Skip to content

Commit 4199be6

Browse files
committed
ScalarLayer supports in-place computation
1 parent 0437545 commit 4199be6

File tree

4 files changed

+169
-11
lines changed

4 files changed

+169
-11
lines changed

include/caffe/layers/scalar_layer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class ScalarLayer: public Layer<Dtype> {
6565

6666
Blob<Dtype> sum_multiplier_;
6767
Blob<Dtype> sum_result_;
68+
Blob<Dtype> temp_;
6869
int axis_;
6970
int outer_dim_, scalar_dim_, inner_dim_;
7071
};

src/caffe/layers/scalar_layer.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,6 @@ void ScalarLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
4444
template <typename Dtype>
4545
void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
4646
const vector<Blob<Dtype>*>& top) {
47-
// TODO: make ScalarLayer usable in-place.
48-
// Currently, in-place computation is broken during Backward with
49-
// propagate_down[0] && propagate_down[1], as bottom[0]'s diff is used for
50-
// temporary storage of an intermediate result, overwriting top[0]'s diff
51-
// if using in-place computation.
52-
CHECK_NE(bottom[0], top[0]) << "ScalarLayer cannot be used in-place";
5347
const ScalarParameter& param = this->layer_param_.scalar_param();
5448
Blob<Dtype>* scalar = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
5549
// Always set axis_ == 0 in special case where scalar is an actual scalar
@@ -71,7 +65,11 @@ void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
7165
outer_dim_ = bottom[0]->count(0, axis_);
7266
scalar_dim_ = scalar->count();
7367
inner_dim_ = bottom[0]->count(axis_ + scalar->num_axes());
74-
top[0]->ReshapeLike(*bottom[0]);
68+
if (bottom[0] == top[0]) { // in-place computation
69+
temp_.ReshapeLike(*bottom[0]);
70+
} else {
71+
top[0]->ReshapeLike(*bottom[0]);
72+
}
7573
sum_result_.Reshape(vector<int>(1, outer_dim_ * scalar_dim_));
7674
const int sum_mult_size = std::max(outer_dim_, inner_dim_);
7775
sum_multiplier_.Reshape(vector<int>(1, sum_mult_size));
@@ -84,6 +82,14 @@ template <typename Dtype>
8482
void ScalarLayer<Dtype>::Forward_cpu(
8583
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
8684
const Dtype* bottom_data = bottom[0]->cpu_data();
85+
if (bottom[0] == top[0]) {
86+
// In-place computation; need to store bottom data before overwriting it.
87+
// Note that this is only necessary for Backward; we could skip this if not
88+
// doing Backward, but Caffe currently provides no way of knowing whether
89+
// we'll need to do Backward at the time of the Forward call.
90+
caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(),
91+
temp_.mutable_cpu_data());
92+
}
8793
const Dtype* scalar_data =
8894
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
8995
Dtype* top_data = top[0]->mutable_cpu_data();
@@ -105,12 +111,16 @@ void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
105111
if ((!scalar_param && propagate_down[1]) ||
106112
(scalar_param && this->param_propagate_down_[0])) {
107113
const Dtype* top_diff = top[0]->cpu_diff();
108-
const Dtype* bottom_data = bottom[0]->cpu_data();
114+
const bool in_place = (bottom[0] == top[0]);
115+
const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
109116
// Hack: store big eltwise product in bottom[0] diff, except in the special
110117
// case where this layer itself does the eltwise product, in which case we
111118
// can store it directly in the scalar diff, and we're done.
119+
// If we're computing in-place (and not doing eltwise computation), this
120+
// hack doesn't work and we store the product in temp_.
112121
const bool is_eltwise = (bottom[0]->count() == scalar->count());
113-
Dtype* product = (is_eltwise ? scalar : bottom[0])->mutable_cpu_diff();
122+
Dtype* product = (is_eltwise ? scalar->mutable_cpu_diff() :
123+
(in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
114124
caffe_mul(top[0]->count(), top_diff, bottom_data, product);
115125
if (!is_eltwise) {
116126
Dtype* sum_result = NULL;

src/caffe/layers/scalar_layer.cu

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ void ScalarLayer<Dtype>::Forward_gpu(
2121
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
2222
const int count = top[0]->count();
2323
const Dtype* bottom_data = bottom[0]->gpu_data();
24+
if (bottom[0] == top[0]) {
25+
// in-place computation; need to store bottom data before overwriting it.
26+
// Note that this is only necessary for Backward; we could skip this if not
27+
// doing Backward, but Caffe currently provides no way of knowing whether
28+
// we'll need to do Backward at the time of the Forward call.
29+
caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(),
30+
temp_.mutable_gpu_data());
31+
}
2432
const Dtype* scalar_data =
2533
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
2634
Dtype* top_data = top[0]->mutable_gpu_data();
@@ -37,12 +45,16 @@ void ScalarLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
3745
if ((!scalar_param && propagate_down[1]) ||
3846
(scalar_param && this->param_propagate_down_[0])) {
3947
const Dtype* top_diff = top[0]->gpu_diff();
40-
const Dtype* bottom_data = bottom[0]->gpu_data();
48+
const bool in_place = (bottom[0] == top[0]);
49+
const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data();
4150
// Hack: store big eltwise product in bottom[0] diff, except in the special
4251
// case where this layer itself does the eltwise product, in which case we
4352
// can store it directly in the scalar diff, and we're done.
53+
// If we're computing in-place (and not doing eltwise computation), this
54+
// hack doesn't work and we store the product in temp_.
4455
const bool is_eltwise = (bottom[0]->count() == scalar->count());
45-
Dtype* product = (is_eltwise ? scalar : bottom[0])->mutable_gpu_diff();
56+
Dtype* product = (is_eltwise ? scalar->mutable_gpu_diff() :
57+
(in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff()));
4658
caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product);
4759
if (!is_eltwise) {
4860
Dtype* sum_result = NULL;

src/caffe/test/test_scalar_layer.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,70 @@ TYPED_TEST(ScalarLayerTest, TestForwardEltwise) {
8686
}
8787
}
8888

89+
TYPED_TEST(ScalarLayerTest, TestForwardEltwiseInPlace) {
90+
typedef typename TypeParam::Dtype Dtype;
91+
this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
92+
Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
93+
orig_bottom.CopyFrom(*this->blob_bottom_);
94+
this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
95+
LayerParameter layer_param;
96+
shared_ptr<ScalarLayer<Dtype> > layer(new ScalarLayer<Dtype>(layer_param));
97+
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
98+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
99+
const Dtype* data = this->blob_bottom_->cpu_data();
100+
const int count = this->blob_bottom_->count();
101+
const Dtype* in_data_a = orig_bottom.cpu_data();
102+
const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data();
103+
for (int i = 0; i < count; ++i) {
104+
EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5);
105+
}
106+
}
107+
108+
TYPED_TEST(ScalarLayerTest, TestBackwardEltwiseInPlace) {
109+
typedef typename TypeParam::Dtype Dtype;
110+
Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
111+
orig_bottom.CopyFrom(*this->blob_bottom_);
112+
this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
113+
LayerParameter layer_param;
114+
shared_ptr<ScalarLayer<Dtype> > layer(new ScalarLayer<Dtype>(layer_param));
115+
Blob<Dtype> top_diff(this->blob_bottom_->shape());
116+
FillerParameter filler_param;
117+
filler_param.set_type("gaussian");
118+
filler_param.set_std(1);
119+
GaussianFiller<Dtype> filler(filler_param);
120+
filler.Fill(&top_diff);
121+
vector<bool> propagate_down(2, true);
122+
// Run forward + backward without in-place computation;
123+
// save resulting bottom diffs.
124+
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
125+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
126+
caffe_copy(top_diff.count(), top_diff.cpu_data(),
127+
this->blob_top_->mutable_cpu_diff());
128+
layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
129+
const bool kReshape = true;
130+
const bool kCopyDiff = true;
131+
Blob<Dtype> orig_bottom_diff;
132+
orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
133+
Blob<Dtype> orig_scalar_diff;
134+
orig_scalar_diff.CopyFrom(*this->blob_bottom_eltwise_,
135+
kCopyDiff, kReshape);
136+
// Rerun forward + backward with in-place computation;
137+
// check that resulting bottom diffs are the same.
138+
this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
139+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
140+
caffe_copy(top_diff.count(), top_diff.cpu_data(),
141+
this->blob_bottom_->mutable_cpu_diff());
142+
layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
143+
for (int i = 0; i < this->blob_bottom_->count(); ++i) {
144+
EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
145+
this->blob_bottom_->cpu_diff()[i], 1e-5);
146+
}
147+
for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) {
148+
EXPECT_NEAR(orig_scalar_diff.cpu_diff()[i],
149+
this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5);
150+
}
151+
}
152+
89153
TYPED_TEST(ScalarLayerTest, TestForwardEltwiseWithParam) {
90154
typedef typename TypeParam::Dtype Dtype;
91155
LayerParameter layer_param;
@@ -151,6 +215,77 @@ TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddle) {
151215
}
152216
}
153217

218+
TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddleInPlace) {
219+
typedef typename TypeParam::Dtype Dtype;
220+
this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
221+
Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
222+
orig_bottom.CopyFrom(*this->blob_bottom_);
223+
this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
224+
LayerParameter layer_param;
225+
layer_param.mutable_scalar_param()->set_axis(1);
226+
shared_ptr<ScalarLayer<Dtype> > layer(new ScalarLayer<Dtype>(layer_param));
227+
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
228+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
229+
for (int n = 0; n < this->blob_bottom_->num(); ++n) {
230+
for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
231+
for (int h = 0; h < this->blob_bottom_->height(); ++h) {
232+
for (int w = 0; w < this->blob_bottom_->width(); ++w) {
233+
EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w),
234+
orig_bottom.data_at(n, c, h, w) *
235+
this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0),
236+
1e-5);
237+
}
238+
}
239+
}
240+
}
241+
}
242+
243+
TYPED_TEST(ScalarLayerTest, TestBackwardBroadcastMiddleInPlace) {
244+
typedef typename TypeParam::Dtype Dtype;
245+
Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
246+
orig_bottom.CopyFrom(*this->blob_bottom_);
247+
this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
248+
LayerParameter layer_param;
249+
layer_param.mutable_scalar_param()->set_axis(1);
250+
shared_ptr<ScalarLayer<Dtype> > layer(new ScalarLayer<Dtype>(layer_param));
251+
Blob<Dtype> top_diff(this->blob_bottom_->shape());
252+
FillerParameter filler_param;
253+
filler_param.set_type("gaussian");
254+
filler_param.set_std(1);
255+
GaussianFiller<Dtype> filler(filler_param);
256+
filler.Fill(&top_diff);
257+
vector<bool> propagate_down(2, true);
258+
// Run forward + backward without in-place computation;
259+
// save resulting bottom diffs.
260+
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
261+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
262+
caffe_copy(top_diff.count(), top_diff.cpu_data(),
263+
this->blob_top_->mutable_cpu_diff());
264+
layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
265+
const bool kReshape = true;
266+
const bool kCopyDiff = true;
267+
Blob<Dtype> orig_bottom_diff;
268+
orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
269+
Blob<Dtype> orig_scalar_diff;
270+
orig_scalar_diff.CopyFrom(*this->blob_bottom_broadcast_1_,
271+
kCopyDiff, kReshape);
272+
// Rerun forward + backward with in-place computation;
273+
// check that resulting bottom diffs are the same.
274+
this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
275+
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
276+
caffe_copy(top_diff.count(), top_diff.cpu_data(),
277+
this->blob_bottom_->mutable_cpu_diff());
278+
layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
279+
for (int i = 0; i < this->blob_bottom_->count(); ++i) {
280+
EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
281+
this->blob_bottom_->cpu_diff()[i], 1e-5);
282+
}
283+
for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) {
284+
EXPECT_NEAR(orig_scalar_diff.cpu_diff()[i],
285+
this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5);
286+
}
287+
}
288+
154289
TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddleWithParam) {
155290
typedef typename TypeParam::Dtype Dtype;
156291
LayerParameter layer_param;

0 commit comments

Comments
 (0)