Skip to content

Commit c647d9c

Browse files
committed
add message ParamSpec to replace param name, blobs_lr, weight_decay, ...
1 parent 18f1926 commit c647d9c

File tree

8 files changed

+526
-438
lines changed

8 files changed

+526
-438
lines changed

src/caffe/net.cpp

Lines changed: 23 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -112,36 +112,19 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
112112
memory_used_ += top_vecs_[layer_id][top_id]->count();
113113
}
114114
DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
115-
const int blobs_lr_size = layer_param.blobs_lr_size();
115+
const int param_size = layer_param.param_size();
116116
const int num_param_blobs = layers_[layer_id]->blobs().size();
117-
CHECK(blobs_lr_size == num_param_blobs || blobs_lr_size == 0)
118-
<< "Incorrect blobs lr size: should be either 0 "
119-
<< "or the same as the number of the layer's parameter blobs.";
120-
if (blobs_lr_size) {
121-
// Check if this layer needs backward operation itself
122-
for (int param_id = 0; param_id < blobs_lr_size; ++param_id) {
123-
const bool param_need_backward = layer_param.blobs_lr(param_id) > 0;
124-
need_backward |= param_need_backward;
125-
layers_[layer_id]->set_param_propagate_down(param_id,
126-
param_need_backward);
127-
}
128-
} else if (layers_[layer_id]->blobs().size()) {
129-
// catch: if a layer param does not specify blobs_lr, we should assume the
130-
// learning rate to be 1. Thus we will need to perform backward.
131-
need_backward = true;
132-
for (int param_id = 0; param_id < blobs_lr_size; ++param_id) {
133-
layers_[layer_id]->set_param_propagate_down(param_id, true);
134-
}
117+
CHECK_LE(param_size, num_param_blobs)
118+
<< "Too many params specified for layer " << layer_param.name();
119+
ParamSpec default_param_spec;
120+
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
121+
const ParamSpec* param_spec = (param_id < param_size) ?
122+
&layer_param.param(param_id) : &default_param_spec;
123+
const bool param_need_backward = param_spec->lr_mult() > 0;
124+
need_backward |= param_need_backward;
125+
layers_[layer_id]->set_param_propagate_down(param_id,
126+
param_need_backward);
135127
}
136-
const int param_size = layer_param.param_size();
137-
CHECK(param_size == num_param_blobs || param_size == 0)
138-
<< "Incorrect param size: should be either 0 or the same as "
139-
"the number of the layer's parameter blobs: " << num_param_blobs;
140-
const int param_share_mode_size = layer_param.param_share_mode_size();
141-
CHECK(param_share_mode_size == num_param_blobs ||
142-
param_share_mode_size == 0)
143-
<< "Incorrect param_share_mode size: should be either 0 or the same as "
144-
"the number of the layer's parameter blobs: " << num_param_blobs;
145128
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
146129
AppendParam(param, layer_id, param_id);
147130
}
@@ -407,7 +390,8 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
407390
const int param_id) {
408391
const LayerParameter& layer_param = layers_[layer_id]->layer_param();
409392
const int param_size = layer_param.param_size();
410-
string param_name = param_size ? layer_param.param(param_id) : "";
393+
string param_name =
394+
(param_size > param_id) ? layer_param.param(param_id).name() : "";
411395
if (param_name.size()) {
412396
param_display_names_.push_back(param_name);
413397
} else {
@@ -441,10 +425,9 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
441425
Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get();
442426
Blob<Dtype>* owner_blob =
443427
layers_[owner_layer_id]->blobs()[owner_param_id].get();
444-
const int param_share_mode_size = layer_param.param_share_mode_size();
445-
if (param_share_mode_size > param_id &&
446-
(layer_param.param_share_mode(param_id) ==
447-
LayerParameter_DimCheckMode_PERMISSIVE)) {
428+
const int param_size = layer_param.param_size();
429+
if (param_size > param_id && (layer_param.param(param_id).share_mode() ==
430+
ParamSpec_DimCheckMode_PERMISSIVE)) {
448431
// Permissive dimension checking -- only check counts are the same.
449432
CHECK_EQ(this_blob->count(), owner_blob->count())
450433
<< "Shared parameter blobs must have the same count.";
@@ -467,34 +450,15 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
467450
template <typename Dtype>
468451
void Net<Dtype>::GetLearningRateAndWeightDecay() {
469452
LOG(INFO) << "Collecting Learning Rate and Weight Decay.";
453+
ParamSpec default_param_spec;
470454
for (int i = 0; i < layers_.size(); ++i) {
471455
vector<shared_ptr<Blob<Dtype> > >& layer_blobs = layers_[i]->blobs();
472-
// push the learning rate mutlipliers
473-
if (layers_[i]->layer_param().blobs_lr_size()) {
474-
CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
475-
for (int j = 0; j < layer_blobs.size(); ++j) {
476-
float local_lr = layers_[i]->layer_param().blobs_lr(j);
477-
CHECK_GE(local_lr, 0.);
478-
params_lr_.push_back(local_lr);
479-
}
480-
} else {
481-
for (int j = 0; j < layer_blobs.size(); ++j) {
482-
params_lr_.push_back(1.);
483-
}
484-
}
485-
// push the weight decay multipliers
486-
if (layers_[i]->layer_param().weight_decay_size()) {
487-
CHECK_EQ(layers_[i]->layer_param().weight_decay_size(),
488-
layer_blobs.size());
489-
for (int j = 0; j < layer_blobs.size(); ++j) {
490-
float local_decay = layers_[i]->layer_param().weight_decay(j);
491-
CHECK_GE(local_decay, 0.);
492-
params_weight_decay_.push_back(local_decay);
493-
}
494-
} else {
495-
for (int j = 0; j < layer_blobs.size(); ++j) {
496-
params_weight_decay_.push_back(1.);
497-
}
456+
for (int j = 0; j < layer_blobs.size(); ++j) {
457+
const ParamSpec* param_spec =
458+
(layers_[i]->layer_param().param_size() > j) ?
459+
&layers_[i]->layer_param().param(j) : &default_param_spec;
460+
params_lr_.push_back(param_spec->lr_mult());
461+
params_weight_decay_.push_back(param_spec->decay_mult());
498462
}
499463
}
500464
}

src/caffe/proto/caffe.proto

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,31 @@ message NetStateRule {
209209
repeated string not_stage = 5;
210210
}
211211

212+
// Specifies training parameters (multipliers on global learning constants,
213+
// and the name and other settings used for weight sharing).
214+
message ParamSpec {
215+
// The names of the parameter blobs -- useful for sharing parameters among
216+
// layers, but never required otherwise. To share a parameter between two
217+
// layers, give it a (non-empty) name.
218+
optional string name = 1;
219+
220+
// Whether to require shared weights to have the same shape, or just the same
221+
// count -- defaults to STRICT if unspecified.
222+
optional DimCheckMode share_mode = 2;
223+
enum DimCheckMode {
224+
// STRICT (default) requires that num, channels, height, width each match.
225+
STRICT = 0;
226+
// PERMISSIVE requires only the count (num*channels*height*width) to match.
227+
PERMISSIVE = 1;
228+
}
229+
230+
// The multiplier on the global learning rate for this parameter.
231+
optional float lr_mult = 3 [default = 1.0];
232+
233+
// The multiplier on the global weight decay for this parameter.
234+
optional float decay_mult = 4 [default = 1.0];
235+
}
236+
212237
// NOTE
213238
// Update the next available ID when you add a new LayerParameter field.
214239
//
@@ -224,29 +249,12 @@ message LayerParameter {
224249
// to each top blob.
225250
repeated float loss_weight = 5;
226251

227-
// The blobs containing the numeric parameters of the layer
228-
repeated BlobProto blobs = 6;
252+
// Specifies training parameters (multipliers on global learning constants,
253+
// and the name and other settings used for weight sharing).
254+
repeated ParamSpec param = 6;
229255

230-
// The names of the parameter blobs -- useful for sharing parameters among
231-
// layers (but never required).
232-
repeated string param = 7;
233-
234-
// Whether to require shared weights to have the same shape, or just the same
235-
// count -- defaults to STRICT if unspecified.
236-
repeated DimCheckMode param_share_mode = 8;
237-
enum DimCheckMode {
238-
// STRICT (default) requires that num, channels, height, width each match.
239-
STRICT = 0;
240-
// PERMISSIVE requires only the count (num*channels*height*width) to match.
241-
PERMISSIVE = 1;
242-
}
243-
244-
// The ratio that is multiplied on the global learning rate. If you want to
245-
// set the learning ratio for one blob, you need to set it for all blobs.
246-
repeated float blobs_lr = 9;
247-
248-
// The weight decay that is multiplied on the global weight decay.
249-
repeated float weight_decay = 10;
256+
// The blobs containing the numeric parameters of the layer.
257+
repeated BlobProto blobs = 7;
250258

251259
// Rules controlling whether and when a layer is included in the network,
252260
// based on the current NetState. You may specify a non-zero number of rules

src/caffe/test/test_gradient_based_solver.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
6464
"lr_policy: 'fixed' "
6565
"net_param { "
6666
" name: 'TestNetwork' "
67-
" layers: { "
67+
" layer { "
6868
" name: 'data' "
6969
" type: 'DummyData' "
7070
" dummy_data_param { "
@@ -83,7 +83,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
8383
" top: 'data' "
8484
" top: 'targets' "
8585
" } "
86-
" layers: { "
86+
" layer { "
8787
" name: 'innerprod' "
8888
" type: 'InnerProduct' "
8989
" inner_product_param { "
@@ -100,7 +100,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
100100
" bottom: 'data' "
101101
" top: 'innerprod' "
102102
" } "
103-
" layers: { "
103+
" layer { "
104104
" name: 'loss' "
105105
" type: 'EuclideanLoss' "
106106
" bottom: 'innerprod' "

0 commit comments

Comments
 (0)