Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions include/caffe/loss_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,14 @@ template <typename Dtype> class SoftmaxLayer;
template <typename Dtype>
class SoftmaxWithLossLayer : public LossLayer<Dtype> {
public:
/**
* @param param provides LossParameter loss_param, with options:
* - ignore_label (optional)
* Specify a label value that should be ignored when computing the loss.
* - normalize (optional, default true)
* If true, the loss is normalized by the number of (nonignored) labels
* present; otherwise the loss is simply summed over spatial locations.
*/
explicit SoftmaxWithLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param),
softmax_layer_(new SoftmaxLayer<Dtype>(param)) {}
Expand Down Expand Up @@ -758,6 +766,13 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
vector<Blob<Dtype>*> softmax_bottom_vec_;
/// top vector holder used in call to the underlying SoftmaxLayer::Forward
vector<Blob<Dtype>*> softmax_top_vec_;
/// Whether to ignore instances with a certain label.
bool has_ignore_label_;
/// The label indicating that an instance should be ignored.
int ignore_label_;
/// Whether to normalize the loss by the total number of values present
/// (otherwise just by the batch size).
bool normalize_;
};

} // namespace caffe
Expand Down
44 changes: 35 additions & 9 deletions src/caffe/layers/softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
softmax_top_vec_.clear();
softmax_top_vec_.push_back(&prob_);
softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);

has_ignore_label_ =
this->layer_param_.loss_param().has_ignore_label();
if (has_ignore_label_) {
ignore_label_ = this->layer_param_.loss_param().ignore_label();
}
normalize_ = this->layer_param_.loss_param().normalize();
}

template <typename Dtype>
Expand All @@ -40,27 +47,34 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
int num = prob_.num();
int dim = prob_.count() / num;
int spatial_dim = prob_.height() * prob_.width();
int count = 0;
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < spatial_dim; j++) {
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
if (has_ignore_label_ && label_value == ignore_label_) {
continue;
}
DCHECK_GE(label_value, 0);
DCHECK_GT(dim, label_value * spatial_dim);
loss -= log(std::max(prob_data[i * dim +
label_value * spatial_dim + j],
DCHECK_LT(label_value, prob_.channels());
loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j],
Dtype(FLT_MIN)));
++count;
}
}
top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
if (normalize_) {
top[0]->mutable_cpu_data()[0] = loss / count;
} else {
top[0]->mutable_cpu_data()[0] = loss / num;
}
if (top.size() == 2) {
top[1]->ShareData(prob_);
}
}

template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type_name()
<< " Layer cannot backpropagate to label inputs.";
Expand All @@ -73,15 +87,27 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
int num = prob_.num();
int dim = prob_.count() / num;
int spatial_dim = prob_.height() * prob_.width();
int count = 0;
for (int i = 0; i < num; ++i) {
for (int j = 0; j < spatial_dim; ++j) {
bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j])
* spatial_dim + j] -= 1;
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
if (has_ignore_label_ && label_value == ignore_label_) {
for (int c = 0; c < bottom[0]->channels(); ++c) {
bottom_diff[i * dim + c * spatial_dim + j] = 0;
}
} else {
bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
++count;
}
}
}
// Scale gradient
const Dtype loss_weight = top[0]->cpu_diff()[0];
caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
if (normalize_) {
caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
} else {
caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
}
}
}

Expand Down
14 changes: 13 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ message NetStateRule {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available ID: 42 (last added: exp_param)
// LayerParameter next available ID: 43 (last added: loss_param)
message LayerParameter {
repeated string bottom = 2; // the name of the bottom blobs
repeated string top = 3; // the name of the top blobs
Expand Down Expand Up @@ -331,6 +331,9 @@ message LayerParameter {
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 36;

// Parameters shared by loss layers.
optional LossParameter loss_param = 42;

// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
Expand Down Expand Up @@ -361,6 +364,15 @@ message TransformationParameter {
repeated float mean_value = 5;
}

// Message that stores parameters shared by loss layers
message LossParameter {
// If specified, ignore instances with the given label.
optional int32 ignore_label = 1;
// If true, normalize each batch across all instances (including spatial
// dimesions, but not ignored instances); else, divide by batch size only.
optional bool normalize = 2 [default = true];
}

// Message that stores parameters used by AccuracyLayer
message AccuracyParameter {
// When computing accuracy, count as correct by comparing the true label to
Expand Down
48 changes: 47 additions & 1 deletion src/caffe/test/test_softmax_with_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cstring>
#include <vector>

#include "boost/scoped_ptr.hpp"
#include "gtest/gtest.h"

#include "caffe/blob.hpp"
Expand All @@ -13,6 +14,8 @@
#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"

using boost::scoped_ptr;

namespace caffe {

template <typename TypeParam>
Expand Down Expand Up @@ -50,7 +53,6 @@ class SoftmaxWithLossLayerTest : public MultiDeviceTest<TypeParam> {

TYPED_TEST_CASE(SoftmaxWithLossLayerTest, TestDtypesAndDevices);


TYPED_TEST(SoftmaxWithLossLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
Expand All @@ -61,4 +63,48 @@ TYPED_TEST(SoftmaxWithLossLayerTest, TestGradient) {
this->blob_top_vec_, 0);
}

TYPED_TEST(SoftmaxWithLossLayerTest, TestForwardIgnoreLabel) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
layer_param.mutable_loss_param()->set_normalize(false);
// First, compute the loss with all labels
scoped_ptr<SoftmaxWithLossLayer<Dtype> > layer(
new SoftmaxWithLossLayer<Dtype>(layer_param));
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
Dtype full_loss = this->blob_top_loss_->cpu_data()[0];
// Now, accumulate the loss, ignoring each label in {0, ..., 4} in turn.
Dtype accum_loss = 0;
for (int label = 0; label < 5; ++label) {
layer_param.mutable_loss_param()->set_ignore_label(label);
layer.reset(new SoftmaxWithLossLayer<Dtype>(layer_param));
layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
accum_loss += this->blob_top_loss_->cpu_data()[0];
}
// Check that each label was included all but once.
EXPECT_NEAR(4 * full_loss, accum_loss, 1e-4);
}

TYPED_TEST(SoftmaxWithLossLayerTest, TestGradientIgnoreLabel) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
// labels are in {0, ..., 4}, so we'll ignore about a fifth of them
layer_param.mutable_loss_param()->set_ignore_label(0);
SoftmaxWithLossLayer<Dtype> layer(layer_param);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 0);
}

TYPED_TEST(SoftmaxWithLossLayerTest, TestGradientUnnormalized) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
layer_param.mutable_loss_param()->set_normalize(false);
SoftmaxWithLossLayer<Dtype> layer(layer_param);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 0);
}

} // namespace caffe