Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions include/caffe/util/im2col.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,27 @@ namespace caffe {

template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_col);
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);

template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int psize, const int pad,
const int stride, Dtype* data_im);
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);

template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_col);
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);

template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int psize, const int pad,
const int stride, Dtype* data_im);
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);

} // namespace caffe

Expand Down
12 changes: 6 additions & 6 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ class ConvolutionLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

int kernel_size_;
int stride_;
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int num_;
int channels_;
int pad_;
int pad_h_, pad_w_;
int height_;
int width_;
int num_output_;
Expand Down Expand Up @@ -119,12 +119,12 @@ class Im2colLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

int kernel_size_;
int stride_;
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int channels_;
int height_;
int width_;
int pad_;
int pad_h_, pad_w_;
};

/* InnerProductLayer
Expand Down
60 changes: 48 additions & 12 deletions src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,42 @@ template <typename Dtype>
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
kernel_size_ = this->layer_param_.convolution_param().kernel_size();
stride_ = this->layer_param_.convolution_param().stride();
ConvolutionParameter conv_param = this->layer_param_.convolution_param();
CHECK(!conv_param.has_kernel_size() !=
!(conv_param.has_kernel_h() && conv_param.has_kernel_w()))
<< "Filter size is kernel_size OR kernel_h and kernel_w; not both";
CHECK(conv_param.has_kernel_size() ||
(conv_param.has_kernel_h() && conv_param.has_kernel_w()))
<< "For non-square filters both kernel_h and kernel_w are required.";
CHECK((!conv_param.has_pad() && conv_param.has_pad_h()
&& conv_param.has_pad_w())
|| (!conv_param.has_pad_h() && !conv_param.has_pad_w()))
<< "pad is pad OR pad_h and pad_w are required.";
CHECK((!conv_param.has_stride() && conv_param.has_stride_h()
&& conv_param.has_stride_w())
|| (!conv_param.has_stride_h() && !conv_param.has_stride_w()))
<< "Stride is stride OR stride_h and stride_w are required.";
if (conv_param.has_kernel_size()) {
kernel_h_ = kernel_w_ = conv_param.kernel_size();
} else {
kernel_h_ = conv_param.kernel_h();
kernel_w_ = conv_param.kernel_w();
}
CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
if (!conv_param.has_pad_h()) {
pad_h_ = pad_w_ = conv_param.pad();
} else {
pad_h_ = conv_param.pad_h();
pad_w_ = conv_param.pad_w();
}
if (!conv_param.has_stride_h()) {
stride_h_ = stride_w_ = conv_param.stride();
} else {
stride_h_ = conv_param.stride_h();
stride_w_ = conv_param.stride_w();
}
group_ = this->layer_param_.convolution_param().group();
pad_ = this->layer_param_.convolution_param().pad();
num_ = bottom[0]->num();
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
Expand All @@ -37,17 +69,18 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
CHECK_EQ(channels_ % group_, 0);
// The im2col result buffer would only hold one image at a time to avoid
// overly large memory usage.
int height_out = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
int width_out = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
int height_out =
(height_ + 2 * pad_h_ - kernel_h_) / stride_h_ + 1;
int width_out = (width_ + 2 * pad_w_ - kernel_w_) / stride_w_ + 1;
col_buffer_.Reshape(
1, channels_ * kernel_size_ * kernel_size_, height_out, width_out);
1, channels_ * kernel_h_ * kernel_w_, height_out, width_out);
// Set the parameters
CHECK_EQ(num_output_ % group_, 0)
<< "Number of output should be multiples of group.";
bias_term_ = this->layer_param_.convolution_param().bias_term();
// Figure out the dimensions for individual gemms.
M_ = num_output_ / group_;
K_ = channels_ * kernel_size_ * kernel_size_ / group_;
K_ = channels_ * kernel_h_ * kernel_w_ / group_;
N_ = height_out * width_out;
for (int top_id = 0; top_id < top->size(); ++top_id) {
(*top)[top_id]->Reshape(num_, num_output_, height_out, width_out);
Expand All @@ -63,7 +96,7 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
}
// Intialize the weight
this->blobs_[0].reset(new Blob<Dtype>(
num_output_, channels_ / group_, kernel_size_, kernel_size_));
num_output_, channels_ / group_, kernel_h_, kernel_w_));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().weight_filler()));
Expand Down Expand Up @@ -99,7 +132,8 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
Expand Down Expand Up @@ -160,7 +194,8 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
// Since we saved memory in the forward pass by not storing all col
// data, we will need to recompute them.
im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
if (this->param_propagate_down_[0]) {
for (int g = 0; g < group_; ++g) {
Expand All @@ -179,8 +214,9 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
(Dtype)0., col_diff + col_offset * g);
}
// col2im back to the data
col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
stride_, bottom_diff + (*bottom)[i]->offset(n));
col2im_cpu(col_diff, channels_, height_, width_,
kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, bottom_diff + (*bottom)[i]->offset(n));
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/caffe/layers/conv_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
Expand Down Expand Up @@ -85,7 +86,8 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
// Since we saved memory in the forward pass by not storing all col
// data, we will need to recompute them.
im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
if (this->param_propagate_down_[0]) {
for (int g = 0; g < group_; ++g) {
Expand All @@ -104,8 +106,9 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
(Dtype)0., col_diff + col_offset * g);
}
// col2im back to the data
col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
stride_, bottom_diff + (*bottom)[i]->offset(n));
col2im_gpu(col_diff, channels_, height_, width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
bottom_diff + (*bottom)[i]->offset(n));
}
}
}
Expand Down
51 changes: 43 additions & 8 deletions src/caffe/layers/im2col_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,48 @@ template <typename Dtype>
void Im2colLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
kernel_size_ = this->layer_param_.convolution_param().kernel_size();
stride_ = this->layer_param_.convolution_param().stride();
pad_ = this->layer_param_.convolution_param().pad();
ConvolutionParameter conv_param = this->layer_param_.convolution_param();
CHECK(!conv_param.has_kernel_size() !=
!(conv_param.has_kernel_h() && conv_param.has_kernel_w()))
<< "Filter size is kernel_size OR kernel_h and kernel_w; not both";
CHECK(conv_param.has_kernel_size() ||
(conv_param.has_kernel_h() && conv_param.has_kernel_w()))
<< "For non-square filters both kernel_h and kernel_w are required.";
CHECK((!conv_param.has_pad() && conv_param.has_pad_h()
&& conv_param.has_pad_w())
|| (!conv_param.has_pad_h() && !conv_param.has_pad_w()))
<< "pad is pad OR pad_h and pad_w are required.";
CHECK((!conv_param.has_stride() && conv_param.has_stride_h()
&& conv_param.has_stride_w())
|| (!conv_param.has_stride_h() && !conv_param.has_stride_w()))
<< "Stride is stride OR stride_h and stride_w are required.";
if (conv_param.has_kernel_size()) {
kernel_h_ = kernel_w_ = conv_param.kernel_size();
} else {
kernel_h_ = conv_param.kernel_h();
kernel_w_ = conv_param.kernel_w();
}
CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
if (!conv_param.has_pad_h()) {
pad_h_ = pad_w_ = conv_param.pad();
} else {
pad_h_ = conv_param.pad_h();
pad_w_ = conv_param.pad_w();
}
if (!conv_param.has_stride_h()) {
stride_h_ = stride_w_ = conv_param.stride();
} else {
stride_h_ = conv_param.stride_h();
stride_w_ = conv_param.stride_w();
}
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
(*top)[0]->Reshape(bottom[0]->num(), channels_ * kernel_size_ * kernel_size_,
(height_ + 2 * pad_ - kernel_size_) / stride_ + 1,
(width_ + 2 * pad_ - kernel_size_) / stride_ + 1);
(*top)[0]->Reshape(
bottom[0]->num(), channels_ * kernel_h_ * kernel_w_,
(height_ + 2 * pad_h_ - kernel_h_) / stride_h_ + 1,
(width_ + 2 * pad_w_ - kernel_w_) / stride_w_ + 1);
}

template <typename Dtype>
Expand All @@ -31,7 +64,8 @@ Dtype Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
Dtype* top_data = (*top)[0]->mutable_cpu_data();
for (int n = 0; n < bottom[0]->num(); ++n) {
im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, top_data + (*top)[0]->offset(n));
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, top_data + (*top)[0]->offset(n));
}
return Dtype(0.);
}
Expand All @@ -43,7 +77,8 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
for (int n = 0; n < top[0]->num(); ++n) {
col2im_cpu(top_diff + top[0]->offset(n), channels_, height_, width_,
kernel_size_, pad_, stride_, bottom_diff + (*bottom)[0]->offset(n));
kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, bottom_diff + (*bottom)[0]->offset(n));
}
}

Expand Down
6 changes: 4 additions & 2 deletions src/caffe/layers/im2col_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ Dtype Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
Dtype* top_data = (*top)[0]->mutable_gpu_data();
for (int n = 0; n < bottom[0]->num(); ++n) {
im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, top_data + (*top)[0]->offset(n));
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, top_data + (*top)[0]->offset(n));
}
return Dtype(0.);
}
Expand All @@ -28,7 +29,8 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
for (int n = 0; n < top[0]->num(); ++n) {
col2im_gpu(top_diff + top[0]->offset(n), channels_, height_, width_,
kernel_size_, pad_, stride_, bottom_diff + (*bottom)[0]->offset(n));
kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, bottom_diff + (*bottom)[0]->offset(n));
}
}

Expand Down
14 changes: 11 additions & 3 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,18 @@ message ConcatParameter {
message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional uint32 pad = 3 [default = 0]; // The padding size
optional uint32 kernel_size = 4; // The kernel size
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [default = 0]; // The padding height
optional uint32 pad_w = 10 [default = 0]; // The padding width
optional uint32 kernel_size = 4; // The kernel size (square)
optional uint32 kernel_h = 11; // The kernel height
optional uint32 kernel_w = 12; // The kernel width
optional uint32 group = 5 [default = 1]; // The group size for group conv
optional uint32 stride = 6 [default = 1]; // The stride
optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 13; // The stride height
optional uint32 stride_w = 14; // The stride width
optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias
}
Expand Down
Loading