Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "caffe/common.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/math_functions.hpp"

namespace caffe {

Expand Down
1 change: 1 addition & 0 deletions include/caffe/syncedmem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cstdlib>

#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

Expand Down
5 changes: 2 additions & 3 deletions include/caffe/util/math_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,14 @@ void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
template <typename Dtype>
void caffe_copy(const int N, const Dtype *X, Dtype *Y);

void caffe_memcpy(const size_t N, const void *X, void *Y);

template <typename Dtype>
void caffe_set(const int N, const Dtype alpha, Dtype *X);

template <typename Dtype>
void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X);

template <typename Dtype>
void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y);

template <typename Dtype>
void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X);

Expand Down
38 changes: 18 additions & 20 deletions matlab/caffe/matcaffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ static mxArray* do_forward(const mxArray* const bottom) {
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count());
caffe_copy(input_blobs[i]->count(), data_ptr,
input_blobs[i]->mutable_cpu_data());
break;
case Caffe::GPU:
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice);
caffe_copy(input_blobs[i]->count(), data_ptr,
input_blobs[i]->mutable_gpu_data());
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
Expand All @@ -77,12 +77,12 @@ static mxArray* do_forward(const mxArray* const bottom) {
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, output_blobs[i]->cpu_data(),
sizeof(float) * output_blobs[i]->count());
caffe_copy(output_blobs[i]->count(), output_blobs[i]->cpu_data(),
data_ptr);
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(),
sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost);
caffe_copy(output_blobs[i]->count(), output_blobs[i]->gpu_data(),
data_ptr);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
Expand All @@ -104,12 +104,12 @@ static mxArray* do_backward(const mxArray* const top_diff) {
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(output_blobs[i]->mutable_cpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count());
caffe_copy(output_blobs[i]->count(), data_ptr,
output_blobs[i]->mutable_cpu_diff());
break;
case Caffe::GPU:
cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice);
caffe_copy(output_blobs[i]->count(), data_ptr,
output_blobs[i]->mutable_gpu_diff());
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
Expand All @@ -129,12 +129,10 @@ static mxArray* do_backward(const mxArray* const top_diff) {
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, input_blobs[i]->cpu_diff(),
sizeof(float) * input_blobs[i]->count());
caffe_copy(input_blobs[i]->count(), input_blobs[i]->cpu_diff(), data_ptr);
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(),
sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost);
caffe_copy(input_blobs[i]->count(), input_blobs[i]->gpu_diff(), data_ptr);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
Expand Down Expand Up @@ -206,12 +204,12 @@ static mxArray* do_get_weights() {

switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(weights_ptr, layer_blobs[j]->cpu_data(),
sizeof(float) * layer_blobs[j]->count());
caffe_copy(layer_blobs[j]->count(), layer_blobs[j]->cpu_data(),
weights_ptr);
break;
case Caffe::GPU:
CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(),
sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost));
caffe_copy(layer_blobs[j]->count(), layer_blobs[j]->gpu_data(),
weights_ptr);
break;
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
Expand Down
32 changes: 16 additions & 16 deletions src/caffe/blob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,25 @@ const Dtype* Blob<Dtype>::gpu_diff() const {
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_data() {
CHECK(data_);
return reinterpret_cast<Dtype*>(data_->mutable_cpu_data());
return static_cast<Dtype*>(data_->mutable_cpu_data());
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_data() {
CHECK(data_);
return reinterpret_cast<Dtype*>(data_->mutable_gpu_data());
return static_cast<Dtype*>(data_->mutable_gpu_data());
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_diff() {
CHECK(diff_);
return reinterpret_cast<Dtype*>(diff_->mutable_cpu_data());
return static_cast<Dtype*>(diff_->mutable_cpu_data());
}

template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_diff() {
CHECK(diff_);
return reinterpret_cast<Dtype*>(diff_->mutable_gpu_data());
return static_cast<Dtype*>(diff_->mutable_gpu_data());
}

template <typename Dtype>
Expand Down Expand Up @@ -121,15 +121,15 @@ void Blob<Dtype>::Update() {
case SyncedMemory::HEAD_AT_CPU:
// perform computation on CPU
caffe_axpy<Dtype>(count_, Dtype(-1),
reinterpret_cast<const Dtype*>(diff_->cpu_data()),
reinterpret_cast<Dtype*>(data_->mutable_cpu_data()));
static_cast<const Dtype*>(diff_->cpu_data()),
static_cast<Dtype*>(data_->mutable_cpu_data()));
break;
case SyncedMemory::HEAD_AT_GPU:
case SyncedMemory::SYNCED:
// perform computation on GPU
caffe_gpu_axpy<Dtype>(count_, Dtype(-1),
reinterpret_cast<const Dtype*>(diff_->gpu_data()),
reinterpret_cast<Dtype*>(data_->mutable_gpu_data()));
static_cast<const Dtype*>(diff_->gpu_data()),
static_cast<Dtype*>(data_->mutable_gpu_data()));
break;
default:
LOG(FATAL) << "Syncedmem not initialized.";
Expand All @@ -149,20 +149,20 @@ void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
switch (Caffe::mode()) {
case Caffe::GPU:
if (copy_diff) {
CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(),
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
caffe_copy(count_, source.gpu_diff(),
static_cast<Dtype*>(diff_->mutable_gpu_data()));
} else {
CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(),
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
caffe_copy(count_, source.gpu_data(),
static_cast<Dtype*>(data_->mutable_gpu_data()));
}
break;
case Caffe::CPU:
if (copy_diff) {
memcpy(diff_->mutable_cpu_data(), source.cpu_diff(),
sizeof(Dtype) * count_);
caffe_copy(count_, source.cpu_diff(),
static_cast<Dtype*>(diff_->mutable_cpu_data()));
} else {
memcpy(data_->mutable_cpu_data(), source.cpu_data(),
sizeof(Dtype) * count_);
caffe_copy(count_, source.cpu_data(),
static_cast<Dtype*>(data_->mutable_cpu_data()));
}
break;
default:
Expand Down
8 changes: 4 additions & 4 deletions src/caffe/layers/concat_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
int offset_num = 0;
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->gpu_data();
caffe_gpu_copy(bottom[i]->count(), bottom_data,
caffe_copy(bottom[i]->count(), bottom_data,
top_data + (*top)[0]->offset(offset_num));
offset_num += bottom[i]->num();
}
Expand All @@ -27,7 +27,7 @@ Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
int num_elem =
bottom[i]->channels() * bottom[i]->height() * bottom[i]->width();
for (int n = 0; n < num_; ++n) {
caffe_gpu_copy(num_elem, bottom_data+bottom[i]->offset(n),
caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
top_data + (*top)[0]->offset(n, offset_channel));
}
offset_channel += bottom[i]->channels();
Expand All @@ -49,7 +49,7 @@ void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
Blob<Dtype>* blob = (*bottom)[i];
if (propagate_down[i]) {
Dtype* bottom_diff = blob->mutable_gpu_diff();
caffe_gpu_copy(blob->count(), top_diff + top[0]->offset(offset_num),
caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num),
bottom_diff);
}
offset_num += blob->num();
Expand All @@ -62,7 +62,7 @@ void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
Dtype* bottom_diff = blob->mutable_gpu_diff();
int num_elem = blob->channels()*blob->height()*blob->width();
for (int n = 0; n < num_; ++n) {
caffe_gpu_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
bottom_diff + blob->offset(n));
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_cpu_diff();
memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
}
const int weight_offset = M_ * K_;
const int col_offset = K_ * N_;
Expand Down
6 changes: 2 additions & 4 deletions src/caffe/layers/conv_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,13 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* weight = this->blobs_[0]->gpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
CUDA_CHECK(cudaMemset(weight_diff, 0,
sizeof(Dtype) * this->blobs_[0]->count()));
caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
Dtype* col_data = col_buffer_.mutable_gpu_data();
Dtype* col_diff = col_buffer_.mutable_gpu_diff();
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_gpu_diff();
CUDA_CHECK(cudaMemset(bias_diff, 0,
sizeof(Dtype) * this->blobs_[1]->count()));
caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
}
const int weight_offset = M_ * K_;
const int col_offset = K_ * N_;
Expand Down
10 changes: 4 additions & 6 deletions src/caffe/layers/data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@ Dtype DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
cudaMemcpyHostToDevice));
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
(*top)[0]->mutable_gpu_data());
if (output_labels_) {
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
cudaMemcpyHostToDevice));
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
(*top)[1]->mutable_gpu_data());
}
// Start a new prefetch thread
CreatePrefetchThread();
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/dropout_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Dtype DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
count, bottom_data, mask, uint_thres_, scale_, top_data);
CUDA_POST_KERNEL_CHECK;
} else {
caffe_gpu_copy(count, bottom_data, top_data);
caffe_copy(count, bottom_data, top_data);
}
return Dtype(0);
}
Expand Down Expand Up @@ -71,7 +71,7 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
CUDA_POST_KERNEL_CHECK;
} else {
caffe_gpu_copy(top[0]->count(), top_diff, bottom_diff);
caffe_copy(top[0]->count(), top_diff, bottom_diff);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/caffe/layers/eltwise_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
break;
case EltwiseParameter_EltwiseOp_SUM:
if (coeffs_[i] == Dtype(1.)) {
caffe_gpu_copy(count, top_diff, bottom_diff);
caffe_copy(count, top_diff, bottom_diff);
} else {
caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
}
Expand Down
16 changes: 6 additions & 10 deletions src/caffe/layers/hdf5_data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,12 @@ Dtype HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
}
current_row_ = 0;
}
CUDA_CHECK(cudaMemcpy(
&(*top)[0]->mutable_gpu_data()[i * data_count],
&data_blob_.cpu_data()[current_row_ * data_count],
sizeof(Dtype) * data_count,
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(
&(*top)[1]->mutable_gpu_data()[i * label_data_count],
&label_blob_.cpu_data()[current_row_ * label_data_count],
sizeof(Dtype) * label_data_count,
cudaMemcpyHostToDevice));
caffe_copy(data_count,
&data_blob_.cpu_data()[current_row_ * data_count],
&(*top)[0]->mutable_gpu_data()[i * data_count]);
caffe_copy(label_data_count,
&label_blob_.cpu_data()[current_row_ * label_data_count],
&(*top)[1]->mutable_gpu_data()[i * label_data_count]);
}
return Dtype(0.);
}
Expand Down
10 changes: 4 additions & 6 deletions src/caffe/layers/hdf5_output_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,10 @@ Dtype HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const int label_datum_dim = bottom[1]->count() / bottom[1]->num();

for (int i = 0; i < bottom[0]->num(); ++i) {
memcpy(&data_blob_.mutable_cpu_data()[i * data_datum_dim],
&bottom[0]->cpu_data()[i * data_datum_dim],
sizeof(Dtype) * data_datum_dim);
memcpy(&label_blob_.mutable_cpu_data()[i * label_datum_dim],
&bottom[1]->cpu_data()[i * label_datum_dim],
sizeof(Dtype) * label_datum_dim);
caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim],
&data_blob_.mutable_cpu_data()[i * data_datum_dim]);
caffe_copy(label_datum_dim, &bottom[0]->cpu_data()[i * label_datum_dim],
&label_blob_.mutable_cpu_data()[i * label_datum_dim]);
}
SaveBlobs();
return Dtype(0.);
Expand Down
10 changes: 4 additions & 6 deletions src/caffe/layers/hdf5_output_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@ Dtype HDF5OutputLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const int label_datum_dim = bottom[1]->count() / bottom[1]->num();

for (int i = 0; i < bottom[0]->num(); ++i) {
CUDA_CHECK(cudaMemcpy(&data_blob_.mutable_cpu_data()[i * data_datum_dim],
&bottom[0]->gpu_data()[i * data_datum_dim],
sizeof(Dtype) * data_datum_dim, cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaMemcpy(&label_blob_.mutable_cpu_data()[i * label_datum_dim],
&bottom[1]->gpu_data()[i * label_datum_dim],
sizeof(Dtype) * label_datum_dim, cudaMemcpyDeviceToHost));
caffe_copy(data_datum_dim, &bottom[0]->gpu_data()[i * data_datum_dim],
&data_blob_.mutable_cpu_data()[i * data_datum_dim]);
caffe_copy(label_datum_dim, &bottom[0]->gpu_data()[i * label_datum_dim],
&label_blob_.mutable_cpu_data()[i * label_datum_dim]);
}
SaveBlobs();
return Dtype(0.);
Expand Down
10 changes: 4 additions & 6 deletions src/caffe/layers/image_data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@ Dtype ImageDataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
cudaMemcpyHostToDevice));
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
(*top)[0]->mutable_gpu_data());
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
(*top)[1]->mutable_gpu_data());
// Start a new prefetch thread
CreatePrefetchThread();
return Dtype(0.);
Expand Down
Loading