Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aten/src/ATen/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class TORCH_API Context {
AT_ERROR(DeviceTypeName(device_type), " device type not enabled.");
}
}
static bool isPinnedPtr(void* data) {
static bool isPinnedPtr(const void* data) {
return detail::getCUDAHooks().isPinnedPtr(data);
}
static bool hasOpenMP();
Expand Down
6 changes: 4 additions & 2 deletions aten/src/ATen/cuda/detail/CUDAHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ Device CUDAHooks::getDeviceFromPtr(void* data) const {
return at::cuda::getDeviceFromPtr(data);
}

bool CUDAHooks::isPinnedPtr(void* data) const {
bool CUDAHooks::isPinnedPtr(const void* data) const {
// First check if driver is broken/missing, in which case PyTorch CPU
// functionalities should still work, we should report `false` here.
if (!at::cuda::is_available()) {
Expand All @@ -134,7 +134,9 @@ bool CUDAHooks::isPinnedPtr(void* data) const {
device_guard.reset_device(at::Device(at::DeviceType::CUDA, *primary_ctx_device_index));
}
cudaPointerAttributes attr;
cudaError_t err = cudaPointerGetAttributes(&attr, data);
// We do not believe that CUDA needs mutable access to the data
// here.
cudaError_t err = cudaPointerGetAttributes(&attr, const_cast<void*>(data));
#if !defined(USE_ROCM)
if (err == cudaErrorInvalidValue) {
cudaGetLastError();
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/cuda/detail/CUDAHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
CUDAHooks(at::CUDAHooksArgs) {}
void initCUDA() const override;
Device getDeviceFromPtr(void* data) const override;
bool isPinnedPtr(void* data) const override;
bool isPinnedPtr(const void* data) const override;
const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
bool hasCUDA() const override;
bool hasMAGMA() const override;
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/detail/CUDAHooksInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ struct TORCH_API CUDAHooksInterface {
TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP);
}

virtual bool isPinnedPtr(void* /*data*/) const {
virtual bool isPinnedPtr(const void* /*data*/) const {
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ magma_trans_t to_magma(TransposeType trans) {

#define ALLOCATE_ARRAY(name, type, size) \
auto storage_##name = pin_memory<type>(size); \
name = static_cast<type*>(storage_##name.data());
name = static_cast<type*>(storage_##name.mutable_data());

namespace {

Expand Down Expand Up @@ -1927,7 +1927,7 @@ static void apply_magma_eigh(const Tensor& values, const Tensor& vectors, const
if (vectors.is_complex()) {
lrwork = magma_int_cast(std::max<int64_t>(1, rwkopt), "rwork_size");
storage_rwork = pin_memory<value_t>(lrwork);
rwork = static_cast<value_t*>(storage_rwork.data());
rwork = static_cast<value_t*>(storage_rwork.mutable_data());
}

for (decltype(batch_size) i = 0; i < batch_size; i++) {
Expand Down Expand Up @@ -2125,7 +2125,7 @@ AT_ERROR("linalg.svd: MAGMA library not found in "
if (A.is_complex()) {
auto lrwork = computeLRWorkDim(compute_uv ? (full_matrices ? 'A' : 'S') : 'N', m, n);
storage_rwork = pin_memory<value_t>(lrwork);
rwork = static_cast<value_t*>(storage_rwork.data());
rwork = static_cast<value_t*>(storage_rwork.mutable_data());
}

magma_int_t* iwork;
Expand Down
6 changes: 5 additions & 1 deletion c10/core/Storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ struct C10_API Storage {
}
// get() use here is to get const-correctness

void* data() const {
const void* data() const {
return storage_impl_->data();
}

void* mutable_data() const {
return storage_impl_->mutable_data();
}

Expand Down
16 changes: 8 additions & 8 deletions c10/core/TensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1522,7 +1522,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
"Caffe2 uses a lazy allocation, so you will need to call "
"mutable_data() or raw_mutable_data() to actually allocate memory.");
// Caller does the type check.
return static_cast<T*>(storage_.data()) + storage_offset_;
return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
}

/**
Expand All @@ -1546,7 +1546,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// Computing an offset into an empty tensor would be UB, since an empty
// tensor's storage will be nullptr, and adding a nonzero offset to nullptr
// is UB. So we skip the offset computation in this case.
char* const data = static_cast<char*>(storage_.data());
char* const data = static_cast<char*>(storage_.mutable_data());
if (data == nullptr) {
return nullptr;
}
Expand All @@ -1559,7 +1559,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
*/
template <typename T>
inline T* unsafe_data() const {
return static_cast<T*>(storage_.data()) + storage_offset_;
return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
}

/**
Expand Down Expand Up @@ -2145,7 +2145,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// For 0-size tensors it's fine to return any pointer (including nullptr)
if (data_type_ == meta && storage_initialized()) {
return static_cast<void*>(
static_cast<char*>(storage_.data()) +
static_cast<char*>(storage_.mutable_data()) +
storage_offset_ * meta.itemsize());
} else {
bool had_special_dtor = data_type_.placementDelete() != nullptr;
Expand All @@ -2161,7 +2161,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
(storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
TORCH_INTERNAL_ASSERT(
storage_offset_ == 0); // because we just reallocated
return storage_.data();
return storage_.mutable_data();
}
const Allocator* allocator = storage_.allocator();
// Storage might have nullptr allocator in rare cases, for example, if
Expand All @@ -2180,7 +2180,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
std::move(data_ptr), dtor, size, storage_.device()));
data_type_.placementNew()(storage_.data(), numel_);
data_type_.placementNew()(storage_.mutable_data(), numel_);
} else {
// For fundamental type, new and delete is easier.
storage_.set_data_ptr_noswap(
Expand All @@ -2190,7 +2190,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
TORCH_INTERNAL_ASSERT(
storage_offset_ == 0); // because we just reallocated
device_opt_ = storage_.device();
return storage_.data();
return storage_.mutable_data();
}
}

Expand All @@ -2203,7 +2203,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
template <typename T>
inline T* mutable_data() {
if (storage_initialized() && data_type_.Match<T>()) {
return static_cast<T*>(storage_.data()) + storage_offset_;
return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
}
// Check it here statically - otherwise TypeMeta would throw the runtime
// error in attempt to invoke TypeMeta::ctor()
Expand Down
4 changes: 2 additions & 2 deletions test/cpp/api/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) {
LSTM model(2, 2);
for (auto& v : model->parameters()) {
float size = v.numel();
auto p = static_cast<float*>(v.storage().data());
auto p = static_cast<float*>(v.storage().mutable_data());
for (size_t i = 0; i < size; i++) {
p[i] = i / size;
}
}

auto x = torch::empty({3, 4, 2}, torch::requires_grad());
float size = x.numel();
auto p = static_cast<float*>(x.storage().data());
auto p = static_cast<float*>(x.storage().mutable_data());
for (size_t i = 0; i < size; i++) {
p[i] = (size - i) / size;
}
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/Storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ static PyObject* THPStorage_pynew(
uint8_t value = THPByteUtils_unpackReal(item.get());
const auto& storage = THPStorage_Unpack(self);
if (allocator == c10::GetDefaultCPUAllocator()) {
static_cast<uint8_t*>(storage.data())[i] = value;
static_cast<uint8_t*>(storage.mutable_data())[i] = value;
} else {
// TODO: this might be slow - consider batched updates?
storage_set(storage, i, value);
Expand Down Expand Up @@ -236,7 +236,7 @@ static PyObject* THPStorage_get(THPStorage* self, PyObject* index) {
}

const auto& storage = THPStorage_Unpack(self);
auto data = static_cast<uint8_t*>(storage.data());
auto data = static_cast<uint8_t*>(storage.mutable_data());

at::StorageImpl* old_storage_impl = storage.unsafeGetStorageImpl();
c10::raw::intrusive_ptr::incref(old_storage_impl);
Expand Down
4 changes: 3 additions & 1 deletion torch/csrc/StorageMethods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ static PyObject* THPStorage_nbytes(PyObject* self, PyObject* noargs) {

static PyObject* THPStorage_dataPtr(PyObject* self, PyObject* noargs) {
HANDLE_TH_ERRORS
return PyLong_FromVoidPtr(THPStorage_Unpack(self).data());
// PyLong_FromVoidPtr should not need to mutate the pointer in order
// to extract a new long object from it.
return PyLong_FromVoidPtr(const_cast<void*>(THPStorage_Unpack(self).data()));
END_HANDLE_TH_ERRORS
}

Expand Down
6 changes: 3 additions & 3 deletions torch/csrc/StorageSharing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ static PyObject* THPStorage_shareCuda(PyObject* self, PyObject* noargs) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
size_t base_size;
void* base_ptr = c10::cuda::CUDACachingAllocator::getBaseAllocation(
storage.data(), &base_size);
storage.mutable_data(), &base_size);
ptrdiff_t offset_bytes = (char*)storage.data() - (char*)base_ptr;

// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Expand All @@ -307,8 +307,8 @@ static PyObject* THPStorage_shareCuda(PyObject* self, PyObject* noargs) {

// Put Storage Data behind new ref counting context
// See Note [CUDA IPC Refcounting implementation explained]
at::DataPtr sent_data_ptr =
torch::GetNewRefCountedSentData(storage.data(), storage.device());
at::DataPtr sent_data_ptr = torch::GetNewRefCountedSentData(
storage.mutable_data(), storage.device());
auto old_data_ptr = storage.set_data_ptr(std::move(sent_data_ptr));
auto sent_data =
static_cast<torch::CudaIPCSentData*>(storage.data_ptr().get_context());
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TensorpipeCudaConverter : public TensorpipeDeviceTypeConverter {
c10::cuda::CUDACachingAllocator::recordStream(storage.data_ptr(), stream);

tensorpipe::CudaBuffer buffer;
buffer.ptr = static_cast<char*>(storage.data());
buffer.ptr = static_cast<char*>(storage.mutable_data());
buffer.stream = stream.stream();

tensorpipe::Message::Tensor tensor;
Expand Down
6 changes: 3 additions & 3 deletions torch/csrc/distributed/rpc/tensorpipe_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class TensorpipeCpuConverter : public TensorpipeDeviceTypeConverter {
bool storageHasDeleter = storage.data_ptr().get_context() != nullptr;
if (!storageHasDeleter) {
std::vector<char> storageData(
static_cast<char*>(storage.data()),
static_cast<char*>(storage.data()) + storage.nbytes());
static_cast<const char*>(storage.data()),
static_cast<const char*>(storage.data()) + storage.nbytes());

tensorpipe::CpuBuffer buffer;
buffer.ptr = storageData.data();
Expand All @@ -59,7 +59,7 @@ class TensorpipeCpuConverter : public TensorpipeDeviceTypeConverter {
return c10::make_optional(std::move(storageData));
} else {
tensorpipe::CpuBuffer buffer;
buffer.ptr = static_cast<char*>(storage.data());
buffer.ptr = static_cast<char*>(storage.mutable_data());

tensorpipe::Message::Tensor tensor;
tensor.buffer = buffer;
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/profiler/data_flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ using TensorImplAddress = strong::type<
strong::boolean>;

using StorageImplData = strong::type<
void*,
const void*,
struct StorageImplData_,
strong::regular,
strong::hashable,
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/profiler/standalone/execution_graph_observer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ struct TORCH_API ExecutionGraphObserver {
std::map<size_t, std::stack<ID>> op_stack{};
// Uses the underlying TensorImpl object pointer as the key and map to its
// unique id.
std::map<void*, ID> object_id{};
std::map<const void*, ID> object_id{};
// Observer run state.
enum class RunState { uninitialized, disabled, enabled };

Expand Down Expand Up @@ -362,7 +362,7 @@ void finalizeExecutionGraphOutput(ExecutionGraphObserver& ob) {

inline ExecutionGraphObserver::ID getObjectID(
ExecutionGraphObserver& ob,
void* t) {
const void* t) {
auto iter = ob.object_id.find(t);
if (iter == ob.object_id.end()) {
ExecutionGraphObserver::ID object_id = ob.getNewID();
Expand Down