pytorch · dagitses · Mar 27, 2023 · Mar 27, 2023 · Mar 28, 2023 · Mar 28, 2023
diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -57,7 +57,7 @@ class TORCH_API Context {
       AT_ERROR(DeviceTypeName(device_type), " device type not enabled.");
     }
   }
-  static bool isPinnedPtr(void* data) {
+  static bool isPinnedPtr(const void* data) {
     return detail::getCUDAHooks().isPinnedPtr(data);
   }
   static bool hasOpenMP();

diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@@ -120,7 +120,7 @@ Device CUDAHooks::getDeviceFromPtr(void* data) const {
   return at::cuda::getDeviceFromPtr(data);
 }
 
-bool CUDAHooks::isPinnedPtr(void* data) const {
+bool CUDAHooks::isPinnedPtr(const void* data) const {
   // First check if driver is broken/missing, in which case PyTorch CPU
   // functionalities should still work, we should report `false` here.
   if (!at::cuda::is_available()) {
@@ -134,7 +134,9 @@ bool CUDAHooks::isPinnedPtr(void* data) const {
     device_guard.reset_device(at::Device(at::DeviceType::CUDA, *primary_ctx_device_index));
   }
   cudaPointerAttributes attr;
-  cudaError_t err = cudaPointerGetAttributes(&attr, data);
+  // We do not believe that CUDA needs mutable access to the data
+  // here.
+  cudaError_t err = cudaPointerGetAttributes(&attr, const_cast<void*>(data));
 #if !defined(USE_ROCM)
   if (err == cudaErrorInvalidValue) {
     cudaGetLastError();

diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.h b/aten/src/ATen/cuda/detail/CUDAHooks.h
@@ -21,7 +21,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
   CUDAHooks(at::CUDAHooksArgs) {}
   void initCUDA() const override;
   Device getDeviceFromPtr(void* data) const override;
-  bool isPinnedPtr(void* data) const override;
+  bool isPinnedPtr(const void* data) const override;
   const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
   bool hasCUDA() const override;
   bool hasMAGMA() const override;

diff --git a/aten/src/ATen/detail/CUDAHooksInterface.h b/aten/src/ATen/detail/CUDAHooksInterface.h
@@ -82,7 +82,7 @@ struct TORCH_API CUDAHooksInterface {
     TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP);
   }
 
-  virtual bool isPinnedPtr(void* /*data*/) const {
+  virtual bool isPinnedPtr(const void* /*data*/) const {
     return false;
   }
 

diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
@@ -1030,7 +1030,7 @@ magma_trans_t to_magma(TransposeType trans) {
 
 #define ALLOCATE_ARRAY(name, type, size) \
   auto storage_##name = pin_memory<type>(size); \
-  name = static_cast<type*>(storage_##name.data());
+  name = static_cast<type*>(storage_##name.mutable_data());
 
 namespace {
 
@@ -1927,7 +1927,7 @@ static void apply_magma_eigh(const Tensor& values, const Tensor& vectors, const
   if (vectors.is_complex()) {
     lrwork = magma_int_cast(std::max<int64_t>(1, rwkopt), "rwork_size");
     storage_rwork = pin_memory<value_t>(lrwork);
-    rwork = static_cast<value_t*>(storage_rwork.data());
+    rwork = static_cast<value_t*>(storage_rwork.mutable_data());
   }
 
   for (decltype(batch_size) i = 0; i < batch_size; i++) {
@@ -2125,7 +2125,7 @@ AT_ERROR("linalg.svd: MAGMA library not found in "
   if (A.is_complex()) {
     auto lrwork = computeLRWorkDim(compute_uv ? (full_matrices ? 'A' : 'S') : 'N', m, n);
     storage_rwork = pin_memory<value_t>(lrwork);
-    rwork = static_cast<value_t*>(storage_rwork.data());
+    rwork = static_cast<value_t*>(storage_rwork.mutable_data());
   }
 
   magma_int_t* iwork;

diff --git a/c10/core/Storage.h b/c10/core/Storage.h
@@ -82,7 +82,11 @@ struct C10_API Storage {
   }
   // get() use here is to get const-correctness
 
-  void* data() const {
+  const void* data() const {
+    return storage_impl_->data();
+  }
+
+  void* mutable_data() const {
     return storage_impl_->mutable_data();
   }
 

diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h
@@ -1522,7 +1522,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
         "Caffe2 uses a lazy allocation, so you will need to call "
         "mutable_data() or raw_mutable_data() to actually allocate memory.");
     // Caller does the type check.
-    return static_cast<T*>(storage_.data()) + storage_offset_;
+    return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
   }
 
   /**
@@ -1546,7 +1546,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
     // Computing an offset into an empty tensor would be UB, since an empty
     // tensor's storage will be nullptr, and adding a nonzero offset to nullptr
     // is UB.  So we skip the offset computation in this case.
-    char* const data = static_cast<char*>(storage_.data());
+    char* const data = static_cast<char*>(storage_.mutable_data());
     if (data == nullptr) {
       return nullptr;
     }
@@ -1559,7 +1559,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
    */
   template <typename T>
   inline T* unsafe_data() const {
-    return static_cast<T*>(storage_.data()) + storage_offset_;
+    return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
   }
 
   /**
@@ -2145,7 +2145,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
     // For 0-size tensors it's fine to return any pointer (including nullptr)
     if (data_type_ == meta && storage_initialized()) {
       return static_cast<void*>(
-          static_cast<char*>(storage_.data()) +
+          static_cast<char*>(storage_.mutable_data()) +
           storage_offset_ * meta.itemsize());
     } else {
       bool had_special_dtor = data_type_.placementDelete() != nullptr;
@@ -2161,7 +2161,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
            (storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
         TORCH_INTERNAL_ASSERT(
             storage_offset_ == 0); // because we just reallocated
-        return storage_.data();
+        return storage_.mutable_data();
       }
       const Allocator* allocator = storage_.allocator();
       // Storage might have nullptr allocator in rare cases, for example, if
@@ -2180,7 +2180,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
         auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
         storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
             std::move(data_ptr), dtor, size, storage_.device()));
-        data_type_.placementNew()(storage_.data(), numel_);
+        data_type_.placementNew()(storage_.mutable_data(), numel_);
       } else {
         // For fundamental type, new and delete is easier.
         storage_.set_data_ptr_noswap(
@@ -2190,7 +2190,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
       TORCH_INTERNAL_ASSERT(
           storage_offset_ == 0); // because we just reallocated
       device_opt_ = storage_.device();
-      return storage_.data();
+      return storage_.mutable_data();
     }
   }
 
@@ -2203,7 +2203,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
   template <typename T>
   inline T* mutable_data() {
     if (storage_initialized() && data_type_.Match<T>()) {
-      return static_cast<T*>(storage_.data()) + storage_offset_;
+      return static_cast<T*>(storage_.mutable_data()) + storage_offset_;
     }
     // Check it here statically - otherwise TypeMeta would throw the runtime
     // error in attempt to invoke TypeMeta::ctor()

diff --git a/test/cpp/api/rnn.cpp b/test/cpp/api/rnn.cpp
@@ -190,15 +190,15 @@ TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) {
   LSTM model(2, 2);
   for (auto& v : model->parameters()) {
     float size = v.numel();
-    auto p = static_cast<float*>(v.storage().data());
+    auto p = static_cast<float*>(v.storage().mutable_data());
     for (size_t i = 0; i < size; i++) {
       p[i] = i / size;
     }
   }
 
   auto x = torch::empty({3, 4, 2}, torch::requires_grad());
   float size = x.numel();
-  auto p = static_cast<float*>(x.storage().data());
+  auto p = static_cast<float*>(x.storage().mutable_data());
   for (size_t i = 0; i < size; i++) {
     p[i] = (size - i) / size;
   }

diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp
@@ -173,7 +173,7 @@ static PyObject* THPStorage_pynew(
         uint8_t value = THPByteUtils_unpackReal(item.get());
         const auto& storage = THPStorage_Unpack(self);
         if (allocator == c10::GetDefaultCPUAllocator()) {
-          static_cast<uint8_t*>(storage.data())[i] = value;
+          static_cast<uint8_t*>(storage.mutable_data())[i] = value;
         } else {
           // TODO: this might be slow - consider batched updates?
           storage_set(storage, i, value);
@@ -236,7 +236,7 @@ static PyObject* THPStorage_get(THPStorage* self, PyObject* index) {
     }
 
     const auto& storage = THPStorage_Unpack(self);
-    auto data = static_cast<uint8_t*>(storage.data());
+    auto data = static_cast<uint8_t*>(storage.mutable_data());
 
     at::StorageImpl* old_storage_impl = storage.unsafeGetStorageImpl();
     c10::raw::intrusive_ptr::incref(old_storage_impl);

diff --git a/torch/csrc/StorageMethods.cpp b/torch/csrc/StorageMethods.cpp
@@ -47,7 +47,9 @@ static PyObject* THPStorage_nbytes(PyObject* self, PyObject* noargs) {
 
 static PyObject* THPStorage_dataPtr(PyObject* self, PyObject* noargs) {
   HANDLE_TH_ERRORS
-  return PyLong_FromVoidPtr(THPStorage_Unpack(self).data());
+  // PyLong_FromVoidPtr should not need to mutate the pointer in order
+  // to extract a new long object from it.
+  return PyLong_FromVoidPtr(const_cast<void*>(THPStorage_Unpack(self).data()));
   END_HANDLE_TH_ERRORS
 }
 

diff --git a/torch/csrc/StorageSharing.cpp b/torch/csrc/StorageSharing.cpp
@@ -295,7 +295,7 @@ static PyObject* THPStorage_shareCuda(PyObject* self, PyObject* noargs) {
     // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
     size_t base_size;
     void* base_ptr = c10::cuda::CUDACachingAllocator::getBaseAllocation(
-        storage.data(), &base_size);
+        storage.mutable_data(), &base_size);
     ptrdiff_t offset_bytes = (char*)storage.data() - (char*)base_ptr;
 
     // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@@ -307,8 +307,8 @@ static PyObject* THPStorage_shareCuda(PyObject* self, PyObject* noargs) {
 
     // Put Storage Data behind new ref counting context
     // See Note [CUDA IPC Refcounting implementation explained]
-    at::DataPtr sent_data_ptr =
-        torch::GetNewRefCountedSentData(storage.data(), storage.device());
+    at::DataPtr sent_data_ptr = torch::GetNewRefCountedSentData(
+        storage.mutable_data(), storage.device());
     auto old_data_ptr = storage.set_data_ptr(std::move(sent_data_ptr));
     auto sent_data =
         static_cast<torch::CudaIPCSentData*>(storage.data_ptr().get_context());

diff --git a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
@@ -83,7 +83,7 @@ class TensorpipeCudaConverter : public TensorpipeDeviceTypeConverter {
     c10::cuda::CUDACachingAllocator::recordStream(storage.data_ptr(), stream);
 
     tensorpipe::CudaBuffer buffer;
-    buffer.ptr = static_cast<char*>(storage.data());
+    buffer.ptr = static_cast<char*>(storage.mutable_data());
     buffer.stream = stream.stream();
 
     tensorpipe::Message::Tensor tensor;

diff --git a/torch/csrc/distributed/rpc/tensorpipe_utils.cpp b/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
@@ -44,8 +44,8 @@ class TensorpipeCpuConverter : public TensorpipeDeviceTypeConverter {
     bool storageHasDeleter = storage.data_ptr().get_context() != nullptr;
     if (!storageHasDeleter) {
       std::vector<char> storageData(
-          static_cast<char*>(storage.data()),
-          static_cast<char*>(storage.data()) + storage.nbytes());
+          static_cast<const char*>(storage.data()),
+          static_cast<const char*>(storage.data()) + storage.nbytes());
 
       tensorpipe::CpuBuffer buffer;
       buffer.ptr = storageData.data();
@@ -59,7 +59,7 @@ class TensorpipeCpuConverter : public TensorpipeDeviceTypeConverter {
       return c10::make_optional(std::move(storageData));
     } else {
       tensorpipe::CpuBuffer buffer;
-      buffer.ptr = static_cast<char*>(storage.data());
+      buffer.ptr = static_cast<char*>(storage.mutable_data());
 
       tensorpipe::Message::Tensor tensor;
       tensor.buffer = buffer;

diff --git a/torch/csrc/profiler/data_flow.h b/torch/csrc/profiler/data_flow.h
@@ -49,7 +49,7 @@ using TensorImplAddress = strong::type<
     strong::boolean>;
 
 using StorageImplData = strong::type<
-    void*,
+    const void*,
     struct StorageImplData_,
     strong::regular,
     strong::hashable,

diff --git a/torch/csrc/profiler/standalone/execution_graph_observer.cpp b/torch/csrc/profiler/standalone/execution_graph_observer.cpp
@@ -164,7 +164,7 @@ struct TORCH_API ExecutionGraphObserver {
   std::map<size_t, std::stack<ID>> op_stack{};
   // Uses the underlying TensorImpl object pointer as the key and map to its
   // unique id.
-  std::map<void*, ID> object_id{};
+  std::map<const void*, ID> object_id{};
   // Observer run state.
   enum class RunState { uninitialized, disabled, enabled };
 
@@ -362,7 +362,7 @@ void finalizeExecutionGraphOutput(ExecutionGraphObserver& ob) {
 
 inline ExecutionGraphObserver::ID getObjectID(
     ExecutionGraphObserver& ob,
-    void* t) {
+    const void* t) {
   auto iter = ob.object_id.find(t);
   if (iter == ob.object_id.end()) {
     ExecutionGraphObserver::ID object_id = ob.getNewID();