asuhan
diff --git a/‎BUILD.bazel‎
Lines changed: 3 additions & 5 deletions b/‎BUILD.bazel‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎aten/src/ATen/core/op_registration/adaption.h‎
Lines changed: 20 additions & 0 deletions b/‎aten/src/ATen/core/op_registration/adaption.h‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎aten/src/ATen/templates/DispatchKeyFunctions.h‎
Lines changed: 14 additions & 23 deletions b/‎aten/src/ATen/templates/DispatchKeyFunctions.h‎
Lines changed: 14 additions & 23 deletions
diff --git a/‎aten/src/ATen/templates/DispatchKeyFunctions_inl.h‎
Lines changed: 0 additions & 16 deletions b/‎aten/src/ATen/templates/DispatchKeyFunctions_inl.h‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎aten/src/ATen/templates/Functions.cpp‎
Lines changed: 160 additions & 0 deletions b/‎aten/src/ATen/templates/Functions.cpp‎
Lines changed: 160 additions & 0 deletions
@@ -136,22 +136,20 @@ genrule(
         "aten/src/ATen/RegisterMeta.cpp",
         "aten/src/ATen/RegisterSchema.cpp",
         "aten/src/ATen/CPUFunctions.h",
-        "aten/src/ATen/CPUFunctions_inl.h",
         "aten/src/ATen/CUDAFunctions.h",
-        "aten/src/ATen/CUDAFunctions_inl.h",
         "aten/src/ATen/CompositeExplicitAutogradFunctions.h",
-        "aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
         "aten/src/ATen/CompositeImplicitAutogradFunctions.h",
-        "aten/src/ATen/CompositeImplicitAutogradFunctions_inl.h",
         "aten/src/ATen/Functions.h",
+        "aten/src/ATen/Functions.cpp",
         "aten/src/ATen/RedispatchFunctions.h",
+        "aten/src/ATen/RedispatchFunctions.cpp",
         "aten/src/ATen/Operators.h",
         "aten/src/ATen/Operators.cpp",
         "aten/src/ATen/NativeFunctions.h",
         "aten/src/ATen/MetaFunctions.h",
-        "aten/src/ATen/MetaFunctions_inl.h",
         "aten/src/ATen/NativeMetaFunctions.h",
         "aten/src/ATen/core/TensorBody.h",
+        "aten/src/ATen/core/TensorMethods.cpp",
         "aten/src/ATen/core/ATenOpList.cpp",
     ],
     cmd = "$(location :gen) --source-path aten/src/ATen --install_dir `dirname $(location aten/src/ATen/Declarations.yaml)`",
 
@@ -43,6 +43,26 @@
 namespace c10 {
 namespace impl {
 
+inline c10::optional<MemoryFormat>
+check_tensor_options_and_extract_memory_format(
+    const TensorOptions& options,
+    c10::optional<MemoryFormat> memory_format) {
+  TORCH_CHECK(
+      options.requires_grad_opt() == c10::nullopt ||
+          options.requires_grad_opt().value() == false,
+      "Operators taking TensorOptions cannot take a TensorOptions with "
+      "options.requires_grad set as true. This isn't implemented yet.");
+  TORCH_CHECK(
+      !(options.has_memory_format() && memory_format.has_value()),
+      "Cannot set memory_format both in TensorOptions and explicit argument; please delete "
+      "the redundant setter.");
+  if (memory_format.has_value()) {
+    return memory_format;
+  } else {
+    return options.memory_format_opt();
+  }
+}
+
 TORCH_API void common_device_check_failure(optional<Device>& common_device, const at::Tensor& tensor, at::CheckedFrom methodName, at::CheckedFrom argName);
 
 inline void check_and_update_common_device(optional<Device>& common_device, const at::Tensor& tensor, at::CheckedFrom methodName, at::CheckedFrom argName) {
 
@@ -1,23 +1,14 @@
-#include <ATen/core/TensorBody.h>
-// Note [Avoiding Include Cycles In Static Dispatch]
-// In order to avoid #include cycles in the static dispatch build, we've carefully split out
-// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h.
-//
-// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h.
-// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods
-//   all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all
-//   directly inlined into TensorBody.h.
-// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API,
-//   which include functions that have defaultable optional<Tensor> arguments.
-//   That requires knowing the full Tensor class definition.
-//
-// We break the cycle by doing the following:
-// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h
-// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl.,
-// - CPUFunctions_inl.h includes everything else
-// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class,
-//   and then it includes CPUFunctions_inl.h.
-// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly.
-// - This also means that static dispatch build, CPUFunctions.h only needs to
-//   #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h.
-${inline_headers_for_nonstatic_build}
+// ${generated_comment}
+
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+
+// TODO: tighten this include
+#include <ATen/Functions.h>
+
+namespace at {
+namespace ${dispatch_namespace} {
+
+${dispatch_namespaced_declarations}
+
+} // namespace ${dispatch_namespace}
+} // namespace at
@@ -0,0 +1,160 @@
+// ${generated_comment}
+
+#include <array>
+
+#include <ATen/Functions.h>
+#include <ATen/Utils.h>
+
+#include <ATen/core/dispatch/Dispatcher.h>
+#include <ATen/core/op_registration/adaption.h>
+
+${static_dispatch_extra_headers}
+
+namespace at {
+
+Tensor var(const Tensor& self, int dim) {
+  return at::var(self, IntArrayRef{dim});
+}
+
+std::tuple<Tensor, Tensor> var_mean(const Tensor& self, int dim) {
+  return at::var_mean(self, IntArrayRef{dim});
+}
+
+Tensor std(const Tensor& self, int dim) {
+  return at::std(self, IntArrayRef{dim});
+}
+
+std::tuple<Tensor, Tensor> std_mean(const Tensor& self, int dim) {
+  return at::std_mean(self, IntArrayRef{dim});
+}
+
+at::Tensor conv1d(
+    const Tensor& input,
+    const Tensor& weight,
+    const Tensor& bias,
+    IntArrayRef stride,
+    std::initializer_list<int64_t> padding_,
+    IntArrayRef dilation,
+    int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv1d(input, weight, bias, stride, padding, dilation, groups);
+}
+
+at::Tensor conv2d(
+    const Tensor& input,
+    const Tensor& weight,
+    const Tensor& bias,
+    IntArrayRef stride,
+    std::initializer_list<int64_t> padding_,
+    IntArrayRef dilation,
+    int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv2d(input, weight, bias, stride, padding, dilation, groups);
+}
+
+at::Tensor conv3d(
+    const Tensor& input,
+    const Tensor& weight,
+    const Tensor& bias,
+    IntArrayRef stride,
+    std::initializer_list<int64_t> padding_,
+    IntArrayRef dilation,
+    int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv3d(input, weight, bias, stride, padding, dilation, groups);
+}
+
+namespace detail {
+
+void noopDelete(void*) {}
+
+} // namespace detail
+
+Tensor TensorMaker::make_tensor() {
+  AutoDispatchBelowADInplaceOrView guard{}; // TODO: Remove.
+  tracer::impl::NoTracerDispatchMode tracer_guard{};
+
+  check_size_nonnegative(sizes_);
+
+  TORCH_CHECK_VALUE(
+      !deleter_ || !ctx_,
+      "The deleter and context arguments are mutually exclusive.");
+
+  if (device_ == nullopt) {
+    device_ = globalContext().getDeviceFromPtr(data_, opts_.device().type());
+  }
+
+  if (opts_.device().has_index()) {
+    // clang-format off
+    TORCH_CHECK_VALUE(
+        opts_.device() == *device_,
+        "Specified device ", opts_.device(), " does not match device of data ", *device_);
+    // clang-format on
+  }
+
+  std::size_t size_bytes = computeStorageSize();
+
+  DataPtr data_ptr{};
+  if (deleter_) {
+    data_ptr = makeDataPtrFromDeleter();
+  } else {
+    data_ptr = makeDataPtrFromContext();
+  }
+
+  Storage storage{Storage::use_byte_size_t{}, size_bytes, std::move(data_ptr)};
+
+  Tensor tensor = detail::make_tensor<TensorImpl>(
+      std::move(storage), opts_.computeDispatchKey(), opts_.dtype());
+
+  if (sizes_.size() != 1 || sizes_[0] != 0) {
+    TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
+
+    if (strides_) {
+      tensor_impl->set_sizes_and_strides(sizes_, *strides_);
+    } else {
+      tensor_impl->set_sizes_contiguous(sizes_);
+    }
+  }
+
+  return tensor;
+}
+
+std::size_t TensorMaker::computeStorageSize() const noexcept {
+  std::size_t itemsize = opts_.dtype().itemsize();
+
+  if (strides_) {
+    return detail::computeStorageNbytes(sizes_, *strides_, itemsize);
+  }
+
+  std::size_t size = 1;
+  for (std::int64_t s : sizes_) {
+    size *= static_cast<std::size_t>(s);
+  }
+  return size * itemsize;
+}
+
+inline DataPtr TensorMaker::makeDataPtrFromDeleter() const {
+  return InefficientStdFunctionContext::makeDataPtr(data_, deleter_, *device_);
+}
+
+inline DataPtr TensorMaker::makeDataPtrFromContext() noexcept {
+  return DataPtr{data_, ctx_.release(), ctx_.get_deleter(), *device_};
+}
+
+IntArrayRef TensorMaker::makeTempSizes() const noexcept {
+  static std::int64_t zeros[5] = {0, 0, 0, 0, 0};
+  if (opts_.has_memory_format()) {
+    MemoryFormat format = *opts_.memory_format_opt();
+    if (format == MemoryFormat::ChannelsLast) {
+      return IntArrayRef(zeros, 4);
+    }
+    if (format == MemoryFormat::ChannelsLast3d) {
+      return IntArrayRef(zeros, 5);
+    }
+  }
+  return IntArrayRef(zeros, 1);
+}
+
+${function_definitions}
+
+} // namespace at