Skip to content
Closed
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ genrule(
"aten/src/ATen/Functions.h",
"aten/src/ATen/Functions.cpp",
"aten/src/ATen/NativeFunctions.h",
"aten/src/ATen/MetaFunctions.h",
"aten/src/ATen/core/TensorBody.h",
"aten/src/ATen/core/TensorMethods.cpp",
"aten/src/ATen/core/ATenOpList.cpp",
Expand Down
27 changes: 27 additions & 0 deletions aten/src/ATen/TensorMeta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include <ATen/ATen.h> // TODO: improve
// #include <ATen/NativeFunctions.h>

namespace at {

struct TensorMeta {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to this PR - just curious what's your thoughts about how it might be evolving in the future, are we going to add more and more TensorImpl fields here, e.g. contiguous, channels_last, etc, which are more "internal" properties?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

strides will subsume contiguous/channels_last, and will be enough to fully specify the output without getting into non dense tensors. There might be more extensibility here when TensorIterator comes in the mix though.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or should we consider this class to replace the existing "collection of fields" on TensorImpl?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to keep them decoupled for now, to make it easier to make representational changes in one or the other.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, we can capture congruences later if they arise naturally (and pay their way)

DimVector sizes;
// TODO: DimVector strides;
TensorOptions options;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would an explicit constructor for TensorMeta spare us redundant default initialization?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, hoping to flush this and other inefficiencies out when I'm ready for benchmarking.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have a constructor now.


TensorMeta(IntArrayRef _sizes, TensorOptions _options)
: sizes(_sizes), options(_options) {}
};

inline Tensor tensor_from_meta(const TensorMeta& meta) {
// TODO: eliminate indirection
return at::empty(meta.sizes, meta.options);
}

// Analogous to self.new_empty(sizes)
inline TensorMeta new_meta(const Tensor& self, IntArrayRef sizes) {
return TensorMeta(sizes, self.options());
}

} // namespace at
126 changes: 46 additions & 80 deletions aten/src/ATen/native/UpSampleNearest1d.cpp
Original file line number Diff line number Diff line change
@@ -1,47 +1,12 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/UpSample.h>
#include <ATen/MetaFunctions.h>

namespace at {
namespace native {
namespace {

static void upsample_nearest1d_out_cpu_template(
Tensor& output,
const Tensor& input,
IntArrayRef output_size,
c10::optional<double> scales) {
TORCH_CHECK(
output_size.size() == 1,
"It is expected output_size equals to 1, but got size ",
output_size.size());

int64_t output_width = output_size[0];

int64_t nbatch = input.size(0);
int64_t channels = input.size(1);
int64_t input_width = input.size(2);

upsample_1d_shape_check(
input,
Tensor(),
nbatch,
channels,
input_width,
output_width);
namespace meta {

output.resize_({nbatch, channels, output_width});

AT_ASSERT(input_width > 0 && output_width > 0);
upsample_nearest1d_kernel(kCPU, output, input, scales);
}

static void upsample_nearest1d_backward_out_cpu_template(
Tensor& grad_input,
const Tensor& grad_output,
IntArrayRef output_size,
IntArrayRef input_size,
c10::optional<double> scales) {
static std::array<int64_t, 3> upsample_nearest1d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
TORCH_CHECK(
output_size.size() == 1,
"It is expected output_size equals to 1, but got size ",
Expand All @@ -58,36 +23,50 @@ static void upsample_nearest1d_backward_out_cpu_template(
int64_t channels = input_size[1];
int64_t input_width = input_size[2];

upsample_1d_shape_check(
Tensor(),
grad_output,
nbatch,
channels,
TORCH_CHECK(
input_width > 0 && output_width > 0,
"Input and output sizes should be greater than 0, but got input (W: ",
input_width,
output_width);
") and output (W: ",
output_width,
")");

grad_input.resize_({nbatch, channels, input_width});
grad_input.zero_();
return {nbatch, channels, output_width};
}

upsample_nearest1d_backward_kernel(kCPU, grad_input, grad_output, scales);
TensorMeta upsample_nearest1d(const Tensor& input, IntArrayRef output_size, c10::optional<double> scales) {
auto full_output_size = upsample_nearest1d_common_check(input.sizes(), output_size);

// Allow for empty batch size but not other dimensions
TORCH_CHECK(
(input.size(1) != 0 && input.size(2) != 0) && input.dim() == 3,
"Non-empty 3D data tensor expected but got a tensor with sizes ",
input.sizes());

return new_meta(input, full_output_size);
}
} // namespace

Tensor& upsample_nearest1d_out_cpu(
Tensor& output,
const Tensor& input,
IntArrayRef output_size,
c10::optional<double> scales) {
upsample_nearest1d_out_cpu_template(output, input, output_size, scales);
return output;
TensorMeta upsample_nearest1d_backward(const Tensor& grad_output, IntArrayRef output_size, IntArrayRef input_size, c10::optional<double> scales) {
auto full_output_size = upsample_nearest1d_common_check(input_size, output_size);

check_dim_size(grad_output, 3, 0, full_output_size[0]);
check_dim_size(grad_output, 3, 1, full_output_size[1]);
check_dim_size(grad_output, 3, 2, full_output_size[2]);

return new_meta(grad_output, input_size);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way you've factored this totally conjures the notion of canned forward and backward utility functions wrapped around an op-specific lambda like upsample_nearest1d_common_check , but sample size of 1 is probably an influence here

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope there are opportunities for utility functions; need to do more functions to say though.

}

Tensor upsample_nearest1d_cpu(
} // namespace meta


namespace native {

Tensor& upsample_nearest1d_out_cpu(
Tensor& output,
const Tensor& input,
IntArrayRef output_size,
c10::optional<double> scales) {
auto output = at::empty({0}, input.options());
upsample_nearest1d_out_cpu_template(output, input, output_size, scales);
upsample_nearest1d_kernel(kCPU, output, input, scales);
return output;
}

Expand All @@ -97,51 +76,38 @@ Tensor& upsample_nearest1d_backward_out_cpu(
IntArrayRef output_size,
IntArrayRef input_size,
c10::optional<double> scales) {
upsample_nearest1d_backward_out_cpu_template(
grad_input, grad_output, output_size, input_size, scales);
return grad_input;
}

Tensor upsample_nearest1d_backward_cpu(
const Tensor& grad_output,
IntArrayRef output_size,
IntArrayRef input_size,
c10::optional<double> scales) {
auto grad_input = at::zeros(input_size, grad_output.options());
upsample_nearest1d_backward_out_cpu_template(
grad_input, grad_output, output_size, input_size, scales);
grad_input.zero_();
upsample_nearest1d_backward_kernel(kCPU, grad_input, grad_output, scales);
return grad_input;
}

using at::native::upsample::compute_output_size;
using at::native::upsample::get_scale_value;

Tensor upsample_nearest1d_cpu(
// vec variants

Tensor upsample_nearest1d(
const Tensor& input,
c10::optional<IntArrayRef> output_size,
c10::optional<ArrayRef<double>> scale_factors) {
auto output = at::empty({0}, input.options());
auto osize = compute_output_size(input.sizes(), output_size, scale_factors);
auto scale_w = get_scale_value(scale_factors, 0);
upsample_nearest1d_out_cpu_template(output, input, osize, scale_w);
return output;
return at::upsample_nearest1d(input, osize, scale_w);
}

Tensor upsample_nearest1d_backward_cpu(
Tensor upsample_nearest1d_backward(
const Tensor& grad_output,
c10::optional<IntArrayRef> output_size,
IntArrayRef input_size,
c10::optional<ArrayRef<double>> scale_factors) {
auto osize = compute_output_size(input_size, output_size, scale_factors);
auto scale_w = get_scale_value(scale_factors, 0);
auto grad_input = at::zeros(input_size, grad_output.options());
upsample_nearest1d_backward_out_cpu_template(
grad_input, grad_output, osize, input_size, scale_w);
return grad_input;
return at::upsample_nearest1d_backward(grad_output, osize, input_size, scale_w);
}

DEFINE_DISPATCH(upsample_nearest1d_kernel);
DEFINE_DISPATCH(upsample_nearest1d_backward_kernel);

} // namespace native

} // namespace at
16 changes: 6 additions & 10 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8253,15 +8253,13 @@
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest1d_cpu
CUDA: upsample_nearest1d_cuda
DefaultBackend: upsample_nearest1d

- func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest1d_backward_cpu
CUDA: upsample_nearest1d_backward_cuda
DefaultBackend: upsample_nearest1d_backward

- func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
use_c10_dispatcher: full
Expand Down Expand Up @@ -8401,29 +8399,27 @@

- func: upsample_nearest1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest1d_out_cpu
CUDA: upsample_nearest1d_out_cuda

- func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest1d_cpu
CUDA: upsample_nearest1d_cuda
structured_delegate: upsample_nearest1d.out

- func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest1d_backward_out_cpu
CUDA: upsample_nearest1d_backward_out_cuda

- func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest1d_backward_cpu
CUDA: upsample_nearest1d_backward_cuda
structured_delegate: upsample_nearest1d_backward.grad_input

- func: upsample_nearest2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
Expand Down
14 changes: 14 additions & 0 deletions aten/src/ATen/templates/MetaFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

// ${generated_comment}

#include <ATen/ATen.h> // TODO: improve
#include <ATen/TensorMeta.h>

namespace at {
namespace meta {

${declarations}

} // namespace meta
} // namespace at
1 change: 1 addition & 0 deletions aten/src/ATen/templates/RegisterDispatchKey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <c10/core/Allocator.h>
#include <ATen/DeviceGuard.h>
#include <ATen/NativeFunctions.h>
#include <ATen/MetaFunctions.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/Utils.h>
#include <ATen/WrapDimUtils.h>
Expand Down
59 changes: 59 additions & 0 deletions tools/codegen/api/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from tools.codegen.model import *
from tools.codegen.api.types import MetaArgument

import tools.codegen.api.cpp as cpp
import tools.codegen.api.dispatcher as dispatcher

from typing import Sequence
import itertools

# Follows dispatcher calling convention, but:
# - Mutable arguments not allowed. Meta functions are always
# written in functional form. Look at FunctionSchema.signature()
# - No tensor returns; instead we return a TensorMeta describing
# the tensor in question

def name(f: FunctionSchema) -> str:
assert f.name.overload_name == ""
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be worth listing this in the constraints section above... but also, is it necessary? I could imagine func/in/out triads that were all in the overloaded namespace

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the one example I ported they all have overload names :) This invariant enforces that you "stripped" the overload name using signature() to get at the functional signature.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right - sorry, careless read.

return str(f.name.name)

def argument_type(a: Argument) -> str:
assert not a.is_write
return dispatcher.argumenttype_type(a.type, mutable=False)

def returntype_type(t: Type) -> str:
r = cpp.valuetype_type(t)
if r is not None:
return r

if isinstance(t, BaseType):
if t.name == BaseTy.Tensor:
return 'TensorMeta'
elif isinstance(t, ListType):
raise NotImplementedError("list returns not supported yet")

raise AssertionError(f"unrecognized return type {t}")

def return_type(r: Return) -> str:
assert not r.is_write
return returntype_type(r.type)

def returns_type(rs: Sequence[Return]) -> str:
if len(rs) == 0:
return 'void'
elif len(rs) == 1:
return return_type(rs[0])
else:
args = ','.join(map(return_type, rs))
return f'std::tuple<{args}>'

def argument(a: Argument) -> MetaArgument:
return MetaArgument(
type=argument_type(a),
name=a.name,
argument=a,
)

def arguments(func: FunctionSchema) -> Sequence[MetaArgument]:
assert not func.out_arguments
return list(map(argument, itertools.chain(func.arguments, func.kwarg_only_arguments)))
Loading