-
Notifications
You must be signed in to change notification settings - Fork 27.4k
Structured kernel definitions #45277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c351469
729d277
4265406
d53c573
79a235b
dee36d8
6ade500
fb44a18
cad2cad
3a1a4c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| #pragma once | ||
|
|
||
| #include <ATen/ATen.h> // TODO: improve | ||
| // #include <ATen/NativeFunctions.h> | ||
|
|
||
| namespace at { | ||
|
|
||
| struct TensorMeta { | ||
| DimVector sizes; | ||
ezyang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // TODO: DimVector strides; | ||
| TensorOptions options; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would an explicit constructor for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, hoping to flush this and other inefficiencies out when I'm ready for benchmarking.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have a constructor now. |
||
|
|
||
| TensorMeta(IntArrayRef _sizes, TensorOptions _options) | ||
| : sizes(_sizes), options(_options) {} | ||
| }; | ||
|
|
||
| inline Tensor tensor_from_meta(const TensorMeta& meta) { | ||
| // TODO: eliminate indirection | ||
| return at::empty(meta.sizes, meta.options); | ||
| } | ||
|
|
||
| // Analogous to self.new_empty(sizes) | ||
| inline TensorMeta new_meta(const Tensor& self, IntArrayRef sizes) { | ||
| return TensorMeta(sizes, self.options()); | ||
| } | ||
|
|
||
| } // namespace at | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,47 +1,12 @@ | ||
| #include <ATen/ATen.h> | ||
| #include <ATen/NativeFunctions.h> | ||
| #include <ATen/native/UpSample.h> | ||
| #include <ATen/MetaFunctions.h> | ||
|
|
||
| namespace at { | ||
| namespace native { | ||
| namespace { | ||
|
|
||
| static void upsample_nearest1d_out_cpu_template( | ||
| Tensor& output, | ||
| const Tensor& input, | ||
| IntArrayRef output_size, | ||
| c10::optional<double> scales) { | ||
| TORCH_CHECK( | ||
| output_size.size() == 1, | ||
| "It is expected output_size equals to 1, but got size ", | ||
| output_size.size()); | ||
|
|
||
| int64_t output_width = output_size[0]; | ||
|
|
||
| int64_t nbatch = input.size(0); | ||
| int64_t channels = input.size(1); | ||
| int64_t input_width = input.size(2); | ||
|
|
||
| upsample_1d_shape_check( | ||
| input, | ||
| Tensor(), | ||
| nbatch, | ||
| channels, | ||
| input_width, | ||
| output_width); | ||
| namespace meta { | ||
|
|
||
| output.resize_({nbatch, channels, output_width}); | ||
|
|
||
| AT_ASSERT(input_width > 0 && output_width > 0); | ||
| upsample_nearest1d_kernel(kCPU, output, input, scales); | ||
| } | ||
|
|
||
| static void upsample_nearest1d_backward_out_cpu_template( | ||
| Tensor& grad_input, | ||
| const Tensor& grad_output, | ||
| IntArrayRef output_size, | ||
| IntArrayRef input_size, | ||
| c10::optional<double> scales) { | ||
| static std::array<int64_t, 3> upsample_nearest1d_common_check(IntArrayRef input_size, IntArrayRef output_size) { | ||
| TORCH_CHECK( | ||
| output_size.size() == 1, | ||
| "It is expected output_size equals to 1, but got size ", | ||
|
|
@@ -58,36 +23,50 @@ static void upsample_nearest1d_backward_out_cpu_template( | |
| int64_t channels = input_size[1]; | ||
| int64_t input_width = input_size[2]; | ||
|
|
||
| upsample_1d_shape_check( | ||
| Tensor(), | ||
| grad_output, | ||
| nbatch, | ||
| channels, | ||
| TORCH_CHECK( | ||
| input_width > 0 && output_width > 0, | ||
| "Input and output sizes should be greater than 0, but got input (W: ", | ||
| input_width, | ||
| output_width); | ||
| ") and output (W: ", | ||
| output_width, | ||
| ")"); | ||
|
|
||
| grad_input.resize_({nbatch, channels, input_width}); | ||
| grad_input.zero_(); | ||
| return {nbatch, channels, output_width}; | ||
| } | ||
|
|
||
| upsample_nearest1d_backward_kernel(kCPU, grad_input, grad_output, scales); | ||
| TensorMeta upsample_nearest1d(const Tensor& input, IntArrayRef output_size, c10::optional<double> scales) { | ||
| auto full_output_size = upsample_nearest1d_common_check(input.sizes(), output_size); | ||
|
|
||
| // Allow for empty batch size but not other dimensions | ||
| TORCH_CHECK( | ||
| (input.size(1) != 0 && input.size(2) != 0) && input.dim() == 3, | ||
| "Non-empty 3D data tensor expected but got a tensor with sizes ", | ||
| input.sizes()); | ||
|
|
||
| return new_meta(input, full_output_size); | ||
| } | ||
| } // namespace | ||
|
|
||
| Tensor& upsample_nearest1d_out_cpu( | ||
| Tensor& output, | ||
| const Tensor& input, | ||
| IntArrayRef output_size, | ||
| c10::optional<double> scales) { | ||
| upsample_nearest1d_out_cpu_template(output, input, output_size, scales); | ||
| return output; | ||
| TensorMeta upsample_nearest1d_backward(const Tensor& grad_output, IntArrayRef output_size, IntArrayRef input_size, c10::optional<double> scales) { | ||
| auto full_output_size = upsample_nearest1d_common_check(input_size, output_size); | ||
|
|
||
| check_dim_size(grad_output, 3, 0, full_output_size[0]); | ||
| check_dim_size(grad_output, 3, 1, full_output_size[1]); | ||
| check_dim_size(grad_output, 3, 2, full_output_size[2]); | ||
|
|
||
| return new_meta(grad_output, input_size); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way you've factored this totally conjures the notion of canned forward and backward utility functions wrapped around an op-specific lambda like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope there are opportunities for utility functions; need to do more functions to say though. |
||
| } | ||
|
|
||
| Tensor upsample_nearest1d_cpu( | ||
| } // namespace meta | ||
|
|
||
|
|
||
| namespace native { | ||
|
|
||
| Tensor& upsample_nearest1d_out_cpu( | ||
| Tensor& output, | ||
| const Tensor& input, | ||
| IntArrayRef output_size, | ||
| c10::optional<double> scales) { | ||
| auto output = at::empty({0}, input.options()); | ||
| upsample_nearest1d_out_cpu_template(output, input, output_size, scales); | ||
| upsample_nearest1d_kernel(kCPU, output, input, scales); | ||
| return output; | ||
| } | ||
|
|
||
|
|
@@ -97,51 +76,38 @@ Tensor& upsample_nearest1d_backward_out_cpu( | |
| IntArrayRef output_size, | ||
| IntArrayRef input_size, | ||
| c10::optional<double> scales) { | ||
| upsample_nearest1d_backward_out_cpu_template( | ||
| grad_input, grad_output, output_size, input_size, scales); | ||
| return grad_input; | ||
| } | ||
|
|
||
| Tensor upsample_nearest1d_backward_cpu( | ||
| const Tensor& grad_output, | ||
| IntArrayRef output_size, | ||
| IntArrayRef input_size, | ||
| c10::optional<double> scales) { | ||
| auto grad_input = at::zeros(input_size, grad_output.options()); | ||
| upsample_nearest1d_backward_out_cpu_template( | ||
| grad_input, grad_output, output_size, input_size, scales); | ||
| grad_input.zero_(); | ||
| upsample_nearest1d_backward_kernel(kCPU, grad_input, grad_output, scales); | ||
| return grad_input; | ||
| } | ||
|
|
||
| using at::native::upsample::compute_output_size; | ||
| using at::native::upsample::get_scale_value; | ||
|
|
||
| Tensor upsample_nearest1d_cpu( | ||
| // vec variants | ||
|
|
||
| Tensor upsample_nearest1d( | ||
| const Tensor& input, | ||
| c10::optional<IntArrayRef> output_size, | ||
| c10::optional<ArrayRef<double>> scale_factors) { | ||
| auto output = at::empty({0}, input.options()); | ||
| auto osize = compute_output_size(input.sizes(), output_size, scale_factors); | ||
| auto scale_w = get_scale_value(scale_factors, 0); | ||
| upsample_nearest1d_out_cpu_template(output, input, osize, scale_w); | ||
| return output; | ||
| return at::upsample_nearest1d(input, osize, scale_w); | ||
| } | ||
|
|
||
| Tensor upsample_nearest1d_backward_cpu( | ||
| Tensor upsample_nearest1d_backward( | ||
| const Tensor& grad_output, | ||
| c10::optional<IntArrayRef> output_size, | ||
| IntArrayRef input_size, | ||
| c10::optional<ArrayRef<double>> scale_factors) { | ||
| auto osize = compute_output_size(input_size, output_size, scale_factors); | ||
| auto scale_w = get_scale_value(scale_factors, 0); | ||
| auto grad_input = at::zeros(input_size, grad_output.options()); | ||
| upsample_nearest1d_backward_out_cpu_template( | ||
| grad_input, grad_output, osize, input_size, scale_w); | ||
| return grad_input; | ||
| return at::upsample_nearest1d_backward(grad_output, osize, input_size, scale_w); | ||
| } | ||
|
|
||
| DEFINE_DISPATCH(upsample_nearest1d_kernel); | ||
| DEFINE_DISPATCH(upsample_nearest1d_backward_kernel); | ||
|
|
||
| } // namespace native | ||
|
|
||
| } // namespace at | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| #pragma once | ||
|
|
||
| // ${generated_comment} | ||
|
|
||
| #include <ATen/ATen.h> // TODO: improve | ||
| #include <ATen/TensorMeta.h> | ||
|
|
||
| namespace at { | ||
| namespace meta { | ||
|
|
||
| ${declarations} | ||
|
|
||
| } // namespace meta | ||
| } // namespace at |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| from tools.codegen.model import * | ||
| from tools.codegen.api.types import MetaArgument | ||
|
|
||
| import tools.codegen.api.cpp as cpp | ||
| import tools.codegen.api.dispatcher as dispatcher | ||
|
|
||
| from typing import Sequence | ||
| import itertools | ||
|
|
||
| # Follows dispatcher calling convention, but: | ||
| # - Mutable arguments not allowed. Meta functions are always | ||
| # written in functional form. Look at FunctionSchema.signature() | ||
| # - No tensor returns; instead we return a TensorMeta describing | ||
| # the tensor in question | ||
|
|
||
| def name(f: FunctionSchema) -> str: | ||
| assert f.name.overload_name == "" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be worth listing this in the constraints section above... but also, is it necessary? I could imagine func/in/out triads that were all in the overloaded namespace
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the one example I ported they all have overload names :) This invariant enforces that you "stripped" the overload name using signature() to get at the functional signature. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah right - sorry, careless read. |
||
| return str(f.name.name) | ||
|
|
||
| def argument_type(a: Argument) -> str: | ||
| assert not a.is_write | ||
| return dispatcher.argumenttype_type(a.type, mutable=False) | ||
|
|
||
| def returntype_type(t: Type) -> str: | ||
| r = cpp.valuetype_type(t) | ||
| if r is not None: | ||
| return r | ||
|
|
||
| if isinstance(t, BaseType): | ||
| if t.name == BaseTy.Tensor: | ||
| return 'TensorMeta' | ||
| elif isinstance(t, ListType): | ||
| raise NotImplementedError("list returns not supported yet") | ||
|
|
||
| raise AssertionError(f"unrecognized return type {t}") | ||
|
|
||
| def return_type(r: Return) -> str: | ||
| assert not r.is_write | ||
| return returntype_type(r.type) | ||
|
|
||
| def returns_type(rs: Sequence[Return]) -> str: | ||
| if len(rs) == 0: | ||
| return 'void' | ||
| elif len(rs) == 1: | ||
| return return_type(rs[0]) | ||
| else: | ||
| args = ','.join(map(return_type, rs)) | ||
| return f'std::tuple<{args}>' | ||
|
|
||
| def argument(a: Argument) -> MetaArgument: | ||
| return MetaArgument( | ||
| type=argument_type(a), | ||
| name=a.name, | ||
| argument=a, | ||
| ) | ||
|
|
||
| def arguments(func: FunctionSchema) -> Sequence[MetaArgument]: | ||
| assert not func.out_arguments | ||
| return list(map(argument, itertools.chain(func.arguments, func.kwarg_only_arguments))) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not related to this PR - just curious what's your thoughts about how it might be evolving in the future, are we going to add more and more TensorImpl fields here, e.g. contiguous, channels_last, etc, which are more "internal" properties?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
strides will subsume contiguous/channels_last, and will be enough to fully specify the output without getting into non dense tensors. There might be more extensibility here when TensorIterator comes in the mix though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or should we consider this class to replace the existing "collection of fields" on TensorImpl?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to keep them decoupled for now, to make it easier to make representational changes in one or the other.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, we can capture congruences later if they arise naturally (and pay their way)