Add a bitwise NOT operator for integer and Boolean types (CUDA).

xuhdev · facebook-github-bot · commit 574e808680ea · 2019-07-10T12:17:48.000-07:00
Summary: Pull Request resolved: #22320 Test Plan: Imported from OSS Differential Revision: D16183578 Pulled By: colesbury fbshipit-source-id: 2f72cce5e10fd637be1ac87e1bbfe0937a661034
diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp
@@ -36,6 +36,26 @@
 namespace at {
 namespace native {
 
+Tensor bitwise_not(const Tensor& self) {
+  Tensor result = at::empty({0}, self.options());
+  return at::bitwise_not_out(result, self);
+}
+
+Tensor& bitwise_not_(Tensor& self) {
+  return at::bitwise_not_out(self, self);
+}
+
+Tensor& bitwise_not_out(Tensor& result, const Tensor& self) {
+  checkBackend("bitwise_not", result, self.type().backend());
+  assert_no_internal_overlap(result, "bitwise_not");
+  auto iter = TensorIterator::unary_op(result, self);
+  bitwise_not_stub(iter->device_type(), *iter);
+#ifdef BUILD_NAMEDTENSOR
+  at::namedinference::propagate_names(result, self);
+#endif
+  return result;
+}
+
 Tensor clamp(const Tensor& self, optional<Scalar> min, optional<Scalar> max) {
   Tensor result = at::empty({0}, self.options());
   return clamp_out(result, self, min, max);
@@ -167,7 +187,6 @@ IMPLEMENT_UNARY_OP_VEC(abs)
 IMPLEMENT_UNARY_OP_VEC(acos)
 IMPLEMENT_UNARY_OP_VEC(asin)
 IMPLEMENT_UNARY_OP_VEC(atan)
-IMPLEMENT_UNARY_OP_VEC(bitwise_not)
 IMPLEMENT_UNARY_OP_VEC(ceil)
 IMPLEMENT_UNARY_OP_VEC(cos)
 IMPLEMENT_UNARY_OP_VEC(cosh)
diff --git a/aten/src/ATen/native/cuda/UnaryOpsKernel.cu b/aten/src/ATen/native/cuda/UnaryOpsKernel.cu
@@ -1,14 +1,28 @@
+#include <limits>
 #include <ATen/native/UnaryOps.h>
 #include <ATen/native/cuda/Loops.cuh>
 #include <ATen/Context.h>
 #include <ATen/Dispatch.h>
 #include <ATen/native/cuda/Loops.cuh>
 #include <ATen/native/DispatchStub.h>
 #include <ATen/native/TensorIterator.h>
-#include <limits>
 
 namespace at { namespace native {
 
+void bitwise_not_kernel_cuda(TensorIterator& iter) {
+  if (iter.dtype() == ScalarType::Bool) {
+    gpu_kernel(iter, []GPU_LAMBDA(bool a) {
+      return !a;
+    });
+  } else {
+    AT_DISPATCH_INTEGRAL_TYPES(iter.dtype(), "bitwise_not_cuda", [&]() {
+      gpu_kernel(iter, []GPU_LAMBDA(scalar_t a) -> scalar_t {
+        return ~a;
+      });
+    });
+  }
+}
+
 template <typename scalar_t>
 void fill_kernel_impl(TensorIterator& iter, Scalar value_scalar) {
   auto value = value_scalar.to<scalar_t>();
@@ -24,5 +38,6 @@ static void fill_kernel_cuda(TensorIterator& iter, Scalar value) {
 }
 
 REGISTER_DISPATCH(fill_stub, &fill_kernel_cuda);
+REGISTER_DISPATCH(bitwise_not_stub, &bitwise_not_kernel_cuda);
 
 }}
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -362,12 +362,11 @@
 
 - func: bitwise_not_(Tensor(a!) self) -> Tensor(a!)
   variants: method
-  dispatch:
-    CPU: _bitwise_not__cpu
 
 - func: bitwise_not(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: _bitwise_not_out_cpu
+    CPU: bitwise_not_out
+    CUDA: bitwise_not_out
 
 - func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
 
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -1088,6 +1088,9 @@ def test_type_conversions_same_gpu(self):
     def test_neg(self):
         _TestTorchMixin._test_neg(self, lambda t: t.cuda())
 
+    def test_bitwise_not(self):
+        _TestTorchMixin._test_bitwise_not(self, 'cuda')
+
     def test_isinf(self):
         _TestTorchMixin._test_isinf(self, lambda t: t.cuda())
 
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -1748,40 +1748,43 @@ def _test_neg(self, cast):
     def test_neg(self):
         self._test_neg(self, lambda t: t)
 
-    def test_bitwise_not(self):
-        res = 0xffff - torch.arange(127, dtype=torch.int8)
-        for t in (torch.BoolTensor,
-                  torch.ByteTensor, torch.LongTensor, torch.IntTensor, torch.ShortTensor, torch.CharTensor):
-            if t == torch.BoolTensor:
-                a = torch.tensor([True, False])
-                expected_res = torch.tensor([False, True])
+    @staticmethod
+    def _test_bitwise_not(self, device):
+        res = 0xffff - torch.arange(127, dtype=torch.int8, device=device)
+        for dtype in (torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):
+            if dtype == torch.bool:
+                a = torch.tensor([True, False], device=device)
+                expected_res = torch.tensor([False, True], device=device)
             else:
-                a = torch.arange(127, dtype=t.dtype)
-                expected_res = res.type(t)
+                a = torch.arange(127, dtype=dtype, device=device)
+                expected_res = res.type(dtype)
             # new tensor
             self.assertEqual(expected_res, a.bitwise_not())
             # out
-            b = t()
+            b = torch.empty(0, dtype=dtype, device=device)
             torch.bitwise_not(a, out=b)
             self.assertEqual(expected_res, b)
             # in-place
             a.bitwise_not_()
             self.assertEqual(expected_res, a)
 
         # test exceptions
-        for t in(torch.HalfTensor, torch.FloatTensor, torch.DoubleTensor):
-            a = torch.zeros(10, dtype=t.dtype)
+        for dtype in(torch.half, torch.float, torch.double):
+            a = torch.zeros(10, dtype=dtype, device=device)
             # new tensor
             with self.assertRaises(RuntimeError):
                 a.bitwise_not()
             # out
-            b = t()
+            b = torch.empty(0, dtype=dtype, device=device)
             with self.assertRaises(RuntimeError):
                 torch.bitwise_not(a, out=b)
             # in-place
             with self.assertRaises(RuntimeError):
                 a.bitwise_not_()
 
+    def test_bitwise_not(self):
+        self._test_bitwise_not(self, 'cpu')
+
     def test_threshold(self):
         for dtype in torch.testing.get_all_math_dtypes('cpu'):
             if dtype != torch.uint8 and dtype != torch.float16: