Merge branch 'main' into refactoring/5523-random-horizontal-flip

pmeier · web-flow · commit 52dc45250678 · 2022-03-14T22:56:38.000+01:00
diff --git a/README.rst b/README.rst
@@ -21,7 +21,9 @@ supported Python versions.
 +--------------------------+--------------------------+---------------------------------+
 | ``torch``                | ``torchvision``          | ``python``                      |
 +==========================+==========================+=================================+
-| ``main`` / ``nightly``   | ``main`` / ``nightly``   | ``>=3.7``, ``<=3.9``            |
+| ``main`` / ``nightly``   | ``main`` / ``nightly``   | ``>=3.7``, ``<=3.10``           |
++--------------------------+--------------------------+---------------------------------+
+| ``1.11.0``               | ``0.12.0``               | ``>=3.7``, ``<=3.10``           |
 +--------------------------+--------------------------+---------------------------------+
 | ``1.10.2``               | ``0.11.3``               | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
diff --git a/references/detection/transforms.py b/references/detection/transforms.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple, Dict, Optional
+from typing import List, Tuple, Dict, Optional, Union
 
 import torch
 import torchvision
@@ -326,3 +326,114 @@ def forward(
                 )
 
         return image, target
+
+
+class FixedSizeCrop(nn.Module):
+    def __init__(self, size, fill=0, padding_mode="constant"):
+        super().__init__()
+        size = tuple(T._setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))
+        self.crop_height = size[0]
+        self.crop_width = size[1]
+        self.fill = fill  # TODO: Fill is currently respected only on PIL. Apply tensor patch.
+        self.padding_mode = padding_mode
+
+    def _pad(self, img, target, padding):
+        # Taken from the functional_tensor.py pad
+        if isinstance(padding, int):
+            pad_left = pad_right = pad_top = pad_bottom = padding
+        elif len(padding) == 1:
+            pad_left = pad_right = pad_top = pad_bottom = padding[0]
+        elif len(padding) == 2:
+            pad_left = pad_right = padding[0]
+            pad_top = pad_bottom = padding[1]
+        else:
+            pad_left = padding[0]
+            pad_top = padding[1]
+            pad_right = padding[2]
+            pad_bottom = padding[3]
+
+        padding = [pad_left, pad_top, pad_right, pad_bottom]
+        img = F.pad(img, padding, self.fill, self.padding_mode)
+        if target is not None:
+            target["boxes"][:, 0::2] += pad_left
+            target["boxes"][:, 1::2] += pad_top
+            if "masks" in target:
+                target["masks"] = F.pad(target["masks"], padding, 0, "constant")
+
+        return img, target
+
+    def _crop(self, img, target, top, left, height, width):
+        img = F.crop(img, top, left, height, width)
+        if target is not None:
+            boxes = target["boxes"]
+            boxes[:, 0::2] -= left
+            boxes[:, 1::2] -= top
+            boxes[:, 0::2].clamp_(min=0, max=width)
+            boxes[:, 1::2].clamp_(min=0, max=height)
+
+            is_valid = (boxes[:, 0] < boxes[:, 2]) & (boxes[:, 1] < boxes[:, 3])
+
+            target["boxes"] = boxes[is_valid]
+            target["labels"] = target["labels"][is_valid]
+            if "masks" in target:
+                target["masks"] = F.crop(target["masks"][is_valid], top, left, height, width)
+
+        return img, target
+
+    def forward(self, img, target=None):
+        _, height, width = F.get_dimensions(img)
+        new_height = min(height, self.crop_height)
+        new_width = min(width, self.crop_width)
+
+        if new_height != height or new_width != width:
+            offset_height = max(height - self.crop_height, 0)
+            offset_width = max(width - self.crop_width, 0)
+
+            r = torch.rand(1)
+            top = int(offset_height * r)
+            left = int(offset_width * r)
+
+            img, target = self._crop(img, target, top, left, new_height, new_width)
+
+        pad_bottom = max(self.crop_height - new_height, 0)
+        pad_right = max(self.crop_width - new_width, 0)
+        if pad_bottom != 0 or pad_right != 0:
+            img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom])
+
+        return img, target
+
+
+class RandomShortestSize(nn.Module):
+    def __init__(
+        self,
+        min_size: Union[List[int], Tuple[int], int],
+        max_size: int,
+        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+    ):
+        super().__init__()
+        self.min_size = [min_size] if isinstance(min_size, int) else list(min_size)
+        self.max_size = max_size
+        self.interpolation = interpolation
+
+    def forward(
+        self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+    ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+        _, orig_height, orig_width = F.get_dimensions(image)
+
+        min_size = self.min_size[torch.randint(len(self.min_size), (1,)).item()]
+        r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width))
+
+        new_width = int(orig_width * r)
+        new_height = int(orig_height * r)
+
+        image = F.resize(image, [new_height, new_width], interpolation=self.interpolation)
+
+        if target is not None:
+            target["boxes"][:, 0::2] *= new_width / orig_width
+            target["boxes"][:, 1::2] *= new_height / orig_height
+            if "masks" in target:
+                target["masks"] = F.resize(
+                    target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST
+                )
+
+        return image, target
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
@@ -19,8 +19,8 @@
 from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file
 from torch.nn.functional import one_hot
 from torch.testing import make_tensor as _make_tensor
+from torchvision._utils import sequence_to_str
 from torchvision.prototype.datasets._api import find
-from torchvision.prototype.utils._internal import sequence_to_str
 
 make_tensor = functools.partial(_make_tensor, device="cpu")
 make_scalar = functools.partial(make_tensor, ())
@@ -1329,20 +1329,20 @@ def cub200(info, root, config):
 
 @register_mock
 def eurosat(info, root, config):
-    data_folder = pathlib.Path(root, "eurosat", "2750")
+    data_folder = root / "2750"
     data_folder.mkdir(parents=True)
 
     num_examples_per_class = 3
-    classes = ("AnnualCrop", "Forest")
-    for cls in classes:
+    categories = ["AnnualCrop", "Forest"]
+    for category in categories:
         create_image_folder(
             root=data_folder,
-            name=cls,
-            file_name_fn=lambda idx: f"{cls}_{idx}.jpg",
+            name=category,
+            file_name_fn=lambda idx: f"{category}_{idx + 1}.jpg",
             num_examples=num_examples_per_class,
         )
     make_zip(root, "EuroSAT.zip", data_folder)
-    return len(classes) * num_examples_per_class
+    return len(categories) * num_examples_per_class
 
 
 @register_mock
diff --git a/test/test_internal_utils.py b/test/test_internal_utils.py
@@ -1,5 +1,5 @@
 import pytest
-from torchvision.prototype.utils._internal import sequence_to_str
+from torchvision._utils import sequence_to_str
 
 
 @pytest.mark.parametrize(
diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py
@@ -10,8 +10,8 @@
 from torch.utils.data.datapipes.iter.grouping import ShardingFilterIterDataPipe as ShardingFilter
 from torch.utils.data.graph import traverse
 from torchdata.datapipes.iter import IterDataPipe, Shuffler
+from torchvision._utils import sequence_to_str
 from torchvision.prototype import transforms, datasets
-from torchvision.prototype.utils._internal import sequence_to_str
 
 
 assert_samples_equal = functools.partial(
@@ -53,6 +53,8 @@ def test_sample(self, test_home, dataset_mock, config):
 
         try:
             sample = next(iter(dataset))
+        except StopIteration:
+            raise AssertionError("Unable to draw any sample.") from None
         except Exception as error:
             raise AssertionError("Drawing a sample raised the error above.") from error
 
diff --git a/torchvision/_utils.py b/torchvision/_utils.py
@@ -1,5 +1,5 @@
 import enum
-from typing import TypeVar, Type
+from typing import Sequence, TypeVar, Type
 
 T = TypeVar("T", bound=enum.Enum)
 
@@ -18,3 +18,15 @@ def from_str(self: Type[T], member: str) -> T:  # type: ignore[misc]
 
 class StrEnum(enum.Enum, metaclass=StrEnumMeta):
     pass
+
+
+def sequence_to_str(seq: Sequence, separate_last: str = "") -> str:
+    if not seq:
+        return ""
+    if len(seq) == 1:
+        return f"'{seq[0]}'"
+
+    head = "'" + "', '".join([str(item) for item in seq[:-1]]) + "'"
+    tail = f"{'' if separate_last and len(seq) == 2 else ','} {separate_last}'{seq[-1]}'"
+
+    return head + tail
diff --git a/torchvision/prototype/datasets/utils/_dataset.py b/torchvision/prototype/datasets/utils/_dataset.py
@@ -7,7 +7,8 @@
 from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Collection
 
 from torch.utils.data import IterDataPipe
-from torchvision.prototype.utils._internal import FrozenBunch, make_repr, add_suggestion, sequence_to_str
+from torchvision._utils import sequence_to_str
+from torchvision.prototype.utils._internal import FrozenBunch, make_repr, add_suggestion
 
 from .._home import use_sharded_dataset
 from ._internal import BUILTIN_DIR, _make_sharded_datapipe
diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py
@@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
         from torchvision.prototype.transforms.functional import convert_bounding_box_format
 
         if isinstance(format, str):
-            format = BoundingBoxFormat[format]
+            format = BoundingBoxFormat.from_str(format.upper())
 
         return BoundingBox.new_like(
             self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py
@@ -15,6 +15,7 @@
     TenCrop,
     BatchMultiCrop,
     RandomHorizontalFlip,
+    RandomZoomOut,
 )
 from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
 from ._misc import Identity, Normalize, ToDtype, Lambda
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -270,3 +270,88 @@ def apply_recursively(obj: Any) -> Any:
                 return obj
 
         return apply_recursively(inputs if len(inputs) > 1 else inputs[0])
+
+
+class RandomZoomOut(Transform):
+    def __init__(
+        self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
+    ) -> None:
+        super().__init__()
+
+        if fill is None:
+            fill = 0.0
+        self.fill = fill
+
+        self.side_range = side_range
+        if side_range[0] < 1.0 or side_range[0] > side_range[1]:
+            raise ValueError(f"Invalid canvas side range provided {side_range}.")
+
+        self.p = p
+
+    def _get_params(self, sample: Any) -> Dict[str, Any]:
+        image = query_image(sample)
+        orig_c, orig_h, orig_w = get_image_dimensions(image)
+
+        r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
+        canvas_width = int(orig_w * r)
+        canvas_height = int(orig_h * r)
+
+        r = torch.rand(2)
+        left = int((canvas_width - orig_w) * r[0])
+        top = int((canvas_height - orig_h) * r[1])
+        right = canvas_width - (left + orig_w)
+        bottom = canvas_height - (top + orig_h)
+        padding = [left, top, right, bottom]
+
+        fill = self.fill
+        if not isinstance(fill, collections.abc.Sequence):
+            fill = [fill] * orig_c
+
+        return dict(padding=padding, fill=fill)
+
+    def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
+        if isinstance(input, features.Image) or is_simple_tensor(input):
+            # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
+            output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant")
+
+            left, top, right, bottom = params["padding"]
+            fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)
+
+            if top > 0:
+                output[..., :top, :] = fill
+            if left > 0:
+                output[..., :, :left] = fill
+            if bottom > 0:
+                output[..., -bottom:, :] = fill
+            if right > 0:
+                output[..., :, -right:] = fill
+
+            if isinstance(input, features.Image):
+                output = features.Image.new_like(input, output)
+
+            return output
+        elif isinstance(input, PIL.Image.Image):
+            return F.pad_image_pil(
+                input,
+                params["padding"],
+                fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]),
+                padding_mode="constant",
+            )
+        elif isinstance(input, features.BoundingBox):
+            output = F.pad_bounding_box(input, params["padding"], format=input.format)
+
+            left, top, right, bottom = params["padding"]
+            height, width = input.image_size
+            height += top + bottom
+            width += left + right
+
+            return features.BoundingBox.new_like(input, output, image_size=(height, width))
+        else:
+            return input
+
+    def forward(self, *inputs: Any) -> Any:
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        if torch.rand(1) >= self.p:
+            return sample
+
+        return super().forward(sample)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
@@ -55,6 +55,7 @@
     rotate_image_pil,
     pad_image_tensor,
     pad_image_pil,
+    pad_bounding_box,
     crop_image_tensor,
     crop_image_pil,
     perspective_image_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -31,7 +31,7 @@ def horizontal_flip_bounding_box(
     bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]]
 
     return convert_bounding_box_format(
-        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
     ).view(shape)
 
 
@@ -214,6 +214,26 @@ def rotate_image_pil(
 pad_image_tensor = _FT.pad
 pad_image_pil = _FP.pad
 
+
+def pad_bounding_box(
+    bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat
+) -> torch.Tensor:
+    left, _, top, _ = _FT._parse_pad_padding(padding)
+
+    shape = bounding_box.shape
+
+    bounding_box = convert_bounding_box_format(
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
+    ).view(-1, 4)
+
+    bounding_box[:, 0::2] += left
+    bounding_box[:, 1::2] += top
+
+    return convert_bounding_box_format(
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
+    ).view(shape)
+
+
 crop_image_tensor = _FT.crop
 crop_image_pil = _FP.crop
 
diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py
diff --git a/torchvision/prototype/utils/_internal.py b/torchvision/prototype/utils/_internal.py
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`TenCrop,`
`16`	`16`	`BatchMultiCrop,`
`17`	`17`	`RandomHorizontalFlip,`
	`18`	`+ RandomZoomOut,`
`18`	`19`	`)`
`19`	`20`	`from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace`
`20`	`21`	`from ._misc import Identity, Normalize, ToDtype, Lambda`