Update on "Refactor layout constraint selection logic"

zou3519 · zou3519 · commit afabfc4983c0 · 2025-04-01T21:07:27.000-07:00
Significant cleanup of the code (it has gotten bad over time).
This PR:
- does some deduplication
- cleans up the "lazy registration path" which seems to never get hit
  anymore...

Test Plan:
- tests + CI

[ghstack-poisoned]
diff --git a/test/inductor/test_triton_kernels.py b/test/inductor/test_triton_kernels.py
@@ -3451,6 +3451,7 @@ def impl2(x):
 
             lib.define(
                 "add_op(Tensor x, Tensor y) -> Tensor",
+                tags=[torch._C.Tag.needs_exact_strides],
             )
 
             def impl(x, y):
@@ -3464,6 +3465,7 @@ def meta(x, y):
 
             lib.define(
                 "add_out_op(Tensor x, Tensor y, Tensor(a!) out) -> ()",
+                tags=[torch._C.Tag.needs_exact_strides],
             )
 
             def impl_out(x, y, out):
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -126,7 +126,7 @@ def prologue_fusion_enabled() -> bool:
 # If the custom op does not have a layout constraint tag already
 # then we assume the following applies.
 custom_op_default_layout_constraint: Literal[
-    "needs_fixed_stride_order", "flexible_layout"
+    "needs_exact_strides", "needs_fixed_stride_order", "flexible_layout"
 ] = "needs_fixed_stride_order"
 
 # The default layout constraint for user-defined triton kernels.
diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py
@@ -80,11 +80,13 @@
     FALLBACK_ALLOW_LIST,
     fallback_handler,
     fallback_node_due_to_unsupported_type,
+    get_layout_constraint_tag,
     lowerings,
     make_fallback,
     maybe_layout_constraints,
     needs_realized_inputs,
     require_contiguous,
+    tag_to_layout_constraint,
     unsupported_output_tensor,
 )
 from .runtime import autotune_cache
@@ -1149,34 +1151,26 @@ def call_function(self, target: Callable, args: Any, kwargs: dict[str, Any]) ->
                     error.operator_str(target, args, kwargs),
                 )
 
-                # use contiguous unless the (custom) op asks something else
-                # explicitly
-                if torch._library.utils.needs_exact_strides(target):
-                    decided_constraint = constrain_to_fake_tensors
-                elif torch._C.Tag.needs_fixed_stride_order in target.tags:
-                    decided_constraint = constrain_to_fx_strides  # type: ignore[assignment]
-                elif torch._C.Tag.flexible_layout in target.tags:
-                    decided_constraint = None  # type: ignore[assignment]
-                else:
-                    # If there are no tags, we do different things depending on
-                    # if it's a builtin ATen/prim ops or custom ops.
-                    # For ATen ops, we require_contiguous to fix https://github.com/pytorch/pytorch/issues/140452
-                    # For custom ops, we constrain_to_fx_strides to maintain the
-                    # behavior of PyTorch 2.5: https://github.com/pytorch/pytorch/issues/148356
+                tag = get_layout_constraint_tag(target, with_default=False)
+                if (
+                    tag is None
+                    and torch._library.utils.is_builtin(target)
+                    and self.is_backward
+                ):
+                    # for implicit fallback ATen ops during backward, if there
+                    # is no layout constraint tag, we conservatively require contiguous
+                    # input since some eager kernels do not
+                    # support non-contiguous inputs. Otherwise they may silently cause
+                    # accuracy problems. Check https://github.com/pytorch/pytorch/issues/140452
+                    # We only do this For ATen ops and for backward.
                     #
-                    # For ATen ops, only apply the constraint for backward
-                    # ops since fwd ops should work for any strides.
-                    if torch._library.utils.is_builtin(target) and self.is_backward:
-                        decided_constraint = require_contiguous  # type: ignore[assignment]
-                    else:
-                        # maybe_layout_constraints will decide the layout constraint for the custom op
-                        # lazily
-                        decided_constraint = None  # type: ignore[assignment]
-
-                # for implicitly fallback ops, we conservatively requires
-                # contiguous input since some eager kernels does not
-                # support non-contiguous inputs. They may silently cause
-                # accuracy problems. Check https://github.com/pytorch/pytorch/issues/140452
+                    # TODO: should really switch to "needs_fixed_stride" constraint on these
+                    # and identify them one by one.
+                    decided_constraint = require_contiguous  # type: ignore[assignment]
+                else:
+                    tag = get_layout_constraint_tag(target, with_default=True)
+                    decided_constraint = tag_to_layout_constraint(tag)
+
                 make_fallback(target, layout_constraint=decided_constraint)
 
             elif get_decompositions([target]):
diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py
@@ -156,37 +156,40 @@ def maybe_layout_constraints(fn: Callable[..., Any]) -> Optional[Callable[..., A
         return None
     if fn in _maybe_layout_constraints:
         return _maybe_layout_constraints[fn]
-    # OpOverload with custom lowerings override tag-based layout constraints
-    if fn in lowerings:
-        _maybe_layout_constraints[fn] = None
-        return None
-    # We lazily register tag-based layout constraints.
-
-    def handle_layout_constraint_tag(tag):
-        if tag is torch._C.Tag.needs_fixed_stride_order:
-            _maybe_layout_constraints[fn] = constrain_to_fx_strides
-            return _maybe_layout_constraints[fn]
-        elif tag is torch._C.Tag.flexible_layout:
-            _maybe_layout_constraints[fn] = None
-            return None
-        else:
-            raise AssertionError(f"Unknown layout constraint tag: {tag}")
+    return None
+
 
-    tag = get_layout_constraint_tag(fn)
-    return handle_layout_constraint_tag(tag)
+tags_by_priority = [
+    torch._C.Tag.needs_exact_strides,
+    torch._C.Tag.needs_fixed_stride_order,
+    torch._C.Tag.flexible_layout,
+]
 
 
-def get_layout_constraint_tag(fn):
+def get_layout_constraint_tag(fn, *, with_default=True):
     tags_by_priority = [
+        torch._C.Tag.needs_exact_strides,
         torch._C.Tag.needs_fixed_stride_order,
         torch._C.Tag.flexible_layout,
     ]
     for tag in tags_by_priority:
         if tag in fn.tags:
             return tag
-    if torch._library.utils.is_builtin(fn):
-        return torch._C.Tag.flexible_layout
-    return getattr(torch._C.Tag, config.custom_op_default_layout_constraint)
+    if with_default:
+        if torch._library.utils.is_builtin(fn):
+            return torch._C.Tag.flexible_layout
+        return getattr(torch._C.Tag, config.custom_op_default_layout_constraint)
+    return None
+
+
+def tag_to_layout_constraint(tag):
+    if tag == torch._C.Tag.needs_exact_strides:
+        return constrain_to_fake_tensors
+    if tag == torch._C.Tag.needs_fixed_stride_order:
+        return constrain_to_fx_strides
+    if tag == torch._C.Tag.flexible_layout:
+        return None
+    raise AssertionError(f"Unknown layout constraint tag: {tag}")
 
 
 def assert_nyi(cond, msg):
diff --git a/torch/_library/custom_ops.py b/torch/_library/custom_ops.py
@@ -615,7 +615,7 @@ def _register_to_dispatcher(self, tags: Sequence[_C.Tag]) -> None:
 
         lib.define(
             schema_str,
-            tags=[_C.Tag.pt2_compliant_tag, *tags],
+            tags=[_C.Tag.pt2_compliant_tag, _C.Tag.needs_fixed_stride_order, *tags],
         )
         self._opoverload = utils.lookup_op(self._qualname)
 
diff --git a/torch/_library/utils.py b/torch/_library/utils.py
@@ -215,16 +215,6 @@ def zip_schema(
     return
 
 
-def needs_exact_strides(op: torch._ops.OpOverload):
-    if torch._C.Tag.needs_exact_strides in op.tags:
-        return True
-    if torch._C.Tag.flexible_layout in op.tags:
-        return False
-    if torch._C.Tag.needs_fixed_stride_order in op.tags:
-        return False
-    return not is_builtin(op)
-
-
 def hop_schema_from_fx_node(node):
     from torchgen.gen_schema_utils import FunctionSchemaGen
 
diff --git a/torch/fx/experimental/proxy_tensor.py b/torch/fx/experimental/proxy_tensor.py
@@ -1169,7 +1169,9 @@ def _should_save_eager_input_vals(
                 f"propagate the FakeTensor vals. Please file an issue."
             )
     if isinstance(target, torch._ops.OpOverload):
-        return torch._library.utils.needs_exact_strides(target)
+        from torch._inductor.lowering import get_layout_constraint_tag
+
+        return get_layout_constraint_tag(target)
     return False
 
 

Original file line number	Diff line number	Diff line change
`@@ -3451,6 +3451,7 @@ def impl2(x):`
`3451`	`3451`
`3452`	`3452`	`lib.define(`
`3453`	`3453`	`"add_op(Tensor x, Tensor y) -> Tensor",`
	`3454`	`+ tags=[torch._C.Tag.needs_exact_strides],`
`3454`	`3455`	`)`
`3455`	`3456`
`3456`	`3457`	`def impl(x, y):`
`@@ -3464,6 +3465,7 @@ def meta(x, y):`
`3464`	`3465`
`3465`	`3466`	`lib.define(`
`3466`	`3467`	`"add_out_op(Tensor x, Tensor y, Tensor(a!) out) -> ()",`
	`3468`	`+ tags=[torch._C.Tag.needs_exact_strides],`
`3467`	`3469`	`)`
`3468`	`3470`
`3469`	`3471`	`def impl_out(x, y, out):`
Original file line number	Diff line number	Diff line change
`@@ -615,7 +615,7 @@ def _register_to_dispatcher(self, tags: Sequence[_C.Tag]) -> None:`
`615`	`615`
`616`	`616`	`lib.define(`
`617`	`617`	`schema_str,`
`618`		`- tags=[_C.Tag.pt2_compliant_tag, *tags],`
	`618`	`+ tags=[_C.Tag.pt2_compliant_tag, _C.Tag.needs_fixed_stride_order, *tags],`
`619`	`619`	`)`
`620`	`620`	`self._opoverload = utils.lookup_op(self._qualname)`
`621`	`621`
Original file line number	Diff line number	Diff line change
`@@ -1169,7 +1169,9 @@ def _should_save_eager_input_vals(`
`1169`	`1169`	`f"propagate the FakeTensor vals. Please file an issue."`
`1170`	`1170`	`)`
`1171`	`1171`	`if isinstance(target, torch._ops.OpOverload):`
`1172`		`- return torch._library.utils.needs_exact_strides(target)`
	`1172`	`+ from torch._inductor.lowering import get_layout_constraint_tag`
	`1173`	`+`
	`1174`	`+ return get_layout_constraint_tag(target)`
`1173`	`1175`	`return False`
`1174`	`1176`
`1175`	`1177`