Skip to content

Commit aa430e6

Browse files
committed
infra: golden HIR oracle for LOAD_ATTR specializations
Adds a regression oracle for the three LOAD_ATTR specializations (LOAD_ATTR_SLOT, LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_MODULE) that the wiring gate's force_compile harness does not exercise. Without this, HIR divergence introduced when the C emit methods replace their C++ counterparts (Tier 5 conversion, in flight) cannot be detected behaviourally. Components: - Lib/test/test_phoenix_jit_loadattr_golden.py — runs an attr-heavy function in a subprocess with PHOENIX_GOLDEN_CAPTURE=1, asserts that dis(adaptive=True) reports the three specialized opcodes, force_compiles, captures the GOLDEN_HIR_FINAL/COMPILE blocks from stderr, canonicalizes embedded pointer addresses, and diffs against the saved golden. - docs/golden/loadattr_hir.txt — captured HIR (288 lines) at HEAD (2ea6b0a) with C++ baseline still active for these paths. Pointer addresses are canonicalized to 0xPTR so the file is process-stable. Regeneration: PHOENIX_REGEN_GOLDEN=1 ./python -m test test_phoenix_jit_loadattr_golden
1 parent 2ea6b0a commit aa430e6

2 files changed

Lines changed: 539 additions & 0 deletions

File tree

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
"""Phoenix JIT LoadAttr Golden HIR Test.
2+
3+
Regression oracle for the three LOAD_ATTR specializations:
4+
5+
- LOAD_ATTR_SLOT (class with __slots__)
6+
- LOAD_ATTR_INSTANCE_VALUE (regular dict-backed attribute)
7+
- LOAD_ATTR_MODULE (module-level attribute)
8+
9+
The wiring gate's force_compile harness exercises NONE of these paths (verified
10+
by testkeeper 2026-04-21), so HIR divergence introduced when the C emit methods
11+
replace their C++ counterparts cannot be caught behaviourally without an
12+
explicit oracle. This test fills that gap by:
13+
14+
1. Running a self-contained subprocess that warms up an attr-heavy function
15+
past the adaptive interpreter's specialization threshold, asserts that
16+
dis() reports the three specialized opcodes, then force-compiles the
17+
function with PHOENIX_GOLDEN_CAPTURE=1.
18+
2. Capturing the GOLDEN_HIR_FINAL and GOLDEN_HIR_COMPILE blocks from stderr.
19+
3. Comparing the captured bytes byte-for-byte against
20+
docs/golden/loadattr_hir.txt.
21+
22+
To regenerate the golden file (e.g. after an intentional HIR change):
23+
24+
PHOENIX_REGEN_GOLDEN=1 ./python -m test test_phoenix_jit_loadattr_golden
25+
26+
Run with: ./python -m test test_phoenix_jit_loadattr_golden
27+
"""
28+
29+
import os
30+
import re
31+
import subprocess
32+
import sys
33+
import textwrap
34+
import unittest
35+
from pathlib import Path
36+
37+
try:
38+
import _cinderx # noqa: F401
39+
import cinderjit # noqa: F401
40+
HAS_JIT = True
41+
except ImportError:
42+
HAS_JIT = False
43+
44+
45+
REPO_ROOT = Path(__file__).resolve().parents[2]
46+
GOLDEN_PATH = REPO_ROOT / "docs" / "golden" / "loadattr_hir.txt"
47+
48+
49+
HARNESS_SOURCE = textwrap.dedent(
50+
"""\
51+
import dis
52+
import io
53+
import sys
54+
55+
# _cinderx must be imported first; importing it registers the cinderjit
56+
# module entry in sys.modules. Without this, ``import cinderjit`` fails.
57+
import _cinderx # noqa: F401
58+
import cinderjit
59+
60+
61+
class Pt:
62+
__slots__ = ("x", "y")
63+
64+
def __init__(self, x, y):
65+
self.x = x
66+
self.y = y
67+
68+
69+
class Inst:
70+
def __init__(self, v):
71+
self.v = v
72+
73+
74+
def attr_probe(p, i):
75+
a = p.x # LOAD_ATTR_SLOT after warmup
76+
b = i.v # LOAD_ATTR_INSTANCE_VALUE after warmup
77+
c = sys.maxsize # LOAD_ATTR_MODULE after warmup
78+
return a + b + c
79+
80+
81+
def main():
82+
p = Pt(1, 2)
83+
i = Inst(3)
84+
85+
# Warmup so the adaptive interpreter specializes each LOAD_ATTR site.
86+
# ADAPTIVE_WARMUP_VALUE is 1 in CPython 3.12; 64 iterations is well past
87+
# the threshold for all three sites.
88+
for _ in range(64):
89+
attr_probe(p, i)
90+
91+
# adaptive=True is REQUIRED — without it, dis prints the un-adapted
92+
# bytecode and the LOAD_ATTR_SLOT/MODULE/INSTANCE_VALUE specializations
93+
# are invisible even when the adaptive interpreter has applied them.
94+
buf = io.StringIO()
95+
dis.dis(attr_probe, file=buf, adaptive=True)
96+
disasm = buf.getvalue()
97+
98+
for needed in (
99+
"LOAD_ATTR_SLOT",
100+
"LOAD_ATTR_INSTANCE_VALUE",
101+
"LOAD_ATTR_MODULE",
102+
):
103+
if needed not in disasm:
104+
sys.stderr.write(
105+
"SPECIALIZATION_MISSING {}\\n{}\\n".format(needed, disasm)
106+
)
107+
sys.exit(2)
108+
109+
cinderjit.force_compile(attr_probe)
110+
if not cinderjit.is_jit_compiled(attr_probe):
111+
sys.stderr.write("FORCE_COMPILE_FAILED\\n")
112+
sys.exit(3)
113+
114+
# Sanity: the compiled function must still produce the expected value.
115+
expected = 1 + 3 + sys.maxsize
116+
got = attr_probe(p, i)
117+
if got != expected:
118+
sys.stderr.write(
119+
"RESULT_MISMATCH expected={} got={}\\n".format(expected, got)
120+
)
121+
sys.exit(4)
122+
123+
sys.stdout.write("OK\\n")
124+
125+
126+
if __name__ == "__main__":
127+
main()
128+
"""
129+
)
130+
131+
132+
# Embedded pointer addresses (e.g. `GuardIs<0x7fd75bd8e7a0>`) are
133+
# process-dependent — they vary between runs because they identify Python
134+
# objects allocated by the host process. Canonicalize them so the golden file
135+
# stays stable.
136+
_PTR_RE = re.compile(r"0x[0-9a-fA-F]{6,}")
137+
138+
139+
def _canonicalize(text: str) -> str:
140+
return _PTR_RE.sub("0xPTR", text)
141+
142+
143+
def _extract_blocks(stderr_text: str) -> str:
144+
"""Concatenate the HIR_FINAL + HIR_COMPILE blocks for ``attr_probe``.
145+
146+
Output format matches docs/golden/loadattr_hir.txt:
147+
148+
GOLDEN_HIR_FINAL <fullname>
149+
<body>
150+
END_GOLDEN_HIR_FINAL
151+
GOLDEN_HIR_COMPILE <fullname>
152+
<body>
153+
END_GOLDEN_HIR_COMPILE
154+
"""
155+
pattern = re.compile(
156+
r"^GOLDEN_HIR_(?P<kind>FINAL|COMPILE) (?P<name>[^\n]+)\n"
157+
r"(?P<body>.*?)\n"
158+
r"END_GOLDEN_HIR_(?P=kind)\n",
159+
re.MULTILINE | re.DOTALL,
160+
)
161+
162+
chunks = []
163+
for match in pattern.finditer(stderr_text):
164+
if "attr_probe" not in match.group("name"):
165+
continue
166+
chunks.append(
167+
"GOLDEN_HIR_{kind} {name}\n{body}\nEND_GOLDEN_HIR_{kind}".format(
168+
kind=match.group("kind"),
169+
name=match.group("name"),
170+
body=match.group("body"),
171+
)
172+
)
173+
return "\n".join(chunks) + "\n" if chunks else ""
174+
175+
176+
@unittest.skipUnless(HAS_JIT, "requires JIT")
177+
class TestLoadAttrGolden(unittest.TestCase):
178+
"""Golden-HIR oracle for the three LOAD_ATTR specializations."""
179+
180+
def _run_harness(self) -> str:
181+
env = dict(os.environ)
182+
env["PHOENIX_GOLDEN_CAPTURE"] = "1"
183+
# Force unbuffered stderr so the GOLDEN_HIR_* blocks land before exit.
184+
env["PYTHONUNBUFFERED"] = "1"
185+
186+
proc = subprocess.run(
187+
[sys.executable, "-c", HARNESS_SOURCE],
188+
env=env,
189+
capture_output=True,
190+
text=True,
191+
timeout=120,
192+
)
193+
self.assertEqual(
194+
proc.returncode,
195+
0,
196+
msg=(
197+
"harness exited {rc}\nstdout:\n{out}\nstderr (last 80 lines):\n{err}"
198+
).format(
199+
rc=proc.returncode,
200+
out=proc.stdout,
201+
err="\n".join(proc.stderr.splitlines()[-80:]),
202+
),
203+
)
204+
self.assertIn("OK", proc.stdout, "harness did not report OK")
205+
return proc.stderr
206+
207+
def test_loadattr_hir_matches_golden(self):
208+
stderr_text = self._run_harness()
209+
captured = _canonicalize(_extract_blocks(stderr_text))
210+
self.assertTrue(
211+
captured,
212+
"no GOLDEN_HIR_* blocks for attr_probe found in harness stderr",
213+
)
214+
215+
if os.environ.get("PHOENIX_REGEN_GOLDEN") == "1":
216+
GOLDEN_PATH.write_text(captured)
217+
self.skipTest(
218+
"regenerated {}; rerun without PHOENIX_REGEN_GOLDEN".format(
219+
GOLDEN_PATH
220+
)
221+
)
222+
223+
self.assertTrue(
224+
GOLDEN_PATH.exists(),
225+
"golden file missing: {}".format(GOLDEN_PATH),
226+
)
227+
expected = _canonicalize(GOLDEN_PATH.read_text())
228+
229+
if captured != expected:
230+
# Surface a precise diff so divergence is debuggable.
231+
import difflib
232+
233+
diff = "".join(
234+
difflib.unified_diff(
235+
expected.splitlines(keepends=True),
236+
captured.splitlines(keepends=True),
237+
fromfile=str(GOLDEN_PATH),
238+
tofile="captured",
239+
n=3,
240+
)
241+
)
242+
self.fail(
243+
"LoadAttr HIR diverged from golden — a LOAD_ATTR specialization "
244+
"emit method changed observable HIR. Inspect the diff and, if "
245+
"intentional, regenerate via PHOENIX_REGEN_GOLDEN=1.\n\n"
246+
+ diff[:8000]
247+
)
248+
249+
250+
if __name__ == "__main__":
251+
unittest.main()

0 commit comments

Comments
 (0)