Skip to content

Commit 8b7b935

Browse files
committed
builder: emitUnpackSequence → C (99/144 → 100/144 = 69.4%)
Convert HIRBuilder::emitUnpackSequence (UNPACK_SEQUENCE handler with specialized-opcode dispatch + 5-block + multi-path type narrowing) to a thin delegation stub. New C function hir_builder_emit_unpack_sequence_c (178 lines, 7 phases) handles the largest non-exception multi-block conversion in the remaining set. Tier 5 milestone: crossed the 100/144 = 69.4% line. 4 emit methods remain (emitInlineExceptionMatch, emitCallExceptionHandler, emitAnyCall plus any I haven't enumerated). W25-defer-aware A1 design (theologian chat 2026-04-22 14:30:30Z pre-audit): ZERO new bridges. Pre-audit bridge-count gate per supervisor chat 14:16:55Z policy (>1 new bridges → HALT for Option B reconsideration); this method passed at 0 ≤ 1. 7 phases per pre-audit: P0: specialized-opcode dispatch (UNPACK_SEQUENCE_LIST → GuardType(TListExact); _TUPLE/_TWO_TUPLE → GuardType(TTupleExact)) Skipped if specialized_op = -1 (passed from C++ stub when jit_get_config()->specialized_opcodes is false) P1: deopt_path setup with frame copy BEFORE pop, Snapshot+Deopt with setGuiltyReg(seq) + setDescr("UNPACK_SEQUENCE") P2: 5 block allocations (fast_path, list_check_path, list_fast_path, tuple_fast_path) + list_mem temp + POP seq P3: 3 mutually-exclusive type-narrowing paths via hir_type_is_subtype static-narrow checks (TTupleExact / TListExact / runtime CondBranchCheckType chain) P4: tuple_fast_path — LoadConst(ob_item offset) + LoadFieldAddress P5: list_fast_path — LoadField(ob_item, TCPtr) P6: fast_path size check — LoadVarObjectSize + LoadConst(target_size) + PrimitiveCompare(Equal) + RE-ALLOC fast_path_extract_loop (distinct from P2's fast_path per pitfall PB) + CondBranch P7: item extraction loop — for idx = oparg-1 down to 0, LoadConst idx + LoadArrayItem + push to stack Pitfalls addressed inline: PA: deopt_path frame_state_copy BEFORE pop (preserves pre-pop stack with seq for interpreter resume on deopt) PB: fast_path RE-ALLOCATED at P6 distinct from P2's initial fast_path (semantically "fast path 1" vs "fast path 2", named fast_path_extract_loop in C body for clarity) PC: emitGuardType in-place SSA-rename: GuardType(seq, type, seq) with seq as both input and output operand (matches emitLoadMethodOrAttrSuper precedent push 52) PD: Py_GIL_DISABLED branches DROPPED for 3.12-only project (3.13+ would need re-introduction; documented inline in C body) PE: phx_frame_state_destroy on deopt_tc at function end (RAII → manual) C++ stub passes (tc, current_func_, this, oparg, baseOffset.value(), specialized_op) — 6 args. CFG& cfg unused (C uses hir_cfg_alloc_block(func)). specialized_op = -1 sentinel signals C body to skip P0 dispatch entirely. Carrying-cost validation extending to n=3 zero-bridge methods (push 56 1-bridge + push 57 0-bridge + this 0-bridge). A1 stub-heavy strategy holds across the 122-line largest-multi-block conversion in remaining 4. testkeeper pre-commit verify (chat 2026-04-22 14:35:08Z): JIT_BUILD_EXIT=0, full --clean BUILD_EXIT=0, 7/7 phoenix-jit tests PASS at HEAD+uncommitted. W26 gate-hardening not triggered (clean build path). ZERO new bridges held.
1 parent 6c154a9 commit 8b7b935

2 files changed

Lines changed: 196 additions & 118 deletions

File tree

Python/jit/hir/builder.cpp

Lines changed: 18 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -4753,127 +4753,27 @@ void HIRBuilder::emitUnpackEx(
47534753
static_cast<void*>(&tc), static_cast<void*>(current_func_), bc_instr.oparg());
47544754
}
47554755

4756+
extern "C" void hir_builder_emit_unpack_sequence_c(
4757+
void* tc, void* func, void* builder,
4758+
int oparg, int bc_offset, int specialized_op);
4759+
47564760
void HIRBuilder::emitUnpackSequence(
4757-
CFG& cfg,
4761+
CFG& /*cfg*/,
47584762
TranslationContext& tc,
47594763
const jit::BytecodeInstruction& bc_instr) {
4760-
PhxPtrArray& stack = tc.frame.stack;
4761-
Register* seq = static_cast<Register*>(stack.data[stack.count - 1]);
4762-
4763-
if (jit_get_config()->specialized_opcodes) {
4764-
// Bug 6 fix: ensure dominating Snapshot for specialised-opcode GuardType
4765-
tc.emitSnapshot();
4766-
switch (bc_instr.specializedOpcode()) {
4767-
case UNPACK_SEQUENCE_LIST:
4768-
tc.emitGuardType(seq, TListExact, seq, tc.frame);
4769-
break;
4770-
case UNPACK_SEQUENCE_TUPLE:
4771-
case UNPACK_SEQUENCE_TWO_TUPLE:
4772-
tc.emitGuardType(seq, TTupleExact, seq, tc.frame);
4773-
break;
4774-
default:
4775-
break;
4776-
}
4777-
}
4778-
4779-
TranslationContext deopt_path{cfg.AllocateBlock(), tc.frame};
4780-
deopt_path.frame.cur_instr_offs = bc_instr.baseOffset();
4781-
deopt_path.emitSnapshot();
4782-
auto* deopt = static_cast<DeoptBase*>(deopt_path.emitDeopt());
4783-
deopt->setGuiltyReg(seq);
4784-
deopt->setDescr("UNPACK_SEQUENCE");
4785-
4786-
BasicBlock* fast_path = cfg.AllocateBlock();
4787-
BasicBlock* list_check_path = cfg.AllocateBlock();
4788-
BasicBlock* list_fast_path = cfg.AllocateBlock();
4789-
BasicBlock* tuple_fast_path = cfg.AllocateBlock();
4790-
Register* list_mem = temps_.AllocateStack();
4791-
static_cast<Register*>(phx_ptr_arr_pop(&stack));
4792-
4793-
// TODO: The manual type checks and branches should go away once we get
4794-
// PGO support to be able to optimize to known types.
4795-
4796-
//---
4797-
// +-main------------------------------+ +-tuple_fast_path------+
4798-
// | CondBranchCheckType (TTupleExact) |-truthy->| LoadConst (ob_item) |
4799-
// +-----------------------------------+ | LoadFieldAddress |
4800-
// | | Branch |--+
4801-
// falsy +----------------------+ |
4802-
// | |
4803-
// v |
4804-
// +-list_check_path------------------+ +-list_fast_path------+ |
4805-
// | CondBranchCheckType (TListExact) |-truthy->| LoadField (ob_item) | |
4806-
// +----------------------------------+ | Branch |----+
4807-
// | +---------------------+ |
4808-
// falsy |
4809-
// | +-fast_path---------+ |
4810-
// | | LoadVarObjectSize |<-----+
4811-
// v | LoadConst |
4812-
// +-deopt_path-+ | PrimitiveCompare |
4813-
// | Deopt |<----------falsy---------------| CondBranch |------+
4814-
// +------------+ +-------------------+ |
4815-
// |
4816-
// +-fast_path-----+ |
4817-
// | LoadConst |<-truthy--+
4818-
// | LoadArrayItem |
4819-
// +---------------+
4820-
//---
4821-
4822-
if (seq->isA(TTupleExact)) {
4823-
tc.emitBranch(tuple_fast_path);
4824-
} else if (seq->isA(TListExact)) {
4825-
// TODO(T255264577). Enable this again. See P2169677587.
4826-
#ifdef Py_GIL_DISABLED
4827-
tc.emitBranch(deopt_path.block);
4828-
#else
4829-
tc.emitBranch(list_fast_path);
4830-
#endif
4831-
} else {
4832-
tc.emitCondBranchCheckType(
4833-
seq, TTupleExact, tuple_fast_path, list_check_path);
4834-
4835-
tc.block = list_check_path;
4836-
// TODO(T255264577). Enable this again. See P2169677587.
4837-
#ifdef Py_GIL_DISABLED
4838-
tc.emitBranch(deopt_path.block);
4839-
#else
4840-
tc.emitCondBranchCheckType(
4841-
seq, TListExact, list_fast_path, deopt_path.block);
4842-
#endif
4843-
}
4844-
4845-
tc.block = tuple_fast_path;
4846-
Register* offset_reg = temps_.AllocateStack();
4847-
tc.emitLoadConst(
4848-
offset_reg, Type::fromCInt(offsetof(PyTupleObject, ob_item), TCInt64));
4849-
tc.emitLoadFieldAddress(list_mem, seq, offset_reg);
4850-
tc.emitBranch(fast_path);
4851-
4852-
tc.block = list_fast_path;
4853-
tc.emitLoadField(
4854-
list_mem, seq, "ob_item", offsetof(PyListObject, ob_item), TCPtr);
4855-
tc.emitBranch(fast_path);
4856-
4857-
tc.block = fast_path;
4858-
4859-
Register* seq_size = temps_.AllocateStack();
4860-
Register* target_size = temps_.AllocateStack();
4861-
Register* is_equal = temps_.AllocateStack();
4862-
tc.emitLoadVarObjectSize(seq_size, seq);
4863-
tc.emitLoadConst(target_size, Type::fromCInt(bc_instr.oparg(), TCInt64));
4864-
tc.emitPrimitiveCompare(
4865-
is_equal, PrimitiveCompareOp::kEqual, seq_size, target_size);
4866-
fast_path = cfg.AllocateBlock();
4867-
tc.emitCondBranch(is_equal, fast_path, deopt_path.block);
4868-
tc.block = fast_path;
4869-
4870-
Register* idx_reg = temps_.AllocateStack();
4871-
for (int idx = bc_instr.oparg() - 1; idx >= 0; --idx) {
4872-
Register* item = temps_.AllocateStack();
4873-
tc.emitLoadConst(idx_reg, Type::fromCInt(idx, TCInt64));
4874-
tc.emitLoadArrayItem(item, list_mem, idx_reg, seq, 0, TObject);
4875-
phx_ptr_arr_push(&stack, item);
4876-
}
4764+
// CFG& cfg unused — C body uses hir_cfg_alloc_block(func) instead.
4765+
// specialized_op = -1 signals C body to skip the P0 dispatch entirely
4766+
// (matches jit_get_config()->specialized_opcodes==false in C++ original).
4767+
int specialized_op = jit_get_config()->specialized_opcodes
4768+
? bc_instr.specializedOpcode()
4769+
: -1;
4770+
hir_builder_emit_unpack_sequence_c(
4771+
static_cast<void*>(&tc),
4772+
static_cast<void*>(current_func_),
4773+
static_cast<void*>(this),
4774+
bc_instr.oparg(),
4775+
bc_instr.baseOffset().value(),
4776+
specialized_op);
48774777
}
48784778

48794779
extern "C" void hir_builder_emit_setup_finally_c(void *tc, int handler_off);

Python/jit/hir/builder_emit_c.c

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2836,3 +2836,181 @@ void hir_builder_emit_load_method_static_c(
28362836
phx_ptr_arr_push(&tc->frame.stack, self);
28372837
}
28382838

2839+
/* emitUnpackSequence — UNPACK_SEQUENCE handler. Most-complex 7-phase
2840+
* multi-block conversion remaining (122-line C++ source). Mirrors C++
2841+
* HIRBuilder::emitUnpackSequence @ builder.cpp:4759.
2842+
*
2843+
* Theologian pre-audit (chat 2026-04-22 14:30:30Z): 7 phases, 22
2844+
* invariants, 5 pitfalls. ZERO new bridges per W25-defer A1 design.
2845+
*
2846+
* 5 BLOCKS allocated upfront (deopt_path, fast_path_1, list_check_path,
2847+
* list_fast_path, tuple_fast_path) + 1 RE-ALLOC (fast_path_2 inside P6
2848+
* — distinct from fast_path_1 per pitfall PB).
2849+
*
2850+
* 3 mutually-exclusive type-narrowing paths (P3):
2851+
* isA(TTupleExact) — static narrow → Branch(tuple_fast_path)
2852+
* isA(TListExact) — static narrow → Branch(list_fast_path)
2853+
* else — runtime CondBranchCheckType chain
2854+
*
2855+
* Pitfalls addressed inline:
2856+
* (PA) deopt_path frame_state_copy BEFORE pop (preserves pre-pop stack
2857+
* with seq for interpreter resume on deopt)
2858+
* (PB) fast_path RE-ALLOCATED at P6 (distinct from initial alloc at
2859+
* P2 — semantically "fast path 1" vs "fast path 2"; named
2860+
* fast_path_extract_loop in C body for clarity)
2861+
* (PC) emitGuardType in-place SSA-rename: GuardType(seq, type, seq)
2862+
* (PD) Py_GIL_DISABLED branches DROPPED for 3.12 (3.13+ would need
2863+
* re-introduction)
2864+
* (PE) phx_frame_state_destroy on deopt_path TC at function end
2865+
*
2866+
* specialized_op = -1 means jit_get_config()->specialized_opcodes was
2867+
* disabled (C++ stub passes -1 to skip P0). Other values are opcode
2868+
* integers per opcode.h. */
2869+
extern void *hir_c_create_load_var_object_size_reg(void *dst, void *src);
2870+
extern void *hir_c_create_load_array_item_reg(
2871+
void *dst, void *arr, void *idx, void *container,
2872+
intptr_t offset, HirType type);
2873+
extern void *hir_c_create_load_field_address_reg(
2874+
void *dst, void *object, void *offset);
2875+
extern void *hir_c_create_primitive_compare(
2876+
void *dst, int32_t op, void *left, void *right);
2877+
extern void hir_c_set_guilty_reg(void *instr, void *reg);
2878+
extern void hir_c_set_descr(void *instr, const char *descr);
2879+
extern void *hir_c_create_load_const(void *reg, HirType type);
2880+
extern void *hir_c_create_guard_type_reg(void *dst, HirType target, void *src);
2881+
2882+
void hir_builder_emit_unpack_sequence_c(
2883+
PhxTranslationContext *tc,
2884+
void *func,
2885+
void *builder,
2886+
int oparg,
2887+
int bc_offset,
2888+
int specialized_op) {
2889+
/* P0: PEEK seq (do not pop yet — see PA). */
2890+
void *seq = tc->frame.stack.data[tc->frame.stack.count - 1];
2891+
2892+
/* P0 specialized-opcode dispatch (skipped if stub passed -1). */
2893+
if (specialized_op >= 0) {
2894+
phx_tc_emit(tc, hir_c_create_snapshot(&tc->frame));
2895+
switch (specialized_op) {
2896+
case UNPACK_SEQUENCE_LIST: {
2897+
HirType t_list = HIR_TYPE_LISTEXACT;
2898+
/* (PC) in-place SSA rename: seq is both input and output. */
2899+
void *guard = hir_c_create_guard_type_reg(seq, t_list, seq);
2900+
hir_deopt_set_frame_state(guard, &tc->frame);
2901+
phx_tc_emit(tc, guard);
2902+
break;
2903+
}
2904+
case UNPACK_SEQUENCE_TUPLE:
2905+
case UNPACK_SEQUENCE_TWO_TUPLE: {
2906+
HirType t_tuple = HIR_TYPE_TUPLEEXACT;
2907+
void *guard = hir_c_create_guard_type_reg(seq, t_tuple, seq);
2908+
hir_deopt_set_frame_state(guard, &tc->frame);
2909+
phx_tc_emit(tc, guard);
2910+
break;
2911+
}
2912+
default:
2913+
break;
2914+
}
2915+
}
2916+
2917+
/* P1: deopt_path setup. (PA) frame_state_copy BEFORE pop. */
2918+
void *deopt_block = hir_cfg_alloc_block(func);
2919+
PhxTranslationContext deopt_tc;
2920+
deopt_tc.block = deopt_block;
2921+
phx_frame_state_copy(&deopt_tc.frame, &tc->frame);
2922+
deopt_tc.frame.cur_instr_offs = bc_offset;
2923+
phx_tc_emit(&deopt_tc, hir_c_create_snapshot(&deopt_tc.frame));
2924+
void *deopt_instr = hir_c_create_deopt();
2925+
phx_tc_emit(&deopt_tc, deopt_instr);
2926+
hir_c_set_guilty_reg(deopt_instr, seq);
2927+
hir_c_set_descr(deopt_instr, "UNPACK_SEQUENCE");
2928+
2929+
/* P2: block allocations + pop. */
2930+
void *fast_path = hir_cfg_alloc_block(func);
2931+
void *list_check_path = hir_cfg_alloc_block(func);
2932+
void *list_fast_path = hir_cfg_alloc_block(func);
2933+
void *tuple_fast_path = hir_cfg_alloc_block(func);
2934+
void *list_mem = hir_builder_temps_alloc_stack(builder);
2935+
/* Now pop seq from main tc.stack (deopt_tc.frame already preserved
2936+
* the pre-pop state per PA). */
2937+
phx_ptr_arr_pop(&tc->frame.stack);
2938+
2939+
/* P3: 3 mutually-exclusive type-narrowing paths. */
2940+
HirType seq_type = hir_register_type(seq);
2941+
HirType t_tuple_exact = HIR_TYPE_TUPLEEXACT;
2942+
HirType t_list_exact = HIR_TYPE_LISTEXACT;
2943+
if (hir_type_is_subtype(seq_type, t_tuple_exact)) {
2944+
phx_tc_emit(tc, hir_c_create_branch_cpp(tuple_fast_path));
2945+
} else if (hir_type_is_subtype(seq_type, t_list_exact)) {
2946+
/* (PD) Py_GIL_DISABLED branch dropped for 3.12 (3.13+ would
2947+
* Branch(deopt_block) instead). */
2948+
phx_tc_emit(tc, hir_c_create_branch_cpp(list_fast_path));
2949+
} else {
2950+
phx_tc_emit(tc, hir_c_create_cond_branch_check_type_cpp(
2951+
seq, t_tuple_exact, tuple_fast_path, list_check_path));
2952+
tc->block = list_check_path;
2953+
/* (PD) same: 3.12 always uses list_fast_path. */
2954+
phx_tc_emit(tc, hir_c_create_cond_branch_check_type_cpp(
2955+
seq, t_list_exact, list_fast_path, deopt_block));
2956+
}
2957+
2958+
/* P4: tuple_fast_path emission. */
2959+
tc->block = tuple_fast_path;
2960+
{
2961+
void *offset_reg = hir_builder_temps_alloc_stack(builder);
2962+
HirType t_cint64 = (HirType)HIR_TYPE_CINT64;
2963+
HirType offset_type = hir_type_from_cint(
2964+
(int64_t)offsetof(PyTupleObject, ob_item), t_cint64);
2965+
phx_tc_emit(tc, hir_c_create_load_const(offset_reg, offset_type));
2966+
phx_tc_emit(tc, hir_c_create_load_field_address_reg(
2967+
list_mem, seq, offset_reg));
2968+
phx_tc_emit(tc, hir_c_create_branch_cpp(fast_path));
2969+
}
2970+
2971+
/* P5: list_fast_path emission. */
2972+
tc->block = list_fast_path;
2973+
{
2974+
HirType t_cptr = (HirType)HIR_TYPE_CPTR;
2975+
phx_tc_emit(tc, hir_c_create_load_field_reg(
2976+
list_mem, seq, "ob_item",
2977+
(intptr_t)offsetof(PyListObject, ob_item), t_cptr, 0));
2978+
phx_tc_emit(tc, hir_c_create_branch_cpp(fast_path));
2979+
}
2980+
2981+
/* P6: fast_path size check. */
2982+
tc->block = fast_path;
2983+
{
2984+
void *seq_size = hir_builder_temps_alloc_stack(builder);
2985+
void *target_size = hir_builder_temps_alloc_stack(builder);
2986+
void *is_equal = hir_builder_temps_alloc_stack(builder);
2987+
HirType t_cint64 = (HirType)HIR_TYPE_CINT64;
2988+
phx_tc_emit(tc, hir_c_create_load_var_object_size_reg(seq_size, seq));
2989+
phx_tc_emit(tc, hir_c_create_load_const(
2990+
target_size, hir_type_from_cint((int64_t)oparg, t_cint64)));
2991+
phx_tc_emit(tc, hir_c_create_primitive_compare(
2992+
is_equal, /*HIR_PCMP_Equal=*/2, seq_size, target_size));
2993+
/* (PB) RE-ALLOC fast_path — distinct block from initial. */
2994+
void *fast_path_extract_loop = hir_cfg_alloc_block(func);
2995+
phx_tc_emit(tc, hir_c_create_cond_branch_cpp(
2996+
is_equal, fast_path_extract_loop, deopt_block));
2997+
2998+
/* P7: item extraction loop. */
2999+
tc->block = fast_path_extract_loop;
3000+
void *idx_reg = hir_builder_temps_alloc_stack(builder);
3001+
HirType t_object = HIR_TYPE_OBJECT;
3002+
for (int idx = oparg - 1; idx >= 0; --idx) {
3003+
void *item = hir_builder_temps_alloc_stack(builder);
3004+
phx_tc_emit(tc, hir_c_create_load_const(
3005+
idx_reg, hir_type_from_cint((int64_t)idx, t_cint64)));
3006+
phx_tc_emit(tc, hir_c_create_load_array_item_reg(
3007+
item, list_mem, idx_reg, seq, /*offset=*/0, t_object));
3008+
phx_ptr_arr_push(&tc->frame.stack, item);
3009+
}
3010+
}
3011+
3012+
/* (PE) frame_state_destroy on deopt_path TC. */
3013+
phx_frame_state_destroy(&deopt_tc.frame);
3014+
}
3015+
3016+

0 commit comments

Comments
 (0)