fix(context-engine): snapshot pre-assembly messages before assemble

Eva · Eva · commit f69efaa66a07 · 2026-05-01T18:29:46.000+07:00
Address PR #74255 review feedback: - Snapshot activeSession.messages before calling assembleAttemptContextEngine so engines that window history in place (allowed by the assemble contract) cannot leave the precheck reading already-windowed messages instead of the true pre-assembly state. Add a regression that wires up an in-place windowing engine and asserts unwindowedMessages still reflects the pre-assembly transcript. (Codex P2) - Clarify the AssembleResult.promptAuthority docstring to spell out the two precheck modes (assembled-only vs max(assembled, preassembly)) so engine authors do not misimplement the opt-in. (Copilot) - Document promptAuthority in docs/concepts/context-engine.md, regenerate the plugin-sdk API baseline, and add a CHANGELOG Unreleased Fixes entry for the public contract addition. (Codex P2/P3)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Context-engine/embedded-runner: honor assembled prompt as the default authority for preemptive overflow prechecks so engines that return a windowed, self-contained context no longer trigger false hard-fail compactions on huge raw history. Engines whose assembled view can hide overflow risk can opt back into the legacy behavior with `AssembleResult.promptAuthority: "preassembly_may_overflow"`. (#74255)
 - Google Meet: interrupt Realtime provider output when local barge-in clears playback, so command-pair audio stops model speech instead of only restarting Chrome playback. Fixes #73850. (#73834) Thanks @shhtheonlyperson.
 - Voice Call/Twilio: honor stored pre-connect TwiML before realtime webhook shortcuts and reject DTMF sequences outside conversation mode, so Meet PIN entry cannot be skipped or silently dropped. Thanks @donkeykong91 and @PfanP.
 - Google Meet/Voice Call: play Twilio Meet DTMF before opening the realtime media stream and carry the intro as the initial Voice Call message, so the greeting is generated after Meet admits the phone participant instead of racing a live-call TwiML update. Thanks @donkeykong91 and @PfanP.
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-e75701dd791461feb4893e7106362dbbb41668bc4341e8b42becc346001e9f0e  plugin-sdk-api-baseline.json
-077e30997781d3a064f00491d55f7ac78465868b02fdcfb70e07e03555bb2afe  plugin-sdk-api-baseline.jsonl
+af5ccb35cf806839288e347323c9958d8a4d6a09f90d2525aa465fc051e6ecce  plugin-sdk-api-baseline.json
+44666c7f08e1b29ca1b1c47ca7140689af8706b1a19add2a0dd476ba2500c9c4  plugin-sdk-api-baseline.jsonl
diff --git a/docs/concepts/context-engine.md b/docs/concepts/context-engine.md
@@ -197,6 +197,17 @@ Required members:
 <ParamField path="systemPromptAddition" type="string">
   Prepended to the system prompt.
 </ParamField>
+<ParamField path="promptAuthority" type='"assembled" | "preassembly_may_overflow"'>
+  Controls which token estimate the runner uses for preemptive overflow
+  prechecks. Defaults to `"assembled"`, which means only the assembled
+  prompt's estimate is checked — appropriate for engines that return a
+  windowed, self-contained context. Set to `"preassembly_may_overflow"` only
+  when your assembled view can hide overflow risk in the underlying
+  transcript; the runner then takes the maximum of the assembled estimate
+  and the pre-assembly (unwindowed) session-history estimate when deciding
+  whether to preemptively compact. Either way, the messages you return are
+  still what the model sees — `promptAuthority` only affects the precheck.
+</ParamField>
 
 `compact` returns a `CompactResult`. When compaction rotates the active
 transcript, `result.sessionId` and `result.sessionFile` identify the successor
diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
@@ -391,6 +391,49 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
     expect(hoisted.preemptiveCompactionCalls.at(-1)).toHaveProperty("unwindowedMessages");
   });
 
+  it("snapshots pre-assembly messages before assemble even when the engine windows in place", async () => {
+    const hugeHistory = "large raw history ".repeat(25_000);
+    const preassemblyMarker = { role: "user", content: hugeHistory, timestamp: 1 } as AgentMessage;
+
+    await createContextEngineAttemptRunner({
+      contextEngine: createTestContextEngine({
+        assemble: async ({ messages }: { messages: AgentMessage[] }) => {
+          // Simulate an engine that windows the input array IN PLACE.
+          // The assemble contract does not require immutability, so the
+          // runner must have already snapshotted before calling us.
+          messages.length = 0;
+          messages.push({ role: "user", content: "windowed", timestamp: 2 } as AgentMessage);
+          return {
+            messages: [
+              { role: "user", content: "small assembled context", timestamp: 1 },
+            ] as AgentMessage[],
+            estimatedTokens: 8,
+            promptAuthority: "preassembly_may_overflow",
+          };
+        },
+      }),
+      sessionKey,
+      tempPaths,
+      sessionMessages: [preassemblyMarker],
+      attemptOverrides: {
+        contextTokenBudget: 500,
+      },
+      sessionPrompt: async (session) => {
+        session.messages = [
+          ...session.messages,
+          { role: "assistant", content: "done", timestamp: 3 },
+        ];
+      },
+    });
+
+    const lastCall = hoisted.preemptiveCompactionCalls.at(-1);
+    expect(lastCall).toHaveProperty("unwindowedMessages");
+    const unwindowed = (lastCall as { unwindowedMessages?: AgentMessage[] }).unwindowedMessages;
+    // The snapshot must reflect the true pre-assembly state, not the in-place
+    // windowed array that assemble mutated.
+    expect(unwindowed).toEqual([preassemblyMarker]);
+  });
+
   it("keeps gateway model runs independent from agent context and session history", async () => {
     const bootstrap = vi.fn(async () => ({ bootstrapped: true }));
     const assemble = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => ({
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2059,7 +2059,11 @@ export async function runEmbeddedAttempt(
 
         if (activeContextEngine) {
           try {
-            const preassemblyContextEngineMessagesForPrecheck = activeSession.messages;
+            // Snapshot before assemble: the assemble contract does not require
+            // the input array to be treated immutably, so an engine that windows
+            // history in place would otherwise leave the precheck reading
+            // already-windowed messages instead of the true pre-assembly state.
+            const preassemblyContextEngineMessagesForPrecheck = activeSession.messages.slice();
             const assembled = await assembleAttemptContextEngine({
               contextEngine: activeContextEngine,
               sessionId: params.sessionId,
@@ -2080,7 +2084,7 @@ export async function runEmbeddedAttempt(
             contextEnginePromptAuthority = assembled.promptAuthority ?? "assembled";
             if (contextEnginePromptAuthority === "preassembly_may_overflow") {
               unwindowedContextEngineMessagesForPrecheck =
-                preassemblyContextEngineMessagesForPrecheck.slice();
+                preassemblyContextEngineMessagesForPrecheck;
             }
             if (assembled.systemPromptAddition) {
               systemPromptText = prependSystemPromptAddition({
diff --git a/src/context-engine/types.ts b/src/context-engine/types.ts
@@ -9,10 +9,16 @@ export type AssembleResult = {
   /** Estimated total tokens in assembled context */
   estimatedTokens: number;
   /**
-   * Declares which message set overflow prechecks should treat as authoritative.
-   * "assembled" means the returned messages are already windowed and complete;
-   * "preassembly_may_overflow" asks the runner to also check pre-assembly
-   * session history because the context engine may hide an overflow risk.
+   * Controls which token estimate the runner treats as authoritative for
+   * preemptive overflow prechecks. The returned `messages` are always the
+   * prompt sent to the model; this only affects the precheck's token comparison.
+   *
+   * - "assembled": the precheck uses only the assembled prompt's estimate.
+   * - "preassembly_may_overflow": the precheck takes the maximum of the
+   *   assembled estimate and the pre-assembly (unwindowed) session-history
+   *   estimate. Engines opt into this when their assembled view can hide an
+   *   overflow that would still affect the underlying transcript.
+   *
    * Defaults to "assembled".
    */
   promptAuthority?: "assembled" | "preassembly_may_overflow";