Skip to content

Commit 66daafc

Browse files
Takhoffmansteipete
authored andcommitted
Refine cause-aware precheck overflow routing
1 parent e55c82a commit 66daafc

8 files changed

Lines changed: 541 additions & 116 deletions

File tree

src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,68 @@ describe("overflow compaction in run loop", () => {
171171
expect(result.meta.error).toBeUndefined();
172172
});
173173

174+
it("retries without hitting compaction when attempt-level preflight truncation already handled the overflow", async () => {
175+
mockedRunEmbeddedAttempt
176+
.mockResolvedValueOnce(
177+
makeAttemptResult({
178+
promptError: null,
179+
preflightRecovery: {
180+
route: "truncate_tool_results_only",
181+
handled: true,
182+
truncatedCount: 2,
183+
},
184+
}),
185+
)
186+
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
187+
188+
const result = await runEmbeddedPiAgent(baseParams);
189+
190+
expect(mockedCompactDirect).not.toHaveBeenCalled();
191+
expect(mockedTruncateOversizedToolResultsInSession).not.toHaveBeenCalled();
192+
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
193+
expect(mockedLog.info).toHaveBeenCalledWith(
194+
expect.stringContaining("early recovery route=truncate_tool_results_only"),
195+
);
196+
expect(result.meta.error).toBeUndefined();
197+
});
198+
199+
it("runs post-compaction tool-result truncation before retry for mixed precheck routes", async () => {
200+
mockedRunEmbeddedAttempt
201+
.mockResolvedValueOnce(
202+
makeAttemptResult({
203+
promptError: makeOverflowError(
204+
"Context overflow: prompt too large for the model (precheck).",
205+
),
206+
preflightRecovery: { route: "compact_then_truncate" },
207+
}),
208+
)
209+
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
210+
211+
mockedCompactDirect.mockResolvedValueOnce(
212+
makeCompactionSuccess({
213+
summary: "Compacted session",
214+
firstKeptEntryId: "entry-5",
215+
tokensBefore: 150000,
216+
}),
217+
);
218+
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
219+
truncated: true,
220+
truncatedCount: 2,
221+
});
222+
223+
const result = await runEmbeddedPiAgent(baseParams);
224+
225+
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
226+
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
227+
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
228+
);
229+
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
230+
expect(mockedLog.info).toHaveBeenCalledWith(
231+
expect.stringContaining("post-compaction tool-result truncation succeeded"),
232+
);
233+
expect(result.meta.error).toBeUndefined();
234+
});
235+
174236
it("retries compaction up to 3 times before giving up", async () => {
175237
const overflowError = makeOverflowError();
176238

src/agents/pi-embedded-runner/run.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ import { runContextEngineMaintenance } from "./context-engine-maintenance.js";
6464
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
6565
import { log } from "./logger.js";
6666
import { resolveModelAsync } from "./model.js";
67-
import {
68-
sessionLikelyHasOversizedToolResults,
69-
truncateOversizedToolResultsInSession,
70-
} from "./tool-result-truncation.js";
7167
import { handleAssistantFailover } from "./run/assistant-failover.js";
7268
import { runEmbeddedAttempt } from "./run/attempt.js";
7369
import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
@@ -95,6 +91,10 @@ import type { RunEmbeddedPiAgentParams } from "./run/params.js";
9591
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
9692
import { handleRetryLimitExhaustion } from "./run/retry-limit.js";
9793
import { resolveEffectiveRuntimeModel, resolveHookModelSelection } from "./run/setup.js";
94+
import {
95+
sessionLikelyHasOversizedToolResults,
96+
truncateOversizedToolResultsInSession,
97+
} from "./tool-result-truncation.js";
9898
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
9999
import { createUsageAccumulator, mergeUsageIntoAccumulator } from "./usage-accumulator.js";
100100
import { describeUnknownError } from "./utils.js";
@@ -616,6 +616,7 @@ export async function runEmbeddedPiAgent(
616616
const {
617617
aborted,
618618
promptError,
619+
preflightRecovery,
619620
timedOut,
620621
timedOutDuringCompaction,
621622
sessionIdUsed,
@@ -663,6 +664,13 @@ export async function runEmbeddedPiAgent(
663664
!attempt.lastToolError &&
664665
attempt.toolMetas.length === 0 &&
665666
attempt.assistantTexts.length === 0;
667+
if (preflightRecovery?.handled) {
668+
log.info(
669+
`[context-overflow-precheck] early recovery route=${preflightRecovery.route} ` +
670+
`completed for ${provider}/${modelId}; retrying prompt`,
671+
);
672+
continue;
673+
}
666674
const requestedSelection = shouldSwitchToLiveModel({
667675
cfg: params.config,
668676
sessionKey: params.sessionKey,
@@ -919,6 +927,25 @@ export async function runEmbeddedPiAgent(
919927
}
920928
await runOwnsCompactionAfterHook("overflow recovery", compactResult);
921929
if (compactResult.compacted) {
930+
if (preflightRecovery?.route === "compact_then_truncate") {
931+
const truncResult = await truncateOversizedToolResultsInSession({
932+
sessionFile: params.sessionFile,
933+
contextWindowTokens: ctxInfo.tokens,
934+
sessionId: params.sessionId,
935+
sessionKey: params.sessionKey,
936+
});
937+
if (truncResult.truncated) {
938+
log.info(
939+
`[context-overflow-precheck] post-compaction tool-result truncation succeeded for ` +
940+
`${provider}/${modelId}; truncated ${truncResult.truncatedCount} tool result(s)`,
941+
);
942+
} else {
943+
log.warn(
944+
`[context-overflow-precheck] post-compaction tool-result truncation did not help for ` +
945+
`${provider}/${modelId}: ${truncResult.reason ?? "unknown"}`,
946+
);
947+
}
948+
}
922949
autoCompactionCount += 1;
923950
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
924951
continue;
@@ -960,7 +987,8 @@ export async function runEmbeddedPiAgent(
960987
}
961988
}
962989
if (
963-
(isCompactionFailure || overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) &&
990+
(isCompactionFailure ||
991+
overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) &&
964992
log.isEnabled("debug")
965993
) {
966994
log.debug(

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ import {
120120
} from "../prompt-cache-observability.js";
121121
import { resolveCacheRetention } from "../prompt-cache-retention.js";
122122
import { sanitizeSessionHistory, validateReplayTurns } from "../replay-history.js";
123-
import {
124-
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
125-
shouldPreemptivelyCompactBeforePrompt,
126-
} from "./preemptive-compaction.js";
127123
import {
128124
clearActiveEmbeddedRun,
129125
type EmbeddedPiQueueHandle,
@@ -149,6 +145,7 @@ import {
149145
import { dropThinkingBlocks } from "../thinking.js";
150146
import { collectAllowedToolNames } from "../tool-name-allowlist.js";
151147
import { installToolResultContextGuard } from "../tool-result-context-guard.js";
148+
import { truncateOversizedToolResultsInSessionManager } from "../tool-result-truncation.js";
152149
import {
153150
logProviderToolSchemaDiagnostics,
154151
normalizeProviderToolSchemas,
@@ -210,6 +207,10 @@ import { pruneProcessedHistoryImages } from "./history-image-prune.js";
210207
import { detectAndLoadPromptImages } from "./images.js";
211208
import { buildAttemptReplayMetadata } from "./incomplete-turn.js";
212209
import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js";
210+
import {
211+
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
212+
shouldPreemptivelyCompactBeforePrompt,
213+
} from "./preemptive-compaction.js";
213214
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
214215

215216
export {
@@ -1525,8 +1526,10 @@ export async function runEmbeddedAttempt(
15251526
const hookAgentId = sessionAgentId;
15261527

15271528
let promptError: unknown = null;
1529+
let preflightRecovery: EmbeddedRunAttemptResult["preflightRecovery"];
15281530
let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null;
15291531
let prePromptMessageCount = activeSession.messages.length;
1532+
let skipPromptSubmission = false;
15301533
try {
15311534
const promptStartedAt = Date.now();
15321535

@@ -1773,32 +1776,81 @@ export async function runEmbeddedAttempt(
17731776
contextTokenBudget: params.contextTokenBudget,
17741777
reserveTokens,
17751778
});
1779+
if (preemptiveCompaction.route === "truncate_tool_results_only") {
1780+
const truncationResult = truncateOversizedToolResultsInSessionManager({
1781+
sessionManager,
1782+
contextWindowTokens: params.contextTokenBudget,
1783+
sessionFile: params.sessionFile,
1784+
sessionId: params.sessionId,
1785+
sessionKey: params.sessionKey,
1786+
});
1787+
if (truncationResult.truncated) {
1788+
preflightRecovery = {
1789+
route: "truncate_tool_results_only",
1790+
handled: true,
1791+
truncatedCount: truncationResult.truncatedCount,
1792+
};
1793+
log.info(
1794+
`[context-overflow-precheck] early tool-result truncation succeeded for ` +
1795+
`${params.provider}/${params.modelId} route=${preemptiveCompaction.route} ` +
1796+
`truncatedCount=${truncationResult.truncatedCount} ` +
1797+
`estimatedPromptTokens=${preemptiveCompaction.estimatedPromptTokens} ` +
1798+
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
1799+
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
1800+
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
1801+
`sessionFile=${params.sessionFile}`,
1802+
);
1803+
skipPromptSubmission = true;
1804+
}
1805+
if (!skipPromptSubmission) {
1806+
log.warn(
1807+
`[context-overflow-precheck] early tool-result truncation did not help for ` +
1808+
`${params.provider}/${params.modelId}; falling back to compaction ` +
1809+
`reason=${truncationResult.reason ?? "unknown"} sessionFile=${params.sessionFile}`,
1810+
);
1811+
preflightRecovery = { route: "compact_only" };
1812+
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
1813+
promptErrorSource = "precheck";
1814+
skipPromptSubmission = true;
1815+
}
1816+
}
17761817
if (preemptiveCompaction.shouldCompact) {
1818+
preflightRecovery =
1819+
preemptiveCompaction.route === "compact_then_truncate"
1820+
? { route: "compact_then_truncate" }
1821+
: { route: "compact_only" };
17771822
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
17781823
promptErrorSource = "precheck";
17791824
log.warn(
17801825
`[context-overflow-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` +
17811826
`provider=${params.provider}/${params.modelId} ` +
1827+
`route=${preemptiveCompaction.route} ` +
17821828
`estimatedPromptTokens=${preemptiveCompaction.estimatedPromptTokens} ` +
17831829
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
1830+
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
1831+
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
17841832
`reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`,
17851833
);
1786-
return;
1834+
skipPromptSubmission = true;
17871835
}
17881836

1789-
const btwSnapshotMessages = activeSession.messages.slice(-MAX_BTW_SNAPSHOT_MESSAGES);
1790-
updateActiveEmbeddedRunSnapshot(params.sessionId, {
1791-
transcriptLeafId,
1792-
messages: btwSnapshotMessages,
1793-
inFlightPrompt: effectivePrompt,
1794-
});
1837+
if (!skipPromptSubmission) {
1838+
const btwSnapshotMessages = activeSession.messages.slice(-MAX_BTW_SNAPSHOT_MESSAGES);
1839+
updateActiveEmbeddedRunSnapshot(params.sessionId, {
1840+
transcriptLeafId,
1841+
messages: btwSnapshotMessages,
1842+
inFlightPrompt: effectivePrompt,
1843+
});
17951844

1796-
// Only pass images option if there are actually images to pass
1797-
// This avoids potential issues with models that don't expect the images parameter
1798-
if (imageResult.images.length > 0) {
1799-
await abortable(activeSession.prompt(effectivePrompt, { images: imageResult.images }));
1800-
} else {
1801-
await abortable(activeSession.prompt(effectivePrompt));
1845+
// Only pass images option if there are actually images to pass
1846+
// This avoids potential issues with models that don't expect the images parameter
1847+
if (imageResult.images.length > 0) {
1848+
await abortable(
1849+
activeSession.prompt(effectivePrompt, { images: imageResult.images }),
1850+
);
1851+
} else {
1852+
await abortable(activeSession.prompt(effectivePrompt));
1853+
}
18021854
}
18031855
} catch (err) {
18041856
// Yield-triggered abort is intentional — treat as clean stop, not error.
@@ -2160,6 +2212,7 @@ export async function runEmbeddedAttempt(
21602212
timedOut,
21612213
timedOutDuringCompaction,
21622214
promptError,
2215+
preflightRecovery,
21632216
sessionIdUsed,
21642217
bootstrapPromptWarningSignaturesSeen: bootstrapPromptWarning.warningSignaturesSeen,
21652218
bootstrapPromptWarningSignature: bootstrapPromptWarning.signature,

src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { describe, expect, it } from "vitest";
2+
import { estimateToolResultReductionPotential } from "../tool-result-truncation.js";
23
import {
34
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
45
estimatePrePromptTokens,
@@ -43,6 +44,7 @@ describe("preemptive-compaction", () => {
4344
});
4445

4546
expect(result.shouldCompact).toBe(true);
47+
expect(result.route).toBe("compact_only");
4648
expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve);
4749
});
4850

@@ -56,6 +58,85 @@ describe("preemptive-compaction", () => {
5658
});
5759

5860
expect(result.shouldCompact).toBe(false);
61+
expect(result.route).toBe("fits");
5962
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
6063
});
64+
65+
it("routes to direct tool-result truncation when recent tool tails can clearly absorb the overflow", () => {
66+
const medium = "alpha beta gamma delta epsilon ".repeat(2200);
67+
const messages = [
68+
{ role: "assistant", content: "short history" },
69+
{
70+
role: "toolResult",
71+
content: [
72+
{ type: "text", text: medium },
73+
{ type: "text", text: medium },
74+
{ type: "text", text: medium },
75+
{ type: "text", text: medium },
76+
],
77+
} as never,
78+
];
79+
const reserveTokens = 2_000;
80+
const contextTokenBudget = 26_000;
81+
const estimatedPromptTokens = estimatePrePromptTokens({
82+
messages,
83+
systemPrompt: "sys",
84+
prompt: "hello",
85+
});
86+
const desiredOverflowTokens = 200;
87+
const adjustedContextTokenBudget =
88+
estimatedPromptTokens - desiredOverflowTokens + reserveTokens;
89+
const result = shouldPreemptivelyCompactBeforePrompt({
90+
messages,
91+
systemPrompt: "sys",
92+
prompt: "hello",
93+
contextTokenBudget: Math.max(contextTokenBudget, adjustedContextTokenBudget),
94+
reserveTokens,
95+
});
96+
97+
expect(result.route).toBe("truncate_tool_results_only");
98+
expect(result.shouldCompact).toBe(false);
99+
expect(result.overflowTokens).toBeGreaterThan(0);
100+
expect(result.toolResultReducibleChars).toBeGreaterThan(0);
101+
});
102+
103+
it("routes to compact then truncate when recent tool tails help but cannot fully cover the overflow", () => {
104+
const medium = "alpha beta gamma delta epsilon ".repeat(220);
105+
const longHistory = "old discussion with substantial retained context and decisions ".repeat(
106+
5000,
107+
);
108+
const messages = [
109+
{ role: "assistant", content: longHistory },
110+
{ role: "toolResult", content: [{ type: "text", text: medium }] } as never,
111+
{ role: "toolResult", content: [{ type: "text", text: medium }] } as never,
112+
{ role: "toolResult", content: [{ type: "text", text: medium }] } as never,
113+
];
114+
const reserveTokens = 500;
115+
const baseContextTokenBudget = 3_500;
116+
const estimatedPromptTokens = estimatePrePromptTokens({
117+
messages,
118+
systemPrompt: verboseSystem,
119+
prompt: verbosePrompt,
120+
});
121+
const toolResultPotential = estimateToolResultReductionPotential({
122+
messages: messages as never,
123+
contextWindowTokens: baseContextTokenBudget,
124+
});
125+
const desiredOverflowTokens = Math.ceil((toolResultPotential.maxReducibleChars + 4_096) / 4);
126+
const result = shouldPreemptivelyCompactBeforePrompt({
127+
messages,
128+
systemPrompt: verboseSystem,
129+
prompt: verbosePrompt,
130+
contextTokenBudget: Math.max(
131+
baseContextTokenBudget,
132+
estimatedPromptTokens - desiredOverflowTokens + reserveTokens,
133+
),
134+
reserveTokens,
135+
});
136+
137+
expect(result.route).toBe("compact_then_truncate");
138+
expect(result.shouldCompact).toBe(true);
139+
expect(result.overflowTokens).toBeGreaterThan(0);
140+
expect(result.toolResultReducibleChars).toBeGreaterThan(0);
141+
});
61142
});

0 commit comments

Comments
 (0)