Skip to content

Commit 70f78dd

Browse files
committed
fix(agents): preserve thinking blocks in latest assistant message during compaction
Anthropic's API requires that thinking/redacted_thinking blocks in the latest assistant message remain exactly as received. The dropThinkingBlocks() function was stripping ALL thinking blocks from ALL assistant messages, causing API 400 errors during context compaction in longer conversations. Now the function preserves thinking blocks in the latest assistant message while still stripping them from older messages (needed for GitHub Copilot compatibility). Fixes #25347
1 parent 191ab17 commit 70f78dd

File tree

3 files changed

+164
-15
lines changed

3 files changed

+164
-15
lines changed

src/agents/pi-embedded-runner.sanitize-session-history.test.ts

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -466,17 +466,21 @@ describe("sanitizeSessionHistory", () => {
466466
).toBe(false);
467467
});
468468

469-
it("drops assistant thinking blocks for github-copilot models", async () => {
469+
it("preserves thinking blocks in latest assistant message for github-copilot models (Anthropic API requirement)", async () => {
470470
setNonGoogleModelApi();
471471

472472
const messages = makeThinkingAndTextAssistantMessages("reasoning_text");
473473

474474
const result = await sanitizeGithubCopilotHistory({ messages });
475475
const assistant = getAssistantMessage(result);
476-
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
476+
// Latest assistant message thinking blocks must be preserved for Anthropic API
477+
expect(assistant.content).toEqual([
478+
{ type: "thinking", thinking: "internal", thinkingSignature: "reasoning_text" },
479+
{ type: "text", text: "hi" },
480+
]);
477481
});
478482

479-
it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => {
483+
it("preserves thinking blocks in latest assistant when all content is thinking (github-copilot)", async () => {
480484
setNonGoogleModelApi();
481485

482486
const messages = [
@@ -496,13 +500,15 @@ describe("sanitizeSessionHistory", () => {
496500

497501
const result = await sanitizeGithubCopilotHistory({ messages });
498502

499-
// Assistant turn should be preserved (not dropped) to maintain turn alternation
503+
// Assistant turn should be preserved with thinking blocks intact (latest assistant)
500504
expect(result).toHaveLength(3);
501505
const assistant = getAssistantMessage(result);
502-
expect(assistant.content).toEqual([{ type: "text", text: "" }]);
506+
expect(assistant.content).toEqual([
507+
{ type: "thinking", thinking: "some reasoning", thinkingSignature: "reasoning_text" },
508+
]);
503509
});
504510

505-
it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => {
511+
it("preserves thinking blocks in latest assistant message with tool_use blocks (github-copilot)", async () => {
506512
setNonGoogleModelApi();
507513

508514
const messages = [
@@ -523,9 +529,10 @@ describe("sanitizeSessionHistory", () => {
523529

524530
const result = await sanitizeGithubCopilotHistory({ messages });
525531
const types = getAssistantContentTypes(result);
532+
// Latest assistant preserves all blocks including thinking
533+
expect(types).toContain("thinking");
526534
expect(types).toContain("toolCall");
527535
expect(types).toContain("text");
528-
expect(types).not.toContain("thinking");
529536
});
530537

531538
it("does not drop thinking blocks for non-copilot providers", async () => {
@@ -555,4 +562,39 @@ describe("sanitizeSessionHistory", () => {
555562
const types = getAssistantContentTypes(result);
556563
expect(types).toContain("thinking");
557564
});
565+
566+
it("drops thinking blocks from older assistant messages but preserves in latest (github-copilot)", async () => {
567+
setNonGoogleModelApi();
568+
569+
const messages = [
570+
{ role: "user", content: "first question" },
571+
{
572+
role: "assistant",
573+
content: [
574+
{ type: "thinking", thinking: "old reasoning" },
575+
{ type: "text", text: "old response" },
576+
],
577+
},
578+
{ role: "user", content: "second question" },
579+
{
580+
role: "assistant",
581+
content: [
582+
{ type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" },
583+
{ type: "text", text: "latest response" },
584+
],
585+
},
586+
] as unknown as AgentMessage[];
587+
588+
const result = await sanitizeGithubCopilotHistory({ messages });
589+
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
590+
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
591+
592+
// Old assistant: thinking blocks dropped
593+
expect(oldAssistant.content).toEqual([{ type: "text", text: "old response" }]);
594+
// Latest assistant: thinking blocks preserved (Anthropic API requirement)
595+
expect(latestAssistant.content).toEqual([
596+
{ type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" },
597+
{ type: "text", text: "latest response" },
598+
]);
599+
});
558600
});

src/agents/pi-embedded-runner/thinking.test.ts

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,19 @@ describe("dropThinkingBlocks", () => {
3434
role: "assistant",
3535
content: [
3636
{ type: "thinking", thinking: "internal" },
37-
{ type: "text", text: "final" },
37+
{ type: "text", text: "first" },
3838
],
3939
} as unknown as AgentMessage,
40+
{ role: "user", content: "follow-up" } as AgentMessage,
41+
{
42+
role: "assistant",
43+
content: [{ type: "text", text: "latest" }],
44+
} as AgentMessage,
4045
];
4146

4247
const result = dropThinkingBlocks(messages);
43-
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
44-
expect(result).not.toBe(messages);
45-
expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
48+
const firstAssistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
49+
expect(firstAssistant.content).toEqual([{ type: "text", text: "first" }]);
4650
});
4751

4852
it("keeps assistant turn structure when all content blocks were thinking", () => {
@@ -51,10 +55,94 @@ describe("dropThinkingBlocks", () => {
5155
role: "assistant",
5256
content: [{ type: "thinking", thinking: "internal-only" }],
5357
} as unknown as AgentMessage,
58+
{ role: "user", content: "follow-up" } as AgentMessage,
59+
{
60+
role: "assistant",
61+
content: [{ type: "text", text: "latest" }],
62+
} as AgentMessage,
63+
];
64+
65+
const result = dropThinkingBlocks(messages);
66+
const firstAssistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
67+
expect(firstAssistant.content).toEqual([{ type: "text", text: "" }]);
68+
});
69+
70+
it("preserves thinking blocks in the latest assistant message (Anthropic API requirement)", () => {
71+
const messages: AgentMessage[] = [
72+
{ role: "user", content: "first" } as AgentMessage,
73+
{
74+
role: "assistant",
75+
content: [
76+
{ type: "thinking", thinking: "old reasoning" },
77+
{ type: "text", text: "old response" },
78+
],
79+
} as unknown as AgentMessage,
80+
{ role: "user", content: "second" } as AgentMessage,
81+
{
82+
role: "assistant",
83+
content: [
84+
{ type: "thinking", thinking: "latest reasoning" },
85+
{ type: "text", text: "latest response" },
86+
],
87+
} as unknown as AgentMessage,
88+
];
89+
90+
const result = dropThinkingBlocks(messages);
91+
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
92+
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
93+
94+
// Old assistant: thinking blocks should be dropped
95+
expect(oldAssistant.content).toEqual([{ type: "text", text: "old response" }]);
96+
// Latest assistant: thinking blocks must be preserved
97+
expect(latestAssistant.content).toEqual([
98+
{ type: "thinking", thinking: "latest reasoning" },
99+
{ type: "text", text: "latest response" },
100+
]);
101+
});
102+
103+
it("preserves thinking blocks when latest assistant is the only assistant message", () => {
104+
const messages: AgentMessage[] = [
105+
{ role: "user", content: "hello" } as AgentMessage,
106+
{
107+
role: "assistant",
108+
content: [
109+
{ type: "thinking", thinking: "my reasoning" },
110+
{ type: "text", text: "response" },
111+
],
112+
} as unknown as AgentMessage,
54113
];
55114

56115
const result = dropThinkingBlocks(messages);
57-
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
58-
expect(assistant.content).toEqual([{ type: "text", text: "" }]);
116+
expect(result).toBe(messages);
117+
});
118+
119+
it("handles multiple assistant messages with only the latest preserved", () => {
120+
const messages: AgentMessage[] = [
121+
{
122+
role: "assistant",
123+
content: [{ type: "thinking", thinking: "first" }],
124+
} as unknown as AgentMessage,
125+
{ role: "user", content: "follow-up" } as AgentMessage,
126+
{
127+
role: "assistant",
128+
content: [{ type: "thinking", thinking: "second" }],
129+
} as unknown as AgentMessage,
130+
{ role: "user", content: "last" } as AgentMessage,
131+
{
132+
role: "assistant",
133+
content: [{ type: "thinking", thinking: "third" }],
134+
} as unknown as AgentMessage,
135+
];
136+
137+
const result = dropThinkingBlocks(messages);
138+
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
139+
const second = result[2] as Extract<AgentMessage, { role: "assistant" }>;
140+
const third = result[4] as Extract<AgentMessage, { role: "assistant" }>;
141+
142+
// First two assistants: thinking blocks dropped
143+
expect(first.content).toEqual([{ type: "text", text: "" }]);
144+
expect(second.content).toEqual([{ type: "text", text: "" }]);
145+
// Latest assistant: thinking blocks preserved
146+
expect(third.content).toEqual([{ type: "thinking", thinking: "third" }]);
59147
});
60148
});

src/agents/pi-embedded-runner/thinking.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,13 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
1313
}
1414

1515
/**
16-
* Strip all `type: "thinking"` content blocks from assistant messages.
16+
* Strip `type: "thinking"` content blocks from assistant messages, EXCEPT
17+
* the latest assistant message.
18+
*
19+
* Anthropic's API requires that thinking/redacted_thinking blocks in the
20+
* latest assistant message remain exactly as received - they cannot be
21+
* modified or removed. Stripping them causes API 400 errors:
22+
* "thinking or redacted_thinking blocks in the latest assistant message cannot be modified"
1723
*
1824
* If an assistant message becomes empty after stripping, it is replaced with
1925
* a synthetic `{ type: "text", text: "" }` block to preserve turn structure
@@ -23,13 +29,26 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
2329
* use reference equality to skip downstream work).
2430
*/
2531
export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
32+
let latestAssistantIndex = -1;
33+
for (let i = messages.length - 1; i >= 0; i -= 1) {
34+
if (isAssistantMessageWithContent(messages[i])) {
35+
latestAssistantIndex = i;
36+
break;
37+
}
38+
}
39+
2640
let touched = false;
2741
const out: AgentMessage[] = [];
28-
for (const msg of messages) {
42+
for (let i = 0; i < messages.length; i += 1) {
43+
const msg = messages[i];
2944
if (!isAssistantMessageWithContent(msg)) {
3045
out.push(msg);
3146
continue;
3247
}
48+
if (i === latestAssistantIndex) {
49+
out.push(msg);
50+
continue;
51+
}
3352
const nextContent: AssistantContentBlock[] = [];
3453
let changed = false;
3554
for (const block of msg.content) {

0 commit comments

Comments
 (0)