fix(agents): preserve thinking blocks in latest assistant message during compaction

Nipurn123 · Nipurn123 · commit 70f78dd4c279 · 2026-02-24T17:57:43.000+05:30
Anthropic's API requires that thinking/redacted_thinking blocks in the latest assistant message remain exactly as received. The dropThinkingBlocks() function was stripping ALL thinking blocks from ALL assistant messages, causing API 400 errors during context compaction in longer conversations. Now the function preserves thinking blocks in the latest assistant message while still stripping them from older messages (needed for GitHub Copilot compatibility). Fixes #25347
diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -466,17 +466,21 @@ describe("sanitizeSessionHistory", () => {
     ).toBe(false);
   });
 
-  it("drops assistant thinking blocks for github-copilot models", async () => {
+  it("preserves thinking blocks in latest assistant message for github-copilot models (Anthropic API requirement)", async () => {
     setNonGoogleModelApi();
 
     const messages = makeThinkingAndTextAssistantMessages("reasoning_text");
 
     const result = await sanitizeGithubCopilotHistory({ messages });
     const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
+    // Latest assistant message thinking blocks must be preserved for Anthropic API
+    expect(assistant.content).toEqual([
+      { type: "thinking", thinking: "internal", thinkingSignature: "reasoning_text" },
+      { type: "text", text: "hi" },
+    ]);
   });
 
-  it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => {
+  it("preserves thinking blocks in latest assistant when all content is thinking (github-copilot)", async () => {
     setNonGoogleModelApi();
 
     const messages = [
@@ -496,13 +500,15 @@ describe("sanitizeSessionHistory", () => {
 
     const result = await sanitizeGithubCopilotHistory({ messages });
 
-    // Assistant turn should be preserved (not dropped) to maintain turn alternation
+    // Assistant turn should be preserved with thinking blocks intact (latest assistant)
     expect(result).toHaveLength(3);
     const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "" }]);
+    expect(assistant.content).toEqual([
+      { type: "thinking", thinking: "some reasoning", thinkingSignature: "reasoning_text" },
+    ]);
   });
 
-  it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => {
+  it("preserves thinking blocks in latest assistant message with tool_use blocks (github-copilot)", async () => {
     setNonGoogleModelApi();
 
     const messages = [
@@ -523,9 +529,10 @@ describe("sanitizeSessionHistory", () => {
 
     const result = await sanitizeGithubCopilotHistory({ messages });
     const types = getAssistantContentTypes(result);
+    // Latest assistant preserves all blocks including thinking
+    expect(types).toContain("thinking");
     expect(types).toContain("toolCall");
     expect(types).toContain("text");
-    expect(types).not.toContain("thinking");
   });
 
   it("does not drop thinking blocks for non-copilot providers", async () => {
@@ -555,4 +562,39 @@ describe("sanitizeSessionHistory", () => {
     const types = getAssistantContentTypes(result);
     expect(types).toContain("thinking");
   });
+
+  it("drops thinking blocks from older assistant messages but preserves in latest (github-copilot)", async () => {
+    setNonGoogleModelApi();
+
+    const messages = [
+      { role: "user", content: "first question" },
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "old reasoning" },
+          { type: "text", text: "old response" },
+        ],
+      },
+      { role: "user", content: "second question" },
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" },
+          { type: "text", text: "latest response" },
+        ],
+      },
+    ] as unknown as AgentMessage[];
+
+    const result = await sanitizeGithubCopilotHistory({ messages });
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
+
+    // Old assistant: thinking blocks dropped
+    expect(oldAssistant.content).toEqual([{ type: "text", text: "old response" }]);
+    // Latest assistant: thinking blocks preserved (Anthropic API requirement)
+    expect(latestAssistant.content).toEqual([
+      { type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" },
+      { type: "text", text: "latest response" },
+    ]);
+  });
 });
diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts
@@ -34,15 +34,19 @@ describe("dropThinkingBlocks", () => {
         role: "assistant",
         content: [
           { type: "thinking", thinking: "internal" },
-          { type: "text", text: "final" },
+          { type: "text", text: "first" },
         ],
       } as unknown as AgentMessage,
+      { role: "user", content: "follow-up" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "latest" }],
+      } as AgentMessage,
     ];
 
     const result = dropThinkingBlocks(messages);
-    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
-    expect(result).not.toBe(messages);
-    expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
+    const firstAssistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(firstAssistant.content).toEqual([{ type: "text", text: "first" }]);
   });
 
   it("keeps assistant turn structure when all content blocks were thinking", () => {
@@ -51,10 +55,94 @@ describe("dropThinkingBlocks", () => {
         role: "assistant",
         content: [{ type: "thinking", thinking: "internal-only" }],
       } as unknown as AgentMessage,
+      { role: "user", content: "follow-up" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "latest" }],
+      } as AgentMessage,
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const firstAssistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(firstAssistant.content).toEqual([{ type: "text", text: "" }]);
+  });
+
+  it("preserves thinking blocks in the latest assistant message (Anthropic API requirement)", () => {
+    const messages: AgentMessage[] = [
+      { role: "user", content: "first" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "old reasoning" },
+          { type: "text", text: "old response" },
+        ],
+      } as unknown as AgentMessage,
+      { role: "user", content: "second" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "latest reasoning" },
+          { type: "text", text: "latest response" },
+        ],
+      } as unknown as AgentMessage,
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
+
+    // Old assistant: thinking blocks should be dropped
+    expect(oldAssistant.content).toEqual([{ type: "text", text: "old response" }]);
+    // Latest assistant: thinking blocks must be preserved
+    expect(latestAssistant.content).toEqual([
+      { type: "thinking", thinking: "latest reasoning" },
+      { type: "text", text: "latest response" },
+    ]);
+  });
+
+  it("preserves thinking blocks when latest assistant is the only assistant message", () => {
+    const messages: AgentMessage[] = [
+      { role: "user", content: "hello" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "my reasoning" },
+          { type: "text", text: "response" },
+        ],
+      } as unknown as AgentMessage,
     ];
 
     const result = dropThinkingBlocks(messages);
-    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
-    expect(assistant.content).toEqual([{ type: "text", text: "" }]);
+    expect(result).toBe(messages);
+  });
+
+  it("handles multiple assistant messages with only the latest preserved", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "first" }],
+      } as unknown as AgentMessage,
+      { role: "user", content: "follow-up" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "second" }],
+      } as unknown as AgentMessage,
+      { role: "user", content: "last" } as AgentMessage,
+      {
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "third" }],
+      } as unknown as AgentMessage,
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    const second = result[2] as Extract<AgentMessage, { role: "assistant" }>;
+    const third = result[4] as Extract<AgentMessage, { role: "assistant" }>;
+
+    // First two assistants: thinking blocks dropped
+    expect(first.content).toEqual([{ type: "text", text: "" }]);
+    expect(second.content).toEqual([{ type: "text", text: "" }]);
+    // Latest assistant: thinking blocks preserved
+    expect(third.content).toEqual([{ type: "thinking", thinking: "third" }]);
   });
 });
diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts
@@ -13,7 +13,13 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
 }
 
 /**
- * Strip all `type: "thinking"` content blocks from assistant messages.
+ * Strip `type: "thinking"` content blocks from assistant messages, EXCEPT
+ * the latest assistant message.
+ *
+ * Anthropic's API requires that thinking/redacted_thinking blocks in the
+ * latest assistant message remain exactly as received - they cannot be
+ * modified or removed. Stripping them causes API 400 errors:
+ * "thinking or redacted_thinking blocks in the latest assistant message cannot be modified"
  *
  * If an assistant message becomes empty after stripping, it is replaced with
  * a synthetic `{ type: "text", text: "" }` block to preserve turn structure
@@ -23,13 +29,26 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
  * use reference equality to skip downstream work).
  */
 export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
+  let latestAssistantIndex = -1;
+  for (let i = messages.length - 1; i >= 0; i -= 1) {
+    if (isAssistantMessageWithContent(messages[i])) {
+      latestAssistantIndex = i;
+      break;
+    }
+  }
+
   let touched = false;
   const out: AgentMessage[] = [];
-  for (const msg of messages) {
+  for (let i = 0; i < messages.length; i += 1) {
+    const msg = messages[i];
     if (!isAssistantMessageWithContent(msg)) {
       out.push(msg);
       continue;
     }
+    if (i === latestAssistantIndex) {
+      out.push(msg);
+      continue;
+    }
     const nextContent: AssistantContentBlock[] = [];
     let changed = false;
     for (const block of msg.content) {