Skip to content

Commit 5ca0233

Browse files
frankeknjmcte
andauthored
fix(agents): drop Anthropic thinking blocks on replay (openclaw#44843)
* agents: drop Anthropic thinking blocks on replay * fix: extend anthropic replay sanitization openclaw#44429 thanks @jmcte * fix: extend anthropic replay sanitization openclaw#44843 thanks @jmcte * test: add bedrock replay sanitization coverage openclaw#44843 * test: cover anthropic provider drop-thinking hints openclaw#44843 --------- Co-authored-by: johnmteneyckjr <[email protected]>
1 parent 0705225 commit 5ca0233

File tree

6 files changed

+67
-16
lines changed

6 files changed

+67
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ Docs: https://docs.openclaw.ai
9595
- Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm.
9696
- Delivery/dedupe: trim completed direct-cron delivery cache correctly and keep mirrored transcript dedupe active even when transcript files contain malformed lines. (#44666) thanks @frankekn.
9797
- CLI/thinking help: add the missing `xhigh` level hints to `openclaw cron add`, `openclaw cron edit`, and `openclaw agent` so the help text matches the levels already accepted at runtime. (#44819) Thanks @kiki830621.
98+
- Agents/Anthropic replay: drop replayed assistant thinking blocks for native Anthropic and Bedrock Claude providers so persisted follow-up turns no longer fail on stored thinking blocks. (#44843) Thanks @jmcte.
9899

99100
## 2026.3.11
100101

src/agents/pi-embedded-runner.sanitize-session-history.test.ts

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ describe("sanitizeSessionHistory", () => {
5252
sessionId: TEST_SESSION_ID,
5353
});
5454

55+
const sanitizeAnthropicHistory = async (params: {
56+
messages: AgentMessage[];
57+
provider?: string;
58+
modelApi?: string;
59+
modelId?: string;
60+
}) =>
61+
sanitizeSessionHistory({
62+
messages: params.messages,
63+
modelApi: params.modelApi ?? "anthropic-messages",
64+
provider: params.provider ?? "anthropic",
65+
modelId: params.modelId ?? "claude-opus-4-6",
66+
sessionManager: makeMockSessionManager(),
67+
sessionId: TEST_SESSION_ID,
68+
});
69+
5570
const getAssistantMessage = (messages: AgentMessage[]) => {
5671
expect(messages[1]?.role).toBe("assistant");
5772
return messages[1] as Extract<AgentMessage, { role: "assistant" }>;
@@ -760,22 +775,30 @@ describe("sanitizeSessionHistory", () => {
760775
expect(types).not.toContain("thinking");
761776
});
762777

763-
it("does not drop thinking blocks for non-copilot providers", async () => {
778+
it("drops assistant thinking blocks for anthropic replay", async () => {
764779
setNonGoogleModelApi();
765780

766781
const messages = makeThinkingAndTextAssistantMessages();
767782

768-
const result = await sanitizeSessionHistory({
783+
const result = await sanitizeAnthropicHistory({ messages });
784+
785+
const assistant = getAssistantMessage(result);
786+
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
787+
});
788+
789+
it("drops assistant thinking blocks for amazon-bedrock replay", async () => {
790+
setNonGoogleModelApi();
791+
792+
const messages = makeThinkingAndTextAssistantMessages();
793+
794+
const result = await sanitizeAnthropicHistory({
769795
messages,
770-
modelApi: "anthropic-messages",
771-
provider: "anthropic",
772-
modelId: "claude-opus-4-6",
773-
sessionManager: makeMockSessionManager(),
774-
sessionId: TEST_SESSION_ID,
796+
provider: "amazon-bedrock",
797+
modelApi: "bedrock-converse-stream",
775798
});
776799

777-
const types = getAssistantContentTypes(result);
778-
expect(types).toContain("thinking");
800+
const assistant = getAssistantMessage(result);
801+
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
779802
});
780803

781804
it("does not drop thinking blocks for non-claude copilot models", async () => {

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,9 +1947,10 @@ export async function runEmbeddedAttempt(
19471947
activeSession.agent.streamFn = cacheTrace.wrapStreamFn(activeSession.agent.streamFn);
19481948
}
19491949

1950-
// Copilot/Claude can reject persisted `thinking` blocks (e.g. thinkingSignature:"reasoning_text")
1951-
// on *any* follow-up provider call (including tool continuations). Wrap the stream function
1952-
// so every outbound request sees sanitized messages.
1950+
// Anthropic Claude endpoints can reject replayed `thinking` blocks
1951+
// (e.g. thinkingSignature:"reasoning_text") on any follow-up provider
1952+
// call, including tool continuations. Wrap the stream function so every
1953+
// outbound request sees sanitized messages.
19531954
if (transcriptPolicy.dropThinkingBlocks) {
19541955
const inner = activeSession.agent.streamFn;
19551956
activeSession.agent.streamFn = (model, context, options) => {

src/agents/provider-capabilities.test.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,19 @@ describe("resolveProviderCapabilities", () => {
2222
transcriptToolCallIdMode: "default",
2323
transcriptToolCallIdModelHints: [],
2424
geminiThoughtSignatureModelHints: [],
25-
dropThinkingBlockModelHints: [],
25+
dropThinkingBlockModelHints: ["claude"],
26+
});
27+
expect(resolveProviderCapabilities("amazon-bedrock")).toEqual({
28+
anthropicToolSchemaMode: "native",
29+
anthropicToolChoiceMode: "native",
30+
providerFamily: "anthropic",
31+
preserveAnthropicThinkingSignatures: true,
32+
openAiCompatTurnValidation: true,
33+
geminiThoughtSignatureSanitization: false,
34+
transcriptToolCallIdMode: "default",
35+
transcriptToolCallIdModelHints: [],
36+
geminiThoughtSignatureModelHints: [],
37+
dropThinkingBlockModelHints: ["claude"],
2638
});
2739
});
2840

@@ -82,6 +94,18 @@ describe("resolveProviderCapabilities", () => {
8294
it("tracks provider families and model-specific transcript quirks in the registry", () => {
8395
expect(isOpenAiProviderFamily("openai")).toBe(true);
8496
expect(isAnthropicProviderFamily("amazon-bedrock")).toBe(true);
97+
expect(
98+
shouldDropThinkingBlocksForModel({
99+
provider: "anthropic",
100+
modelId: "claude-opus-4-6",
101+
}),
102+
).toBe(true);
103+
expect(
104+
shouldDropThinkingBlocksForModel({
105+
provider: "amazon-bedrock",
106+
modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
107+
}),
108+
).toBe(true);
85109
expect(
86110
shouldDropThinkingBlocksForModel({
87111
provider: "github-copilot",

src/agents/provider-capabilities.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ const DEFAULT_PROVIDER_CAPABILITIES: ProviderCapabilities = {
2929
const PROVIDER_CAPABILITIES: Record<string, Partial<ProviderCapabilities>> = {
3030
anthropic: {
3131
providerFamily: "anthropic",
32+
dropThinkingBlockModelHints: ["claude"],
3233
},
3334
"amazon-bedrock": {
3435
providerFamily: "anthropic",
36+
dropThinkingBlockModelHints: ["claude"],
3537
},
3638
// kimi-coding natively supports Anthropic tool framing (input_schema);
3739
// converting to OpenAI format causes XML text fallback instead of tool_use blocks.

src/agents/transcript-policy.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ export function resolveTranscriptPolicy(params: {
8080
});
8181
const requiresOpenAiCompatibleToolIdSanitization = params.modelApi === "openai-completions";
8282

83-
// GitHub Copilot's Claude endpoints can reject persisted `thinking` blocks with
84-
// non-binary/non-base64 signatures (e.g. thinkingSignature: "reasoning_text").
85-
// Drop these blocks at send-time to keep sessions usable.
83+
// Anthropic Claude endpoints can reject replayed `thinking` blocks unless the
84+
// original signatures are preserved byte-for-byte. Drop them at send-time to
85+
// keep persisted sessions usable across follow-up turns.
8686
const dropThinkingBlocks = shouldDropThinkingBlocksForModel({ provider, modelId });
8787

8888
const needsNonImageSanitize =

0 commit comments

Comments
 (0)