Skip to content

Commit 38c96bc

Browse files
Sriram Naidu Thotaclaude
authored andcommitted
fix: validate base64 image data before API submission
Adds explicit base64 format validation in sanitizeContentBlocksImages() to prevent invalid image data from being sent to the Anthropic API. The Problem: - Node's Buffer.from(str, "base64") silently ignores invalid characters - Invalid base64 passes local validation but fails at Anthropic's stricter API - Once corrupted data persists in session history, every API call fails The Fix: - Add validateAndNormalizeBase64() function that: - Strips data URL prefixes (e.g., "data:image/png;base64,...") - Validates base64 character set with regex - Checks for valid padding (0-2 '=' chars) - Validates length is proper for base64 encoding - Invalid images are replaced with descriptive text blocks - Prevents permanent session corruption Tests: - Rejects invalid base64 characters - Strips data URL prefixes correctly - Rejects invalid padding - Rejects invalid length - Handles empty data gracefully Closes #18212 Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent aeec95f commit 38c96bc

File tree

2 files changed

+155
-2
lines changed

2 files changed

+155
-2
lines changed

src/agents/tool-images.e2e.test.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,106 @@ import sharp from "sharp";
22
import { describe, expect, it } from "vitest";
33
import { sanitizeContentBlocksImages, sanitizeImageBlocks } from "./tool-images.js";
44

5+
describe("base64 validation", () => {
6+
it("rejects invalid base64 characters and replaces with error text", async () => {
7+
const blocks = [
8+
{
9+
type: "image" as const,
10+
data: "not-valid-base64!!!@#$%",
11+
mimeType: "image/png",
12+
},
13+
];
14+
15+
const out = await sanitizeContentBlocksImages(blocks, "test");
16+
expect(out.length).toBe(1);
17+
expect(out[0].type).toBe("text");
18+
if (out[0].type === "text") {
19+
expect(out[0].text).toContain("omitted image payload");
20+
expect(out[0].text).toContain("invalid");
21+
}
22+
});
23+
24+
it("strips data URL prefix and processes valid base64", async () => {
25+
// Create a small valid image
26+
const jpeg = await sharp({
27+
create: {
28+
width: 10,
29+
height: 10,
30+
channels: 3,
31+
background: { r: 255, g: 0, b: 0 },
32+
},
33+
})
34+
.jpeg()
35+
.toBuffer();
36+
37+
const base64 = jpeg.toString("base64");
38+
const dataUrl = `data:image/jpeg;base64,${base64}`;
39+
40+
const blocks = [
41+
{
42+
type: "image" as const,
43+
data: dataUrl,
44+
mimeType: "image/jpeg",
45+
},
46+
];
47+
48+
const out = await sanitizeContentBlocksImages(blocks, "test");
49+
expect(out.length).toBe(1);
50+
expect(out[0].type).toBe("image");
51+
});
52+
53+
it("rejects base64 with invalid padding", async () => {
54+
const blocks = [
55+
{
56+
type: "image" as const,
57+
data: "SGVsbG8===", // too many padding chars
58+
mimeType: "image/png",
59+
},
60+
];
61+
62+
const out = await sanitizeContentBlocksImages(blocks, "test");
63+
expect(out.length).toBe(1);
64+
expect(out[0].type).toBe("text");
65+
if (out[0].type === "text") {
66+
expect(out[0].text).toContain("omitted image payload");
67+
}
68+
});
69+
70+
it("rejects base64 with invalid length", async () => {
71+
const blocks = [
72+
{
73+
type: "image" as const,
74+
data: "AAAAA", // length 5 without padding is invalid (remainder 1)
75+
mimeType: "image/png",
76+
},
77+
];
78+
79+
const out = await sanitizeContentBlocksImages(blocks, "test");
80+
expect(out.length).toBe(1);
81+
expect(out[0].type).toBe("text");
82+
if (out[0].type === "text") {
83+
expect(out[0].text).toContain("omitted image payload");
84+
}
85+
});
86+
87+
it("handles empty base64 data gracefully", async () => {
88+
const blocks = [
89+
{
90+
type: "image" as const,
91+
data: " ",
92+
mimeType: "image/png",
93+
},
94+
];
95+
96+
const out = await sanitizeContentBlocksImages(blocks, "test");
97+
expect(out.length).toBe(1);
98+
expect(out[0].type).toBe("text");
99+
if (out[0].type === "text") {
100+
expect(out[0].text).toContain("omitted empty image payload");
101+
}
102+
});
103+
});
104+
5105
describe("tool image sanitizing", () => {
6106
it("shrinks oversized images to <=5MB", async () => {
7107
const width = 2800;

src/agents/tool-images.ts

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,54 @@ const MAX_IMAGE_DIMENSION_PX = 2000;
1717
const MAX_IMAGE_BYTES = 5 * 1024 * 1024;
1818
const log = createSubsystemLogger("agents/tool-images");
1919

20+
// Valid base64 character set (standard + URL-safe variants)
21+
const BASE64_REGEX = /^[A-Za-z0-9+/=_-]*$/;
22+
23+
/**
24+
* Validates and normalizes base64 image data before processing.
25+
* - Strips data URL prefixes (e.g., "data:image/png;base64,")
26+
* - Validates base64 character set
27+
* - Ensures the string is not empty after trimming
28+
*
29+
* Returns the cleaned base64 string or throws an error if invalid.
30+
*/
31+
function validateAndNormalizeBase64(base64: string): string {
32+
let data = base64.trim();
33+
34+
// Strip data URL prefix if present (e.g., "data:image/png;base64,...")
35+
const dataUrlMatch = data.match(/^data:[^;]+;base64,(.*)$/i);
36+
if (dataUrlMatch) {
37+
data = dataUrlMatch[1].trim();
38+
}
39+
40+
if (!data) {
41+
throw new Error("Base64 data is empty");
42+
}
43+
44+
// Check for valid base64 characters
45+
// Node's Buffer.from silently ignores invalid chars, but Anthropic API rejects them
46+
if (!BASE64_REGEX.test(data)) {
47+
throw new Error("Base64 data contains invalid characters");
48+
}
49+
50+
// Validate base64 padding (should be 0, 1, or 2 '=' chars at end)
51+
const paddingMatch = data.match(/=+$/);
52+
if (paddingMatch && paddingMatch[0].length > 2) {
53+
throw new Error("Base64 data has invalid padding");
54+
}
55+
56+
// Check that length is valid for base64 (must be multiple of 4 when padded)
57+
// Remove padding for length check, then verify
58+
const withoutPadding = data.replace(/=+$/, "");
59+
const remainder = withoutPadding.length % 4;
60+
if (remainder === 1) {
61+
// A single char remainder is always invalid in base64
62+
throw new Error("Base64 data has invalid length");
63+
}
64+
65+
return data;
66+
}
67+
2068
function isImageBlock(block: unknown): block is ImageContentBlock {
2169
if (!block || typeof block !== "object") {
2270
return false;
@@ -160,8 +208,8 @@ export async function sanitizeContentBlocksImages(
160208
continue;
161209
}
162210

163-
const data = block.data.trim();
164-
if (!data) {
211+
const rawData = block.data.trim();
212+
if (!rawData) {
165213
out.push({
166214
type: "text",
167215
text: `[${label}] omitted empty image payload`,
@@ -170,6 +218,11 @@ export async function sanitizeContentBlocksImages(
170218
}
171219

172220
try {
221+
// Validate and normalize base64 before processing
222+
// This catches invalid base64 that Buffer.from() would silently accept
223+
// but Anthropic's API would reject, preventing permanent session corruption
224+
const data = validateAndNormalizeBase64(rawData);
225+
173226
const inferredMimeType = inferMimeTypeFromBase64(data);
174227
const mimeType = inferredMimeType ?? block.mimeType;
175228
const resized = await resizeImageBase64IfNeeded({

0 commit comments

Comments
 (0)