Skip to content

Commit 9521e61

Browse files
authored
Gateway: follow up HEIC input image handling (openclaw#38146)
* Media: scope HEIC MIME sniffing * Media: hermeticize HEIC input tests * Gateway: fix HEIC image budget accounting * Gateway: add HEIC image budget regression test * Changelog: note HEIC follow-up fix
1 parent f9d86b9 commit 9521e61

File tree

5 files changed

+116
-10
lines changed

5 files changed

+116
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Docs: https://docs.openclaw.ai
138138
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
139139
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
140140
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
141+
- Gateway/HEIC input follow-up: keep non-HEIC `input_image` MIME handling unchanged, make HEIC tests hermetic, and enforce chat-completions `maxTotalImageBytes` against post-normalization image payload size. Thanks @vincentkoc.
141142
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
142143
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
143144
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { beforeEach, describe, expect, it, vi } from "vitest";
2+
3+
const extractImageContentFromSourceMock = vi.fn();
4+
5+
vi.mock("../media/input-files.js", async (importOriginal) => {
6+
const actual = await importOriginal<typeof import("../media/input-files.js")>();
7+
return {
8+
...actual,
9+
extractImageContentFromSource: (...args: unknown[]) =>
10+
extractImageContentFromSourceMock(...args),
11+
};
12+
});
13+
14+
import { __testOnlyOpenAiHttp } from "./openai-http.js";
15+
16+
describe("openai image budget accounting", () => {
17+
beforeEach(() => {
18+
vi.clearAllMocks();
19+
});
20+
21+
it("counts normalized base64 image bytes against maxTotalImageBytes", async () => {
22+
extractImageContentFromSourceMock.mockResolvedValueOnce({
23+
type: "image",
24+
data: Buffer.alloc(10, 1).toString("base64"),
25+
mimeType: "image/jpeg",
26+
});
27+
28+
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
29+
maxTotalImageBytes: 5,
30+
});
31+
32+
await expect(
33+
__testOnlyOpenAiHttp.resolveImagesForRequest(
34+
{
35+
urls: ["data:image/heic;base64,QUJD"],
36+
},
37+
limits,
38+
),
39+
).rejects.toThrow(/Total image payload too large/);
40+
});
41+
42+
it("does not double-count unchanged base64 image payloads", async () => {
43+
extractImageContentFromSourceMock.mockResolvedValueOnce({
44+
type: "image",
45+
data: "QUJDRA==",
46+
mimeType: "image/jpeg",
47+
});
48+
49+
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
50+
maxTotalImageBytes: 4,
51+
});
52+
53+
await expect(
54+
__testOnlyOpenAiHttp.resolveImagesForRequest(
55+
{
56+
urls: ["data:image/jpeg;base64,QUJDRA=="],
57+
},
58+
limits,
59+
),
60+
).resolves.toEqual([
61+
{
62+
type: "image",
63+
data: "QUJDRA==",
64+
mimeType: "image/jpeg",
65+
},
66+
]);
67+
});
68+
});

src/gateway/openai-http.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,18 +300,16 @@ async function resolveImagesForRequest(
300300
for (const url of urls) {
301301
const source = parseImageUrlToSource(url);
302302
if (source.type === "base64") {
303-
totalBytes += estimateBase64DecodedBytes(source.data);
304-
if (totalBytes > limits.maxTotalImageBytes) {
303+
const sourceBytes = estimateBase64DecodedBytes(source.data);
304+
if (totalBytes + sourceBytes > limits.maxTotalImageBytes) {
305305
throw new Error(
306-
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
306+
`Total image payload too large (${totalBytes + sourceBytes}; limit ${limits.maxTotalImageBytes})`,
307307
);
308308
}
309309
}
310310

311311
const image = await extractImageContentFromSource(source, limits.images);
312-
if (source.type !== "base64") {
313-
totalBytes += estimateBase64DecodedBytes(image.data);
314-
}
312+
totalBytes += estimateBase64DecodedBytes(image.data);
315313
if (totalBytes > limits.maxTotalImageBytes) {
316314
throw new Error(
317315
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
@@ -322,6 +320,11 @@ async function resolveImagesForRequest(
322320
return images;
323321
}
324322

323+
export const __testOnlyOpenAiHttp = {
324+
resolveImagesForRequest,
325+
resolveOpenAiChatCompletionsLimits,
326+
};
327+
325328
function buildAgentPrompt(
326329
messagesUnknown: unknown,
327330
activeUserMessageIndex: number,

src/media/input-files.fetch-guard.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
22

33
const fetchWithSsrFGuardMock = vi.fn();
44
const convertHeicToJpegMock = vi.fn();
5+
const detectMimeMock = vi.fn();
56

67
vi.mock("../infra/net/fetch-guard.js", () => ({
78
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
@@ -11,6 +12,10 @@ vi.mock("./image-ops.js", () => ({
1112
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
1213
}));
1314

15+
vi.mock("./mime.js", () => ({
16+
detectMime: (...args: unknown[]) => detectMimeMock(...args),
17+
}));
18+
1419
async function waitForMicrotaskTurn(): Promise<void> {
1520
await new Promise<void>((resolve) => queueMicrotask(resolve));
1621
}
@@ -31,6 +36,7 @@ beforeEach(() => {
3136
describe("HEIC input image normalization", () => {
3237
it("converts base64 HEIC images to JPEG before returning them", async () => {
3338
const normalized = Buffer.from("jpeg-normalized");
39+
detectMimeMock.mockResolvedValueOnce("image/heic");
3440
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
3541

3642
const image = await extractImageContentFromSource(
@@ -67,6 +73,7 @@ describe("HEIC input image normalization", () => {
6773
finalUrl: "https://example.com/photo.heic",
6874
});
6975
const normalized = Buffer.from("jpeg-url-normalized");
76+
detectMimeMock.mockResolvedValueOnce("image/heic");
7077
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
7178

7279
const image = await extractImageContentFromSource(
@@ -91,6 +98,31 @@ describe("HEIC input image normalization", () => {
9198
});
9299
expect(release).toHaveBeenCalledTimes(1);
93100
});
101+
102+
it("keeps declared MIME for non-HEIC images without sniffing", async () => {
103+
const image = await extractImageContentFromSource(
104+
{
105+
type: "base64",
106+
data: Buffer.from("png-like").toString("base64"),
107+
mediaType: "image/png",
108+
},
109+
{
110+
allowUrl: false,
111+
allowedMimes: new Set(["image/png"]),
112+
maxBytes: 1024 * 1024,
113+
maxRedirects: 0,
114+
timeoutMs: 1,
115+
},
116+
);
117+
118+
expect(detectMimeMock).not.toHaveBeenCalled();
119+
expect(convertHeicToJpegMock).not.toHaveBeenCalled();
120+
expect(image).toEqual({
121+
type: "image",
122+
data: Buffer.from("png-like").toString("base64"),
123+
mimeType: "image/png",
124+
});
125+
});
94126
});
95127

96128
describe("fetchWithGuard", () => {

src/media/input-files.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,12 @@ async function normalizeInputImage(params: {
234234
mimeType?: string;
235235
limits: InputImageLimits;
236236
}): Promise<InputImageContent> {
237-
const sourceMime =
238-
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
239-
normalizeMimeType(params.mimeType) ??
240-
"application/octet-stream";
237+
const declaredMime = normalizeMimeType(params.mimeType) ?? "application/octet-stream";
238+
const sourceMime = HEIC_INPUT_IMAGE_MIMES.has(declaredMime)
239+
? (normalizeMimeType(
240+
await detectMime({ buffer: params.buffer, headerMime: params.mimeType }),
241+
) ?? declaredMime)
242+
: declaredMime;
241243
if (!params.limits.allowedMimes.has(sourceMime)) {
242244
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
243245
}

0 commit comments

Comments
 (0)