Skip to content

Commit 24032dc

Browse files
authored
Reply: fix generated image delivery to Discord (openclaw#52489)
1 parent 6d34d62 commit 24032dc

File tree

10 files changed

+242
-4
lines changed

10 files changed

+242
-4
lines changed

extensions/discord/src/actions/runtime.messaging.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ export async function handleDiscordMessagingAction(
386386
...cfgOptions,
387387
...(accountId ? { accountId } : {}),
388388
mediaUrl,
389+
filename: filename ?? undefined,
389390
mediaLocalRoots: options?.mediaLocalRoots,
390391
replyTo,
391392
components,

extensions/discord/src/actions/runtime.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,28 @@ describe("handleDiscordMessagingAction", () => {
395395
);
396396
});
397397

398+
it("forwards the optional filename into sendMessageDiscord", async () => {
399+
sendMessageDiscord.mockClear();
400+
await handleDiscordMessagingAction(
401+
"sendMessage",
402+
{
403+
to: "channel:123",
404+
content: "hello",
405+
mediaUrl: "/tmp/generated-image",
406+
filename: "image.png",
407+
},
408+
enableAllActions,
409+
);
410+
expect(sendMessageDiscord).toHaveBeenCalledWith(
411+
"channel:123",
412+
"hello",
413+
expect.objectContaining({
414+
mediaUrl: "/tmp/generated-image",
415+
filename: "image.png",
416+
}),
417+
);
418+
});
419+
398420
it("rejects voice messages that include content", async () => {
399421
await expect(
400422
handleDiscordMessagingAction(

extensions/discord/src/send.outbound.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type DiscordSendOpts = {
4848
token?: string;
4949
accountId?: string;
5050
mediaUrl?: string;
51+
filename?: string;
5152
mediaLocalRoots?: readonly string[];
5253
verbose?: boolean;
5354
rest?: RequestClient;
@@ -214,6 +215,7 @@ export async function sendMessageDiscord(
214215
threadId,
215216
mediaCaption ?? "",
216217
opts.mediaUrl,
218+
opts.filename,
217219
opts.mediaLocalRoots,
218220
mediaMaxBytes,
219221
undefined,
@@ -275,6 +277,7 @@ export async function sendMessageDiscord(
275277
channelId,
276278
textWithMentions,
277279
opts.mediaUrl,
280+
opts.filename,
278281
opts.mediaLocalRoots,
279282
mediaMaxBytes,
280283
opts.replyTo,

extensions/discord/src/send.sends-basic-channel-messages.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,27 @@ describe("sendMessageDiscord", () => {
272272
);
273273
});
274274

275+
it("prefers the caller-provided filename for media attachments", async () => {
276+
const { rest, postMock } = makeDiscordRest();
277+
postMock.mockResolvedValue({ id: "msg", channel_id: "789" });
278+
279+
await sendMessageDiscord("channel:789", "photo", {
280+
rest,
281+
token: "t",
282+
mediaUrl: "file:///tmp/generated-image",
283+
filename: "renderable.png",
284+
});
285+
286+
expect(postMock).toHaveBeenCalledWith(
287+
Routes.channelMessages("789"),
288+
expect.objectContaining({
289+
body: expect.objectContaining({
290+
files: [expect.objectContaining({ name: "renderable.png" })],
291+
}),
292+
}),
293+
);
294+
});
295+
275296
it("uses configured discord mediaMaxMb for uploads", async () => {
276297
const { rest, postMock } = makeDiscordRest();
277298
postMock.mockResolvedValue({ id: "msg", channel_id: "789" });

extensions/discord/src/send.shared.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { Routes, type APIChannel, type APIEmbed } from "discord-api-types/v10";
1212
import { loadConfig, type OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
1313
import type { RetryRunner } from "openclaw/plugin-sdk/infra-runtime";
1414
import { buildOutboundMediaLoadOptions } from "openclaw/plugin-sdk/media-runtime";
15+
import { extensionForMime } from "openclaw/plugin-sdk/media-runtime";
1516
import {
1617
normalizePollDurationHours,
1718
normalizePollInput,
@@ -416,6 +417,7 @@ async function sendDiscordMedia(
416417
channelId: string,
417418
text: string,
418419
mediaUrl: string,
420+
filename: string | undefined,
419421
mediaLocalRoots: readonly string[] | undefined,
420422
maxBytes: number | undefined,
421423
replyTo: string | undefined,
@@ -430,6 +432,12 @@ async function sendDiscordMedia(
430432
mediaUrl,
431433
buildOutboundMediaLoadOptions({ maxBytes, mediaLocalRoots }),
432434
);
435+
const requestedFileName = filename?.trim();
436+
const resolvedFileName =
437+
requestedFileName ||
438+
media.fileName ||
439+
(media.contentType ? `upload${extensionForMime(media.contentType) ?? ""}` : "") ||
440+
"upload";
433441
const chunks = text ? buildDiscordTextChunks(text, { maxLinesPerMessage, chunkMode }) : [];
434442
const caption = chunks[0] ?? "";
435443
const messageReference = replyTo ? { message_id: replyTo, fail_if_not_exists: false } : undefined;
@@ -449,7 +457,7 @@ async function sendDiscordMedia(
449457
files: [
450458
{
451459
data: fileData,
452-
name: media.fileName ?? "upload",
460+
name: resolvedFileName,
453461
},
454462
],
455463
});

src/agents/tools/image-generate-tool.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@ function stubImageGenerationProviders() {
4343
generate: {
4444
maxCount: 4,
4545
supportsSize: true,
46+
supportsAspectRatio: true,
4647
},
4748
edit: {
4849
enabled: false,
4950
maxInputImages: 0,
5051
},
5152
geometry: {
5253
sizes: ["1024x1024", "1024x1536", "1536x1024"],
54+
aspectRatios: ["1:1", "16:9"],
5355
},
5456
},
5557
generateImage: vi.fn(async () => {
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { describe, expect, it, vi } from "vitest";
2+
import { createBlockReplyDeliveryHandler } from "./reply-delivery.js";
3+
import type { TypingSignaler } from "./typing-mode.js";
4+
5+
describe("createBlockReplyDeliveryHandler", () => {
6+
it("sends media-bearing block replies even when block streaming is disabled", async () => {
7+
const onBlockReply = vi.fn(async () => {});
8+
const normalizeStreamingText = vi.fn((payload: { text?: string }) => ({
9+
text: payload.text,
10+
skip: false,
11+
}));
12+
const typingSignals = {
13+
signalTextDelta: vi.fn(async () => {}),
14+
} as unknown as TypingSignaler;
15+
16+
const handler = createBlockReplyDeliveryHandler({
17+
onBlockReply,
18+
normalizeStreamingText,
19+
applyReplyToMode: (payload) => payload,
20+
typingSignals,
21+
blockStreamingEnabled: false,
22+
blockReplyPipeline: null,
23+
directlySentBlockKeys: new Set(),
24+
});
25+
26+
await handler({
27+
text: "here's the vibe",
28+
mediaUrls: ["/tmp/generated.png"],
29+
replyToCurrent: true,
30+
});
31+
32+
expect(onBlockReply).toHaveBeenCalledWith({
33+
text: undefined,
34+
mediaUrl: "/tmp/generated.png",
35+
mediaUrls: ["/tmp/generated.png"],
36+
replyToCurrent: true,
37+
replyToId: undefined,
38+
replyToTag: undefined,
39+
audioAsVoice: false,
40+
});
41+
expect(typingSignals.signalTextDelta).toHaveBeenCalledWith("here's the vibe");
42+
});
43+
44+
it("keeps text-only block replies buffered when block streaming is disabled", async () => {
45+
const onBlockReply = vi.fn(async () => {});
46+
47+
const handler = createBlockReplyDeliveryHandler({
48+
onBlockReply,
49+
normalizeStreamingText: (payload) => ({ text: payload.text, skip: false }),
50+
applyReplyToMode: (payload) => payload,
51+
typingSignals: {
52+
signalTextDelta: vi.fn(async () => {}),
53+
} as unknown as TypingSignaler,
54+
blockStreamingEnabled: false,
55+
blockReplyPipeline: null,
56+
directlySentBlockKeys: new Set(),
57+
});
58+
59+
await handler({ text: "text only" });
60+
61+
expect(onBlockReply).not.toHaveBeenCalled();
62+
});
63+
});

src/auto-reply/reply/reply-delivery.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,12 @@ export function createBlockReplyDeliveryHandler(params: {
128128
// Track sent key to avoid duplicate in final payloads.
129129
params.directlySentBlockKeys.add(createBlockReplyContentKey(blockPayload));
130130
await params.onBlockReply(blockPayload);
131+
} else if (blockHasMedia) {
132+
// When block streaming is disabled, text-only block replies are accumulated into the
133+
// final response. Media cannot be reconstructed later, so send it immediately and let
134+
// the assistant's final text arrive through the normal final-reply path.
135+
await params.onBlockReply({ ...blockPayload, text: undefined });
131136
}
132-
// When streaming is disabled entirely, blocks are accumulated in final text instead.
137+
// When streaming is disabled entirely, text-only blocks are accumulated in final text.
133138
};
134139
}

src/image-generation/providers/openai.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,81 @@ describe("OpenAI image-generation provider", () => {
6767
});
6868
});
6969

70+
it("maps supported aspect ratios onto OpenAI size presets", async () => {
71+
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
72+
apiKey: "sk-test",
73+
source: "env",
74+
mode: "api-key",
75+
});
76+
const fetchMock = vi.fn().mockResolvedValue({
77+
ok: true,
78+
json: async () => ({
79+
data: [{ b64_json: Buffer.from("png-data").toString("base64") }],
80+
}),
81+
});
82+
vi.stubGlobal("fetch", fetchMock);
83+
84+
const provider = buildOpenAIImageGenerationProvider();
85+
await provider.generateImage({
86+
provider: "openai",
87+
model: "gpt-image-1.5",
88+
prompt: "draw a portrait",
89+
aspectRatio: "9:16",
90+
cfg: {},
91+
authStore: { version: 1, profiles: {} },
92+
});
93+
94+
expect(fetchMock).toHaveBeenCalledWith(
95+
"https://api.openai.com/v1/images/generations",
96+
expect.objectContaining({
97+
body: JSON.stringify({
98+
model: "gpt-image-1.5",
99+
prompt: "draw a portrait",
100+
n: 1,
101+
size: "1024x1536",
102+
}),
103+
}),
104+
);
105+
});
106+
107+
it("prefers an explicit size over aspect ratio mapping", async () => {
108+
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
109+
apiKey: "sk-test",
110+
source: "env",
111+
mode: "api-key",
112+
});
113+
const fetchMock = vi.fn().mockResolvedValue({
114+
ok: true,
115+
json: async () => ({
116+
data: [{ b64_json: Buffer.from("png-data").toString("base64") }],
117+
}),
118+
});
119+
vi.stubGlobal("fetch", fetchMock);
120+
121+
const provider = buildOpenAIImageGenerationProvider();
122+
await provider.generateImage({
123+
provider: "openai",
124+
model: "gpt-image-1.5",
125+
prompt: "draw a landscape",
126+
size: "1024x1024",
127+
aspectRatio: "16:9",
128+
cfg: {},
129+
authStore: { version: 1, profiles: {} },
130+
});
131+
132+
expect(fetchMock).toHaveBeenCalledWith(
133+
"https://api.openai.com/v1/images/generations",
134+
expect.objectContaining({
135+
body: JSON.stringify({
136+
model: "gpt-image-1.5",
137+
prompt: "draw a landscape",
138+
n: 1,
139+
size: "1024x1024",
140+
}),
141+
}),
142+
);
143+
});
144+
70145
it("rejects reference-image edits for now", async () => {
71146
const provider = buildOpenAIImageGenerationProvider();
72147

src/image-generation/providers/openai.ts

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,18 @@ const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1";
66
const DEFAULT_OUTPUT_MIME = "image/png";
77
const DEFAULT_SIZE = "1024x1024";
88
const OPENAI_SUPPORTED_SIZES = ["1024x1024", "1024x1536", "1536x1024"] as const;
9+
const OPENAI_SUPPORTED_ASPECT_RATIOS = [
10+
"1:1",
11+
"2:3",
12+
"3:2",
13+
"3:4",
14+
"4:3",
15+
"4:5",
16+
"5:4",
17+
"9:16",
18+
"16:9",
19+
"21:9",
20+
] as const;
921

1022
type OpenAIImageApiResponse = {
1123
data?: Array<{
@@ -19,6 +31,31 @@ function resolveOpenAIBaseUrl(cfg: Parameters<typeof resolveApiKeyForProvider>[0
1931
return direct || DEFAULT_OPENAI_IMAGE_BASE_URL;
2032
}
2133

34+
function resolveOpenAISize(params: { size?: string; aspectRatio?: string }): string {
35+
const explicitSize = params.size?.trim();
36+
if (explicitSize) {
37+
return explicitSize;
38+
}
39+
40+
switch (params.aspectRatio?.trim()) {
41+
case "1:1":
42+
return "1024x1024";
43+
case "2:3":
44+
case "3:4":
45+
case "4:5":
46+
case "9:16":
47+
return "1024x1536";
48+
case "3:2":
49+
case "4:3":
50+
case "5:4":
51+
case "16:9":
52+
case "21:9":
53+
return "1536x1024";
54+
default:
55+
return DEFAULT_SIZE;
56+
}
57+
}
58+
2259
export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlugin {
2360
return {
2461
id: "openai",
@@ -29,7 +66,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlu
2966
generate: {
3067
maxCount: 4,
3168
supportsSize: true,
32-
supportsAspectRatio: false,
69+
supportsAspectRatio: true,
3370
supportsResolution: false,
3471
},
3572
edit: {
@@ -42,6 +79,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlu
4279
},
4380
geometry: {
4481
sizes: [...OPENAI_SUPPORTED_SIZES],
82+
aspectRatios: [...OPENAI_SUPPORTED_ASPECT_RATIOS],
4583
},
4684
},
4785
async generateImage(req) {
@@ -75,7 +113,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlu
75113
model: req.model || DEFAULT_OPENAI_IMAGE_MODEL,
76114
prompt: req.prompt,
77115
n: req.count ?? 1,
78-
size: req.size ?? DEFAULT_SIZE,
116+
size: resolveOpenAISize({ size: req.size, aspectRatio: req.aspectRatio }),
79117
}),
80118
signal: controller.signal,
81119
}).finally(() => {

0 commit comments

Comments
 (0)