Skip to content

Commit aae90cb

Browse files
authored
fix(telegram): include replied media files in reply context (#28488)
* fix(telegram): include replied media files in reply context * fix(telegram): keep reply media fields nullable * perf(telegram): defer reply-media fetch to debounce flush * fix(telegram): gate and preserve reply media attachments * fix(telegram): preserve cached-sticker reply media context * fix: update changelog for telegram reply-media context fixes (#28488) (thanks @obviyus)
1 parent a7929ab commit aae90cb

10 files changed

+376
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
3737
- Typing/Cross-channel leakage: unify run-scoped typing suppression for cross-channel/internal-webchat routes, preserve current inbound origin as embedded run message channel context, harden shared typing keepalive with consecutive-failure circuit breaker edge-case handling, and enforce dispatcher completion/idle waits in extension dispatcher callsites (Feishu, Matrix, Mattermost, MSTeams) so typing indicators always clean up on success/error paths. Related: #27647, #27493, #27598. Supersedes/replaces draft PRs: #27640, #27593, #27540.
3838
- Telegram/sendChatAction 401 handling: add bounded exponential backoff + temporary local typing suppression after repeated unauthorized failures to stop unbounded `sendChatAction` retry loops that can trigger Telegram abuse enforcement and bot deletion. (#27415) Thanks @widingmarcus-cyber.
3939
- Telegram/Webhook startup: clarify webhook config guidance, allow `channels.telegram.webhookPort: 0` for ephemeral listener binding, and log both the local listener URL and Telegram-advertised webhook URL with the bound port. (#25732) thanks @huntharo.
40+
- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
4041
- Config/Doctor allowlist safety: reject `dmPolicy: "allowlist"` configs with empty `allowFrom`, add Telegram account-level inheritance-aware validation, and teach `openclaw doctor --fix` to restore missing `allowFrom` entries from pairing-store files when present, preventing silent DM drops after upgrades. (#27936) Thanks @widingmarcus-cyber.
4142
- Browser/Chrome extension handshake: bind relay WS message handling before `onopen` and add non-blocking `connect.challenge` response handling for gateway-style handshake frames, avoiding stuck `` badge states when challenge frames arrive immediately on connect. Landed from contributor PR #22571 by @pandego. (#22553)
4243
- Browser/Extension relay init: dedupe concurrent same-port relay startup with shared in-flight initialization promises so callers await one startup lifecycle and receive consistent success/failure results. Landed from contributor PR #21277 by @HOYALIM. (Related #20688)

src/auto-reply/templating.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ export type MsgContext = {
8989
MediaTypes?: string[];
9090
/** Telegram sticker metadata (emoji, set name, file IDs, cached description). */
9191
Sticker?: StickerMetadata;
92+
/** True when current-turn sticker media is present in MediaPaths (false for cached-description path). */
93+
StickerMediaIncluded?: boolean;
9294
OutputDir?: string;
9395
OutputBase?: string;
9496
/** Remote host for SCP when media lives on a different machine (e.g., [email protected]). */

src/telegram/bot-handlers.ts

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ function hasInboundMedia(msg: Message): boolean {
8181
);
8282
}
8383

84+
function hasReplyTargetMedia(msg: Message): boolean {
85+
const externalReply = (msg as Message & { external_reply?: Message }).external_reply;
86+
const replyTarget = msg.reply_to_message ?? externalReply;
87+
return Boolean(replyTarget && hasInboundMedia(replyTarget));
88+
}
89+
90+
function resolveInboundMediaFileId(msg: Message): string | undefined {
91+
return (
92+
msg.sticker?.file_id ??
93+
msg.photo?.[msg.photo.length - 1]?.file_id ??
94+
msg.video?.file_id ??
95+
msg.video_note?.file_id ??
96+
msg.document?.file_id ??
97+
msg.audio?.file_id ??
98+
msg.voice?.file_id
99+
);
100+
}
101+
84102
export const registerTelegramHandlers = ({
85103
cfg,
86104
accountId,
@@ -198,7 +216,8 @@ export const registerTelegramHandlers = ({
198216
return;
199217
}
200218
if (entries.length === 1) {
201-
await processMessage(last.ctx, last.allMedia, last.storeAllowFrom);
219+
const replyMedia = await resolveReplyMediaForMessage(last.ctx, last.msg);
220+
await processMessage(last.ctx, last.allMedia, last.storeAllowFrom, undefined, replyMedia);
202221
return;
203222
}
204223
const combinedText = entries
@@ -217,11 +236,14 @@ export const registerTelegramHandlers = ({
217236
date: last.msg.date ?? first.msg.date,
218237
});
219238
const messageIdOverride = last.msg.message_id ? String(last.msg.message_id) : undefined;
239+
const syntheticCtx = buildSyntheticContext(baseCtx, syntheticMessage);
240+
const replyMedia = await resolveReplyMediaForMessage(baseCtx, syntheticMessage);
220241
await processMessage(
221-
buildSyntheticContext(baseCtx, syntheticMessage),
242+
syntheticCtx,
222243
combinedMedia,
223244
first.storeAllowFrom,
224245
messageIdOverride ? { messageIdOverride } : undefined,
246+
replyMedia,
225247
);
226248
},
227249
onError: (err) => {
@@ -336,7 +358,8 @@ export const registerTelegramHandlers = ({
336358
}
337359

338360
const storeAllowFrom = await loadStoreAllowFrom();
339-
await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom);
361+
const replyMedia = await resolveReplyMediaForMessage(primaryEntry.ctx, primaryEntry.msg);
362+
await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom, undefined, replyMedia);
340363
} catch (err) {
341364
runtime.error?.(danger(`media group handler failed: ${String(err)}`));
342365
}
@@ -398,6 +421,45 @@ export const registerTelegramHandlers = ({
398421
const loadStoreAllowFrom = async () =>
399422
readChannelAllowFromStore("telegram", process.env, accountId).catch(() => []);
400423

424+
const resolveReplyMediaForMessage = async (
425+
ctx: TelegramContext,
426+
msg: Message,
427+
): Promise<TelegramMediaRef[]> => {
428+
const replyMessage = msg.reply_to_message;
429+
if (!replyMessage || !hasInboundMedia(replyMessage)) {
430+
return [];
431+
}
432+
const replyFileId = resolveInboundMediaFileId(replyMessage);
433+
if (!replyFileId) {
434+
return [];
435+
}
436+
try {
437+
const media = await resolveMedia(
438+
{
439+
message: replyMessage,
440+
me: ctx.me,
441+
getFile: async () => await bot.api.getFile(replyFileId),
442+
},
443+
mediaMaxBytes,
444+
opts.token,
445+
opts.proxyFetch,
446+
);
447+
if (!media) {
448+
return [];
449+
}
450+
return [
451+
{
452+
path: media.path,
453+
contentType: media.contentType,
454+
stickerMetadata: media.stickerMetadata,
455+
},
456+
];
457+
} catch (err) {
458+
logger.warn({ chatId: msg.chat.id, error: String(err) }, "reply media fetch failed");
459+
return [];
460+
}
461+
};
462+
401463
const isAllowlistAuthorized = (
402464
allow: NormalizedAllowFrom,
403465
senderId: string,
@@ -1301,7 +1363,7 @@ export const registerTelegramHandlers = ({
13011363
return;
13021364
}
13031365

1304-
if (!event.isGroup && hasInboundMedia(event.msg)) {
1366+
if (!event.isGroup && (hasInboundMedia(event.msg) || hasReplyTargetMedia(event.msg))) {
13051367
const dmAuthorized = await enforceTelegramDmAccess({
13061368
isGroup: event.isGroup,
13071369
dmPolicy,

src/telegram/bot-message-context.ts

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ type ResolveGroupRequireMention = (chatId: string | number) => boolean;
101101
export type BuildTelegramMessageContextParams = {
102102
primaryCtx: TelegramContext;
103103
allMedia: TelegramMediaRef[];
104+
replyMedia?: TelegramMediaRef[];
104105
storeAllowFrom: string[];
105106
options?: TelegramMessageContextOptions;
106107
bot: Bot;
@@ -143,6 +144,7 @@ async function resolveStickerVisionSupport(params: {
143144
export const buildTelegramMessageContext = async ({
144145
primaryCtx,
145146
allMedia,
147+
replyMedia = [],
146148
storeAllowFrom,
147149
options,
148150
bot,
@@ -640,6 +642,8 @@ export const buildTelegramMessageContext = async ({
640642
timestamp: entry.timestamp,
641643
}))
642644
: undefined;
645+
const currentMediaForContext = stickerCacheHit ? [] : allMedia;
646+
const contextMedia = [...currentMediaForContext, ...replyMedia];
643647
const ctxPayload = finalizeInboundContext({
644648
Body: combinedBody,
645649
// Agent prompt should be the raw user text only; metadata/context is provided via system prompt.
@@ -685,26 +689,18 @@ export const buildTelegramMessageContext = async ({
685689
ForwardedDate: forwardOrigin?.date ? forwardOrigin.date * 1000 : undefined,
686690
Timestamp: msg.date ? msg.date * 1000 : undefined,
687691
WasMentioned: isGroup ? effectiveWasMentioned : undefined,
688-
// Filter out cached stickers from media - their description is already in the message body
689-
MediaPath: stickerCacheHit ? undefined : allMedia[0]?.path,
690-
MediaType: stickerCacheHit ? undefined : allMedia[0]?.contentType,
691-
MediaUrl: stickerCacheHit ? undefined : allMedia[0]?.path,
692-
MediaPaths: stickerCacheHit
693-
? undefined
694-
: allMedia.length > 0
695-
? allMedia.map((m) => m.path)
696-
: undefined,
697-
MediaUrls: stickerCacheHit
698-
? undefined
699-
: allMedia.length > 0
700-
? allMedia.map((m) => m.path)
701-
: undefined,
702-
MediaTypes: stickerCacheHit
703-
? undefined
704-
: allMedia.length > 0
705-
? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
692+
// Filter out cached stickers from current-message media; reply media is still valid context.
693+
MediaPath: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
694+
MediaType: contextMedia.length > 0 ? contextMedia[0]?.contentType : undefined,
695+
MediaUrl: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
696+
MediaPaths: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
697+
MediaUrls: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
698+
MediaTypes:
699+
contextMedia.length > 0
700+
? (contextMedia.map((m) => m.contentType).filter(Boolean) as string[])
706701
: undefined,
707702
Sticker: allMedia[0]?.stickerMetadata,
703+
StickerMediaIncluded: allMedia[0]?.stickerMetadata ? !stickerCacheHit : undefined,
708704
...(locationData ? toLocationContext(locationData) : undefined),
709705
CommandAuthorized: commandAuthorized,
710706
// For groups: use resolved forum topic id; for DMs: use raw messageThreadId
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { describe, expect, it } from "vitest";
2+
import { pruneStickerMediaFromContext } from "./bot-message-dispatch.js";
3+
4+
describe("pruneStickerMediaFromContext", () => {
5+
it("preserves appended reply media while removing primary sticker media", () => {
6+
const ctx = {
7+
MediaPath: "/tmp/sticker.webp",
8+
MediaUrl: "/tmp/sticker.webp",
9+
MediaType: "image/webp",
10+
MediaPaths: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
11+
MediaUrls: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
12+
MediaTypes: ["image/webp", "image/jpeg"],
13+
};
14+
15+
pruneStickerMediaFromContext(ctx);
16+
17+
expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
18+
expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
19+
expect(ctx.MediaType).toBe("image/jpeg");
20+
expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
21+
expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
22+
expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
23+
});
24+
25+
it("clears media fields when sticker is the only media", () => {
26+
const ctx = {
27+
MediaPath: "/tmp/sticker.webp",
28+
MediaUrl: "/tmp/sticker.webp",
29+
MediaType: "image/webp",
30+
MediaPaths: ["/tmp/sticker.webp"],
31+
MediaUrls: ["/tmp/sticker.webp"],
32+
MediaTypes: ["image/webp"],
33+
};
34+
35+
pruneStickerMediaFromContext(ctx);
36+
37+
expect(ctx.MediaPath).toBeUndefined();
38+
expect(ctx.MediaUrl).toBeUndefined();
39+
expect(ctx.MediaType).toBeUndefined();
40+
expect(ctx.MediaPaths).toBeUndefined();
41+
expect(ctx.MediaUrls).toBeUndefined();
42+
expect(ctx.MediaTypes).toBeUndefined();
43+
});
44+
45+
it("does not prune when sticker media is already omitted from context", () => {
46+
const ctx = {
47+
MediaPath: "/tmp/replied.jpg",
48+
MediaUrl: "/tmp/replied.jpg",
49+
MediaType: "image/jpeg",
50+
MediaPaths: ["/tmp/replied.jpg"],
51+
MediaUrls: ["/tmp/replied.jpg"],
52+
MediaTypes: ["image/jpeg"],
53+
};
54+
55+
pruneStickerMediaFromContext(ctx, { stickerMediaIncluded: false });
56+
57+
expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
58+
expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
59+
expect(ctx.MediaType).toBe("image/jpeg");
60+
expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
61+
expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
62+
expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
63+
});
64+
});

src/telegram/bot-message-dispatch.ts

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,37 @@ async function resolveStickerVisionSupport(cfg: OpenClawConfig, agentId: string)
6060
}
6161
}
6262

63+
export function pruneStickerMediaFromContext(
64+
ctxPayload: {
65+
MediaPath?: string;
66+
MediaUrl?: string;
67+
MediaType?: string;
68+
MediaPaths?: string[];
69+
MediaUrls?: string[];
70+
MediaTypes?: string[];
71+
},
72+
opts?: { stickerMediaIncluded?: boolean },
73+
) {
74+
if (opts?.stickerMediaIncluded === false) {
75+
return;
76+
}
77+
const nextMediaPaths = Array.isArray(ctxPayload.MediaPaths)
78+
? ctxPayload.MediaPaths.slice(1)
79+
: undefined;
80+
const nextMediaUrls = Array.isArray(ctxPayload.MediaUrls)
81+
? ctxPayload.MediaUrls.slice(1)
82+
: undefined;
83+
const nextMediaTypes = Array.isArray(ctxPayload.MediaTypes)
84+
? ctxPayload.MediaTypes.slice(1)
85+
: undefined;
86+
ctxPayload.MediaPaths = nextMediaPaths && nextMediaPaths.length > 0 ? nextMediaPaths : undefined;
87+
ctxPayload.MediaUrls = nextMediaUrls && nextMediaUrls.length > 0 ? nextMediaUrls : undefined;
88+
ctxPayload.MediaTypes = nextMediaTypes && nextMediaTypes.length > 0 ? nextMediaTypes : undefined;
89+
ctxPayload.MediaPath = ctxPayload.MediaPaths?.[0];
90+
ctxPayload.MediaUrl = ctxPayload.MediaUrls?.[0] ?? ctxPayload.MediaPath;
91+
ctxPayload.MediaType = ctxPayload.MediaTypes?.[0];
92+
}
93+
6394
type DispatchTelegramMessageParams = {
6495
context: TelegramMessageContext;
6596
bot: Bot;
@@ -311,13 +342,10 @@ export const dispatchTelegramMessage = async ({
311342
// Update context to use description instead of image
312343
ctxPayload.Body = formattedDesc;
313344
ctxPayload.BodyForAgent = formattedDesc;
314-
// Clear media paths so native vision doesn't process the image again
315-
ctxPayload.MediaPath = undefined;
316-
ctxPayload.MediaType = undefined;
317-
ctxPayload.MediaUrl = undefined;
318-
ctxPayload.MediaPaths = undefined;
319-
ctxPayload.MediaUrls = undefined;
320-
ctxPayload.MediaTypes = undefined;
345+
// Drop only the sticker attachment; keep replied media context if present.
346+
pruneStickerMediaFromContext(ctxPayload, {
347+
stickerMediaIncluded: ctxPayload.StickerMediaIncluded,
348+
});
321349
}
322350

323351
// Cache the description for future encounters

src/telegram/bot-message.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ export const createTelegramMessageProcessor = (deps: TelegramMessageProcessorDep
5252
allMedia: TelegramMediaRef[],
5353
storeAllowFrom: string[],
5454
options?: { messageIdOverride?: string; forceWasMentioned?: boolean },
55+
replyMedia?: TelegramMediaRef[],
5556
) => {
5657
const context = await buildTelegramMessageContext({
5758
primaryCtx,
5859
allMedia,
60+
replyMedia,
5961
storeAllowFrom,
6062
options,
6163
bot,

src/telegram/bot-native-commands.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import { resolveThreadSessionKeys } from "../routing/session-key.js";
4242
import type { RuntimeEnv } from "../runtime.js";
4343
import { withTelegramApiErrorLogging } from "./api-logging.js";
4444
import { isSenderAllowed, normalizeDmAllowFromWithStore } from "./bot-access.js";
45+
import type { TelegramMediaRef } from "./bot-message-context.js";
4546
import {
4647
buildCappedTelegramMenuCommands,
4748
buildPluginTelegramMenuCommands,
@@ -101,12 +102,13 @@ export type RegisterTelegramHandlerParams = {
101102
shouldSkipUpdate: (ctx: TelegramUpdateKeyContext) => boolean;
102103
processMessage: (
103104
ctx: TelegramContext,
104-
allMedia: Array<{ path: string; contentType?: string }>,
105+
allMedia: TelegramMediaRef[],
105106
storeAllowFrom: string[],
106107
options?: {
107108
messageIdOverride?: string;
108109
forceWasMentioned?: boolean;
109110
},
111+
replyMedia?: TelegramMediaRef[],
110112
) => Promise<void>;
111113
logger: ReturnType<typeof getChildLogger>;
112114
};

src/telegram/bot.create-telegram-bot.test-harness.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ export const getMeSpy: AnyAsyncMock = vi.fn(async () => ({
120120
export const sendMessageSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 77 }));
121121
export const sendAnimationSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 78 }));
122122
export const sendPhotoSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 79 }));
123+
export const getFileSpy: AnyAsyncMock = vi.fn(async () => ({ file_path: "media/file.jpg" }));
123124

124125
type ApiStub = {
125126
config: { use: (arg: unknown) => void };
@@ -132,6 +133,7 @@ type ApiStub = {
132133
sendMessage: typeof sendMessageSpy;
133134
sendAnimation: typeof sendAnimationSpy;
134135
sendPhoto: typeof sendPhotoSpy;
136+
getFile: typeof getFileSpy;
135137
};
136138

137139
const apiStub: ApiStub = {
@@ -145,6 +147,7 @@ const apiStub: ApiStub = {
145147
sendMessage: sendMessageSpy,
146148
sendAnimation: sendAnimationSpy,
147149
sendPhoto: sendPhotoSpy,
150+
getFile: getFileSpy,
148151
};
149152

150153
vi.mock("grammy", () => ({
@@ -290,6 +293,8 @@ beforeEach(() => {
290293
sendPhotoSpy.mockResolvedValue({ message_id: 79 });
291294
sendMessageSpy.mockReset();
292295
sendMessageSpy.mockResolvedValue({ message_id: 77 });
296+
getFileSpy.mockReset();
297+
getFileSpy.mockResolvedValue({ file_path: "media/file.jpg" });
293298

294299
setMessageReactionSpy.mockReset();
295300
setMessageReactionSpy.mockResolvedValue(undefined);

0 commit comments

Comments
 (0)