Skip to content

Commit f9de2ac

Browse files
committed
fix(discord): retry outbound sends on transient network errors
Previously, Discord reply delivery only retried on HTTP 429/5xx status codes. Network-level failures (TypeError: fetch failed, ECONNRESET, etc.) were not retried, causing silent message loss on transient network issues. This adds network error detection to both the reply delivery retry predicate and the shared Discord retry runner, matching the pattern already used by the Telegram retry policy. Fixes #5152
1 parent 80e7da9 commit f9de2ac

File tree

4 files changed

+127
-3
lines changed

4 files changed

+127
-3
lines changed

src/discord/monitor/reply-delivery.test.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,77 @@ describe("deliverDiscordReply", () => {
354354
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(2);
355355
});
356356

357+
it("retries bot send on network TypeError then succeeds", async () => {
358+
sendMessageDiscordMock
359+
.mockRejectedValueOnce(new TypeError("fetch failed"))
360+
.mockResolvedValueOnce({ messageId: "msg-1", channelId: "channel-1" });
361+
362+
await deliverDiscordReply({
363+
replies: [{ text: "retry me" }],
364+
target: "channel:123",
365+
token: "token",
366+
runtime,
367+
cfg,
368+
textLimit: 2000,
369+
});
370+
371+
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(2);
372+
});
373+
374+
it("retries bot send on ECONNRESET then succeeds", async () => {
375+
const networkErr = Object.assign(new Error("socket hang up"), { code: "ECONNRESET" });
376+
sendMessageDiscordMock
377+
.mockRejectedValueOnce(networkErr)
378+
.mockResolvedValueOnce({ messageId: "msg-1", channelId: "channel-1" });
379+
380+
await deliverDiscordReply({
381+
replies: [{ text: "retry me" }],
382+
target: "channel:123",
383+
token: "token",
384+
runtime,
385+
cfg,
386+
textLimit: 2000,
387+
});
388+
389+
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(2);
390+
});
391+
392+
it("does not retry on 401 auth error", async () => {
393+
const authErr = Object.assign(new Error("unauthorized"), { status: 401 });
394+
sendMessageDiscordMock.mockRejectedValueOnce(authErr);
395+
396+
await expect(
397+
deliverDiscordReply({
398+
replies: [{ text: "fail" }],
399+
target: "channel:123",
400+
token: "token",
401+
runtime,
402+
cfg,
403+
textLimit: 2000,
404+
}),
405+
).rejects.toThrow("unauthorized");
406+
407+
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(1);
408+
});
409+
410+
it("does not retry on 400 validation error", async () => {
411+
const validationErr = Object.assign(new Error("bad request"), { status: 400 });
412+
sendMessageDiscordMock.mockRejectedValueOnce(validationErr);
413+
414+
await expect(
415+
deliverDiscordReply({
416+
replies: [{ text: "fail" }],
417+
target: "channel:123",
418+
token: "token",
419+
runtime,
420+
cfg,
421+
textLimit: 2000,
422+
}),
423+
).rejects.toThrow("bad request");
424+
425+
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(1);
426+
});
427+
357428
it("does not retry on 4xx client errors", async () => {
358429
const clientErr = Object.assign(new Error("bad request"), { status: 400 });
359430
sendMessageDiscordMock.mockRejectedValueOnce(clientErr);
@@ -372,6 +443,23 @@ describe("deliverDiscordReply", () => {
372443
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(1);
373444
});
374445

446+
it("exhausts retries on persistent network failure", async () => {
447+
sendMessageDiscordMock.mockRejectedValue(new TypeError("fetch failed"));
448+
449+
await expect(
450+
deliverDiscordReply({
451+
replies: [{ text: "persistent failure" }],
452+
target: "channel:123",
453+
token: "token",
454+
runtime,
455+
cfg,
456+
textLimit: 2000,
457+
}),
458+
).rejects.toThrow("fetch failed");
459+
460+
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(3);
461+
});
462+
375463
it("throws after exhausting retry attempts", async () => {
376464
const rateLimitErr = Object.assign(new Error("rate limited"), { status: 429 });
377465
sendMessageDiscordMock.mockRejectedValue(rateLimitErr);

src/discord/monitor/reply-delivery.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ import type { ChunkMode } from "../../auto-reply/chunk.js";
44
import type { ReplyPayload } from "../../auto-reply/types.js";
55
import type { OpenClawConfig } from "../../config/config.js";
66
import type { MarkdownTableMode, ReplyToMode } from "../../config/types.base.js";
7-
import { createDiscordRetryRunner, type RetryRunner } from "../../infra/retry-policy.js";
7+
import {
8+
createDiscordRetryRunner,
9+
isRetryableDiscordNetworkError,
10+
type RetryRunner,
11+
} from "../../infra/retry-policy.js";
812
import { resolveRetryConfig, retryAsync, type RetryConfig } from "../../infra/retry.js";
913
import { convertMarkdownTables } from "../../markdown/tables.js";
1014
import type { RuntimeEnv } from "../../runtime.js";
@@ -37,6 +41,9 @@ const DISCORD_DELIVERY_RETRY_DEFAULTS: ResolvedRetryConfig = {
3741
};
3842

3943
function isRetryableDiscordError(err: unknown): boolean {
44+
if (isRetryableDiscordNetworkError(err)) {
45+
return true;
46+
}
4047
const status = (err as { status?: number }).status ?? (err as { statusCode?: number }).statusCode;
4148
return status === 429 || (status !== undefined && status >= 500);
4249
}

src/infra/retry-policy.test.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
import { describe, expect, it, vi } from "vitest";
2-
import { createTelegramRetryRunner } from "./retry-policy.js";
2+
import { createDiscordRetryRunner, createTelegramRetryRunner } from "./retry-policy.js";
3+
4+
describe("createDiscordRetryRunner", () => {
5+
it("retries network errors", async () => {
6+
const fn = vi.fn().mockRejectedValueOnce(new TypeError("fetch failed")).mockResolvedValue("ok");
7+
const runner = createDiscordRetryRunner({
8+
retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 },
9+
});
10+
11+
await expect(runner(fn, "discord send")).resolves.toBe("ok");
12+
expect(fn).toHaveBeenCalledTimes(2);
13+
});
14+
});
315

416
describe("createTelegramRetryRunner", () => {
517
describe("strictShouldRetry", () => {

src/infra/retry-policy.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ export const TELEGRAM_RETRY_DEFAULTS = {
2020
};
2121

2222
const TELEGRAM_RETRY_RE = /429|timeout|connect|reset|closed|unavailable|temporarily/i;
23+
const DISCORD_NETWORK_ERROR_RE = /fetch failed|network/i;
24+
const DISCORD_RETRYABLE_ERROR_CODES = new Set([
25+
"ECONNRESET",
26+
"ECONNREFUSED",
27+
"ETIMEDOUT",
28+
"UND_ERR_CONNECT_TIMEOUT",
29+
]);
2330
const log = createSubsystemLogger("retry-policy");
2431

2532
function resolveTelegramShouldRetry(params: {
@@ -58,6 +65,16 @@ function getTelegramRetryAfterMs(err: unknown): number | undefined {
5865
return typeof candidate === "number" && Number.isFinite(candidate) ? candidate * 1000 : undefined;
5966
}
6067

68+
export function isRetryableDiscordNetworkError(err: unknown): boolean {
69+
if (!(err instanceof Error)) {
70+
return false;
71+
}
72+
if (typeof (err as { code?: unknown }).code === "string") {
73+
return DISCORD_RETRYABLE_ERROR_CODES.has((err as { code: string }).code);
74+
}
75+
return err instanceof TypeError && DISCORD_NETWORK_ERROR_RE.test(err.message);
76+
}
77+
6178
export function createDiscordRetryRunner(params: {
6279
retry?: RetryConfig;
6380
configRetry?: RetryConfig;
@@ -71,7 +88,7 @@ export function createDiscordRetryRunner(params: {
7188
retryAsync(fn, {
7289
...retryConfig,
7390
label,
74-
shouldRetry: (err) => err instanceof RateLimitError,
91+
shouldRetry: (err) => err instanceof RateLimitError || isRetryableDiscordNetworkError(err),
7592
retryAfterMs: (err) => (err instanceof RateLimitError ? err.retryAfter * 1000 : undefined),
7693
onRetry: params.verbose
7794
? (info) => {

0 commit comments

Comments
 (0)