Skip to content

Commit 8ad0ca3

Browse files
authored
Subagents: stop retrying external completion timeouts (openclaw#41235) (openclaw#43847)
* Changelog: add subagent announce timeout note * Tests: cover subagent completion timeout no-retry * Subagents: stop retrying external completion timeouts * Config: update subagent announce timeout default docs * Tests: use fake timers for subagent timeout retry guard
1 parent 7844bc8 commit 8ad0ca3

File tree

4 files changed

+59
-9
lines changed

4 files changed

+59
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
3434
- Mattermost/block streaming: fix duplicate message delivery (one threaded, one top-level) when block streaming is active by excluding `replyToId` from the block reply dedup key and adding an explicit `threading` dock to the Mattermost plugin. (#41362) Thanks @mathiasnagler and @vincentkoc.
3535
- BlueBubbles/self-chat echo dedupe: drop reflected duplicate webhook copies only when a matching `fromMe` event was just seen for the same chat, body, and timestamp, preventing self-chat loops without broad webhook suppression. Related to #32166. (#38442) Thanks @vincentkoc.
3636
- Models/Kimi Coding: send `anthropic-messages` tools in native Anthropic format again so `kimi-coding` stops degrading tool calls into XML/plain-text pseudo invocations instead of real `tool_use` blocks. (#38669, #39907, #40552) Thanks @opriz.
37+
- Subagents/completion announce retries: raise the default announce timeout to 90 seconds and stop retrying gateway-timeout failures for externally delivered completion announces, preventing duplicate user-facing completion messages after slow gateway responses. Fixes #41235. Thanks @vasujain00 and @vincentkoc.
3738
- Sandbox/write: preserve pinned mutation-helper payload stdin so sandboxed `write` no longer reports success while creating empty files. (#43876) Thanks @glitch418x.
3839
- Gateway/main-session routing: keep TUI and other `mode:UI` main-session sends on the internal surface when `deliver` is enabled, so replies no longer inherit the session's persisted Telegram/WhatsApp route. (#43918) Thanks @obviyus.
3940
- Doctor/gateway service audit: canonicalize service entrypoint paths before comparing them so symlink-vs-realpath installs no longer trigger false "entrypoint does not match the current install" repair prompts. (#43882) Thanks @ngutman.

src/agents/subagent-announce.timeout.test.ts

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ type GatewayCall = {
88
};
99

1010
const gatewayCalls: GatewayCall[] = [];
11+
let callGatewayImpl: (request: GatewayCall) => Promise<unknown> = async (request) => {
12+
if (request.method === "chat.history") {
13+
return { messages: [] };
14+
}
15+
return {};
16+
};
1117
let sessionStore: Record<string, Record<string, unknown>> = {};
1218
let configOverride: ReturnType<(typeof import("../config/config.js"))["loadConfig"]> = {
1319
session: {
@@ -27,10 +33,7 @@ let fallbackRequesterResolution: {
2733
vi.mock("../gateway/call.js", () => ({
2834
callGateway: vi.fn(async (request: GatewayCall) => {
2935
gatewayCalls.push(request);
30-
if (request.method === "chat.history") {
31-
return { messages: [] };
32-
}
33-
return {};
36+
return await callGatewayImpl(request);
3437
}),
3538
}));
3639

@@ -120,6 +123,12 @@ function findGatewayCall(predicate: (call: GatewayCall) => boolean): GatewayCall
120123
describe("subagent announce timeout config", () => {
121124
beforeEach(() => {
122125
gatewayCalls.length = 0;
126+
callGatewayImpl = async (request) => {
127+
if (request.method === "chat.history") {
128+
return { messages: [] };
129+
}
130+
return {};
131+
};
123132
sessionStore = {};
124133
configOverride = {
125134
session: defaultSessionConfig,
@@ -131,13 +140,13 @@ describe("subagent announce timeout config", () => {
131140
fallbackRequesterResolution = null;
132141
});
133142

134-
it("uses 60s timeout by default for direct announce agent call", async () => {
143+
it("uses 90s timeout by default for direct announce agent call", async () => {
135144
await runAnnounceFlowForTest("run-default-timeout");
136145

137146
const directAgentCall = findGatewayCall(
138147
(call) => call.method === "agent" && call.expectFinal === true,
139148
);
140-
expect(directAgentCall?.timeoutMs).toBe(60_000);
149+
expect(directAgentCall?.timeoutMs).toBe(90_000);
141150
});
142151

143152
it("honors configured announce timeout for direct announce agent call", async () => {
@@ -166,6 +175,35 @@ describe("subagent announce timeout config", () => {
166175
expect(completionDirectAgentCall?.timeoutMs).toBe(90_000);
167176
});
168177

178+
it("does not retry gateway timeout for externally delivered completion announces", async () => {
179+
vi.useFakeTimers();
180+
try {
181+
callGatewayImpl = async (request) => {
182+
if (request.method === "chat.history") {
183+
return { messages: [] };
184+
}
185+
throw new Error("gateway timeout after 90000ms");
186+
};
187+
188+
await expect(
189+
runAnnounceFlowForTest("run-completion-timeout-no-retry", {
190+
requesterOrigin: {
191+
channel: "telegram",
192+
to: "12345",
193+
},
194+
expectsCompletionMessage: true,
195+
}),
196+
).resolves.toBe(false);
197+
198+
const directAgentCalls = gatewayCalls.filter(
199+
(call) => call.method === "agent" && call.expectFinal === true,
200+
);
201+
expect(directAgentCalls).toHaveLength(1);
202+
} finally {
203+
vi.useRealTimers();
204+
}
205+
});
206+
169207
it("regression, skips parent announce while descendants are still pending", async () => {
170208
requesterDepthResolver = () => 1;
171209
pendingDescendantRuns = 2;

src/agents/subagent-announce.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ import { isAnnounceSkip } from "./tools/sessions-send-helpers.js";
5151

5252
const FAST_TEST_MODE = process.env.OPENCLAW_TEST_FAST === "1";
5353
const FAST_TEST_RETRY_INTERVAL_MS = 8;
54-
const DEFAULT_SUBAGENT_ANNOUNCE_TIMEOUT_MS = 60_000;
54+
const DEFAULT_SUBAGENT_ANNOUNCE_TIMEOUT_MS = 90_000;
5555
const MAX_TIMER_SAFE_TIMEOUT_MS = 2_147_000_000;
56+
const GATEWAY_TIMEOUT_PATTERN = /gateway timeout/i;
5657
let subagentRegistryRuntimePromise: Promise<
5758
typeof import("./subagent-registry-runtime.js")
5859
> | null = null;
@@ -107,7 +108,7 @@ const TRANSIENT_ANNOUNCE_DELIVERY_ERROR_PATTERNS: readonly RegExp[] = [
107108
/no active .* listener/i,
108109
/gateway not connected/i,
109110
/gateway closed \(1006/i,
110-
/gateway timeout/i,
111+
GATEWAY_TIMEOUT_PATTERN,
111112
/\b(econnreset|econnrefused|etimedout|enotfound|ehostunreach|network error)\b/i,
112113
];
113114

@@ -133,6 +134,11 @@ function isTransientAnnounceDeliveryError(error: unknown): boolean {
133134
return TRANSIENT_ANNOUNCE_DELIVERY_ERROR_PATTERNS.some((re) => re.test(message));
134135
}
135136

137+
function isGatewayTimeoutError(error: unknown): boolean {
138+
const message = summarizeDeliveryError(error);
139+
return Boolean(message) && GATEWAY_TIMEOUT_PATTERN.test(message);
140+
}
141+
136142
async function waitForAnnounceRetryDelay(ms: number, signal?: AbortSignal): Promise<void> {
137143
if (ms <= 0) {
138144
return;
@@ -160,6 +166,7 @@ async function waitForAnnounceRetryDelay(ms: number, signal?: AbortSignal): Prom
160166

161167
async function runAnnounceDeliveryWithRetry<T>(params: {
162168
operation: string;
169+
noRetryOnGatewayTimeout?: boolean;
163170
signal?: AbortSignal;
164171
run: () => Promise<T>;
165172
}): Promise<T> {
@@ -171,6 +178,9 @@ async function runAnnounceDeliveryWithRetry<T>(params: {
171178
try {
172179
return await params.run();
173180
} catch (err) {
181+
if (params.noRetryOnGatewayTimeout && isGatewayTimeoutError(err)) {
182+
throw err;
183+
}
174184
const delayMs = DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS[retryIndex];
175185
if (delayMs == null || !isTransientAnnounceDeliveryError(err) || params.signal?.aborted) {
176186
throw err;
@@ -789,6 +799,7 @@ async function sendSubagentAnnounceDirectly(params: {
789799
operation: params.expectsCompletionMessage
790800
? "completion direct announce agent call"
791801
: "direct announce agent call",
802+
noRetryOnGatewayTimeout: params.expectsCompletionMessage && shouldDeliverExternally,
792803
signal: params.signal,
793804
run: async () =>
794805
await callGateway({

src/config/types.agent-defaults.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ export type AgentDefaultsConfig = {
279279
thinking?: string;
280280
/** Default run timeout in seconds for spawned sub-agents (0 = no timeout). */
281281
runTimeoutSeconds?: number;
282-
/** Gateway timeout in ms for sub-agent announce delivery calls (default: 60000). */
282+
/** Gateway timeout in ms for sub-agent announce delivery calls (default: 90000). */
283283
announceTimeoutMs?: number;
284284
};
285285
/** Optional sandbox settings for non-main sessions. */

0 commit comments

Comments
 (0)