Skip to content

Commit d56ffe7

Browse files
committed
fix: improve WS handshake reliability on slow-startup environments (#48736)
On Windows with large dist bundles (46MB/639 files), heavy synchronous module loading blocks the event loop during CLI startup, preventing timely processing of the connect.challenge frame and causing ~80% handshake timeout failures. Changes: - Yield event loop (setImmediate) before starting WS connection in callGateway to let pending I/O drain after heavy module loading - Add OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS env var override for client-side connect challenge timeout (server already has OPENCLAW_HANDSHAKE_TIMEOUT_MS) - Include diagnostic timing in challenge timeout error messages (elapsed vs limit) for easier debugging - Add tests for env var override and resolution logic
1 parent 02d66fb commit d56ffe7

File tree

4 files changed

+60
-4
lines changed

4 files changed

+60
-4
lines changed

src/gateway/call.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,11 @@ async function executeGatewayRequestWithScopes<T>(params: {
800800
}): Promise<T> {
801801
const { opts, scopes, url, token, password, tlsFingerprint, timeoutMs, safeTimerTimeoutMs } =
802802
params;
803+
// Yield to the event loop before starting the WebSocket connection.
804+
// On Windows with large dist bundles, heavy synchronous module loading
805+
// can starve the event loop, preventing timely processing of the
806+
// connect.challenge frame and causing handshake timeouts (#48736).
807+
await new Promise<void>((r) => setImmediate(r));
803808
return await new Promise<T>((resolve, reject) => {
804809
let settled = false;
805810
let ignoreClose = false;

src/gateway/client.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -727,12 +727,18 @@ export class GatewayClient {
727727

728728
private armConnectChallengeTimeout() {
729729
const connectChallengeTimeoutMs = resolveGatewayClientConnectChallengeTimeoutMs(this.opts);
730+
const armedAt = Date.now();
730731
this.clearConnectChallengeTimeout();
731732
this.connectTimer = setTimeout(() => {
732733
if (this.connectSent || this.ws?.readyState !== WebSocket.OPEN) {
733734
return;
734735
}
735-
this.opts.onConnectError?.(new Error("gateway connect challenge timeout"));
736+
const elapsedMs = Date.now() - armedAt;
737+
this.opts.onConnectError?.(
738+
new Error(
739+
`gateway connect challenge timeout (waited ${elapsedMs}ms, limit ${connectChallengeTimeoutMs}ms)`,
740+
),
741+
);
736742
this.ws?.close(1008, "connect challenge timeout");
737743
}, connectChallengeTimeoutMs);
738744
}

src/gateway/handshake-timeouts.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { describe, expect, test } from "vitest";
22
import {
33
clampConnectChallengeTimeoutMs,
44
DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS,
5+
getConnectChallengeTimeoutMsFromEnv,
56
getPreauthHandshakeTimeoutMsFromEnv,
67
MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
78
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
@@ -34,4 +35,30 @@ describe("gateway handshake timeouts", () => {
3435
}),
3536
).toBe(20);
3637
});
38+
39+
test("getConnectChallengeTimeoutMsFromEnv reads OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS", () => {
40+
expect(getConnectChallengeTimeoutMsFromEnv({})).toBeUndefined();
41+
expect(
42+
getConnectChallengeTimeoutMsFromEnv({ OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS: "15000" }),
43+
).toBe(15_000);
44+
expect(
45+
getConnectChallengeTimeoutMsFromEnv({ OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS: "garbage" }),
46+
).toBeUndefined();
47+
});
48+
49+
test("resolveConnectChallengeTimeoutMs falls back to env override", () => {
50+
const original = process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
51+
try {
52+
process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS = "5000";
53+
expect(resolveConnectChallengeTimeoutMs()).toBe(5_000);
54+
// Explicit value still takes precedence over env
55+
expect(resolveConnectChallengeTimeoutMs(3_000)).toBe(3_000);
56+
} finally {
57+
if (original === undefined) {
58+
delete process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
59+
} else {
60+
process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS = original;
61+
}
62+
}
63+
});
3764
});

src/gateway/handshake-timeouts.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,28 @@ export function clampConnectChallengeTimeoutMs(timeoutMs: number): number {
99
);
1010
}
1111

12+
export function getConnectChallengeTimeoutMsFromEnv(
13+
env: NodeJS.ProcessEnv = process.env,
14+
): number | undefined {
15+
const raw = env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
16+
if (raw) {
17+
const parsed = Number(raw);
18+
if (Number.isFinite(parsed) && parsed > 0) {
19+
return parsed;
20+
}
21+
}
22+
return undefined;
23+
}
24+
1225
export function resolveConnectChallengeTimeoutMs(timeoutMs?: number | null): number {
13-
return typeof timeoutMs === "number" && Number.isFinite(timeoutMs)
14-
? clampConnectChallengeTimeoutMs(timeoutMs)
15-
: DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
26+
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
27+
return clampConnectChallengeTimeoutMs(timeoutMs);
28+
}
29+
const envOverride = getConnectChallengeTimeoutMsFromEnv();
30+
if (envOverride !== undefined) {
31+
return clampConnectChallengeTimeoutMs(envOverride);
32+
}
33+
return DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
1634
}
1735

1836
export function getPreauthHandshakeTimeoutMsFromEnv(env: NodeJS.ProcessEnv = process.env): number {

0 commit comments

Comments
 (0)