Skip to content

Commit dd07c06

Browse files
committed
fix: tighten gateway restart loop handling (#23416) (thanks @jeffwnli)
1 parent 26acb77 commit dd07c06

File tree

6 files changed

+67
-14
lines changed

6 files changed

+67
-14
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
1818

1919
### Fixes
2020

21+
- Gateway/Restart: fix restart-loop edge cases by keeping `openclaw.mjs -> dist/entry.js` bootstrap detection explicit, reacquiring the gateway lock for in-process restart fallback paths, and tightening restart-loop regression coverage. (#23416) Thanks @jeffwnli.
2122
- Security/Audit: add `openclaw security audit` detection for open group policies that expose runtime/filesystem tools without sandbox/workspace guards (`security.exposure.open_groups_with_runtime_or_fs`).
2223
- Security/Exec env: block request-scoped `HOME` and `ZDOTDIR` overrides in host exec env sanitizers (Node + macOS), preventing shell startup-file execution before allowlist-evaluated command bodies. This ships in the next npm release. Thanks @tdjackey for reporting.
2324
- Security/Gateway: emit a startup security warning when insecure/dangerous config flags are enabled (including `gateway.controlUi.dangerouslyDisableDeviceAuth=true`) and point operators to `openclaw security audit`.

src/cli/gateway-cli/run-loop.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ const markGatewaySigusr1RestartHandled = vi.fn();
1111
const getActiveTaskCount = vi.fn(() => 0);
1212
const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true }));
1313
const resetAllLanes = vi.fn();
14-
const restartGatewayProcessWithFreshPid = vi.fn(() => ({ mode: "skipped" as const }));
14+
const restartGatewayProcessWithFreshPid = vi.fn<
15+
() => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string }
16+
>(() => ({ mode: "disabled" }));
1517
const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart";
1618
const gatewayLog = {
1719
info: vi.fn(),
@@ -30,8 +32,7 @@ vi.mock("../../infra/restart.js", () => ({
3032
}));
3133

3234
vi.mock("../../infra/process-respawn.js", () => ({
33-
restartGatewayProcessWithFreshPid: (...args: unknown[]) =>
34-
restartGatewayProcessWithFreshPid(...args),
35+
restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(),
3536
}));
3637

3738
vi.mock("../../process/command-queue.js", () => ({
@@ -140,6 +141,7 @@ describe("runGatewayLoop", () => {
140141
});
141142
expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2);
142143
expect(resetAllLanes).toHaveBeenCalledTimes(2);
144+
expect(acquireGatewayLock).toHaveBeenCalledTimes(3);
143145
} finally {
144146
removeNewSignalListeners("SIGTERM", beforeSigterm);
145147
removeNewSignalListeners("SIGINT", beforeSigint);
@@ -153,8 +155,6 @@ describe("runGatewayLoop", () => {
153155
const lockRelease = vi.fn(async () => {});
154156
acquireGatewayLock.mockResolvedValueOnce({
155157
release: lockRelease,
156-
lockPath: "/tmp/test.lock",
157-
configPath: "/test/openclaw.json",
158158
});
159159

160160
// Override process-respawn to return "spawned" mode

src/cli/gateway-cli/run-loop.ts

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export async function runGatewayLoop(params: {
2323
start: () => Promise<Awaited<ReturnType<typeof startGatewayServer>>>;
2424
runtime: typeof defaultRuntime;
2525
}) {
26-
const lock = await acquireGatewayLock();
26+
let lock = await acquireGatewayLock();
2727
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
2828
let shuttingDown = false;
2929
let restartResolver: (() => void) | null = null;
@@ -83,8 +83,12 @@ export async function runGatewayLoop(params: {
8383
clearTimeout(forceExitTimer);
8484
server = null;
8585
if (isRestart) {
86+
const hadLock = lock != null;
8687
// Release the lock BEFORE spawning so the child can acquire it immediately.
87-
await lock?.release();
88+
if (lock) {
89+
await lock.release();
90+
lock = null;
91+
}
8892
const respawn = restartGatewayProcessWithFreshPid();
8993
if (respawn.mode === "spawned" || respawn.mode === "supervised") {
9094
const modeLabel =
@@ -102,11 +106,29 @@ export async function runGatewayLoop(params: {
102106
} else {
103107
gatewayLog.info("restart mode: in-process restart (OPENCLAW_NO_RESPAWN)");
104108
}
105-
shuttingDown = false;
106-
restartResolver?.();
109+
let canContinueInProcessRestart = true;
110+
if (hadLock) {
111+
try {
112+
lock = await acquireGatewayLock();
113+
} catch (err) {
114+
gatewayLog.error(
115+
`failed to reacquire gateway lock for in-process restart: ${String(err)}`,
116+
);
117+
cleanupSignals();
118+
params.runtime.exit(1);
119+
canContinueInProcessRestart = false;
120+
}
121+
}
122+
if (canContinueInProcessRestart) {
123+
shuttingDown = false;
124+
restartResolver?.();
125+
}
107126
}
108127
} else {
109-
await lock?.release();
128+
if (lock) {
129+
await lock.release();
130+
lock = null;
131+
}
110132
cleanupSignals();
111133
params.runtime.exit(0);
112134
}
@@ -161,7 +183,10 @@ export async function runGatewayLoop(params: {
161183
});
162184
}
163185
} finally {
164-
await lock?.release();
186+
if (lock) {
187+
await lock.release();
188+
lock = null;
189+
}
165190
cleanupSignals();
166191
}
167192
}

src/infra/infra-parsing.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ describe("infra parsing", () => {
5656
).toBe(true);
5757
});
5858

59+
it("returns true for dist/entry.js when launched via openclaw.mjs wrapper", () => {
60+
expect(
61+
isMainModule({
62+
currentFile: "/repo/dist/entry.js",
63+
argv: ["node", "/repo/openclaw.mjs"],
64+
cwd: "/repo",
65+
env: {},
66+
}),
67+
).toBe(true);
68+
});
69+
5970
it("returns false when running under PM2 but this module is imported", () => {
6071
expect(
6172
isMainModule({

src/infra/is-main.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ export function isMainModule({
4141
return true;
4242
}
4343

44+
// The published/open-source wrapper binary is openclaw.mjs, which then imports
45+
// dist/entry.js. Treat that pair as the main module so entry bootstrap runs.
46+
if (normalizedCurrent && normalizedArgv1) {
47+
const currentBase = path.basename(normalizedCurrent);
48+
const argvBase = path.basename(normalizedArgv1);
49+
if (currentBase === "entry.js" && (argvBase === "openclaw.mjs" || argvBase === "openclaw.js")) {
50+
return true;
51+
}
52+
}
53+
4454
// Fallback: basename match (relative paths, symlinked bins).
4555
if (
4656
normalizedCurrent &&

src/shared/pid-alive.test.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,22 @@ describe("isPidAlive", () => {
3131
});
3232

3333
// Override platform to linux so the zombie check runs
34-
const originalPlatform = process.platform;
35-
Object.defineProperty(process, "platform", { value: "linux", writable: true });
34+
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
35+
if (!originalPlatformDescriptor) {
36+
throw new Error("missing process.platform descriptor");
37+
}
38+
Object.defineProperty(process, "platform", {
39+
...originalPlatformDescriptor,
40+
value: "linux",
41+
});
3642

3743
try {
3844
// Re-import the module so it picks up the mocked platform and fs
3945
vi.resetModules();
4046
const { isPidAlive: freshIsPidAlive } = await import("./pid-alive.js");
4147
expect(freshIsPidAlive(zombiePid)).toBe(false);
4248
} finally {
43-
Object.defineProperty(process, "platform", { value: originalPlatform, writable: true });
49+
Object.defineProperty(process, "platform", originalPlatformDescriptor);
4450
vi.restoreAllMocks();
4551
}
4652
});

0 commit comments

Comments
 (0)