Skip to content

Commit b975711

Browse files
frankekngumadeiras
andauthored
fix(daemon): stabilize LaunchAgent restart and proxy env passthrough (#27276)
Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: b08797a Co-authored-by: frankekn <[email protected]> Co-authored-by: gumadeiras <[email protected]> Reviewed-by: @gumadeiras
1 parent 96c7702 commit b975711

File tree

7 files changed

+334
-5
lines changed

7 files changed

+334
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
1010

1111
### Fixes
1212

13+
- Daemon/macOS launchd: forward proxy env vars into supervised service environments, switch LaunchAgent keepalive policy to crash-only with throttling, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn.
1314
- Android/Node invoke: remove native gateway WebSocket `Origin` header to avoid false origin rejections, unify invoke command registry/policy/error parsing paths, and keep command availability checks centralized to reduce dispatcher/advertisement drift. (#27257) Thanks @obviyus.
1415
- CI/Windows: shard the Windows `checks-windows` test lane into two matrix jobs and honor explicit shard index overrides in `scripts/test-parallel.mjs` to reduce CI critical-path wall time. (#27234) Thanks @joshavant.
1516

src/daemon/launchd-plist.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import fs from "node:fs/promises";
22

3+
const LAUNCHD_THROTTLE_INTERVAL_SECONDS = 5;
4+
35
const plistEscape = (value: string): string =>
46
value
57
.replaceAll("&", "&amp;")
@@ -106,5 +108,5 @@ export function buildLaunchAgentPlist({
106108
? `\n <key>Comment</key>\n <string>${plistEscape(comment.trim())}</string>`
107109
: "";
108110
const envXml = renderEnvDict(environment);
109-
return `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n<plist version="1.0">\n <dict>\n <key>Label</key>\n <string>${plistEscape(label)}</string>\n ${commentXml}\n <key>RunAtLoad</key>\n <true/>\n <key>KeepAlive</key>\n <true/>\n <key>ProgramArguments</key>\n <array>${argsXml}\n </array>\n ${workingDirXml}\n <key>StandardOutPath</key>\n <string>${plistEscape(stdoutPath)}</string>\n <key>StandardErrorPath</key>\n <string>${plistEscape(stderrPath)}</string>${envXml}\n </dict>\n</plist>\n`;
111+
return `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n<plist version="1.0">\n <dict>\n <key>Label</key>\n <string>${plistEscape(label)}</string>\n ${commentXml}\n <key>RunAtLoad</key>\n <true/>\n <key>KeepAlive</key>\n <dict>\n <key>SuccessfulExit</key>\n <false/>\n </dict>\n <key>ThrottleInterval</key>\n <integer>${LAUNCHD_THROTTLE_INTERVAL_SECONDS}</integer>\n <key>ProgramArguments</key>\n <array>${argsXml}\n </array>\n ${workingDirXml}\n <key>StandardOutPath</key>\n <string>${plistEscape(stdoutPath)}</string>\n <key>StandardErrorPath</key>\n <string>${plistEscape(stderrPath)}</string>${envXml}\n </dict>\n</plist>\n`;
110112
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { spawnSync } from "node:child_process";
2+
import { randomUUID } from "node:crypto";
3+
import fs from "node:fs/promises";
4+
import os from "node:os";
5+
import path from "node:path";
6+
import { PassThrough } from "node:stream";
7+
import { afterAll, beforeAll, describe, expect, it } from "vitest";
8+
import {
9+
installLaunchAgent,
10+
readLaunchAgentRuntime,
11+
restartLaunchAgent,
12+
resolveLaunchAgentPlistPath,
13+
uninstallLaunchAgent,
14+
} from "./launchd.js";
15+
import type { GatewayServiceEnv } from "./service-types.js";
16+
17+
const WAIT_INTERVAL_MS = 200;
18+
const WAIT_TIMEOUT_MS = 15_000;
19+
20+
function canRunLaunchdIntegration(): boolean {
21+
if (process.platform !== "darwin") {
22+
return false;
23+
}
24+
if (typeof process.getuid !== "function") {
25+
return false;
26+
}
27+
const domain = `gui/${process.getuid()}`;
28+
const probe = spawnSync("launchctl", ["print", domain], { encoding: "utf8" });
29+
if (probe.error) {
30+
return false;
31+
}
32+
return probe.status === 0;
33+
}
34+
35+
const describeLaunchdIntegration = canRunLaunchdIntegration() ? describe : describe.skip;
36+
37+
async function waitForRunningRuntime(params: {
38+
env: GatewayServiceEnv;
39+
pidNot?: number;
40+
timeoutMs?: number;
41+
}): Promise<{ pid: number }> {
42+
const timeoutMs = params.timeoutMs ?? WAIT_TIMEOUT_MS;
43+
const deadline = Date.now() + timeoutMs;
44+
let lastStatus = "unknown";
45+
let lastPid: number | undefined;
46+
while (Date.now() < deadline) {
47+
const runtime = await readLaunchAgentRuntime(params.env);
48+
lastStatus = runtime.status;
49+
lastPid = runtime.pid;
50+
if (
51+
runtime.status === "running" &&
52+
typeof runtime.pid === "number" &&
53+
runtime.pid > 1 &&
54+
(params.pidNot === undefined || runtime.pid !== params.pidNot)
55+
) {
56+
return { pid: runtime.pid };
57+
}
58+
await new Promise((resolve) => {
59+
setTimeout(resolve, WAIT_INTERVAL_MS);
60+
});
61+
}
62+
throw new Error(
63+
`Timed out waiting for launchd runtime (status=${lastStatus}, pid=${lastPid ?? "none"})`,
64+
);
65+
}
66+
67+
describeLaunchdIntegration("launchd integration", () => {
68+
let env: GatewayServiceEnv | undefined;
69+
let homeDir = "";
70+
const stdout = new PassThrough();
71+
72+
beforeAll(async () => {
73+
const testId = randomUUID().slice(0, 8);
74+
homeDir = await fs.mkdtemp(path.join(os.tmpdir(), `openclaw-launchd-int-${testId}-`));
75+
env = {
76+
HOME: homeDir,
77+
OPENCLAW_LAUNCHD_LABEL: `ai.openclaw.launchd-int-${testId}`,
78+
OPENCLAW_LOG_PREFIX: `gateway-launchd-int-${testId}`,
79+
};
80+
await installLaunchAgent({
81+
env,
82+
stdout,
83+
programArguments: [process.execPath, "-e", "setInterval(() => {}, 1000);"],
84+
});
85+
await waitForRunningRuntime({ env });
86+
}, 30_000);
87+
88+
afterAll(async () => {
89+
if (env) {
90+
try {
91+
await uninstallLaunchAgent({ env, stdout });
92+
} catch {
93+
// Best-effort cleanup in case launchctl state already changed.
94+
}
95+
}
96+
if (homeDir) {
97+
await fs.rm(homeDir, { recursive: true, force: true });
98+
}
99+
}, 30_000);
100+
101+
it("restarts launchd service and keeps it running with a new pid", async () => {
102+
if (!env) {
103+
throw new Error("launchd integration env was not initialized");
104+
}
105+
const before = await waitForRunningRuntime({ env });
106+
await restartLaunchAgent({ env, stdout });
107+
const after = await waitForRunningRuntime({ env, pidNot: before.pid });
108+
expect(after.pid).toBeGreaterThan(1);
109+
expect(after.pid).not.toBe(before.pid);
110+
await fs.access(resolveLaunchAgentPlistPath(env));
111+
}, 30_000);
112+
});

src/daemon/launchd.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ import {
55
isLaunchAgentListed,
66
parseLaunchctlPrint,
77
repairLaunchAgentBootstrap,
8+
restartLaunchAgent,
89
resolveLaunchAgentPlistPath,
910
} from "./launchd.js";
1011

1112
const state = vi.hoisted(() => ({
1213
launchctlCalls: [] as string[][],
1314
listOutput: "",
15+
printOutput: "",
1416
bootstrapError: "",
1517
dirs: new Set<string>(),
1618
files: new Map<string, string>(),
@@ -35,6 +37,9 @@ vi.mock("./exec-file.js", () => ({
3537
if (call[0] === "list") {
3638
return { stdout: state.listOutput, stderr: "", code: 0 };
3739
}
40+
if (call[0] === "print") {
41+
return { stdout: state.printOutput, stderr: "", code: 0 };
42+
}
3843
if (call[0] === "bootstrap" && state.bootstrapError) {
3944
return { stdout: "", stderr: state.bootstrapError, code: 1 };
4045
}
@@ -71,6 +76,7 @@ vi.mock("node:fs/promises", async (importOriginal) => {
7176
beforeEach(() => {
7277
state.launchctlCalls.length = 0;
7378
state.listOutput = "";
79+
state.printOutput = "";
7480
state.bootstrapError = "";
7581
state.dirs.clear();
7682
state.files.clear();
@@ -179,6 +185,86 @@ describe("launchd install", () => {
179185
expect(plist).toContain(`<string>${tmpDir}</string>`);
180186
});
181187

188+
it("writes crash-only KeepAlive policy with throttle interval", async () => {
189+
const env = createDefaultLaunchdEnv();
190+
await installLaunchAgent({
191+
env,
192+
stdout: new PassThrough(),
193+
programArguments: defaultProgramArguments,
194+
});
195+
196+
const plistPath = resolveLaunchAgentPlistPath(env);
197+
const plist = state.files.get(plistPath) ?? "";
198+
expect(plist).toContain("<key>KeepAlive</key>");
199+
expect(plist).toContain("<key>SuccessfulExit</key>");
200+
expect(plist).toContain("<false/>");
201+
expect(plist).toContain("<key>ThrottleInterval</key>");
202+
expect(plist).toContain("<integer>5</integer>");
203+
});
204+
205+
it("restarts LaunchAgent with bootout-bootstrap-kickstart order", async () => {
206+
const env = createDefaultLaunchdEnv();
207+
await restartLaunchAgent({
208+
env,
209+
stdout: new PassThrough(),
210+
});
211+
212+
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
213+
const label = "ai.openclaw.gateway";
214+
const plistPath = resolveLaunchAgentPlistPath(env);
215+
const bootoutIndex = state.launchctlCalls.findIndex(
216+
(c) => c[0] === "bootout" && c[1] === `${domain}/${label}`,
217+
);
218+
const bootstrapIndex = state.launchctlCalls.findIndex(
219+
(c) => c[0] === "bootstrap" && c[1] === domain && c[2] === plistPath,
220+
);
221+
const kickstartIndex = state.launchctlCalls.findIndex(
222+
(c) => c[0] === "kickstart" && c[1] === "-k" && c[2] === `${domain}/${label}`,
223+
);
224+
225+
expect(bootoutIndex).toBeGreaterThanOrEqual(0);
226+
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
227+
expect(kickstartIndex).toBeGreaterThanOrEqual(0);
228+
expect(bootoutIndex).toBeLessThan(bootstrapIndex);
229+
expect(bootstrapIndex).toBeLessThan(kickstartIndex);
230+
});
231+
232+
it("waits for previous launchd pid to exit before bootstrapping", async () => {
233+
const env = createDefaultLaunchdEnv();
234+
state.printOutput = ["state = running", "pid = 4242"].join("\n");
235+
const killSpy = vi.spyOn(process, "kill");
236+
killSpy
237+
.mockImplementationOnce(() => true)
238+
.mockImplementationOnce(() => {
239+
const err = new Error("no such process") as NodeJS.ErrnoException;
240+
err.code = "ESRCH";
241+
throw err;
242+
});
243+
244+
vi.useFakeTimers();
245+
try {
246+
const restartPromise = restartLaunchAgent({
247+
env,
248+
stdout: new PassThrough(),
249+
});
250+
await vi.advanceTimersByTimeAsync(250);
251+
await restartPromise;
252+
expect(killSpy).toHaveBeenCalledWith(4242, 0);
253+
const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501";
254+
const label = "ai.openclaw.gateway";
255+
const bootoutIndex = state.launchctlCalls.findIndex(
256+
(c) => c[0] === "bootout" && c[1] === `${domain}/${label}`,
257+
);
258+
const bootstrapIndex = state.launchctlCalls.findIndex((c) => c[0] === "bootstrap");
259+
expect(bootoutIndex).toBeGreaterThanOrEqual(0);
260+
expect(bootstrapIndex).toBeGreaterThanOrEqual(0);
261+
expect(bootoutIndex).toBeLessThan(bootstrapIndex);
262+
} finally {
263+
vi.useRealTimers();
264+
killSpy.mockRestore();
265+
}
266+
});
267+
182268
it("shows actionable guidance when launchctl gui domain does not support bootstrap", async () => {
183269
state.bootstrapError = "Bootstrap failed: 125: Domain does not support specified action";
184270
const env = createDefaultLaunchdEnv();

src/daemon/launchd.ts

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,34 @@ function isUnsupportedGuiDomain(detail: string): boolean {
331331
);
332332
}
333333

334+
const RESTART_PID_WAIT_TIMEOUT_MS = 10_000;
335+
const RESTART_PID_WAIT_INTERVAL_MS = 200;
336+
337+
async function sleepMs(ms: number): Promise<void> {
338+
await new Promise((resolve) => {
339+
setTimeout(resolve, ms);
340+
});
341+
}
342+
343+
async function waitForPidExit(pid: number): Promise<void> {
344+
if (!Number.isFinite(pid) || pid <= 1) {
345+
return;
346+
}
347+
const deadline = Date.now() + RESTART_PID_WAIT_TIMEOUT_MS;
348+
while (Date.now() < deadline) {
349+
try {
350+
process.kill(pid, 0);
351+
} catch (err) {
352+
const code = (err as NodeJS.ErrnoException).code;
353+
if (code === "ESRCH" || code === "EPERM") {
354+
return;
355+
}
356+
return;
357+
}
358+
await sleepMs(RESTART_PID_WAIT_INTERVAL_MS);
359+
}
360+
}
361+
334362
export async function stopLaunchAgent({ stdout, env }: GatewayServiceControlArgs): Promise<void> {
335363
const domain = resolveGuiDomain();
336364
const label = resolveLaunchAgentLabel({ env });
@@ -418,11 +446,45 @@ export async function restartLaunchAgent({
418446
stdout,
419447
env,
420448
}: GatewayServiceControlArgs): Promise<void> {
449+
const serviceEnv = env ?? (process.env as GatewayServiceEnv);
421450
const domain = resolveGuiDomain();
422-
const label = resolveLaunchAgentLabel({ env });
423-
const res = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
424-
if (res.code !== 0) {
425-
throw new Error(`launchctl kickstart failed: ${res.stderr || res.stdout}`.trim());
451+
const label = resolveLaunchAgentLabel({ env: serviceEnv });
452+
const plistPath = resolveLaunchAgentPlistPath(serviceEnv);
453+
454+
const runtime = await execLaunchctl(["print", `${domain}/${label}`]);
455+
const previousPid =
456+
runtime.code === 0
457+
? parseLaunchctlPrint(runtime.stdout || runtime.stderr || "").pid
458+
: undefined;
459+
460+
const stop = await execLaunchctl(["bootout", `${domain}/${label}`]);
461+
if (stop.code !== 0 && !isLaunchctlNotLoaded(stop)) {
462+
throw new Error(`launchctl bootout failed: ${stop.stderr || stop.stdout}`.trim());
463+
}
464+
if (typeof previousPid === "number") {
465+
await waitForPidExit(previousPid);
466+
}
467+
468+
const boot = await execLaunchctl(["bootstrap", domain, plistPath]);
469+
if (boot.code !== 0) {
470+
const detail = (boot.stderr || boot.stdout).trim();
471+
if (isUnsupportedGuiDomain(detail)) {
472+
throw new Error(
473+
[
474+
`launchctl bootstrap failed: ${detail}`,
475+
`LaunchAgent restart requires a logged-in macOS GUI session for this user (${domain}).`,
476+
"This usually means you are running from SSH/headless context or as the wrong user (including sudo).",
477+
"Fix: sign in to the macOS desktop as the target user and rerun `openclaw gateway restart`.",
478+
"Headless deployments should use a dedicated logged-in user session or a custom LaunchDaemon (not shipped): https://docs.openclaw.ai/gateway",
479+
].join("\n"),
480+
);
481+
}
482+
throw new Error(`launchctl bootstrap failed: ${detail}`);
483+
}
484+
485+
const start = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
486+
if (start.code !== 0) {
487+
throw new Error(`launchctl kickstart failed: ${start.stderr || start.stdout}`.trim());
426488
}
427489
try {
428490
stdout.write(`${formatLine("Restarted LaunchAgent", `${domain}/${label}`)}\n`);

src/daemon/service-env.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,26 @@ describe("buildServiceEnvironment", () => {
309309
expect(env.OPENCLAW_LAUNCHD_LABEL).toBe("ai.openclaw.work");
310310
}
311311
});
312+
313+
it("forwards proxy environment variables for launchd/systemd runtime", () => {
314+
const env = buildServiceEnvironment({
315+
env: {
316+
HOME: "/home/user",
317+
HTTP_PROXY: " http://proxy.local:7890 ",
318+
HTTPS_PROXY: "https://proxy.local:7890",
319+
NO_PROXY: "localhost,127.0.0.1",
320+
http_proxy: "http://proxy.local:7890",
321+
all_proxy: "socks5://proxy.local:1080",
322+
},
323+
port: 18789,
324+
});
325+
326+
expect(env.HTTP_PROXY).toBe("http://proxy.local:7890");
327+
expect(env.HTTPS_PROXY).toBe("https://proxy.local:7890");
328+
expect(env.NO_PROXY).toBe("localhost,127.0.0.1");
329+
expect(env.http_proxy).toBe("http://proxy.local:7890");
330+
expect(env.all_proxy).toBe("socks5://proxy.local:1080");
331+
});
312332
});
313333

314334
describe("buildNodeServiceEnvironment", () => {
@@ -319,6 +339,19 @@ describe("buildNodeServiceEnvironment", () => {
319339
expect(env.HOME).toBe("/home/user");
320340
});
321341

342+
it("forwards proxy environment variables for node services", () => {
343+
const env = buildNodeServiceEnvironment({
344+
env: {
345+
HOME: "/home/user",
346+
HTTPS_PROXY: " https://proxy.local:7890 ",
347+
no_proxy: "localhost,127.0.0.1",
348+
},
349+
});
350+
351+
expect(env.HTTPS_PROXY).toBe("https://proxy.local:7890");
352+
expect(env.no_proxy).toBe("localhost,127.0.0.1");
353+
});
354+
322355
it("forwards TMPDIR for node services", () => {
323356
const env = buildNodeServiceEnvironment({
324357
env: { HOME: "/home/user", TMPDIR: "/tmp/custom" },

0 commit comments

Comments
 (0)