Skip to content

Commit 39f7dbf

Browse files
committed
fix(cli): make gateway --force resilient to lsof EACCES
1 parent c03adfb commit 39f7dbf

File tree

3 files changed

+266
-16
lines changed

3 files changed

+266
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Docs: https://docs.openclaw.ai
6161
- Doctor/State integrity: ignore metadata-only slash routing sessions when checking recent missing transcripts so `openclaw doctor` no longer reports false-positive transcript-missing warnings for `*:slash:*` keys. (#27375) thanks @gumadeiras.
6262
- CLI/Gateway status: force local `gateway status` probe host to `127.0.0.1` for `bind=lan` so co-located probes do not trip non-loopback plaintext WebSocket checks. (#26997) thanks @chikko80.
6363
- CLI/Gateway auth: align `gateway run --auth` parsing/help text with supported gateway auth modes by accepting `none` and `trusted-proxy` (in addition to `token`/`password`) for CLI overrides. (#27469) thanks @s1korrrr.
64+
- CLI/Gateway `--force` in non-root Docker: recover from `lsof` permission failures (`EACCES`/`EPERM`) by falling back to `fuser` kill + probe-based port checks, so `openclaw gateway --force` works for default container `node` user flows. (#27941)
6465
- CLI/Daemon status TLS probe: use `wss://` and forward local TLS certificate fingerprint for TLS-enabled gateway daemon probes so `openclaw daemon status` works with `gateway.bind=lan` + `gateway.tls.enabled=true`. (#24234) thanks @liuy.
6566
- Podman/Default bind: change `run-openclaw-podman.sh` default gateway bind from `lan` to `loopback` and document explicit LAN opt-in with Control UI origin configuration. (#27491) thanks @robbyczgw-cla.
6667
- Daemon/macOS launchd: forward proxy env vars into supervised service environments, keep LaunchAgent `KeepAlive=true` semantics, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn.

src/cli/ports.ts

Lines changed: 176 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { execFileSync } from "node:child_process";
22
import { resolveLsofCommandSync } from "../infra/ports-lsof.js";
3+
import { tryListenOnPort } from "../infra/ports-probe.js";
34
import { sleep } from "../utils.js";
45

56
export type PortProcess = { pid: number; command?: string };
@@ -10,6 +11,132 @@ export type ForceFreePortResult = {
1011
escalatedToSigkill: boolean;
1112
};
1213

14+
type ExecFileError = NodeJS.ErrnoException & {
15+
status?: number | null;
16+
stderr?: string | Buffer;
17+
stdout?: string | Buffer;
18+
cause?: unknown;
19+
};
20+
21+
const FUSER_SIGNALS: Record<"SIGTERM" | "SIGKILL", string> = {
22+
SIGTERM: "TERM",
23+
SIGKILL: "KILL",
24+
};
25+
26+
function readExecOutput(value: string | Buffer | undefined): string {
27+
if (typeof value === "string") {
28+
return value;
29+
}
30+
if (value instanceof Buffer) {
31+
return value.toString("utf8");
32+
}
33+
return "";
34+
}
35+
36+
function withErrnoCode(message: string, code: string, cause: unknown): Error {
37+
const out = new Error(message, { cause: cause instanceof Error ? cause : undefined }) as Error &
38+
NodeJS.ErrnoException;
39+
out.code = code;
40+
return out;
41+
}
42+
43+
function getErrnoCode(err: unknown): string | undefined {
44+
if (!err || typeof err !== "object") {
45+
return undefined;
46+
}
47+
const direct = (err as { code?: unknown }).code;
48+
if (typeof direct === "string" && direct.length > 0) {
49+
return direct;
50+
}
51+
const cause = (err as { cause?: unknown }).cause;
52+
if (cause && typeof cause === "object") {
53+
const nested = (cause as { code?: unknown }).code;
54+
if (typeof nested === "string" && nested.length > 0) {
55+
return nested;
56+
}
57+
}
58+
return undefined;
59+
}
60+
61+
function isRecoverableLsofError(err: unknown): boolean {
62+
const code = getErrnoCode(err);
63+
if (code === "ENOENT" || code === "EACCES" || code === "EPERM") {
64+
return true;
65+
}
66+
const message = err instanceof Error ? err.message : String(err);
67+
return /lsof.*(permission denied|not permitted|operation not permitted|eacces|eperm)/i.test(
68+
message,
69+
);
70+
}
71+
72+
function parseFuserPidList(output: string): number[] {
73+
if (!output) {
74+
return [];
75+
}
76+
const values = new Set<number>();
77+
for (const rawLine of output.split(/\r?\n/)) {
78+
const line = rawLine.trim();
79+
if (!line) {
80+
continue;
81+
}
82+
const pidRegion = line.includes(":") ? line.slice(line.indexOf(":") + 1) : line;
83+
const pidMatches = pidRegion.match(/\d+/g) ?? [];
84+
for (const match of pidMatches) {
85+
const pid = Number.parseInt(match, 10);
86+
if (Number.isFinite(pid) && pid > 0) {
87+
values.add(pid);
88+
}
89+
}
90+
}
91+
return [...values];
92+
}
93+
94+
function killPortWithFuser(port: number, signal: "SIGTERM" | "SIGKILL"): PortProcess[] {
95+
const args = ["-k", `-${FUSER_SIGNALS[signal]}`, `${port}/tcp`];
96+
try {
97+
const stdout = execFileSync("fuser", args, {
98+
encoding: "utf-8",
99+
stdio: ["ignore", "pipe", "pipe"],
100+
});
101+
return parseFuserPidList(stdout).map((pid) => ({ pid }));
102+
} catch (err: unknown) {
103+
const execErr = err as ExecFileError;
104+
const code = execErr.code;
105+
const status = execErr.status;
106+
const stdout = readExecOutput(execErr.stdout);
107+
const stderr = readExecOutput(execErr.stderr);
108+
const parsed = parseFuserPidList([stdout, stderr].filter(Boolean).join("\n"));
109+
if (status === 1) {
110+
// fuser exits 1 if nothing matched; keep any parsed PIDs in case signal succeeded.
111+
return parsed.map((pid) => ({ pid }));
112+
}
113+
if (code === "ENOENT") {
114+
throw withErrnoCode(
115+
"fuser not found; required for --force when lsof is unavailable",
116+
"ENOENT",
117+
err,
118+
);
119+
}
120+
if (code === "EACCES" || code === "EPERM") {
121+
throw withErrnoCode("fuser permission denied while forcing gateway port", code, err);
122+
}
123+
throw err instanceof Error ? err : new Error(String(err));
124+
}
125+
}
126+
127+
async function isPortBusy(port: number): Promise<boolean> {
128+
try {
129+
await tryListenOnPort({ port, exclusive: true });
130+
return false;
131+
} catch (err: unknown) {
132+
const code = (err as NodeJS.ErrnoException).code;
133+
if (code === "EADDRINUSE") {
134+
return true;
135+
}
136+
throw err instanceof Error ? err : new Error(String(err));
137+
}
138+
}
139+
13140
export function parseLsofOutput(output: string): PortProcess[] {
14141
const lines = output.split(/\r?\n/).filter(Boolean);
15142
const results: PortProcess[] = [];
@@ -38,12 +165,27 @@ export function listPortListeners(port: number): PortProcess[] {
38165
});
39166
return parseLsofOutput(out);
40167
} catch (err: unknown) {
41-
const status = (err as { status?: number }).status;
42-
const code = (err as { code?: string }).code;
168+
const execErr = err as ExecFileError;
169+
const status = execErr.status ?? undefined;
170+
const code = execErr.code;
43171
if (code === "ENOENT") {
44-
throw new Error("lsof not found; required for --force", { cause: err });
172+
throw withErrnoCode("lsof not found; required for --force", "ENOENT", err);
173+
}
174+
if (code === "EACCES" || code === "EPERM") {
175+
throw withErrnoCode("lsof permission denied while inspecting gateway port", code, err);
45176
}
46177
if (status === 1) {
178+
const stderr = readExecOutput(execErr.stderr).trim();
179+
if (
180+
stderr &&
181+
/permission denied|not permitted|operation not permitted|can't stat/i.test(stderr)
182+
) {
183+
throw withErrnoCode(
184+
`lsof permission denied while inspecting gateway port: ${stderr}`,
185+
"EACCES",
186+
err,
187+
);
188+
}
47189
return [];
48190
} // no listeners
49191
throw err instanceof Error ? err : new Error(String(err));
@@ -93,43 +235,65 @@ export async function forceFreePortAndWait(
93235
const intervalMs = Math.max(opts.intervalMs ?? 100, 1);
94236
const sigtermTimeoutMs = Math.min(Math.max(opts.sigtermTimeoutMs ?? 600, 0), timeoutMs);
95237

96-
const killed = forceFreePort(port);
97-
if (killed.length === 0) {
238+
let killed: PortProcess[] = [];
239+
let useFuserFallback = false;
240+
241+
try {
242+
killed = forceFreePort(port);
243+
} catch (err) {
244+
if (!isRecoverableLsofError(err)) {
245+
throw err;
246+
}
247+
useFuserFallback = true;
248+
killed = killPortWithFuser(port, "SIGTERM");
249+
}
250+
251+
const checkBusy = async (): Promise<boolean> =>
252+
useFuserFallback ? isPortBusy(port) : listPortListeners(port).length > 0;
253+
254+
if (!(await checkBusy())) {
98255
return { killed, waitedMs: 0, escalatedToSigkill: false };
99256
}
100257

101258
let waitedMs = 0;
102259
const triesSigterm = intervalMs > 0 ? Math.ceil(sigtermTimeoutMs / intervalMs) : 0;
103260
for (let i = 0; i < triesSigterm; i++) {
104-
if (listPortListeners(port).length === 0) {
261+
if (!(await checkBusy())) {
105262
return { killed, waitedMs, escalatedToSigkill: false };
106263
}
107264
await sleep(intervalMs);
108265
waitedMs += intervalMs;
109266
}
110267

111-
if (listPortListeners(port).length === 0) {
268+
if (!(await checkBusy())) {
112269
return { killed, waitedMs, escalatedToSigkill: false };
113270
}
114271

115-
const remaining = listPortListeners(port);
116-
killPids(remaining, "SIGKILL");
272+
if (useFuserFallback) {
273+
killPortWithFuser(port, "SIGKILL");
274+
} else {
275+
const remaining = listPortListeners(port);
276+
killPids(remaining, "SIGKILL");
277+
}
117278

118279
const remainingBudget = Math.max(timeoutMs - waitedMs, 0);
119280
const triesSigkill = intervalMs > 0 ? Math.ceil(remainingBudget / intervalMs) : 0;
120281
for (let i = 0; i < triesSigkill; i++) {
121-
if (listPortListeners(port).length === 0) {
282+
if (!(await checkBusy())) {
122283
return { killed, waitedMs, escalatedToSigkill: true };
123284
}
124285
await sleep(intervalMs);
125286
waitedMs += intervalMs;
126287
}
127288

128-
const still = listPortListeners(port);
129-
if (still.length === 0) {
289+
if (!(await checkBusy())) {
130290
return { killed, waitedMs, escalatedToSigkill: true };
131291
}
132292

293+
if (useFuserFallback) {
294+
throw new Error(`port ${port} still has listeners after --force (fuser fallback)`);
295+
}
296+
const still = listPortListeners(port);
133297
throw new Error(
134298
`port ${port} still has listeners after --force: ${still.map((p) => p.pid).join(", ")}`,
135299
);

src/cli/program.force.test.ts

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ vi.mock("node:child_process", async () => {
88
};
99
});
1010

11+
const tryListenOnPortMock = vi.hoisted(() => vi.fn());
12+
13+
vi.mock("../infra/ports-probe.js", () => ({
14+
tryListenOnPort: (...args: unknown[]) => tryListenOnPortMock(...args),
15+
}));
16+
1117
import { execFileSync } from "node:child_process";
1218
import {
1319
forceFreePort,
@@ -23,6 +29,7 @@ describe("gateway --force helpers", () => {
2329
beforeEach(() => {
2430
vi.clearAllMocks();
2531
originalKill = process.kill.bind(process);
32+
tryListenOnPortMock.mockReset();
2633
});
2734

2835
afterEach(() => {
@@ -80,11 +87,13 @@ describe("gateway --force helpers", () => {
8087
let call = 0;
8188
(execFileSync as unknown as Mock).mockImplementation(() => {
8289
call += 1;
83-
// 1st call: initial listeners to kill; 2nd call: still listed; 3rd call: gone.
90+
// 1st call: initial listeners to kill.
91+
// 2nd/3rd calls: still listed.
92+
// 4th call: gone.
8493
if (call === 1) {
8594
return ["p42", "cnode", ""].join("\n");
8695
}
87-
if (call === 2) {
96+
if (call === 2 || call === 3) {
8897
return ["p42", "cnode", ""].join("\n");
8998
}
9099
return "";
@@ -105,7 +114,7 @@ describe("gateway --force helpers", () => {
105114
expect(killMock).toHaveBeenCalledWith(42, "SIGTERM");
106115
expect(res.killed).toEqual<PortProcess[]>([{ pid: 42, command: "node" }]);
107116
expect(res.escalatedToSigkill).toBe(false);
108-
expect(res.waitedMs).toBeGreaterThan(0);
117+
expect(res.waitedMs).toBe(100);
109118

110119
vi.useRealTimers();
111120
});
@@ -116,7 +125,7 @@ describe("gateway --force helpers", () => {
116125
(execFileSync as unknown as Mock).mockImplementation(() => {
117126
call += 1;
118127
// 1st call: initial kill list; then keep showing until after SIGKILL.
119-
if (call <= 6) {
128+
if (call <= 7) {
120129
return ["p42", "cnode", ""].join("\n");
121130
}
122131
return "";
@@ -140,4 +149,80 @@ describe("gateway --force helpers", () => {
140149

141150
vi.useRealTimers();
142151
});
152+
153+
it("falls back to fuser when lsof is permission denied", async () => {
154+
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
155+
if (cmd.includes("lsof")) {
156+
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
157+
err.code = "EACCES";
158+
throw err;
159+
}
160+
return "18789/tcp: 4242\n";
161+
});
162+
tryListenOnPortMock.mockResolvedValue(undefined);
163+
164+
const result = await forceFreePortAndWait(18789, { timeoutMs: 500, intervalMs: 100 });
165+
166+
expect(result.escalatedToSigkill).toBe(false);
167+
expect(result.killed).toEqual<PortProcess[]>([{ pid: 4242 }]);
168+
expect(execFileSync).toHaveBeenCalledWith(
169+
"fuser",
170+
["-k", "-TERM", "18789/tcp"],
171+
expect.objectContaining({ encoding: "utf-8" }),
172+
);
173+
});
174+
175+
it("uses fuser SIGKILL escalation when port stays busy", async () => {
176+
vi.useFakeTimers();
177+
(execFileSync as unknown as Mock).mockImplementation((cmd: string, args: string[]) => {
178+
if (cmd.includes("lsof")) {
179+
const err = new Error("spawnSync lsof EACCES") as NodeJS.ErrnoException;
180+
err.code = "EACCES";
181+
throw err;
182+
}
183+
if (args.includes("-TERM")) {
184+
return "18789/tcp: 1337\n";
185+
}
186+
if (args.includes("-KILL")) {
187+
return "18789/tcp: 1337\n";
188+
}
189+
return "";
190+
});
191+
192+
const busyErr = Object.assign(new Error("in use"), { code: "EADDRINUSE" });
193+
tryListenOnPortMock
194+
.mockRejectedValueOnce(busyErr)
195+
.mockRejectedValueOnce(busyErr)
196+
.mockRejectedValueOnce(busyErr)
197+
.mockResolvedValueOnce(undefined);
198+
199+
const promise = forceFreePortAndWait(18789, {
200+
timeoutMs: 300,
201+
intervalMs: 100,
202+
sigtermTimeoutMs: 100,
203+
});
204+
await vi.runAllTimersAsync();
205+
const result = await promise;
206+
207+
expect(result.escalatedToSigkill).toBe(true);
208+
expect(result.waitedMs).toBe(100);
209+
expect(execFileSync).toHaveBeenCalledWith(
210+
"fuser",
211+
["-k", "-KILL", "18789/tcp"],
212+
expect.objectContaining({ encoding: "utf-8" }),
213+
);
214+
vi.useRealTimers();
215+
});
216+
217+
it("throws when lsof is unavailable and fuser is missing", async () => {
218+
(execFileSync as unknown as Mock).mockImplementation((cmd: string) => {
219+
const err = new Error(`spawnSync ${cmd} ENOENT`) as NodeJS.ErrnoException;
220+
err.code = "ENOENT";
221+
throw err;
222+
});
223+
224+
await expect(forceFreePortAndWait(18789, { timeoutMs: 200, intervalMs: 100 })).rejects.toThrow(
225+
/fuser not found/i,
226+
);
227+
});
143228
});

0 commit comments

Comments
 (0)