|
1 | 1 | import { spawnSync } from "node:child_process"; |
| 2 | +import { resolveGatewayPort } from "../config/paths.js"; |
2 | 3 | import { |
3 | 4 | resolveGatewayLaunchAgentLabel, |
4 | 5 | resolveGatewaySystemdServiceName, |
5 | 6 | } from "../daemon/constants.js"; |
6 | 7 | import { createSubsystemLogger } from "../logging/subsystem.js"; |
| 8 | +import { resolveLsofCommandSync } from "./ports-lsof.js"; |
7 | 9 |
|
8 | 10 | export type RestartAttempt = { |
9 | 11 | ok: boolean; |
@@ -283,10 +285,106 @@ function normalizeSystemdUnit(raw?: string, profile?: string): string { |
283 | 285 | return unit.endsWith(".service") ? unit : `${unit}.service`; |
284 | 286 | } |
285 | 287 |
|
| 288 | +/** |
| 289 | + * Find PIDs of gateway processes listening on the given port using synchronous lsof. |
| 290 | + * Returns only PIDs that belong to openclaw gateway processes (not the current process). |
| 291 | + */ |
| 292 | +export function findGatewayPidsOnPortSync(port: number): number[] { |
| 293 | + if (process.platform === "win32") { |
| 294 | + return []; |
| 295 | + } |
| 296 | + const lsof = resolveLsofCommandSync(); |
| 297 | + const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], { |
| 298 | + encoding: "utf8", |
| 299 | + timeout: SPAWN_TIMEOUT_MS, |
| 300 | + }); |
| 301 | + if (res.error || res.status !== 0) { |
| 302 | + return []; |
| 303 | + } |
| 304 | + const pids: number[] = []; |
| 305 | + let currentPid: number | undefined; |
| 306 | + let currentCmd: string | undefined; |
| 307 | + for (const line of res.stdout.split(/\r?\n/).filter(Boolean)) { |
| 308 | + if (line.startsWith("p")) { |
| 309 | + if (currentPid != null && currentCmd && currentCmd.toLowerCase().includes("openclaw")) { |
| 310 | + pids.push(currentPid); |
| 311 | + } |
| 312 | + const parsed = Number.parseInt(line.slice(1), 10); |
| 313 | + currentPid = Number.isFinite(parsed) && parsed > 0 ? parsed : undefined; |
| 314 | + currentCmd = undefined; |
| 315 | + } else if (line.startsWith("c")) { |
| 316 | + currentCmd = line.slice(1); |
| 317 | + } |
| 318 | + } |
| 319 | + if (currentPid != null && currentCmd && currentCmd.toLowerCase().includes("openclaw")) { |
| 320 | + pids.push(currentPid); |
| 321 | + } |
| 322 | + return pids.filter((pid) => pid !== process.pid); |
| 323 | +} |
| 324 | + |
| 325 | +const STALE_SIGTERM_WAIT_MS = 300; |
| 326 | +const STALE_SIGKILL_WAIT_MS = 200; |
| 327 | + |
| 328 | +/** |
| 329 | + * Synchronously terminate stale gateway processes. |
| 330 | + * Sends SIGTERM, waits briefly, then SIGKILL for survivors. |
| 331 | + */ |
| 332 | +function terminateStaleProcessesSync(pids: number[]): number[] { |
| 333 | + if (pids.length === 0) { |
| 334 | + return []; |
| 335 | + } |
| 336 | + const killed: number[] = []; |
| 337 | + for (const pid of pids) { |
| 338 | + try { |
| 339 | + process.kill(pid, "SIGTERM"); |
| 340 | + killed.push(pid); |
| 341 | + } catch { |
| 342 | + // ESRCH — already gone |
| 343 | + } |
| 344 | + } |
| 345 | + if (killed.length === 0) { |
| 346 | + return killed; |
| 347 | + } |
| 348 | + spawnSync("sleep", [String(STALE_SIGTERM_WAIT_MS / 1000)], { timeout: 2000 }); |
| 349 | + for (const pid of killed) { |
| 350 | + try { |
| 351 | + process.kill(pid, 0); |
| 352 | + process.kill(pid, "SIGKILL"); |
| 353 | + } catch { |
| 354 | + // already gone |
| 355 | + } |
| 356 | + } |
| 357 | + spawnSync("sleep", [String(STALE_SIGKILL_WAIT_MS / 1000)], { timeout: 2000 }); |
| 358 | + return killed; |
| 359 | +} |
| 360 | + |
| 361 | +/** |
| 362 | + * Inspect the gateway port and kill any stale gateway processes holding it. |
| 363 | + * Called before service restart commands to prevent port conflicts. |
| 364 | + */ |
| 365 | +function cleanStaleGatewayProcessesSync(): number[] { |
| 366 | + try { |
| 367 | + const port = resolveGatewayPort(undefined, process.env); |
| 368 | + const stalePids = findGatewayPidsOnPortSync(port); |
| 369 | + if (stalePids.length === 0) { |
| 370 | + return []; |
| 371 | + } |
| 372 | + restartLog.warn( |
| 373 | + `killing ${stalePids.length} stale gateway process(es) before restart: ${stalePids.join(", ")}`, |
| 374 | + ); |
| 375 | + return terminateStaleProcessesSync(stalePids); |
| 376 | + } catch { |
| 377 | + return []; |
| 378 | + } |
| 379 | +} |
| 380 | + |
286 | 381 | export function triggerOpenClawRestart(): RestartAttempt { |
287 | 382 | if (process.env.VITEST || process.env.NODE_ENV === "test") { |
288 | 383 | return { ok: true, method: "supervisor", detail: "test mode" }; |
289 | 384 | } |
| 385 | + |
| 386 | + cleanStaleGatewayProcessesSync(); |
| 387 | + |
290 | 388 | const tried: string[] = []; |
291 | 389 | if (process.platform !== "darwin") { |
292 | 390 | if (process.platform === "linux") { |
|
0 commit comments