Skip to content

Commit b672756

Browse files
committed
fix(discord): serialize command-deploy cache persists via in-process mutex
Codex follow-up on #77367 noted: re-read-before-write inside persistHashes isn't enough — two deployers running persistHashes in true parallel can both read the same snapshot before either writes, and the later rename overwrites the earlier writer's app-scoped entries. Add a module-level Map<storePath, Promise<void>> mutex and wrap the read-merge-write cycle in withCachePersistLock so concurrent persists for the same on-disk path serialize. In-process is sufficient because Discord deployers only run inside the gateway process. New regression test fires three deployers via Promise.all on the same tick and asserts all three application-scoped entries survive — pre-fix this race lost at least one entry.
1 parent 75f64f1 commit b672756

2 files changed

Lines changed: 96 additions & 9 deletions

File tree

extensions/discord/src/internal/command-deploy.test.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,4 +393,50 @@ describe("DiscordCommandDeployer cache scoping (multi-application)", () => {
393393
expect(restB.get).not.toHaveBeenCalled();
394394
expect(restB.post).not.toHaveBeenCalled();
395395
});
396+
397+
test("truly parallel deployers serialize cache writes via the per-path mutex (codex follow-up on #77367)", async () => {
398+
// Codex follow-up on PR #77367: re-read-before-write alone isn't enough
399+
// when two deployers run `persistHashes` in real parallel — both can read
400+
// the same snapshot before either writes. The in-process per-path mutex
401+
// around the read-merge-write cycle makes the operation atomic.
402+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-discord-multi-app-"));
403+
const hashStorePath = path.join(dir, "command-deploy-cache.json");
404+
const commands = [new StaticCommand("ping")];
405+
406+
// Run BOTH deploys with Promise.all on the SAME process tick — pre-fix,
407+
// both `persistHashes` calls would race on read-then-rename and one
408+
// writer's `app:<id>:...` entry would be lost.
409+
const restA = createRest();
410+
const restB = createRest();
411+
const restC = createRest();
412+
await Promise.all([
413+
new DiscordCommandDeployer({
414+
clientId: "app-default",
415+
commands,
416+
hashStorePath,
417+
rest: () => restA,
418+
}).deploy({ mode: "reconcile" }),
419+
new DiscordCommandDeployer({
420+
clientId: "app-secondary",
421+
commands,
422+
hashStorePath,
423+
rest: () => restB,
424+
}).deploy({ mode: "reconcile" }),
425+
new DiscordCommandDeployer({
426+
clientId: "app-tertiary",
427+
commands,
428+
hashStorePath,
429+
rest: () => restC,
430+
}).deploy({ mode: "reconcile" }),
431+
]);
432+
433+
const raw = await fs.readFile(hashStorePath, "utf8");
434+
const parsed = JSON.parse(raw) as { hashes: Record<string, string> };
435+
const keys = Object.keys(parsed.hashes);
436+
// All three apps' entries must survive — pre-fix, one or two would be
437+
// lost to the race.
438+
expect(keys).toContain("app:app-default:global:reconcile");
439+
expect(keys).toContain("app:app-secondary:global:reconcile");
440+
expect(keys).toContain("app:app-tertiary:global:reconcile");
441+
});
396442
});

extensions/discord/src/internal/command-deploy.ts

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,41 @@ export type DeployCommandOptions = {
2020

2121
type SerializedCommand = ReturnType<BaseCommand["serialize"]>;
2222

23+
/**
24+
* Per-`command-deploy-cache.json` path async mutex. `server-channels.ts` can
25+
* start several Discord deployers concurrently in the same Node.js process;
26+
* each one shares the same on-disk cache file. Without this lock, two
27+
* deployers can run `persistHashes` in parallel, both read the same on-disk
28+
* snapshot before either writes, and the later `rename` then overwrites the
29+
* earlier writer's entries — defeating the rate-limit cache.
30+
*
31+
* This is an in-process lock; cross-process serialization would need an OS
32+
* file lock. Discord deployers only run inside the gateway process, so an
33+
* in-process mutex is sufficient for the documented concurrency surface.
34+
*/
35+
const cachePersistLocks = new Map<string, Promise<void>>();
36+
37+
async function withCachePersistLock<T>(storePath: string, fn: () => Promise<T>): Promise<T> {
38+
const previous = cachePersistLocks.get(storePath) ?? Promise.resolve();
39+
let release: () => void = () => {};
40+
const next = new Promise<void>((resolve) => {
41+
release = resolve;
42+
});
43+
cachePersistLocks.set(
44+
storePath,
45+
previous.then(() => next),
46+
);
47+
try {
48+
await previous;
49+
return await fn();
50+
} finally {
51+
release();
52+
if (cachePersistLocks.get(storePath) === previous.then(() => next)) {
53+
cachePersistLocks.delete(storePath);
54+
}
55+
}
56+
}
57+
2358
export class DiscordCommandDeployer {
2459
private readonly hashes = new Map<string, string>();
2560
private hashesLoaded = false;
@@ -178,18 +213,24 @@ export class DiscordCommandDeployer {
178213
if (!storePath) {
179214
return;
180215
}
216+
// Serialize concurrent persists for the same on-disk path. The earlier
217+
// "re-read inside persistHashes" merge alone is not enough — two
218+
// deployers running `persistHashes` in true parallel would both read the
219+
// same snapshot before either writes, and the later `rename` would still
220+
// overwrite the earlier one's `app:<id>:...` entries. The mutex makes the
221+
// read-merge-write cycle atomic for in-process callers.
222+
await withCachePersistLock(storePath, async () => {
223+
await this.persistHashesLocked(storePath);
224+
});
225+
}
226+
227+
private async persistHashesLocked(storePath: string): Promise<void> {
181228
try {
182229
await fs.mkdir(path.dirname(storePath), { recursive: true });
183230
// Re-read the on-disk hashes immediately before writing and merge them
184-
// with our in-memory entries. Multiple Discord accounts on the same
185-
// gateway share `command-deploy-cache.json`, and `server-channels.ts`
186-
// can start their deployers concurrently. Without this re-read step,
187-
// two deployers that both load the old file and then both write would
188-
// each persist only their own scoped keys (`app:<id>:...`) and drop the
189-
// other deployer's keys, defeating the rate-limit protection the cache
190-
// was added for. Our in-memory entries always win on key collisions
191-
// (we just produced them); on-disk entries that we don't have in memory
192-
// are preserved as-is.
231+
// with our in-memory entries. Our in-memory entries always win on key
232+
// collisions (we just produced them); on-disk entries that we don't have
233+
// in memory are preserved as-is.
193234
const merged = new Map<string, string>();
194235
try {
195236
const raw = await fs.readFile(storePath, "utf8");

0 commit comments

Comments
 (0)