Skip to content

Commit 638efc7

Browse files
[fix]: refresh agent cache on action failure (#1472)
1 parent e021674 commit 638efc7

File tree

4 files changed

+235
-21
lines changed

4 files changed

+235
-21
lines changed

.changeset/khaki-llamas-hope.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
fix: agent cache not refreshed on action failure

packages/core/lib/v3/cache/AgentCache.ts

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ import type {
1717
AgentCacheDeps,
1818
} from "../types/private";
1919
import type {
20-
AvailableModel,
20+
Action,
2121
AgentResult,
2222
AgentStreamResult,
2323
AgentConfig,
2424
AgentExecuteOptionsBase,
25+
AvailableModel,
2526
Logger,
2627
} from "../types/public";
2728
import type { Page } from "../understudy/page";
@@ -188,7 +189,7 @@ export class AgentCache {
188189
},
189190
});
190191

191-
return await this.replayAgentCacheEntry(entry);
192+
return await this.replayAgentCacheEntry(context, entry);
192193
}
193194

194195
/**
@@ -495,14 +496,20 @@ export class AgentCache {
495496
}
496497

497498
private async replayAgentCacheEntry(
499+
context: AgentCacheContext,
498500
entry: CachedAgentEntry,
499501
): Promise<AgentResult | null> {
500502
const ctx = this.getContext();
501503
const handler = this.getActHandler();
502504
if (!ctx || !handler) return null;
503505
try {
506+
const updatedSteps: AgentReplayStep[] = [];
507+
let stepsChanged = false;
504508
for (const step of entry.steps ?? []) {
505-
await this.executeAgentReplayStep(step, ctx, handler);
509+
const replayedStep =
510+
(await this.executeAgentReplayStep(step, ctx, handler)) ?? step;
511+
stepsChanged ||= replayedStep !== step;
512+
updatedSteps.push(replayedStep);
506513
}
507514
const result = cloneForCache(entry.result);
508515
result.usage = {
@@ -517,6 +524,9 @@ export class AgentCache {
517524
cacheHit: true,
518525
cacheTimestamp: entry.timestamp,
519526
};
527+
if (stepsChanged) {
528+
await this.refreshAgentCacheEntry(context, entry, updatedSteps);
529+
}
520530
return result;
521531
} catch (err) {
522532
this.logger({
@@ -535,87 +545,112 @@ export class AgentCache {
535545
step: AgentReplayStep,
536546
ctx: V3Context,
537547
handler: ActHandler,
538-
): Promise<void> {
548+
): Promise<AgentReplayStep> {
539549
switch (step.type) {
540550
case "act":
541-
await this.replayAgentActStep(step as AgentReplayActStep, ctx, handler);
542-
return;
551+
return await this.replayAgentActStep(
552+
step as AgentReplayActStep,
553+
ctx,
554+
handler,
555+
);
543556
case "fillForm":
544-
await this.replayAgentFillFormStep(
557+
return await this.replayAgentFillFormStep(
545558
step as AgentReplayFillFormStep,
546559
ctx,
547560
handler,
548561
);
549-
return;
550562
case "goto":
551563
await this.replayAgentGotoStep(step as AgentReplayGotoStep, ctx);
552-
return;
564+
return step;
553565
case "scroll":
554566
await this.replayAgentScrollStep(step as AgentReplayScrollStep, ctx);
555-
return;
567+
return step;
556568
case "wait":
557569
await this.replayAgentWaitStep(step as AgentReplayWaitStep);
558-
return;
570+
return step;
559571
case "navback":
560572
await this.replayAgentNavBackStep(step as AgentReplayNavBackStep, ctx);
561-
return;
573+
return step;
562574
case "keys":
563575
await this.replayAgentKeysStep(step as AgentReplayKeysStep, ctx);
564-
return;
576+
return step;
565577
case "close":
566578
case "extract":
567579
case "screenshot":
568580
case "ariaTree":
569-
return;
581+
return step;
570582
default:
571583
this.logger({
572584
category: "cache",
573585
message: `agent cache skipping step type: ${step.type}`,
574586
level: 2,
575587
});
588+
return step;
576589
}
577590
}
578591

579592
private async replayAgentActStep(
580593
step: AgentReplayActStep,
581594
ctx: V3Context,
582595
handler: ActHandler,
583-
): Promise<void> {
596+
): Promise<AgentReplayActStep> {
584597
const actions = Array.isArray(step.actions) ? step.actions : [];
585598
if (actions.length > 0) {
586599
const page = await ctx.awaitActivePage();
600+
const updatedActions: Action[] = [];
587601
for (const action of actions) {
588-
await handler.takeDeterministicAction(
602+
const result = await handler.takeDeterministicAction(
589603
action,
590604
page,
591605
this.domSettleTimeoutMs,
592606
this.getDefaultLlmClient(),
593607
);
608+
if (result.success && Array.isArray(result.actions)) {
609+
updatedActions.push(...cloneForCache(result.actions));
610+
} else {
611+
updatedActions.push(cloneForCache(action));
612+
}
594613
}
595-
return;
614+
if (this.haveActionsChanged(actions, updatedActions)) {
615+
return { ...step, actions: updatedActions };
616+
}
617+
return step;
596618
}
597619
await this.act(step.instruction, { timeout: step.timeout });
620+
return step;
598621
}
599622

600623
private async replayAgentFillFormStep(
601624
step: AgentReplayFillFormStep,
602625
ctx: V3Context,
603626
handler: ActHandler,
604-
): Promise<void> {
627+
): Promise<AgentReplayFillFormStep> {
605628
const actions =
606629
Array.isArray(step.actions) && step.actions.length > 0
607630
? step.actions
608631
: (step.observeResults ?? []);
609-
if (!Array.isArray(actions) || actions.length === 0) return;
632+
if (!Array.isArray(actions) || actions.length === 0) {
633+
return step;
634+
}
610635
const page = await ctx.awaitActivePage();
636+
const updatedActions: Action[] = [];
611637
for (const action of actions) {
612-
await handler.takeDeterministicAction(
638+
const result = await handler.takeDeterministicAction(
613639
action,
614640
page,
615641
this.domSettleTimeoutMs,
616642
this.getDefaultLlmClient(),
617643
);
644+
if (result.success && Array.isArray(result.actions)) {
645+
updatedActions.push(...cloneForCache(result.actions));
646+
} else {
647+
updatedActions.push(cloneForCache(action));
648+
}
649+
}
650+
if (this.haveActionsChanged(actions, updatedActions)) {
651+
return { ...step, actions: updatedActions };
618652
}
653+
return step;
619654
}
620655

621656
private async replayAgentGotoStep(
@@ -681,4 +716,73 @@ export class AgentCache {
681716
}
682717
}
683718
}
719+
720+
private haveActionsChanged(original: Action[], updated: Action[]): boolean {
721+
if (original.length !== updated.length) {
722+
return true;
723+
}
724+
for (let i = 0; i < original.length; i += 1) {
725+
const orig = original[i];
726+
const next = updated[i];
727+
if (!orig || !next) {
728+
return true;
729+
}
730+
if (orig.selector !== next.selector) {
731+
return true;
732+
}
733+
if ((orig.description ?? "") !== (next.description ?? "")) {
734+
return true;
735+
}
736+
if ((orig.method ?? "") !== (next.method ?? "")) {
737+
return true;
738+
}
739+
const origArgs = Array.isArray(orig.arguments) ? orig.arguments : [];
740+
const nextArgs = Array.isArray(next.arguments) ? next.arguments : [];
741+
if (origArgs.length !== nextArgs.length) {
742+
return true;
743+
}
744+
for (let j = 0; j < origArgs.length; j += 1) {
745+
if (origArgs[j] !== nextArgs[j]) {
746+
return true;
747+
}
748+
}
749+
}
750+
return false;
751+
}
752+
753+
private async refreshAgentCacheEntry(
754+
context: AgentCacheContext,
755+
entry: CachedAgentEntry,
756+
updatedSteps: AgentReplayStep[],
757+
): Promise<void> {
758+
const updatedEntry: CachedAgentEntry = {
759+
...entry,
760+
steps: cloneForCache(updatedSteps),
761+
timestamp: new Date().toISOString(),
762+
};
763+
const { error, path } = await this.storage.writeJson(
764+
`agent-${context.cacheKey}.json`,
765+
updatedEntry,
766+
);
767+
if (error && path) {
768+
this.logger({
769+
category: "cache",
770+
message: "failed to update agent cache entry after self-heal",
771+
level: 0,
772+
auxiliary: {
773+
error: { value: String(error), type: "string" },
774+
},
775+
});
776+
return;
777+
}
778+
this.logger({
779+
category: "cache",
780+
message: "agent cache entry updated after self-heal",
781+
level: 2,
782+
auxiliary: {
783+
instruction: { value: context.instruction, type: "string" },
784+
steps: { value: String(updatedSteps.length), type: "string" },
785+
},
786+
});
787+
}
684788
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import { test, expect } from "@playwright/test";
2+
import fs from "fs/promises";
3+
import path from "path";
4+
import { V3 } from "../v3";
5+
import { v3TestConfig } from "./v3.config";
6+
import type {
7+
AgentReplayActStep,
8+
AgentReplayFillFormStep,
9+
CachedAgentEntry,
10+
} from "../types/private/cache";
11+
12+
test.describe("Agent cache self-heal (e2e)", () => {
13+
let v3: V3;
14+
let cacheDir: string;
15+
16+
// eslint-disable-next-line no-empty-pattern
17+
test.beforeEach(async ({}, testInfo) => {
18+
await fs.mkdir(testInfo.outputDir, { recursive: true });
19+
cacheDir = await fs.mkdtemp(path.join(testInfo.outputDir, "agent-cache-"));
20+
v3 = new V3({
21+
...v3TestConfig,
22+
cacheDir,
23+
selfHeal: true,
24+
});
25+
await v3.init();
26+
});
27+
28+
test.afterEach(async () => {
29+
await v3?.close?.().catch(() => {});
30+
});
31+
32+
test("replays heal corrupted selectors", async () => {
33+
test.setTimeout(120_000);
34+
35+
const agent = v3.agent({
36+
model: "anthropic/claude-haiku-4-5-20251001",
37+
});
38+
const page = v3.context.pages()[0];
39+
const url =
40+
"https://browserbase.github.io/stagehand-eval-sites/sites/shadow-dom/";
41+
const instruction = "click the button";
42+
43+
await page.goto(url, { waitUntil: "networkidle" });
44+
const firstResult = await agent.execute({ instruction, maxSteps: 20 });
45+
expect(firstResult.success).toBe(true);
46+
47+
const cachePath = await locateAgentCacheFile(cacheDir);
48+
const originalEntry = await readCacheEntry(cachePath);
49+
const originalActionStep = findFirstActionStep(originalEntry);
50+
expect(originalActionStep).toBeDefined();
51+
const originalSelector = originalActionStep?.actions?.[0]?.selector;
52+
expect(typeof originalSelector).toBe("string");
53+
54+
// Corrupt the cached selector so the replay needs to self-heal.
55+
if (originalActionStep?.actions?.[0]) {
56+
originalActionStep.actions[0].selector = "xpath=/yeee";
57+
}
58+
await fs.writeFile(
59+
cachePath,
60+
JSON.stringify(originalEntry, null, 2),
61+
"utf8",
62+
);
63+
64+
// Second run should replay from cache, self-heal, and update the file.
65+
await page.goto(url, { waitUntil: "networkidle" });
66+
const replayResult = await agent.execute({ instruction, maxSteps: 20 });
67+
expect(replayResult.success).toBe(true);
68+
69+
const healedEntry = await readCacheEntry(cachePath);
70+
const healedActionStep = findFirstActionStep(healedEntry);
71+
expect(healedActionStep?.actions?.[0]?.selector).toBe(originalSelector);
72+
expect(healedActionStep?.actions?.[0]?.selector).not.toBe("xpath=/yeee");
73+
expect(healedEntry.timestamp).not.toBe(originalEntry.timestamp);
74+
});
75+
});
76+
77+
async function locateAgentCacheFile(cacheDir: string): Promise<string> {
78+
const deadline = Date.now() + 10_000;
79+
while (Date.now() < deadline) {
80+
const entries = await fs.readdir(cacheDir);
81+
const agentFiles = entries.filter((file) => file.startsWith("agent-"));
82+
if (agentFiles.length > 0) {
83+
return path.join(cacheDir, agentFiles[0]!);
84+
}
85+
await new Promise((resolve) => setTimeout(resolve, 200));
86+
}
87+
throw new Error("Timed out waiting for agent cache entry to be written");
88+
}
89+
90+
async function readCacheEntry(cachePath: string): Promise<CachedAgentEntry> {
91+
const raw = await fs.readFile(cachePath, "utf8");
92+
return JSON.parse(raw) as CachedAgentEntry;
93+
}
94+
95+
type StepWithActions = AgentReplayActStep | AgentReplayFillFormStep;
96+
97+
function findFirstActionStep(
98+
entry: CachedAgentEntry,
99+
): StepWithActions | undefined {
100+
return entry.steps.find((step) => {
101+
const actions = (step as StepWithActions).actions;
102+
return Array.isArray(actions) && actions.length > 0;
103+
}) as StepWithActions | undefined;
104+
}

turbo.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@
8989
],
9090
"cache": false,
9191
"env": [
92-
"ANTHROPIC_API_KEY"
92+
"ANTHROPIC_API_KEY",
93+
"OPENAI_API_KEY"
9394
]
9495
},
9596
"e2e:bb": {

0 commit comments

Comments
 (0)