Skip to content

Commit 89db1e5

Browse files
authored
feat(cron): surface run diagnostics in status (#75928)
* feat(cron): surface run diagnostics in status * docs: add cron diagnostics changelog * fix(cron): preserve latest run diagnostics * test(cron): update diagnostics regression deps
1 parent 8afc9ef commit 89db1e5

18 files changed

Lines changed: 828 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
4646
### Fixes
4747

4848
- Telegram: clean up tool-only draft previews after assistant message boundaries so transient `Surfacing...` tool-status bubbles do not linger when no matching final preview arrives. Thanks @BunsDev.
49+
- Cron: surface failed isolated-run diagnostics in `cron show`, status, and run history when requested tools are unavailable, so blocked cron runs report the actual tool-policy failure instead of a misleading green result. Fixes #75763. Thanks @RyanSandoval.
4950
- TUI/escape abort: track the in-flight runId after `chat.send` resolves so pressing Esc during the gap before the first gateway event aborts the run instead of repeatedly printing `no active run`. Fixes #1296. Thanks @Lukavyi and @romneyda.
5051
- TUI/render: stop the long-token sanitizer from injecting literal spaces inside inline code spans, fenced code blocks, table borders, and bare hyphenated/dotted identifiers, so copied package names, entity IDs, and shell line-continuations stay byte-for-byte intact while narrow-terminal protection still chunks unidentifiable long prose tokens. Fixes #48432, #39505. Thanks @DocOellerson, @xeusoc, @CCcassiusdjs, @akramcodez, @brokemac79, @romneyda.
5152
- Gateway/status: label Linux managed gateway services as `systemd user`, making status output explicit about the user-service scope instead of implying a system-level unit. Thanks @vincentkoc.

apps/macos/Sources/OpenClawProtocol/GatewayModels.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4323,6 +4323,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43234323
public let status: AnyCodable?
43244324
public let error: String?
43254325
public let summary: String?
4326+
public let diagnostics: [String: AnyCodable]?
43264327
public let delivered: Bool?
43274328
public let deliverystatus: AnyCodable?
43284329
public let deliveryerror: String?
@@ -4344,6 +4345,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43444345
status: AnyCodable?,
43454346
error: String?,
43464347
summary: String?,
4348+
diagnostics: [String: AnyCodable]?,
43474349
delivered: Bool?,
43484350
deliverystatus: AnyCodable?,
43494351
deliveryerror: String?,
@@ -4364,6 +4366,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43644366
self.status = status
43654367
self.error = error
43664368
self.summary = summary
4369+
self.diagnostics = diagnostics
43674370
self.delivered = delivered
43684371
self.deliverystatus = deliverystatus
43694372
self.deliveryerror = deliveryerror
@@ -4386,6 +4389,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43864389
case status
43874390
case error
43884391
case summary
4392+
case diagnostics
43894393
case delivered
43904394
case deliverystatus = "deliveryStatus"
43914395
case deliveryerror = "deliveryError"

apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4323,6 +4323,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43234323
public let status: AnyCodable?
43244324
public let error: String?
43254325
public let summary: String?
4326+
public let diagnostics: [String: AnyCodable]?
43264327
public let delivered: Bool?
43274328
public let deliverystatus: AnyCodable?
43284329
public let deliveryerror: String?
@@ -4344,6 +4345,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43444345
status: AnyCodable?,
43454346
error: String?,
43464347
summary: String?,
4348+
diagnostics: [String: AnyCodable]?,
43474349
delivered: Bool?,
43484350
deliverystatus: AnyCodable?,
43494351
deliveryerror: String?,
@@ -4364,6 +4366,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43644366
self.status = status
43654367
self.error = error
43664368
self.summary = summary
4369+
self.diagnostics = diagnostics
43674370
self.delivered = delivered
43684371
self.deliverystatus = deliverystatus
43694372
self.deliveryerror = deliveryerror
@@ -4386,6 +4389,7 @@ public struct CronRunLogEntry: Codable, Sendable {
43864389
case status
43874390
case error
43884391
case summary
4392+
case diagnostics
43894393
case delivered
43904394
case deliverystatus = "deliveryStatus"
43914395
case deliveryerror = "deliveryError"

src/cli/cron-cli.test.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,10 @@ describe("cron cli", () => {
497497
nextOffset: 200,
498498
};
499499
}
500+
const targetJob = createCronJob("target-job", "Target Job");
501+
targetJob.state.lastDiagnosticSummary = "exec stderr tail";
500502
return {
501-
jobs: [createCronJob("target-job", "Target Job")],
503+
jobs: [targetJob],
502504
hasMore: false,
503505
nextOffset: null,
504506
deliveryPreviews: {
@@ -527,6 +529,7 @@ describe("cron cli", () => {
527529
expect(defaultRuntime.log).toHaveBeenCalledWith(
528530
"delivery: announce -> telegram:-100 (resolved from last, main session)",
529531
);
532+
expect(defaultRuntime.log).toHaveBeenCalledWith("diagnostic: exec stderr tail");
530533
});
531534

532535
it("sends agent id on cron add", async () => {

src/cli/cron-cli/shared.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,4 +380,5 @@ export function printCronShow(
380380
runtime.log(`next: ${formatRelative(job.state.nextRunAtMs, Date.now())}`);
381381
runtime.log(`last: ${formatRelative(job.state.lastRunAtMs, Date.now())}`);
382382
runtime.log(`status: ${formatStatus(job)}`);
383+
runtime.log(`diagnostic: ${job.state.lastDiagnosticSummary ?? "-"}`);
383384
}

src/cron/isolated-agent/run.ts

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ import { stringifyRouteThreadId } from "../../plugin-sdk/channel-route.js";
1010
import { createLazyImportLoader } from "../../shared/lazy-promise.js";
1111
import { normalizeOptionalString } from "../../shared/string-coerce.js";
1212
import { resolveCronDeliveryPlan, type CronDeliveryPlan } from "../delivery-plan.js";
13+
import {
14+
createCronRunDiagnosticsFromAgentResult,
15+
createCronRunDiagnosticsFromError,
16+
mergeCronRunDiagnostics,
17+
} from "../run-diagnostics.js";
1318
import type {
1419
CronAgentExecutionStarted,
1520
CronDeliveryTrace,
@@ -566,7 +571,14 @@ async function prepareCronRunContext(params: {
566571
if (!resolvedModelSelection.ok) {
567572
return {
568573
ok: false,
569-
result: withRunSession({ status: "error", error: resolvedModelSelection.error }),
574+
result: withRunSession({
575+
status: "error",
576+
error: resolvedModelSelection.error,
577+
diagnostics: createCronRunDiagnosticsFromError(
578+
"cron-preflight",
579+
resolvedModelSelection.error,
580+
),
581+
}),
570582
};
571583
}
572584
let provider = resolvedModelSelection.provider;
@@ -586,6 +598,9 @@ async function prepareCronRunContext(params: {
586598
result: withRunSession({
587599
status: "skipped",
588600
error: preflight.reason,
601+
diagnostics: createCronRunDiagnosticsFromError("model-preflight", preflight.reason, {
602+
severity: "warn",
603+
}),
589604
provider,
590605
model,
591606
}),
@@ -862,7 +877,15 @@ async function finalizeCronRun(params: {
862877
await prepared.persistSessionEntry();
863878

864879
if (params.isAborted()) {
865-
return prepared.withRunSession({ status: "error", error: params.abortReason(), ...telemetry });
880+
return prepared.withRunSession({
881+
status: "error",
882+
error: params.abortReason(),
883+
diagnostics: mergeCronRunDiagnostics(
884+
createCronRunDiagnosticsFromAgentResult(finalRunResult, { finalStatus: "error" }),
885+
createCronRunDiagnosticsFromError("cron-setup", params.abortReason()),
886+
),
887+
...telemetry,
888+
});
866889
}
867890
let {
868891
summary,
@@ -882,6 +905,9 @@ async function finalizeCronRun(params: {
882905
await resolveCronChannelOutputPolicy(prepared.resolvedDelivery.channel)
883906
).preferFinalAssistantVisibleText,
884907
});
908+
const agentDiagnostics = createCronRunDiagnosticsFromAgentResult(finalRunResult, {
909+
finalStatus: hasFatalErrorPayload ? "error" : "ok",
910+
});
885911
const resolveRunOutcome = (result?: {
886912
delivered?: boolean;
887913
deliveryAttempted?: boolean;
@@ -897,6 +923,15 @@ async function finalizeCronRun(params: {
897923
delivered: result?.delivered,
898924
deliveryAttempted: result?.deliveryAttempted,
899925
delivery: result?.delivery,
926+
diagnostics: hasFatalErrorPayload
927+
? mergeCronRunDiagnostics(
928+
agentDiagnostics,
929+
createCronRunDiagnosticsFromError(
930+
"agent-run",
931+
embeddedRunError ?? "cron isolated run returned an error payload",
932+
),
933+
)
934+
: agentDiagnostics,
900935
...telemetry,
901936
});
902937
const failPendingPresentationWarningUnlessDelivered = (delivered?: boolean) => {
@@ -975,6 +1010,13 @@ async function finalizeCronRun(params: {
9751010
deliveryAttempted:
9761011
deliveryResult.result.deliveryAttempted ?? deliveryResult.deliveryAttempted,
9771012
delivery: deliveryTrace,
1013+
diagnostics: mergeCronRunDiagnostics(
1014+
agentDiagnostics,
1015+
deliveryResult.result.diagnostics,
1016+
deliveryResult.result.status === "error" && deliveryResult.result.error
1017+
? createCronRunDiagnosticsFromError("delivery", deliveryResult.result.error)
1018+
: undefined,
1019+
),
9781020
};
9791021
failPendingPresentationWarningUnlessDelivered(
9801022
resultWithDeliveryMeta.delivered ?? deliveryResult.delivered,
@@ -1066,7 +1108,11 @@ export async function runCronIsolatedAgentTurn(params: {
10661108
suppressExecNotifyOnExit: prepared.context.suppressExecNotifyOnExit,
10671109
});
10681110
if (isAborted()) {
1069-
return prepared.context.withRunSession({ status: "error", error: abortReason() });
1111+
return prepared.context.withRunSession({
1112+
status: "error",
1113+
error: abortReason(),
1114+
diagnostics: createCronRunDiagnosticsFromError("cron-setup", abortReason()),
1115+
});
10701116
}
10711117
return await finalizeCronRun({
10721118
prepared: prepared.context,
@@ -1075,6 +1121,10 @@ export async function runCronIsolatedAgentTurn(params: {
10751121
isAborted,
10761122
});
10771123
} catch (err) {
1078-
return prepared.context.withRunSession({ status: "error", error: String(err) });
1124+
return prepared.context.withRunSession({
1125+
status: "error",
1126+
error: String(err),
1127+
diagnostics: createCronRunDiagnosticsFromError("agent-run", err),
1128+
});
10791129
}
10801130
}

src/cron/run-diagnostics.test.ts

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { describe, expect, it } from "vitest";
2+
import {
3+
createCronRunDiagnosticsFromAgentResult,
4+
createCronRunDiagnosticsFromError,
5+
mergeCronRunDiagnostics,
6+
normalizeCronRunDiagnostics,
7+
summarizeCronRunDiagnostics,
8+
} from "./run-diagnostics.js";
9+
10+
describe("cron run diagnostics", () => {
11+
it("normalizes and bounds diagnostic entries", () => {
12+
const diagnostics = normalizeCronRunDiagnostics({
13+
summary: "x".repeat(2_100),
14+
entries: Array.from({ length: 12 }, (_, i) => ({
15+
ts: i,
16+
source: "exec",
17+
severity: "error",
18+
message: i === 11 ? `secret sk-1234567890abcdef ${"a".repeat(1_100)}` : `entry ${i}`,
19+
})),
20+
});
21+
22+
expect(diagnostics?.entries).toHaveLength(10);
23+
expect(diagnostics?.entries[0]?.message).toBe("entry 2");
24+
expect(diagnostics?.entries.at(-1)?.message).toMatch(/$/);
25+
expect(diagnostics?.entries.at(-1)?.message).not.toContain("sk-1234567890abcdef");
26+
expect(diagnostics?.entries.at(-1)?.truncated).toBe(true);
27+
expect(diagnostics?.summary).toHaveLength(2_000);
28+
});
29+
30+
it("preserves later terminal diagnostics when capping entries", () => {
31+
const diagnostics = normalizeCronRunDiagnostics({
32+
entries: [
33+
...Array.from({ length: 10 }, (_, i) => ({
34+
ts: i,
35+
source: "tool",
36+
severity: "warn",
37+
message: `tool warning ${i}`,
38+
})),
39+
{
40+
ts: 11,
41+
source: "delivery",
42+
severity: "error",
43+
message: "delivery failed",
44+
},
45+
],
46+
});
47+
48+
expect(diagnostics?.entries).toHaveLength(10);
49+
expect(diagnostics?.entries.map((entry) => entry.message)).not.toContain("tool warning 0");
50+
expect(diagnostics?.entries.at(-1)).toMatchObject({
51+
source: "delivery",
52+
severity: "error",
53+
message: "delivery failed",
54+
});
55+
});
56+
57+
it("returns undefined for empty diagnostics", () => {
58+
expect(normalizeCronRunDiagnostics({ entries: [] })).toBeUndefined();
59+
expect(normalizeCronRunDiagnostics({ entries: [{ source: "exec" }] })).toBeUndefined();
60+
expect(summarizeCronRunDiagnostics(undefined)).toBeUndefined();
61+
});
62+
63+
it("creates diagnostics from errors and prefers the latest error summary", () => {
64+
const first = createCronRunDiagnosticsFromError("cron-preflight", "first failure", {
65+
nowMs: () => 100,
66+
});
67+
const second = createCronRunDiagnosticsFromError("delivery", new Error("delivery failed"), {
68+
nowMs: () => 200,
69+
});
70+
71+
const merged = mergeCronRunDiagnostics(first, second);
72+
expect(merged?.summary).toBe("delivery failed");
73+
expect(merged?.entries.map((entry) => entry.message)).toEqual([
74+
"first failure",
75+
"delivery failed",
76+
]);
77+
expect(summarizeCronRunDiagnostics(merged)).toBe("delivery failed");
78+
});
79+
80+
it("keeps a later delivery error summary ahead of an earlier warning", () => {
81+
const warning = normalizeCronRunDiagnostics({
82+
summary: "agent warning",
83+
entries: [{ ts: 100, source: "agent-run", severity: "warn", message: "agent warning" }],
84+
});
85+
const deliveryError = createCronRunDiagnosticsFromError("delivery", "delivery failed", {
86+
nowMs: () => 200,
87+
});
88+
89+
expect(mergeCronRunDiagnostics(warning, deliveryError)?.summary).toBe("delivery failed");
90+
});
91+
92+
it("extracts fatal agent result payloads and meta errors", () => {
93+
const diagnostics = createCronRunDiagnosticsFromAgentResult(
94+
{
95+
payloads: [
96+
{ text: "normal" },
97+
{ text: "tool stderr", isError: true, toolName: "shell" },
98+
{
99+
toolName: "exec",
100+
details: {
101+
status: "completed",
102+
exitCode: 2,
103+
aggregated: "stdout\nstderr failure",
104+
},
105+
},
106+
],
107+
meta: {
108+
error: { kind: "retry_limit", message: "retry limit exceeded" },
109+
failureSignal: { message: "SYSTEM_RUN_DENIED" },
110+
},
111+
},
112+
{ nowMs: () => 123 },
113+
);
114+
115+
expect(diagnostics?.entries.map((entry) => entry.message)).toEqual([
116+
"tool stderr",
117+
"stdout\nstderr failure",
118+
"retry limit exceeded",
119+
"SYSTEM_RUN_DENIED",
120+
]);
121+
expect(diagnostics?.entries[1]).toMatchObject({
122+
source: "exec",
123+
toolName: "exec",
124+
exitCode: 2,
125+
});
126+
});
127+
128+
it("does not capture harmless successful exec output", () => {
129+
const result = {
130+
payloads: [
131+
{
132+
toolName: "exec",
133+
details: {
134+
status: "completed",
135+
exitCode: 0,
136+
aggregated: "progress written to stderr",
137+
},
138+
},
139+
],
140+
};
141+
142+
expect(createCronRunDiagnosticsFromAgentResult(result)).toBeUndefined();
143+
expect(
144+
createCronRunDiagnosticsFromAgentResult(result, { finalStatus: "error" }),
145+
).toBeUndefined();
146+
});
147+
148+
it("captures silent failed exec details with a fallback message", () => {
149+
const diagnostics = createCronRunDiagnosticsFromAgentResult({
150+
payloads: [
151+
{
152+
toolName: "exec",
153+
details: {
154+
status: "completed",
155+
exitCode: 2,
156+
},
157+
},
158+
],
159+
});
160+
161+
expect(diagnostics?.entries).toEqual([
162+
expect.objectContaining({
163+
source: "exec",
164+
severity: "warn",
165+
message: "exec failed with exit code 2",
166+
toolName: "exec",
167+
exitCode: 2,
168+
}),
169+
]);
170+
});
171+
});

0 commit comments

Comments
 (0)