fix(sanitizer): thread errorKind through pipeline to prevent false positives#29031
fix(sanitizer): thread errorKind through pipeline to prevent false positives#29031dominicnunez wants to merge 1 commit intoopenclaw:mainfrom
Conversation
Greptile SummaryThis PR successfully addresses the false positive issue where assistant text mentioning errors was being incorrectly rewritten. The solution threads a structured Key Changes
ArchitectureThe refactor correctly eliminates false positives by:
Issue Found
Most code paths work correctly because:
But Confidence Score: 3/5
Last reviewed commit: 427404e |
| return false; | ||
| } | ||
|
|
||
| export function deriveErrorKind(rawErrorMessage: string): ErrorKind { | ||
| if (isCompactionFailureError(rawErrorMessage)) { | ||
| return "compaction_failure"; | ||
| } | ||
| if (isLikelyContextOverflowError(rawErrorMessage)) { | ||
| return "context_overflow"; | ||
| } | ||
| // Check overloaded before classifyFailoverReason, which conflates it with rate_limit. | ||
| if (isOverloadedErrorMessage(rawErrorMessage)) { | ||
| return "overloaded"; | ||
| } | ||
| if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) { | ||
| return "image_size"; | ||
| } | ||
| const failoverReason = classifyFailoverReason(rawErrorMessage); | ||
| const errorKindValues: ReadonlySet<string> = new Set<ErrorKind>([ | ||
| "billing", | ||
| "rate_limit", | ||
| "timeout", | ||
| "auth", | ||
| "context_overflow", |
There was a problem hiding this comment.
deriveErrorKind doesn't detect role ordering errors
The function checks for compaction_failure, context_overflow, overloaded, and image_size errors, but doesn't check for role ordering errors (e.g., "400 Incorrect role information" or "roles must alternate"). This means it returns "unknown" for these errors.
Impact: In sessions-helpers.ts (line 173), when deriveErrorKind is called on a role ordering error message, it returns "unknown". Then sanitizeUserFacingText is called with errorKind: "unknown", which doesn't match the "role_ordering" case in the switch statement. The error gets formatted as a generic HTTP error instead of the friendly "Message ordering conflict" message.
| return false; | |
| } | |
| export function deriveErrorKind(rawErrorMessage: string): ErrorKind { | |
| if (isCompactionFailureError(rawErrorMessage)) { | |
| return "compaction_failure"; | |
| } | |
| if (isLikelyContextOverflowError(rawErrorMessage)) { | |
| return "context_overflow"; | |
| } | |
| // Check overloaded before classifyFailoverReason, which conflates it with rate_limit. | |
| if (isOverloadedErrorMessage(rawErrorMessage)) { | |
| return "overloaded"; | |
| } | |
| if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) { | |
| return "image_size"; | |
| } | |
| const failoverReason = classifyFailoverReason(rawErrorMessage); | |
| const errorKindValues: ReadonlySet<string> = new Set<ErrorKind>([ | |
| "billing", | |
| "rate_limit", | |
| "timeout", | |
| "auth", | |
| "context_overflow", | |
| export function deriveErrorKind(rawErrorMessage: string): ErrorKind { | |
| if (isCompactionFailureError(rawErrorMessage)) { | |
| return "compaction_failure"; | |
| } | |
| if (isLikelyContextOverflowError(rawErrorMessage)) { | |
| return "context_overflow"; | |
| } | |
| // Check overloaded before classifyFailoverReason, which conflates it with rate_limit. | |
| if (isOverloadedErrorMessage(rawErrorMessage)) { | |
| return "overloaded"; | |
| } | |
| // Check role ordering errors | |
| if (/incorrect role information|roles must alternate|400.*role|"message".*role.*information/i.test(rawErrorMessage)) { | |
| return "role_ordering"; | |
| } | |
| if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) { | |
| return "image_size"; | |
| } | |
| const failoverReason = classifyFailoverReason(rawErrorMessage); | |
| const errorKindValues: ReadonlySet<string> = new Set<ErrorKind>([ | |
| "billing", "rate_limit", "timeout", "auth", "context_overflow", | |
| "overloaded", "format", "compaction_failure", "role_ordering", "image_size", "unknown", | |
| ]); | |
| if (failoverReason && failoverReason !== "unknown" && errorKindValues.has(failoverReason)) { | |
| return failoverReason as ErrorKind; | |
| } | |
| return "unknown"; | |
| } |
Prompt To Fix With AI
This is a comment left during a code review.
Path: src/agents/pi-embedded-helpers/errors.ts
Line: 879-902
Comment:
`deriveErrorKind` doesn't detect role ordering errors
The function checks for compaction_failure, context_overflow, overloaded, and image_size errors, but doesn't check for role ordering errors (e.g., "400 Incorrect role information" or "roles must alternate"). This means it returns "unknown" for these errors.
Impact: In `sessions-helpers.ts` (line 173), when `deriveErrorKind` is called on a role ordering error message, it returns "unknown". Then `sanitizeUserFacingText` is called with `errorKind: "unknown"`, which doesn't match the "role_ordering" case in the switch statement. The error gets formatted as a generic HTTP error instead of the friendly "Message ordering conflict" message.
```suggestion
export function deriveErrorKind(rawErrorMessage: string): ErrorKind {
if (isCompactionFailureError(rawErrorMessage)) {
return "compaction_failure";
}
if (isLikelyContextOverflowError(rawErrorMessage)) {
return "context_overflow";
}
// Check overloaded before classifyFailoverReason, which conflates it with rate_limit.
if (isOverloadedErrorMessage(rawErrorMessage)) {
return "overloaded";
}
// Check role ordering errors
if (/incorrect role information|roles must alternate|400.*role|"message".*role.*information/i.test(rawErrorMessage)) {
return "role_ordering";
}
if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) {
return "image_size";
}
const failoverReason = classifyFailoverReason(rawErrorMessage);
const errorKindValues: ReadonlySet<string> = new Set<ErrorKind>([
"billing", "rate_limit", "timeout", "auth", "context_overflow",
"overloaded", "format", "compaction_failure", "role_ordering", "image_size", "unknown",
]);
if (failoverReason && failoverReason !== "unknown" && errorKindValues.has(failoverReason)) {
return failoverReason as ErrorKind;
}
return "unknown";
}
```
How can I resolve this? If you propose a fix, please make it concise.There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: bf5c23f864
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) { | ||
| return "image_size"; | ||
| } | ||
| const failoverReason = classifyFailoverReason(rawErrorMessage); |
There was a problem hiding this comment.
Classify role-ordering errors before fallback mapping
deriveErrorKind never emits "role_ordering": it only checks compaction/context/overloaded/image-size, then falls back to classifyFailoverReason(rawErrorMessage), and that helper has no role-ordering branch. Because this commit also removed regex reclassification from sanitizeUserFacingText, an assistant error like "400 Incorrect role information" now resolves to errorKind: "unknown" and skips the friendly "Message ordering conflict" rewrite, so raw provider text can leak through in paths that rely on deriveErrorKind + sanitizeUserFacingText (for example extractAssistantText).
Useful? React with 👍 / 👎.
|
Addressed the review feedback from both Greptile and Codex — The remaining CI failure is a pre-existing |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: a428ac4a2d
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if (isImageDimensionErrorMessage(rawErrorMessage) || isImageSizeError(rawErrorMessage)) { | ||
| return "image_size"; | ||
| } | ||
| const failoverReason = classifyFailoverReason(rawErrorMessage); |
There was a problem hiding this comment.
Distinguish transient HTTP failures from timeouts
Using classifyFailoverReason(rawErrorMessage) here causes every transient HTTP 5xx/529 error to be classified as "timeout" (because that helper intentionally collapses transient transport failures into the failover bucket). After this commit, that errorKind is threaded into sanitizeUserFacingText, so messages like 500 Internal Server Error are now rewritten to "LLM request timed out." instead of preserving the HTTP/unavailable context, which regresses user-facing diagnostics in paths like assistant text extraction and reply normalization.
Useful? React with 👍 / 👎.
448b29a to
3323947
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 332394706c
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| defaultRuntime.error(`Embedded agent failed before reply: ${message}`); | ||
| const safeMessage = isTransientHttp | ||
| ? sanitizeUserFacingText(message, { errorContext: true }) | ||
| ? sanitizeUserFacingText(message, { errorContext: true, errorKind: "timeout" }) |
There was a problem hiding this comment.
Preserve transient HTTP details in pre-reply fallback
This path now hard-codes errorKind: "timeout" for every transient HTTP failure before a reply is produced, so errors like 500 Internal Server Error and 529 are rewritten to LLM request timed out. instead of preserving the actual HTTP status/context. In the ⚠️ Agent failed before reply: ... fallback path, that regression makes diagnostics misleading and hides actionable upstream failure details from operators.
Useful? React with 👍 / 👎.
|
Addressed the second Codex review (transient HTTP 5xx misclassification) in 448b29a → 3323947.
Transient HTTP errors like Note: the |
3323947 to
64c64e6
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 64c64e6e09
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if (isOverloadedErrorMessage(rawErrorMessage)) { | ||
| return "overloaded"; | ||
| } |
There was a problem hiding this comment.
Keep transient HTTP statuses out of overloaded mapping
deriveErrorKind now classifies "503 Service Unavailable" as "overloaded" because it checks isOverloadedErrorMessage before any transient-HTTP guard, and sanitizeUserFacingText then rewrites that to the generic overloaded copy. This is a regression from the previous behavior where HTTP-prefixed errors were preserved as HTTP 503: ..., so operators lose the actionable status code/context for transient upstream failures.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: b6a070cd6c
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
src/agents/pi-embedded-utils.ts
Outdated
| const errorKind = | ||
| errorContext && msg.errorMessage ? deriveErrorKind(msg.errorMessage) : undefined; |
There was a problem hiding this comment.
Derive kind from content when errorMessage is missing
errorContext is set whenever stopReason === "error", but errorKind is only derived from msg.errorMessage; with this commit’s regex removal in sanitizeUserFacingText, error replies that only carry the provider error in content (including legacy transcript entries with missing errorMessage) now skip role/context/billing rewrites and surface raw provider text instead. For example, a content-only "400 Incorrect role information" no longer maps to the friendly ordering-conflict message on this path.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 3369559144
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
src/agents/tools/sessions-helpers.ts
Outdated
| const errorKind = | ||
| errorContext && typeof errorMessage === "string" ? deriveErrorKind(errorMessage) : undefined; |
There was a problem hiding this comment.
Derive error kind when assistant errorMessage is missing
This path only calls deriveErrorKind when errorMessage is a string, but errorContext is also true for stopReason === "error"; after this commit removed regex reclassification from sanitizeUserFacingText, error turns that carry provider text only in content (e.g. legacy/history entries like "400 Incorrect role information") now bypass friendly rewrites and surface raw provider text in readLatestAssistantReply/sessions flows. Please derive errorKind from available error text when stopReason is error even if errorMessage is absent.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 74c04f75a2
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if (isTransientHttpError(rawErrorMessage)) { | ||
| return "unknown"; | ||
| } | ||
| if (isOverloadedErrorMessage(rawErrorMessage)) { | ||
| return "overloaded"; |
There was a problem hiding this comment.
Treat
HTTP 5xx: strings as transient before overloaded mapping
deriveErrorKind now relies on isTransientHttpError to keep transient 5xx diagnostics raw, but that helper only recognizes 503 ... (space form) and misses formatted strings like HTTP 503: Service Unavailable; those then fall through to isOverloadedErrorMessage and get rewritten to the generic overloaded copy. This shows up in the new content-fallback classification paths (extractAssistantText in src/agents/pi-embedded-utils.ts and src/agents/tools/sessions-helpers.ts) when legacy/error-history entries only contain formatted HTTP text, causing loss of actionable status details that the new guard was intended to preserve.
Useful? React with 👍 / 👎.
df4b377 to
3e973c8
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: c57b4a92df
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
src/agents/pi-embedded-utils.ts
Outdated
| const errorKind = | ||
| errorContext && msg.errorMessage ? deriveErrorKind(msg.errorMessage) : undefined; |
There was a problem hiding this comment.
Restrict errorKind override to real error turns
extractAssistantText now derives errorKind whenever errorMessage is present, even if the assistant turn was not an actual error (stopReason !== "error"). In that mixed-state case, sanitizeUserFacingText will replace normal assistant content with canned billing/rate-limit/etc text because the new errorKind switch ignores the extracted body once a kind is set. This regresses reply/session reads by hiding valid assistant output when providers attach stale error metadata; only derive/apply errorKind for true error turns (or after confirming the extracted text is itself an error payload).
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 20fa26d10a
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| failoverReason !== "auth_permanent" && | ||
| failoverReason !== "model_not_found" | ||
| ) { | ||
| return failoverReason; | ||
| } |
There was a problem hiding this comment.
Preserve API internal-error details instead of forcing timeout
deriveErrorKind now forwards most classifyFailoverReason outputs directly, including "timeout"; because classifyFailoverReason treats JSON API internal-server payloads (for example {"type":"error","error":{"type":"api_error","message":"Internal server error"}}) as timeout, downstream sanitization rewrites those errors to LLM request timed out. and drops the provider/type details that formatAssistantErrorText already extracted. This regresses diagnostics in the payload normalization paths that now consume errorKind.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 55157676ef
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| ? sanitizeUserFacingText(message, { errorContext: true }) | ||
| : message; | ||
| const trimmedMessage = safeMessage.replace(/\.\s*$/, ""); | ||
| const trimmedMessage = message.replace(/\.\s*$/, ""); |
There was a problem hiding this comment.
Sanitize transient HTTP fallback text before returning to users
This fallback now interpolates the raw exception string directly into ⚠️ Agent failed before reply: ..., so transient provider failures that include full HTML/error payloads (for example 521 <!doctype html>...) will be sent verbatim to end users after the retry path is exhausted. Before this change, transient errors were sanitized/formatted first, which avoided leaking noisy multi-line payloads and kept replies channel-safe; restoring formatted handling here would preserve HTTP context without dumping raw bodies.
Useful? React with 👍 / 👎.
5515767 to
5dbed9b
Compare
bmendonca3
left a comment
There was a problem hiding this comment.
Blocking issues before merge:
errorKindderivation is now too narrow for provider paths that populateerrorMessagewithoutstopReason === "error".- the transient HTTP fallback path regresses and now surfaces raw provider or network failure text again.
The sanitizer threading is moving in the right direction, but these two reply-surface regressions need to be fixed first.
src/agents/pi-embedded-utils.ts
Outdated
| return sanitizeUserFacingText(extracted, { errorContext }); | ||
| const isErrorTurn = msg.stopReason === "error"; | ||
| const errorContext = isErrorTurn || Boolean(msg.errorMessage?.trim()); | ||
| const errorKind = |
There was a problem hiding this comment.
This only derives errorKind when stopReason === "error", even though errorContext is also true when the provider filled errorMessage. That regresses the provider path where we have an error string but no stopReason, because those turns lose the specific rewrite and fall back to generic sanitization. I would derive the kind whenever errorContext is true, using errorMessage when present and extracted as the fallback.
| ? sanitizeUserFacingText(message, { errorContext: true }) | ||
| : message; | ||
| const trimmedMessage = safeMessage.replace(/\.\s*$/, ""); | ||
| const trimmedMessage = message.replace(/\.\s*$/, ""); |
There was a problem hiding this comment.
This drops the previous transient-HTTP sanitization and now echoes the raw provider or network failure text back to the user after retry exhaustion. That is exactly the class of message we usually scrub, because it tends to contain transport noise or provider-internal phrasing. Please keep the old sanitizeUserFacingText(message, { errorContext: true }) path for transient HTTP failures before building the fallback text.
abcf03b to
9d39e5e
Compare
744def4 to
40df5b0
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 40df5b0647
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| case "billing": | ||
| return BILLING_ERROR_USER_MESSAGE; |
There was a problem hiding this comment.
Preserve provider/model context in billing error payloads
When buildEmbeddedRunPayloads() has already formatted a billing failure with formatBillingErrorMessage(provider, model), the later sanitizer pass still sees errorKind: "billing" and unconditionally replaces that text with the generic BILLING_ERROR_USER_MESSAGE. On the normal reply-delivery paths (normalizeReplyPayload and the streaming normalization in agent-runner-execution.ts), a payload like ⚠️ Anthropic (claude-3-5-sonnet) returned a billing error… is therefore downgraded to provider-agnostic copy, so users/operators lose the specific provider/model that actually ran out of credits.
Useful? React with 👍 / 👎.
40df5b0 to
a2179c7
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: a2179c780b
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| const effectiveErrorSource = lastAssistantErrored | ||
| ? params.lastAssistant?.errorMessage?.trim() || fallbackRawAnswerText.trim() || undefined | ||
| : undefined; |
There was a problem hiding this comment.
Inspect content when
errorMessage is only a wrapper
If an error turn carries a generic wrapper like Request failed in errorMessage but the detailed provider payload lives in content, effectiveErrorSource never inspects that content. deriveErrorKind() then stays unknown, formatAssistantErrorText() formats only the wrapper, and buildEmbeddedRunPayloads() can emit Request failed plus raw provider text instead of the friendly rewrite the old content-based sanitizer produced (for example 400 Incorrect role information no longer becomes the ordering-conflict message). This affects any provider or legacy transcript shape that stores the actionable error only in the assistant body.
Useful? React with 👍 / 👎.
| const trimmedErrorMessage = msg.errorMessage?.trim(); | ||
| const errorSource = errorContext ? trimmedErrorMessage || extracted.trim() : undefined; | ||
| const errorKind = errorSource ? deriveErrorKind(errorSource) : undefined; |
There was a problem hiding this comment.
Inspect error content before trusting transcript
errorMessage
This new trimmedErrorMessage || extracted.trim() precedence regresses transcript/session reads when stopReason === "error", errorMessage is generic, and the real provider error is only in content. In that case errorKind becomes unknown, so sanitizeUserFacingText() no longer applies the structured role-ordering/context/billing rewrites and commands that read the latest assistant reply now surface raw provider text; the same pattern is repeated in sessions-helpers.ts, so session-history reads regress too.
Useful? React with 👍 / 👎.
a2179c7 to
e3c357a
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: e3c357ae3b
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| case "auth": | ||
| case "auth_permanent": | ||
| return AUTH_ERROR_USER_MESSAGE; |
There was a problem hiding this comment.
Preserve actionable scope details for structured auth errors
Because extractAssistantText, sessions-helpers, and normalizeReplyPayload now thread errorKind, an auth payload like 401 {"error":{"message":"Missing scopes: model.request"}} reaches this branch and is collapsed to the generic authentication copy. Before this commit, the same text flowed through formatRawAssistantErrorForUi, which preserved the missing-scope detail (see the existing TUI expectations for Missing scopes: model.request), so users/operators now lose the exact permission they need to add.
Useful? React with 👍 / 👎.
| case "image_size": | ||
| return IMAGE_SIZE_USER_MESSAGE; | ||
| case "model_not_found": | ||
| return MODEL_NOT_FOUND_USER_MESSAGE; |
There was a problem hiding this comment.
Keep model-specific guidance in model_not_found replies
This generic rewrite now discards the concrete model identifier and any recovery hint carried by the original error text. For example, provider-catalog errors like Unknown model: openai/gpt-5.3-codex-spark ... Use openai-codex/gpt-5.3-codex-spark. will now be reduced to The selected model is not available, even though the original message told the user exactly which replacement to use. Since the new pipeline derives errorKind before sanitizing assistant/session replies, that guidance is no longer surfaced on those paths.
Useful? React with 👍 / 👎.
Summary
ErrorKindtype andderiveErrorKind()to classify errors at the source (runner payload layer) instead of regex-matching text content insanitizeUserFacingTexterrorKindthrough the sanitizer pipeline sosanitizeUserFacingTextuses aswitchon the caller-provided kind rather than regex matching the text bodyProblem
sanitizeUserFacingTextused regex patterns to detect error types in text content. This caused false positives: if an assistant reply mentioned a billing error (e.g. "check whether we need to re-enable billing") or context overflow, it would be silently rewritten to a canned error message like "billing error: please upgrade your plan".Solution
The caller now classifies the error once via
deriveErrorKind()at the point where the raw error message is available, and passes theerrorKindthrough tosanitizeUserFacingText. The sanitizer uses a structuredswitchonerrorKindinstead of regex matching, eliminating false positives entirely.Test plan
sanitizeuserfacingtext-errorkind.test.ts— tests structured errorKind dispatch in sanitizeUserFacingTextpayloads.errorkind.test.ts— tests deriveErrorKind derivation in buildEmbeddedRunPayloadssanitizeuserfacingtext.test.tsto pass errorKind where neededvitest run)extraparams.test.tsunrelated to this PR)🤖 AI-assisted (Claude Code). I understand the code and have verified the changes are correct.