Skip to content

Commit 32ee2f0

Browse files
fix(feishu): break infinite typing-indicator retry loop on rate-limit / quota errors (#28494) thanks @guoqunabc
Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: guoqunabc <[email protected]> Co-authored-by: Tak Hoffman <[email protected]>
1 parent 0e755ad commit 32ee2f0

File tree

3 files changed

+263
-5
lines changed

3 files changed

+263
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
1515

1616
### Fixes
1717

18+
- Feishu/Typing backoff: re-throw Feishu typing add/remove rate-limit and quota errors (`429`, `99991400`, `99991403`) and detect SDK non-throwing backoff responses so the typing keepalive circuit breaker can stop retries instead of looping indefinitely. (#28494)
1819
- Feishu/Probe status caching: cache successful `probeFeishu()` bot-info results for 10 minutes (bounded cache with per-account keying) to reduce repeated status/onboarding probe API calls, while bypassing cache for failures and exceptions. (#28907) Thanks @Glucksberg.
1920
- Feishu/Opus media send type: send `.opus` attachments with `msg_type: "audio"` (instead of `"media"`) so Feishu voice messages deliver correctly while `.mp4` remains `msg_type: "media"` and documents remain `msg_type: "file"`. (#28269) Thanks @Glucksberg.
2021
- Feishu/Local media sends: propagate `mediaLocalRoots` through Feishu outbound media sending into `loadWebMedia` so local path attachments work with post-CVE local-root enforcement. (#27884) Thanks @joelnishanth.
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import { describe, expect, it } from "vitest";
2+
import { isFeishuBackoffError, getBackoffCodeFromResponse, FeishuBackoffError } from "./typing.js";
3+
4+
describe("isFeishuBackoffError", () => {
5+
it("returns true for HTTP 429 (AxiosError shape)", () => {
6+
const err = { response: { status: 429, data: {} } };
7+
expect(isFeishuBackoffError(err)).toBe(true);
8+
});
9+
10+
it("returns true for Feishu quota exceeded code 99991403", () => {
11+
const err = { response: { status: 200, data: { code: 99991403 } } };
12+
expect(isFeishuBackoffError(err)).toBe(true);
13+
});
14+
15+
it("returns true for Feishu rate limit code 99991400", () => {
16+
const err = { response: { status: 200, data: { code: 99991400 } } };
17+
expect(isFeishuBackoffError(err)).toBe(true);
18+
});
19+
20+
it("returns true for SDK error with code 429", () => {
21+
const err = { code: 429, message: "too many requests" };
22+
expect(isFeishuBackoffError(err)).toBe(true);
23+
});
24+
25+
it("returns true for SDK error with top-level code 99991403", () => {
26+
const err = { code: 99991403, message: "quota exceeded" };
27+
expect(isFeishuBackoffError(err)).toBe(true);
28+
});
29+
30+
it("returns false for other HTTP errors (e.g. 500)", () => {
31+
const err = { response: { status: 500, data: {} } };
32+
expect(isFeishuBackoffError(err)).toBe(false);
33+
});
34+
35+
it("returns false for non-rate-limit Feishu codes", () => {
36+
const err = { response: { status: 200, data: { code: 99991401 } } };
37+
expect(isFeishuBackoffError(err)).toBe(false);
38+
});
39+
40+
it("returns false for generic Error", () => {
41+
expect(isFeishuBackoffError(new Error("network timeout"))).toBe(false);
42+
});
43+
44+
it("returns false for null", () => {
45+
expect(isFeishuBackoffError(null)).toBe(false);
46+
});
47+
48+
it("returns false for undefined", () => {
49+
expect(isFeishuBackoffError(undefined)).toBe(false);
50+
});
51+
52+
it("returns false for string", () => {
53+
expect(isFeishuBackoffError("429")).toBe(false);
54+
});
55+
56+
it("returns true for 429 even without data", () => {
57+
const err = { response: { status: 429 } };
58+
expect(isFeishuBackoffError(err)).toBe(true);
59+
});
60+
});
61+
62+
describe("getBackoffCodeFromResponse", () => {
63+
it("returns backoff code for response with quota exceeded code", () => {
64+
const response = { code: 99991403, msg: "quota exceeded", data: null };
65+
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
66+
});
67+
68+
it("returns backoff code for response with rate limit code", () => {
69+
const response = { code: 99991400, msg: "rate limit", data: null };
70+
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
71+
});
72+
73+
it("returns backoff code for response with code 429", () => {
74+
const response = { code: 429, msg: "too many requests", data: null };
75+
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
76+
});
77+
78+
it("returns undefined for successful response (code 0)", () => {
79+
const response = { code: 0, msg: "success", data: { reaction_id: "r1" } };
80+
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
81+
});
82+
83+
it("returns undefined for other error codes", () => {
84+
const response = { code: 99991401, msg: "other error", data: null };
85+
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
86+
});
87+
88+
it("returns undefined for null", () => {
89+
expect(getBackoffCodeFromResponse(null)).toBeUndefined();
90+
});
91+
92+
it("returns undefined for undefined", () => {
93+
expect(getBackoffCodeFromResponse(undefined)).toBeUndefined();
94+
});
95+
96+
it("returns undefined for response without code field", () => {
97+
const response = { data: { reaction_id: "r1" } };
98+
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
99+
});
100+
});
101+
102+
describe("FeishuBackoffError", () => {
103+
it("is detected by isFeishuBackoffError via .code property", () => {
104+
const err = new FeishuBackoffError(99991403);
105+
expect(isFeishuBackoffError(err)).toBe(true);
106+
});
107+
108+
it("is detected for rate limit code 99991400", () => {
109+
const err = new FeishuBackoffError(99991400);
110+
expect(isFeishuBackoffError(err)).toBe(true);
111+
});
112+
113+
it("has correct name and message", () => {
114+
const err = new FeishuBackoffError(99991403);
115+
expect(err.name).toBe("FeishuBackoffError");
116+
expect(err.message).toBe("Feishu API backoff: code 99991403");
117+
expect(err.code).toBe(99991403);
118+
});
119+
120+
it("is an instance of Error", () => {
121+
const err = new FeishuBackoffError(99991403);
122+
expect(err instanceof Error).toBe(true);
123+
});
124+
125+
it("survives catch-and-rethrow pattern", () => {
126+
// Simulates the exact pattern in addTypingIndicator/removeTypingIndicator:
127+
// thrown inside try, caught by catch, isFeishuBackoffError must match
128+
let caught: unknown;
129+
try {
130+
try {
131+
throw new FeishuBackoffError(99991403);
132+
} catch (err) {
133+
if (isFeishuBackoffError(err)) {
134+
throw err; // re-thrown — this is the fix
135+
}
136+
// would be silently swallowed with plain Error
137+
caught = "swallowed";
138+
}
139+
} catch (err) {
140+
caught = err;
141+
}
142+
expect(caught).toBeInstanceOf(FeishuBackoffError);
143+
});
144+
});

extensions/feishu/src/typing.ts

Lines changed: 118 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,97 @@ import { createFeishuClient } from "./client.js";
77
// Full list: https://github.com/go-lark/lark/blob/main/emoji.go
88
const TYPING_EMOJI = "Typing"; // Typing indicator emoji
99

10+
/**
11+
* Feishu API error codes that indicate the caller should back off.
12+
* These must propagate to the typing circuit breaker so the keepalive loop
13+
* can trip and stop retrying.
14+
*
15+
* - 99991400: Rate limit (too many requests per second)
16+
* - 99991403: Monthly API call quota exceeded
17+
* - 429: Standard HTTP 429 returned as a Feishu SDK error code
18+
*
19+
* @see https://open.feishu.cn/document/server-docs/api-call-guide/generic-error-code
20+
*/
21+
const FEISHU_BACKOFF_CODES = new Set([99991400, 99991403, 429]);
22+
23+
/**
24+
* Custom error class for Feishu backoff conditions detected from non-throwing
25+
* SDK responses. Carries a numeric `.code` so that `isFeishuBackoffError()`
26+
* recognises it when the error is caught downstream.
27+
*/
28+
export class FeishuBackoffError extends Error {
29+
code: number;
30+
constructor(code: number) {
31+
super(`Feishu API backoff: code ${code}`);
32+
this.name = "FeishuBackoffError";
33+
this.code = code;
34+
}
35+
}
36+
1037
export type TypingIndicatorState = {
1138
messageId: string;
1239
reactionId: string | null;
1340
};
1441

1542
/**
16-
* Add a typing indicator (reaction) to a message
43+
* Check whether an error represents a rate-limit or quota-exceeded condition
44+
* from the Feishu API that should stop the typing keepalive loop.
45+
*
46+
* Handles two shapes:
47+
* 1. AxiosError with `response.status` and `response.data.code`
48+
* 2. Feishu SDK error with a top-level `code` property
49+
*/
50+
export function isFeishuBackoffError(err: unknown): boolean {
51+
if (typeof err !== "object" || err === null) {
52+
return false;
53+
}
54+
55+
// AxiosError shape: err.response.status / err.response.data.code
56+
const response = (err as { response?: { status?: number; data?: { code?: number } } }).response;
57+
if (response) {
58+
if (response.status === 429) {
59+
return true;
60+
}
61+
if (typeof response.data?.code === "number" && FEISHU_BACKOFF_CODES.has(response.data.code)) {
62+
return true;
63+
}
64+
}
65+
66+
// Feishu SDK error shape: err.code
67+
const code = (err as { code?: number }).code;
68+
if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) {
69+
return true;
70+
}
71+
72+
return false;
73+
}
74+
75+
/**
76+
* Check whether a Feishu SDK response object contains a backoff error code.
77+
*
78+
* The Feishu SDK sometimes returns a normal response (no throw) with an
79+
* API-level error code in the response body. This must be detected so the
80+
* circuit breaker can trip. See codex review on #28157.
81+
*/
82+
export function getBackoffCodeFromResponse(response: unknown): number | undefined {
83+
if (typeof response !== "object" || response === null) {
84+
return undefined;
85+
}
86+
const code = (response as { code?: number }).code;
87+
if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) {
88+
return code;
89+
}
90+
return undefined;
91+
}
92+
93+
/**
94+
* Add a typing indicator (reaction) to a message.
95+
*
96+
* Rate-limit and quota errors are re-thrown so the circuit breaker in
97+
* `createTypingCallbacks` (typing-start-guard) can trip and stop the
98+
* keepalive loop. See #28062.
99+
*
100+
* Also checks for backoff codes in non-throwing SDK responses (#28157).
17101
*/
18102
export async function addTypingIndicator(params: {
19103
cfg: ClawdbotConfig;
@@ -36,18 +120,34 @@ export async function addTypingIndicator(params: {
36120
},
37121
});
38122

123+
// Feishu SDK may return a normal response with an API-level error code
124+
// instead of throwing. Detect backoff codes and throw to trip the breaker.
125+
const backoffCode = getBackoffCodeFromResponse(response);
126+
if (backoffCode !== undefined) {
127+
console.log(
128+
`[feishu] typing indicator response contains backoff code ${backoffCode}, stopping keepalive`,
129+
);
130+
throw new FeishuBackoffError(backoffCode);
131+
}
132+
39133
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK response type
40134
const reactionId = (response as any)?.data?.reaction_id ?? null;
41135
return { messageId, reactionId };
42136
} catch (err) {
43-
// Silently fail - typing indicator is not critical
137+
if (isFeishuBackoffError(err)) {
138+
console.log(`[feishu] typing indicator hit rate-limit/quota, stopping keepalive`);
139+
throw err;
140+
}
141+
// Silently fail for other non-critical errors (e.g. message deleted, permission issues)
44142
console.log(`[feishu] failed to add typing indicator: ${err}`);
45143
return { messageId, reactionId: null };
46144
}
47145
}
48146

49147
/**
50-
* Remove a typing indicator (reaction) from a message
148+
* Remove a typing indicator (reaction) from a message.
149+
*
150+
* Rate-limit and quota errors are re-thrown for the same reason as above.
51151
*/
52152
export async function removeTypingIndicator(params: {
53153
cfg: ClawdbotConfig;
@@ -67,14 +167,27 @@ export async function removeTypingIndicator(params: {
67167
const client = createFeishuClient(account);
68168

69169
try {
70-
await client.im.messageReaction.delete({
170+
const result = await client.im.messageReaction.delete({
71171
path: {
72172
message_id: state.messageId,
73173
reaction_id: state.reactionId,
74174
},
75175
});
176+
177+
// Check for backoff codes in non-throwing SDK responses
178+
const backoffCode = getBackoffCodeFromResponse(result);
179+
if (backoffCode !== undefined) {
180+
console.log(
181+
`[feishu] typing indicator removal response contains backoff code ${backoffCode}, stopping keepalive`,
182+
);
183+
throw new FeishuBackoffError(backoffCode);
184+
}
76185
} catch (err) {
77-
// Silently fail - cleanup is not critical
186+
if (isFeishuBackoffError(err)) {
187+
console.log(`[feishu] typing indicator removal hit rate-limit/quota, stopping keepalive`);
188+
throw err;
189+
}
190+
// Silently fail for other non-critical errors
78191
console.log(`[feishu] failed to remove typing indicator: ${err}`);
79192
}
80193
}

0 commit comments

Comments
 (0)