Skip to content

Commit ff9b19d

Browse files
committed
fix(providers): require strong-token co-occurrence for broad Chinese aliases
Codex flagged that 分析 / 应用 / 演示 over-trigger via plain substring match — e.g. '分析这段文本' matches dashboard, 'Web 应用' matches mobile-mock, '演示一下功能' matches deck. Split each trigger group into strong + weak; broad aliases live in weak and never fire on their own. Adds regression tests for the 3 false positives plus 3 true-positive checks (数据看板 / iPhone app / PPT).
1 parent a94e84b commit ff9b19d

2 files changed

Lines changed: 129 additions & 63 deletions

File tree

packages/providers/src/skill-injector.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,4 +435,39 @@ describe('matchSkillsToPrompt()', () => {
435435
const matched = matchSkillsToPrompt(all, '做一个移动端原型');
436436
expect(matched.map((s) => s.id)).toContain('mobile-mock');
437437
});
438+
439+
// Regression: broad Chinese aliases 分析 / 应用 / 演示 must not fire their
440+
// group on their own. They are demoted to `weak` so substring hits inside
441+
// unrelated text (analyse this paragraph, web 应用, 演示一下功能) no longer
442+
// over-trigger dashboard / mobile-mock / pitch-deck.
443+
it('does NOT match dashboard for generic 分析 prompt — "分析这段文本"', () => {
444+
const matched = matchSkillsToPrompt(all, '分析这段文本');
445+
expect(matched.map((s) => s.id)).not.toContain('data-viz-recharts');
446+
});
447+
448+
it('does NOT match mobile-mock for generic 应用 prompt — "Web 应用程序员"', () => {
449+
const matched = matchSkillsToPrompt(all, 'Web 应用程序员');
450+
expect(matched.map((s) => s.id)).not.toContain('mobile-mock');
451+
});
452+
453+
it('does NOT match pitch-deck for generic 演示 prompt — "演示一下功能"', () => {
454+
const matched = matchSkillsToPrompt(all, '演示一下功能');
455+
expect(matched.map((s) => s.id)).not.toContain('pitch-deck');
456+
});
457+
458+
// True positives for the same buckets must still resolve via strong tokens.
459+
it('still matches dashboard for "数据看板"', () => {
460+
const matched = matchSkillsToPrompt(all, '做一个数据看板');
461+
expect(matched.map((s) => s.id)).toContain('data-viz-recharts');
462+
});
463+
464+
it('still matches mobile-mock for English "iPhone app"', () => {
465+
const matched = matchSkillsToPrompt(all, 'design an iPhone app');
466+
expect(matched.map((s) => s.id)).toContain('mobile-mock');
467+
});
468+
469+
it('still matches pitch-deck for "做一个 PPT"', () => {
470+
const matched = matchSkillsToPrompt(all, '帮我做一个 PPT');
471+
expect(matched.map((s) => s.id)).toContain('pitch-deck');
472+
});
438473
});

packages/providers/src/skill-injector.ts

Lines changed: 94 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ export function injectSkillsIntoMessages(
131131
* equality), so a Chinese prompt like "数据看板" can resolve to the same bucket
132132
* as an English description like "dashboards, analytics".
133133
*
134+
* Each group splits its vocabulary into `strong` and `weak`:
135+
* - `strong` tokens fire the group on their own.
136+
* - `weak` tokens only count when a `strong` from the same group co-occurs in
137+
* the same text. They exist to suppress broad Chinese aliases (分析 / 应用 /
138+
* 演示) that would otherwise over-trigger via plain substring match — e.g.
139+
* "分析这段文本" matching dashboard, "Web 应用" matching mobile-mock,
140+
* "演示一下功能" matching deck.
141+
*
134142
* Add new entries to an existing group when extending vocabulary for the same
135143
* concept; add a new group only when introducing a skill whose intent isn't
136144
* covered by an existing concept.
@@ -139,89 +147,112 @@ export function injectSkillsIntoMessages(
139147
* would force-match every skill on every call.
140148
*
141149
* Note on bare 'app': accepted false-positive risk (matches "apple",
142-
* "application"). Kept because it is the highest-frequency bridge between
143-
* Chinese prompts ("做个 App") and the mobile-mock description ("app screen").
150+
* "application"). Kept in `strong` because it is the highest-frequency bridge
151+
* between Chinese prompts ("做个 App") and the mobile-mock description
152+
* ("app screen").
144153
*/
145-
const SKILL_TRIGGER_GROUPS: readonly (readonly string[])[] = [
154+
type TriggerGroup = { strong: readonly string[]; weak: readonly string[] };
155+
156+
const SKILL_TRIGGER_GROUPS: readonly TriggerGroup[] = [
146157
// dashboard / data
147-
[
148-
'dashboard',
149-
'chart',
150-
'graph',
151-
'analytics',
152-
'kpi',
153-
'metric',
154-
'data viz',
155-
'data-driven',
156-
'recharts',
157-
'仪表盘',
158-
'看板',
159-
'数据看板',
160-
'数据图',
161-
'图表',
162-
'分析',
163-
],
158+
{
159+
strong: [
160+
'dashboard',
161+
'chart',
162+
'graph',
163+
'analytics',
164+
'kpi',
165+
'metric',
166+
'data viz',
167+
'data-driven',
168+
'recharts',
169+
'仪表盘',
170+
'看板',
171+
'数据看板',
172+
'数据图',
173+
'图表',
174+
'数据',
175+
'统计',
176+
],
177+
weak: ['分析'],
178+
},
164179
// landing / web
165-
[
166-
'landing',
167-
'homepage',
168-
'hero',
169-
'web page',
170-
'website',
171-
'落地页',
172-
'官网',
173-
'首页',
174-
'主页',
175-
'网页',
176-
'宣传页',
177-
],
180+
{
181+
strong: [
182+
'landing',
183+
'homepage',
184+
'hero',
185+
'web page',
186+
'website',
187+
'落地页',
188+
'官网',
189+
'首页',
190+
'主页',
191+
'网页',
192+
'宣传页',
193+
],
194+
weak: [],
195+
},
178196
// mobile / app — kept strictly mobile-specific. Generic words like 'prototype'
179197
// and '原型' do NOT belong here: bucketing them into mobile false-fires
180198
// mobile-mock for "landing page prototype" / "落地页的原型". They also can't
181199
// safely live in UI-broad, because mobile-mock's own description hits UI-broad
182200
// via 'screen', so the cross-bucket intersection would still false-fire.
183201
// The mobile-mock description still lands in this bucket via mobile/app/phone,
184202
// so dropping the generic prototype tokens costs no recall on real mobile prompts.
185-
[
186-
'mobile',
187-
'phone',
188-
'app screen',
189-
'app',
190-
'ios',
191-
'iphone',
192-
'android',
193-
'移动端',
194-
'移动应用',
195-
'手机',
196-
'app设计',
197-
'应用',
198-
],
203+
{
204+
strong: [
205+
'mobile',
206+
'phone',
207+
'app screen',
208+
'app',
209+
'ios',
210+
'iphone',
211+
'android',
212+
'移动端',
213+
'移动应用',
214+
'手机',
215+
'app设计',
216+
],
217+
weak: ['应用'],
218+
},
199219
// slides / deck
200-
[
201-
'deck',
202-
'slide',
203-
'slides',
204-
'presentation',
205-
'pitch',
206-
'keynote',
207-
'幻灯片',
208-
'演示文稿',
209-
'ppt',
210-
'路演',
211-
'提案',
212-
'演示',
213-
],
220+
{
221+
strong: [
222+
'deck',
223+
'slide',
224+
'slides',
225+
'presentation',
226+
'pitch',
227+
'keynote',
228+
'幻灯片',
229+
'演示文稿',
230+
'ppt',
231+
'路演',
232+
'提案',
233+
],
234+
weak: ['演示'],
235+
},
214236
// UI broad
215-
['ui', 'interface', 'screen', '界面', '原型图', '设计稿'],
237+
{
238+
strong: ['ui', 'interface', 'screen', '界面', '原型图', '设计稿'],
239+
weak: [],
240+
},
216241
] as const;
217242

243+
// A group fires when any `strong` token is present. `weak` tokens never fire
244+
// alone — they were demoted from strong because their substring match
245+
// over-triggered in unrelated prompts (e.g. 应用 hitting "Web 应用",
246+
// 演示 hitting "演示一下功能"). When a strong from the same group is also
247+
// present the weak token is considered, but that's already covered by the
248+
// strong hit, so the rule collapses to: strong-only firing.
218249
function extractGroupIds(text: string): Set<number> {
219250
const lower = text.toLowerCase();
220251
const hits = new Set<number>();
221252
for (let i = 0; i < SKILL_TRIGGER_GROUPS.length; i++) {
222253
const group = SKILL_TRIGGER_GROUPS[i];
223254
if (!group) continue;
224-
for (const kw of group) {
255+
for (const kw of group.strong) {
225256
if (lower.includes(kw)) {
226257
hits.add(i);
227258
break;

0 commit comments

Comments
 (0)