fix(providers): require strong-token co-occurrence for broad Chinese aliases

hqhq1025 · hqhq1025 · commit ff9b19d899eb · 2026-04-19T15:47:26.000+08:00
Codex flagged that 分析 / 应用 / 演示 over-trigger via plain substring match —
e.g. '分析这段文本' matches dashboard, 'Web 应用' matches mobile-mock,
'演示一下功能' matches deck. Split each trigger group into strong + weak;
broad aliases live in weak and never fire on their own. Adds regression
tests for the 3 false positives plus 3 true-positive checks (数据看板 /
iPhone app / PPT).
diff --git a/packages/providers/src/skill-injector.test.ts b/packages/providers/src/skill-injector.test.ts
@@ -435,4 +435,39 @@ describe('matchSkillsToPrompt()', () => {
     const matched = matchSkillsToPrompt(all, '做一个移动端原型');
     expect(matched.map((s) => s.id)).toContain('mobile-mock');
   });
+
+  // Regression: broad Chinese aliases 分析 / 应用 / 演示 must not fire their
+  // group on their own. They are demoted to `weak` so substring hits inside
+  // unrelated text (analyse this paragraph, web 应用, 演示一下功能) no longer
+  // over-trigger dashboard / mobile-mock / pitch-deck.
+  it('does NOT match dashboard for generic 分析 prompt — "分析这段文本"', () => {
+    const matched = matchSkillsToPrompt(all, '分析这段文本');
+    expect(matched.map((s) => s.id)).not.toContain('data-viz-recharts');
+  });
+
+  it('does NOT match mobile-mock for generic 应用 prompt — "Web 应用程序员"', () => {
+    const matched = matchSkillsToPrompt(all, 'Web 应用程序员');
+    expect(matched.map((s) => s.id)).not.toContain('mobile-mock');
+  });
+
+  it('does NOT match pitch-deck for generic 演示 prompt — "演示一下功能"', () => {
+    const matched = matchSkillsToPrompt(all, '演示一下功能');
+    expect(matched.map((s) => s.id)).not.toContain('pitch-deck');
+  });
+
+  // True positives for the same buckets must still resolve via strong tokens.
+  it('still matches dashboard for "数据看板"', () => {
+    const matched = matchSkillsToPrompt(all, '做一个数据看板');
+    expect(matched.map((s) => s.id)).toContain('data-viz-recharts');
+  });
+
+  it('still matches mobile-mock for English "iPhone app"', () => {
+    const matched = matchSkillsToPrompt(all, 'design an iPhone app');
+    expect(matched.map((s) => s.id)).toContain('mobile-mock');
+  });
+
+  it('still matches pitch-deck for "做一个 PPT"', () => {
+    const matched = matchSkillsToPrompt(all, '帮我做一个 PPT');
+    expect(matched.map((s) => s.id)).toContain('pitch-deck');
+  });
 });
diff --git a/packages/providers/src/skill-injector.ts b/packages/providers/src/skill-injector.ts
@@ -131,6 +131,14 @@ export function injectSkillsIntoMessages(
  * equality), so a Chinese prompt like "数据看板" can resolve to the same bucket
  * as an English description like "dashboards, analytics".
  *
+ * Each group splits its vocabulary into `strong` and `weak`:
+ *  - `strong` tokens fire the group on their own.
+ *  - `weak` tokens only count when a `strong` from the same group co-occurs in
+ *    the same text. They exist to suppress broad Chinese aliases (分析 / 应用 /
+ *    演示) that would otherwise over-trigger via plain substring match — e.g.
+ *    "分析这段文本" matching dashboard, "Web 应用" matching mobile-mock,
+ *    "演示一下功能" matching deck.
+ *
  * Add new entries to an existing group when extending vocabulary for the same
  * concept; add a new group only when introducing a skill whose intent isn't
  * covered by an existing concept.
@@ -139,89 +147,112 @@ export function injectSkillsIntoMessages(
  * would force-match every skill on every call.
  *
  * Note on bare 'app': accepted false-positive risk (matches "apple",
- * "application"). Kept because it is the highest-frequency bridge between
- * Chinese prompts ("做个 App") and the mobile-mock description ("app screen").
+ * "application"). Kept in `strong` because it is the highest-frequency bridge
+ * between Chinese prompts ("做个 App") and the mobile-mock description
+ * ("app screen").
  */
-const SKILL_TRIGGER_GROUPS: readonly (readonly string[])[] = [
+type TriggerGroup = { strong: readonly string[]; weak: readonly string[] };
+
+const SKILL_TRIGGER_GROUPS: readonly TriggerGroup[] = [
   // dashboard / data
-  [
-    'dashboard',
-    'chart',
-    'graph',
-    'analytics',
-    'kpi',
-    'metric',
-    'data viz',
-    'data-driven',
-    'recharts',
-    '仪表盘',
-    '看板',
-    '数据看板',
-    '数据图',
-    '图表',
-    '分析',
-  ],
+  {
+    strong: [
+      'dashboard',
+      'chart',
+      'graph',
+      'analytics',
+      'kpi',
+      'metric',
+      'data viz',
+      'data-driven',
+      'recharts',
+      '仪表盘',
+      '看板',
+      '数据看板',
+      '数据图',
+      '图表',
+      '数据',
+      '统计',
+    ],
+    weak: ['分析'],
+  },
   // landing / web
-  [
-    'landing',
-    'homepage',
-    'hero',
-    'web page',
-    'website',
-    '落地页',
-    '官网',
-    '首页',
-    '主页',
-    '网页',
-    '宣传页',
-  ],
+  {
+    strong: [
+      'landing',
+      'homepage',
+      'hero',
+      'web page',
+      'website',
+      '落地页',
+      '官网',
+      '首页',
+      '主页',
+      '网页',
+      '宣传页',
+    ],
+    weak: [],
+  },
   // mobile / app — kept strictly mobile-specific. Generic words like 'prototype'
   // and '原型' do NOT belong here: bucketing them into mobile false-fires
   // mobile-mock for "landing page prototype" / "落地页的原型". They also can't
   // safely live in UI-broad, because mobile-mock's own description hits UI-broad
   // via 'screen', so the cross-bucket intersection would still false-fire.
   // The mobile-mock description still lands in this bucket via mobile/app/phone,
   // so dropping the generic prototype tokens costs no recall on real mobile prompts.
-  [
-    'mobile',
-    'phone',
-    'app screen',
-    'app',
-    'ios',
-    'iphone',
-    'android',
-    '移动端',
-    '移动应用',
-    '手机',
-    'app设计',
-    '应用',
-  ],
+  {
+    strong: [
+      'mobile',
+      'phone',
+      'app screen',
+      'app',
+      'ios',
+      'iphone',
+      'android',
+      '移动端',
+      '移动应用',
+      '手机',
+      'app设计',
+    ],
+    weak: ['应用'],
+  },
   // slides / deck
-  [
-    'deck',
-    'slide',
-    'slides',
-    'presentation',
-    'pitch',
-    'keynote',
-    '幻灯片',
-    '演示文稿',
-    'ppt',
-    '路演',
-    '提案',
-    '演示',
-  ],
+  {
+    strong: [
+      'deck',
+      'slide',
+      'slides',
+      'presentation',
+      'pitch',
+      'keynote',
+      '幻灯片',
+      '演示文稿',
+      'ppt',
+      '路演',
+      '提案',
+    ],
+    weak: ['演示'],
+  },
   // UI broad
-  ['ui', 'interface', 'screen', '界面', '原型图', '设计稿'],
+  {
+    strong: ['ui', 'interface', 'screen', '界面', '原型图', '设计稿'],
+    weak: [],
+  },
 ] as const;
 
+// A group fires when any `strong` token is present. `weak` tokens never fire
+// alone — they were demoted from strong because their substring match
+// over-triggered in unrelated prompts (e.g. 应用 hitting "Web 应用",
+// 演示 hitting "演示一下功能"). When a strong from the same group is also
+// present the weak token is considered, but that's already covered by the
+// strong hit, so the rule collapses to: strong-only firing.
 function extractGroupIds(text: string): Set<number> {
   const lower = text.toLowerCase();
   const hits = new Set<number>();
   for (let i = 0; i < SKILL_TRIGGER_GROUPS.length; i++) {
     const group = SKILL_TRIGGER_GROUPS[i];
     if (!group) continue;
-    for (const kw of group) {
+    for (const kw of group.strong) {
       if (lower.includes(kw)) {
         hits.add(i);
         break;