@@ -131,6 +131,14 @@ export function injectSkillsIntoMessages(
131131 * equality), so a Chinese prompt like "数据看板" can resolve to the same bucket
132132 * as an English description like "dashboards, analytics".
133133 *
134+ * Each group splits its vocabulary into `strong` and `weak`:
135+ * - `strong` tokens fire the group on their own.
136+ * - `weak` tokens only count when a `strong` from the same group co-occurs in
137+ * the same text. They exist to suppress broad Chinese aliases (分析 / 应用 /
138+ * 演示) that would otherwise over-trigger via plain substring match — e.g.
139+ * "分析这段文本" matching dashboard, "Web 应用" matching mobile-mock,
140+ * "演示一下功能" matching deck.
141+ *
134142 * Add new entries to an existing group when extending vocabulary for the same
135143 * concept; add a new group only when introducing a skill whose intent isn't
136144 * covered by an existing concept.
@@ -139,89 +147,112 @@ export function injectSkillsIntoMessages(
139147 * would force-match every skill on every call.
140148 *
141149 * Note on bare 'app': accepted false-positive risk (matches "apple",
142- * "application"). Kept because it is the highest-frequency bridge between
143- * Chinese prompts ("做个 App") and the mobile-mock description ("app screen").
150+ * "application"). Kept in `strong` because it is the highest-frequency bridge
151+ * between Chinese prompts ("做个 App") and the mobile-mock description
152+ * ("app screen").
144153 */
145- const SKILL_TRIGGER_GROUPS : readonly ( readonly string [ ] ) [ ] = [
154+ type TriggerGroup = { strong : readonly string [ ] ; weak : readonly string [ ] } ;
155+
156+ const SKILL_TRIGGER_GROUPS : readonly TriggerGroup [ ] = [
146157 // dashboard / data
147- [
148- 'dashboard' ,
149- 'chart' ,
150- 'graph' ,
151- 'analytics' ,
152- 'kpi' ,
153- 'metric' ,
154- 'data viz' ,
155- 'data-driven' ,
156- 'recharts' ,
157- '仪表盘' ,
158- '看板' ,
159- '数据看板' ,
160- '数据图' ,
161- '图表' ,
162- '分析' ,
163- ] ,
158+ {
159+ strong : [
160+ 'dashboard' ,
161+ 'chart' ,
162+ 'graph' ,
163+ 'analytics' ,
164+ 'kpi' ,
165+ 'metric' ,
166+ 'data viz' ,
167+ 'data-driven' ,
168+ 'recharts' ,
169+ '仪表盘' ,
170+ '看板' ,
171+ '数据看板' ,
172+ '数据图' ,
173+ '图表' ,
174+ '数据' ,
175+ '统计' ,
176+ ] ,
177+ weak : [ '分析' ] ,
178+ } ,
164179 // landing / web
165- [
166- 'landing' ,
167- 'homepage' ,
168- 'hero' ,
169- 'web page' ,
170- 'website' ,
171- '落地页' ,
172- '官网' ,
173- '首页' ,
174- '主页' ,
175- '网页' ,
176- '宣传页' ,
177- ] ,
180+ {
181+ strong : [
182+ 'landing' ,
183+ 'homepage' ,
184+ 'hero' ,
185+ 'web page' ,
186+ 'website' ,
187+ '落地页' ,
188+ '官网' ,
189+ '首页' ,
190+ '主页' ,
191+ '网页' ,
192+ '宣传页' ,
193+ ] ,
194+ weak : [ ] ,
195+ } ,
178196 // mobile / app — kept strictly mobile-specific. Generic words like 'prototype'
179197 // and '原型' do NOT belong here: bucketing them into mobile false-fires
180198 // mobile-mock for "landing page prototype" / "落地页的原型". They also can't
181199 // safely live in UI-broad, because mobile-mock's own description hits UI-broad
182200 // via 'screen', so the cross-bucket intersection would still false-fire.
183201 // The mobile-mock description still lands in this bucket via mobile/app/phone,
184202 // so dropping the generic prototype tokens costs no recall on real mobile prompts.
185- [
186- 'mobile' ,
187- 'phone' ,
188- 'app screen' ,
189- 'app' ,
190- 'ios' ,
191- 'iphone' ,
192- 'android' ,
193- '移动端' ,
194- '移动应用' ,
195- '手机' ,
196- 'app设计' ,
197- '应用' ,
198- ] ,
203+ {
204+ strong : [
205+ 'mobile' ,
206+ 'phone' ,
207+ 'app screen' ,
208+ 'app' ,
209+ 'ios' ,
210+ 'iphone' ,
211+ 'android' ,
212+ '移动端' ,
213+ '移动应用' ,
214+ '手机' ,
215+ 'app设计' ,
216+ ] ,
217+ weak : [ '应用' ] ,
218+ } ,
199219 // slides / deck
200- [
201- 'deck' ,
202- 'slide' ,
203- 'slides' ,
204- 'presentation' ,
205- 'pitch' ,
206- 'keynote' ,
207- '幻灯片' ,
208- '演示文稿' ,
209- 'ppt' ,
210- '路演' ,
211- '提案' ,
212- '演示' ,
213- ] ,
220+ {
221+ strong : [
222+ 'deck' ,
223+ 'slide' ,
224+ 'slides' ,
225+ 'presentation' ,
226+ 'pitch' ,
227+ 'keynote' ,
228+ '幻灯片' ,
229+ '演示文稿' ,
230+ 'ppt' ,
231+ '路演' ,
232+ '提案' ,
233+ ] ,
234+ weak : [ '演示' ] ,
235+ } ,
214236 // UI broad
215- [ 'ui' , 'interface' , 'screen' , '界面' , '原型图' , '设计稿' ] ,
237+ {
238+ strong : [ 'ui' , 'interface' , 'screen' , '界面' , '原型图' , '设计稿' ] ,
239+ weak : [ ] ,
240+ } ,
216241] as const ;
217242
243+ // A group fires when any `strong` token is present. `weak` tokens never fire
244+ // alone — they were demoted from strong because their substring match
245+ // over-triggered in unrelated prompts (e.g. 应用 hitting "Web 应用",
246+ // 演示 hitting "演示一下功能"). When a strong from the same group is also
247+ // present the weak token is considered, but that's already covered by the
248+ // strong hit, so the rule collapses to: strong-only firing.
218249function extractGroupIds ( text : string ) : Set < number > {
219250 const lower = text . toLowerCase ( ) ;
220251 const hits = new Set < number > ( ) ;
221252 for ( let i = 0 ; i < SKILL_TRIGGER_GROUPS . length ; i ++ ) {
222253 const group = SKILL_TRIGGER_GROUPS [ i ] ;
223254 if ( ! group ) continue ;
224- for ( const kw of group ) {
255+ for ( const kw of group . strong ) {
225256 if ( lower . includes ( kw ) ) {
226257 hits . add ( i ) ;
227258 break ;
0 commit comments