Skip to content

Commit 028ac5a

Browse files
♻️ refactor(lint): Update language family sets and improve severity logic for accuracy
1 parent 9d76bce commit 028ac5a

File tree

2 files changed

+27
-16
lines changed

2 files changed

+27
-16
lines changed

packages/lobe-i18n/src/commands/Lint/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ export const HYPHENATED_LANGUAGE_MAP: { [key: string]: string } = {
176176
export const ARABIC_SIMILAR_LANGUAGES = new Set(['ar', 'fa', 'ur', 'ku']);
177177

178178
export const LANGUAGE_FAMILIES = {
179-
EAST_ASIAN: new Set(['zh', 'ja', 'ko', 'vi', 'th', 'my', 'km', 'lo', 'mn', 'bo', 'ug']),
179+
EAST_ASIAN: new Set(['ja', 'ko', 'vi', 'th', 'my', 'km', 'lo', 'mn', 'bo', 'ug']),
180180
GERMANIC: new Set(['de', 'nl', 'da', 'no', 'sv', 'is', 'fo', 'af']),
181181
MALAY: new Set(['id', 'ms']),
182182
ROMANCE: new Set(['es', 'pt', 'ca', 'it', 'fr', 'ro', 'gl']),

packages/lobe-i18n/src/commands/Lint/utils/languageValidation.ts

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,32 +63,43 @@ export function determineSeverity(
6363
mainLanguage: string,
6464
text: string,
6565
): 'error' | 'warning' {
66-
// 对于短文本(<=3),置信度低于0.7时只报 warning
67-
let severity: 'error' | 'warning' =
68-
textLength <= 3
69-
? confidence > CONFIDENCE_THRESHOLDS.SHORT_TEXT
70-
? 'error'
71-
: 'warning'
72-
: confidence > CONFIDENCE_THRESHOLDS.LONG_TEXT
73-
? 'error'
74-
: 'warning';
75-
76-
// 如果是东亚语言和其他语系的混用,判定为 error
66+
// 默认为 warning
67+
let severity: 'error' | 'warning' = 'warning';
68+
69+
// 规则1:对于长文本,如果置信度足够高,则为 error
70+
if (textLength > 3 && confidence > CONFIDENCE_THRESHOLDS.LONG_TEXT) {
71+
severity = 'error';
72+
}
73+
74+
// 规则2:对于短文本,如果置信度足够高,则为 error
75+
if (textLength <= 3 && confidence > CONFIDENCE_THRESHOLDS.SHORT_TEXT) {
76+
severity = 'error';
77+
}
78+
79+
// 规则3:检测到东亚语言和其他语系混用,则为 error
7780
const isDetectedEastAsian = LANGUAGE_FAMILIES.EAST_ASIAN.has(detectedLanguage);
7881
const isMainEastAsian = LANGUAGE_FAMILIES.EAST_ASIAN.has(mainLanguage);
79-
8082
if (isDetectedEastAsian !== isMainEastAsian && confidence > 0.5) {
81-
return 'error';
83+
severity = 'error';
84+
}
85+
86+
// 规则4:如果主语言和检测语言不属于同一个语系,则为 error
87+
const mainFamily = Object.values(LANGUAGE_FAMILIES).find((family) => family.has(mainLanguage));
88+
const detectedFamily = Object.values(LANGUAGE_FAMILIES).find((family) =>
89+
family.has(detectedLanguage),
90+
);
91+
if (mainFamily && detectedFamily && mainFamily !== detectedFamily && confidence > 0.6) {
92+
severity = 'error';
8293
}
8394

84-
// 检测明显英文内容的逻辑
95+
// 规则5:检测到明显英文内容,则为 error
8596
if (
8697
detectedLanguage === 'en' &&
8798
mainLanguage !== 'en' &&
8899
containsEnglishTerms(text) &&
89100
confidence > 0.5
90101
) {
91-
severity = 'error'; // 明显英文内容直接设为error
102+
severity = 'error';
92103
}
93104

94105
return severity;

0 commit comments

Comments
 (0)