Skip to content

Commit b631b1a

Browse files
committed
add Kana, CJK, and Hangul scrpts
1 parent c67931a commit b631b1a

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

src/mission.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,18 @@ pub const UBF_AFRICAN: u64 = 0x0000_0000_ffe0_0000;
130130
/// #[allow(dead_code)]
131131
pub const UBF_COMMON: u64 = 0x0000_0000_ffff_fffc;
132132
/// Unicode-block-filter:
133+
/// Kana: (U+3000..U+4000).
134+
#[allow(dead_code)]
135+
pub const UBF_KANA: u64 = 0x0000_0008_0000_0000;
136+
/// Unicode-block-filter:
137+
/// CJK: (U+3000..A000).
138+
#[allow(dead_code)]
139+
pub const UBF_CJK: u64 = 0x0000_03f0_0000_0000;
140+
/// Unicode-block-filter:
141+
/// Hangul: (U+B000..E000).
142+
#[allow(dead_code)]
143+
pub const UBF_HANGUL: u64 = 0x0000_3800_0000_0000;
144+
/// Unicode-block-filter:
133145
/// Kana: (U+3000..), CJK: (U+4000..), Asian: (U+A000..), Hangul: (U+B000..U+E000).
134146
#[allow(dead_code)]
135147
pub const UBF_ASIAN: u64 = 0x0000_3ffc_0000_0000;
@@ -150,7 +162,7 @@ pub const UBF_UNCOMMON: u64 = 0x000f_0000_0000_0000;
150162
/// The array is defined as `(key, value)` tuples.
151163
/// For value see chapter *Codepage layout* in
152164
/// [UTF-8 - Wikipedia](https://en.wikipedia.org/wiki/UTF-8)
153-
pub const UNICODE_BLOCK_FILTER_ALIASSE: [([u8; 12], u64, [u8; 25]); 14] = [
165+
pub const UNICODE_BLOCK_FILTER_ALIASSE: [([u8; 12], u64, [u8; 25]); 18] = [
154166
(
155167
*b"default ",
156168
UBF_ALL & !UBF_INVALID,
@@ -178,18 +190,18 @@ pub const UNICODE_BLOCK_FILTER_ALIASSE: [([u8; 12], u64, [u8; 25]); 14] = [
178190
UBF_ARMENIAN,
179191
*b"Armenian ",
180192
),
181-
(
182-
*b"hebrew ",
183-
UBF_HEBREW,
184-
*b"Hebrew ",
185-
),
193+
(*b"hebrew ", UBF_HEBREW, *b"Hebrew "),
186194
(
187195
*b"arabic ",
188196
UBF_ARABIC | UBF_SYRIAC,
189197
*b"Arabic+Syriac ",
190198
),
191199
(*b"common ", UBF_COMMON, *b"all 2-byte-UFT-8 "),
192200
(*b"african ", UBF_AFRICAN, *b"all in U+540..U+800 "),
201+
(*b"kana ", UBF_KANA, *b"Kana: U+3000..U+4000 "),
202+
(*b"cjk ", UBF_CJK, *b"CJK : U+4000..U+A000 "),
203+
(*b"hangul ", UBF_HANGUL, *b"Hangul: U+B000..U+E000 "),
204+
(*b"asian ", UBF_ASIAN, *b"all in U+3000..U+E000 "),
193205
// All but Asian (U+3000..U+E000), useful for UTF-16 scans.
194206
(
195207
*b"all-asian ",

0 commit comments

Comments
 (0)