@@ -130,6 +130,18 @@ pub const UBF_AFRICAN: u64 = 0x0000_0000_ffe0_0000;
130130/// #[allow(dead_code)]
131131pub const UBF_COMMON : u64 = 0x0000_0000_ffff_fffc ;
132132/// Unicode-block-filter:
133+ /// Kana: (U+3000..U+4000).
134+ #[ allow( dead_code) ]
135+ pub const UBF_KANA : u64 = 0x0000_0008_0000_0000 ;
136+ /// Unicode-block-filter:
137+ /// CJK: (U+3000..A000).
138+ #[ allow( dead_code) ]
139+ pub const UBF_CJK : u64 = 0x0000_03f0_0000_0000 ;
140+ /// Unicode-block-filter:
141+ /// Hangul: (U+B000..E000).
142+ #[ allow( dead_code) ]
143+ pub const UBF_HANGUL : u64 = 0x0000_3800_0000_0000 ;
144+ /// Unicode-block-filter:
133145/// Kana: (U+3000..), CJK: (U+4000..), Asian: (U+A000..), Hangul: (U+B000..U+E000).
134146#[ allow( dead_code) ]
135147pub const UBF_ASIAN : u64 = 0x0000_3ffc_0000_0000 ;
@@ -150,7 +162,7 @@ pub const UBF_UNCOMMON: u64 = 0x000f_0000_0000_0000;
150162/// The array is defined as `(key, value)` tuples.
151163/// For value see chapter *Codepage layout* in
152164/// [UTF-8 - Wikipedia](https://en.wikipedia.org/wiki/UTF-8)
153- pub const UNICODE_BLOCK_FILTER_ALIASSE : [ ( [ u8 ; 12 ] , u64 , [ u8 ; 25 ] ) ; 14 ] = [
165+ pub const UNICODE_BLOCK_FILTER_ALIASSE : [ ( [ u8 ; 12 ] , u64 , [ u8 ; 25 ] ) ; 18 ] = [
154166 (
155167 * b"default " ,
156168 UBF_ALL & !UBF_INVALID ,
@@ -178,18 +190,18 @@ pub const UNICODE_BLOCK_FILTER_ALIASSE: [([u8; 12], u64, [u8; 25]); 14] = [
178190 UBF_ARMENIAN ,
179191 * b"Armenian " ,
180192 ) ,
181- (
182- * b"hebrew " ,
183- UBF_HEBREW ,
184- * b"Hebrew " ,
185- ) ,
193+ ( * b"hebrew " , UBF_HEBREW , * b"Hebrew " ) ,
186194 (
187195 * b"arabic " ,
188196 UBF_ARABIC | UBF_SYRIAC ,
189197 * b"Arabic+Syriac " ,
190198 ) ,
191199 ( * b"common " , UBF_COMMON , * b"all 2-byte-UFT-8 " ) ,
192200 ( * b"african " , UBF_AFRICAN , * b"all in U+540..U+800 " ) ,
201+ ( * b"kana " , UBF_KANA , * b"Kana: U+3000..U+4000 " ) ,
202+ ( * b"cjk " , UBF_CJK , * b"CJK : U+4000..U+A000 " ) ,
203+ ( * b"hangul " , UBF_HANGUL , * b"Hangul: U+B000..U+E000 " ) ,
204+ ( * b"asian " , UBF_ASIAN , * b"all in U+3000..U+E000 " ) ,
193205 // All but Asian (U+3000..U+E000), useful for UTF-16 scans.
194206 (
195207 * b"all-asian " ,
0 commit comments