Skip to content

Commit da626ef

Browse files
authored
Merge pull request #49 from Jules-Bertholet/syriac-abbreviation-mark
Mark U+070F and U+A8FA as zero width
2 parents 47bac32 + 3742586 commit da626ef

File tree

4 files changed

+49
-9
lines changed

4 files changed

+49
-9
lines changed

scripts/unicode.py

+15
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,21 @@ def load_zero_widths() -> "list[bool]":
241241
# width 2. Therefore, we treat it as having width 2.
242242
zw_map[0x115F] = False
243243

244+
# Syriac abbreviation mark:
245+
# Zero-width `Prepended_Concatenation_Mark`
246+
zw_map[0x070F] = True
247+
248+
# Some Arabic Prepended_Concatenation_Mark`s
249+
# https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G27820
250+
zw_map[0x0605] = True
251+
zw_map[0x0890] = True
252+
zw_map[0x0891] = True
253+
zw_map[0x08E2] = True
254+
255+
# U+A8FA DEVANAGARI CARET
256+
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
257+
zw_map[0xA8FA] = True
258+
244259
return zw_map
245260

246261

src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@
5757
//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
5858
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
5959
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
60+
//! - The following [`Prepended_Concatenation_Mark`]s:
61+
//! - [`'\u{0605}'` NUMBER MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0605),
62+
//! - [`'\u{070F}'` SYRIAC ABBREVIATION MARK](https://util.unicode.org/UnicodeJsps/character.jsp?a=070F),
63+
//! - [`'\u{0890}'` POUND MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0890),
64+
//! - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
65+
//! - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
66+
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
6067
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
6168
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
6269
//! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
@@ -68,6 +75,7 @@
6875
//! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
6976
//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
7077
//! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
78+
//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
7179
//!
7280
//! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
7381
//! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4

src/tables.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -320,18 +320,18 @@ pub mod charwidth {
320320
0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
321321
0x55, 0x55, 0x55, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
322322
0x10, 0x41, 0x10, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
323-
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
323+
0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
324324
0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
325325
0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x14, 0x00, 0x14, 0x04,
326-
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
326+
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
327327
0x55, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
328328
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
329329
0x55, 0x05, 0x00, 0x00, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
330330
0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
331331
0x10, 0x00, 0x00, 0x01, 0x01, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
332332
0x55, 0x01, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
333-
0x55, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
334-
0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
333+
0x50, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
334+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
335335
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x45, 0x54, 0x01,
336336
0x00, 0x54, 0x51, 0x01, 0x00, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
337337
0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
@@ -438,7 +438,7 @@ pub mod charwidth {
438438
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
439439
0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x45, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
440440
0x41, 0x55, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
441-
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
441+
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
442442
0x55, 0x55, 0x05, 0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x50, 0x55,
443443
0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0x40, 0x55, 0x55, 0x55, 0x55,
444444
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x05, 0x50, 0x50, 0x55, 0x55, 0x55, 0x55,

tests/tests.rs

+21-4
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,22 @@ fn test_jamo() {
9999

100100
#[test]
101101
fn test_prepended_concatenation_marks() {
102-
assert_eq!('\u{0600}'.width(), Some(1));
103-
assert_eq!('\u{070F}'.width(), Some(1));
104-
assert_eq!('\u{08E2}'.width(), Some(1));
105-
assert_eq!('\u{110BD}'.width(), Some(1));
102+
for c in [
103+
'\u{0600}',
104+
'\u{0601}',
105+
'\u{0602}',
106+
'\u{0603}',
107+
'\u{0604}',
108+
'\u{06DD}',
109+
'\u{110BD}',
110+
'\u{110CD}',
111+
] {
112+
assert_eq!(c.width(), Some(1), "{c:?} should have width 1");
113+
}
114+
115+
for c in ['\u{0605}', '\u{070F}', '\u{0890}', '\u{0891}', '\u{08E2}'] {
116+
assert_eq!(c.width(), Some(0), "{c:?} should have width 0");
117+
}
106118
}
107119

108120
#[test]
@@ -131,6 +143,11 @@ fn test_marks() {
131143
assert_eq!('\u{09BE}'.width(), Some(0));
132144
}
133145

146+
#[test]
147+
fn test_devanagari_caret() {
148+
assert_eq!('\u{A8FA}'.width(), Some(0));
149+
}
150+
134151
#[test]
135152
fn test_canonical_equivalence() {
136153
let norm_file = BufReader::new(

0 commit comments

Comments
 (0)