Skip to content

Commit 3742586

Browse files
Mark more Prepended_Concatenation_Marks as non-advancing
1 parent 3b56f6d commit 3742586

File tree

4 files changed

+34
-14
lines changed

4 files changed

+34
-14
lines changed

scripts/unicode.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,17 @@ def load_zero_widths() -> "list[bool]":
241241
# width 2. Therefore, we treat it as having width 2.
242242
zw_map[0x115F] = False
243243

244-
# Syriac abbreviation mark
245-
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
244+
# Syriac abbreviation mark:
245+
# Zero-width `Prepended_Concatenation_Mark`
246246
zw_map[0x070F] = True
247247

248+
# Some Arabic Prepended_Concatenation_Mark`s
249+
# https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G27820
250+
zw_map[0x0605] = True
251+
zw_map[0x0890] = True
252+
zw_map[0x0891] = True
253+
zw_map[0x08E2] = True
254+
248255
# U+A8FA DEVANAGARI CARET
249256
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
250257
zw_map[0xA8FA] = True

src/lib.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@
5757
//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
5858
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
5959
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
60-
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
60+
//! - The following [`Prepended_Concatenation_Mark`]s:
61+
//! - [`'\u{0605}'` NUMBER MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0605),
62+
//! - [`'\u{070F}'` SYRIAC ABBREVIATION MARK](https://util.unicode.org/UnicodeJsps/character.jsp?a=070F),
63+
//! - [`'\u{0890}'` POUND MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0890),
64+
//! - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
65+
//! - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
6166
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
6267
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
6368
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
@@ -70,6 +75,7 @@
7075
//! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
7176
//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
7277
//! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
78+
//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
7379
//!
7480
//! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
7581
//! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4
@@ -80,7 +86,6 @@
8086
//!
8187
//! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
8288
//!
83-
//! [Syriac]: https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G13006
8489
//! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
8590
//!
8691
//! ## Canonical equivalence

src/tables.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ pub mod charwidth {
320320
0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
321321
0x55, 0x55, 0x55, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
322322
0x10, 0x41, 0x10, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
323-
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
323+
0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
324324
0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
325325
0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x14, 0x00, 0x14, 0x04,
326326
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
@@ -330,8 +330,8 @@ pub mod charwidth {
330330
0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
331331
0x10, 0x00, 0x00, 0x01, 0x01, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
332332
0x55, 0x01, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
333-
0x55, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
334-
0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
333+
0x50, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
334+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
335335
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x45, 0x54, 0x01,
336336
0x00, 0x54, 0x51, 0x01, 0x00, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
337337
0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,

tests/tests.rs

+15-7
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,22 @@ fn test_jamo() {
9999

100100
#[test]
101101
fn test_prepended_concatenation_marks() {
102-
assert_eq!('\u{0600}'.width(), Some(1));
103-
assert_eq!('\u{08E2}'.width(), Some(1));
104-
assert_eq!('\u{110BD}'.width(), Some(1));
105-
}
102+
for c in [
103+
'\u{0600}',
104+
'\u{0601}',
105+
'\u{0602}',
106+
'\u{0603}',
107+
'\u{0604}',
108+
'\u{06DD}',
109+
'\u{110BD}',
110+
'\u{110CD}',
111+
] {
112+
assert_eq!(c.width(), Some(1), "{c:?} should have width 1");
113+
}
106114

107-
#[test]
108-
fn test_syriac_abbreviation_mark() {
109-
assert_eq!('\u{070F}'.width(), Some(0));
115+
for c in ['\u{0605}', '\u{070F}', '\u{0890}', '\u{0891}', '\u{08E2}'] {
116+
assert_eq!(c.width(), Some(0), "{c:?} should have width 0");
117+
}
110118
}
111119

112120
#[test]

0 commit comments

Comments
 (0)