Skip to content

Commit afd2e6b

Browse files
Re-enable Unicode ZWJ handling code that had been disabled pending an ICU update (#4035)
1 parent 3efd356 commit afd2e6b

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

third_party/txt/src/minikin/GraphemeBreak.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ bool GraphemeBreak::isGraphemeBreak(const float* advances,
112112
return false;
113113
}
114114
// Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
115-
// TODO(abarth): Add U_GCB_ZWJ once we update ICU.
116-
if (p2 == U_GCB_EXTEND || /* p2 == U_GCB_ZWJ || */ p2 == U_GCB_SPACING_MARK ||
115+
if (p2 == U_GCB_EXTEND || p2 == U_GCB_ZWJ || p2 == U_GCB_SPACING_MARK ||
117116
p1 == U_GCB_PREPEND) {
118117
return false;
119118
}
@@ -157,25 +156,26 @@ bool GraphemeBreak::isGraphemeBreak(const float* advances,
157156
}
158157
}
159158

160-
// TODO(abarth): Enablet his code once we update ICU.
161159
// Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG)
162160
// We try to make emoji sequences with ZWJ a single grapheme cluster, but only
163161
// if they actually merge to one cluster. So we are more relaxed than the UAX
164162
// #29 rules in accepting any emoji character after the ZWJ, but are tighter
165163
// in that we only treat it as one cluster if a ligature is actually formed
166-
// and we also require the character before the ZWJ to also be an emoji. if
167-
// (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) {
168-
// // look at character before ZWJ to see that both can participate in an
169-
// emoji zwj sequence uint32_t c0 = 0; size_t offset_backback =
170-
// offset_back; U16_PREV(buf, start, offset_backback, c0); if (c0 ==
171-
// 0xFE0F && offset_backback > start) {
172-
// // skip over emoji variation selector
173-
// U16_PREV(buf, start, offset_backback, c0);
174-
// }
175-
// if (isEmoji(c0)) {
176-
// return false;
177-
// }
178-
// }
164+
// and we also require the character before the ZWJ to also be an emoji.
165+
if (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) {
166+
// look at character before ZWJ to see that both can participate in an
167+
// emoji zwj sequence
168+
uint32_t c0 = 0;
169+
size_t offset_backback = offset_back;
170+
U16_PREV(buf, start, offset_backback, c0);
171+
if (c0 == 0xFE0F && offset_backback > start) {
172+
// skip over emoji variation selector
173+
U16_PREV(buf, start, offset_backback, c0);
174+
}
175+
if (isEmoji(c0)) {
176+
return false;
177+
}
178+
}
179179

180180
// Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases.
181181
// sot (RI RI)* RI x RI

0 commit comments

Comments
 (0)