@@ -112,8 +112,7 @@ bool GraphemeBreak::isGraphemeBreak(const float* advances,
112112 return false ;
113113 }
114114 // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
115- // TODO(abarth): Add U_GCB_ZWJ once we update ICU.
116- if (p2 == U_GCB_EXTEND || /* p2 == U_GCB_ZWJ || */ p2 == U_GCB_SPACING_MARK ||
115+ if (p2 == U_GCB_EXTEND || p2 == U_GCB_ZWJ || p2 == U_GCB_SPACING_MARK ||
117116 p1 == U_GCB_PREPEND) {
118117 return false ;
119118 }
@@ -157,25 +156,26 @@ bool GraphemeBreak::isGraphemeBreak(const float* advances,
157156 }
158157 }
159158
160- // TODO(abarth): Enablet his code once we update ICU.
161159 // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG)
162160 // We try to make emoji sequences with ZWJ a single grapheme cluster, but only
163161 // if they actually merge to one cluster. So we are more relaxed than the UAX
164162 // #29 rules in accepting any emoji character after the ZWJ, but are tighter
165163 // in that we only treat it as one cluster if a ligature is actually formed
166- // and we also require the character before the ZWJ to also be an emoji. if
167- // (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) {
168- // // look at character before ZWJ to see that both can participate in an
169- // emoji zwj sequence uint32_t c0 = 0; size_t offset_backback =
170- // offset_back; U16_PREV(buf, start, offset_backback, c0); if (c0 ==
171- // 0xFE0F && offset_backback > start) {
172- // // skip over emoji variation selector
173- // U16_PREV(buf, start, offset_backback, c0);
174- // }
175- // if (isEmoji(c0)) {
176- // return false;
177- // }
178- // }
164+ // and we also require the character before the ZWJ to also be an emoji.
165+ if (p1 == U_GCB_ZWJ && isEmoji (c2) && offset_back > start) {
166+ // look at character before ZWJ to see that both can participate in an
167+ // emoji zwj sequence
168+ uint32_t c0 = 0 ;
169+ size_t offset_backback = offset_back;
170+ U16_PREV (buf, start, offset_backback, c0);
171+ if (c0 == 0xFE0F && offset_backback > start) {
172+ // skip over emoji variation selector
173+ U16_PREV (buf, start, offset_backback, c0);
174+ }
175+ if (isEmoji (c0)) {
176+ return false ;
177+ }
178+ }
179179
180180 // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases.
181181 // sot (RI RI)* RI x RI
0 commit comments