2525
2626#define MAXUTF 0x7FFFFFFFu
2727
28+
29+ #define MSGInvalid "invalid UTF-8 code"
30+
2831/*
2932** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
3033*/
@@ -35,7 +38,8 @@ typedef unsigned long utfint;
3538#endif
3639
3740
38- #define iscont (p ) ((*(p) & 0xC0) == 0x80)
41+ #define iscont (c ) (((c) & 0xC0) == 0x80)
42+ #define iscontp (p ) iscont(*(p))
3943
4044
4145/* from strlib */
@@ -65,7 +69,7 @@ static const char *utf8_decode (const char *s, utfint *val, int strict) {
6569 int count = 0 ; /* to count number of continuation bytes */
6670 for (; c & 0x40 ; c <<= 1 ) { /* while it needs continuation bytes... */
6771 unsigned int cc = (unsigned char )s [++ count ]; /* read next byte */
68- if ((cc & 0xC0 ) != 0x80 ) /* not a continuation byte? */
72+ if (! iscont (cc ) ) /* not a continuation byte? */
6973 return NULL ; /* invalid byte sequence */
7074 res = (res << 6 ) | (cc & 0x3F ); /* add lower 6 bits from cont. byte */
7175 }
@@ -140,7 +144,7 @@ static int codepoint (lua_State *L) {
140144 utfint code ;
141145 s = utf8_decode (s , & code , !lax );
142146 if (s == NULL )
143- return luaL_error (L , "invalid UTF-8 code" );
147+ return luaL_error (L , MSGInvalid );
144148 lua_pushinteger (L , code );
145149 n ++ ;
146150 }
@@ -190,16 +194,16 @@ static int byteoffset (lua_State *L) {
190194 "position out of bounds" );
191195 if (n == 0 ) {
192196 /* find beginning of current byte sequence */
193- while (posi > 0 && iscont (s + posi )) posi -- ;
197+ while (posi > 0 && iscontp (s + posi )) posi -- ;
194198 }
195199 else {
196- if (iscont (s + posi ))
200+ if (iscontp (s + posi ))
197201 return luaL_error (L , "initial position is a continuation byte" );
198202 if (n < 0 ) {
199203 while (n < 0 && posi > 0 ) { /* move back */
200204 do { /* find beginning of previous character */
201205 posi -- ;
202- } while (posi > 0 && iscont (s + posi ));
206+ } while (posi > 0 && iscontp (s + posi ));
203207 n ++ ;
204208 }
205209 }
@@ -208,7 +212,7 @@ static int byteoffset (lua_State *L) {
208212 while (n > 0 && posi < (lua_Integer )len ) {
209213 do { /* find beginning of next character */
210214 posi ++ ;
211- } while (iscont (s + posi )); /* (cannot pass final '\0') */
215+ } while (iscontp (s + posi )); /* (cannot pass final '\0') */
212216 n -- ;
213217 }
214218 }
@@ -226,15 +230,15 @@ static int iter_aux (lua_State *L, int strict) {
226230 const char * s = luaL_checklstring (L , 1 , & len );
227231 lua_Unsigned n = (lua_Unsigned )lua_tointeger (L , 2 );
228232 if (n < len ) {
229- while (iscont (s + n )) n ++ ; /* skip continuation bytes */
233+ while (iscontp (s + n )) n ++ ; /* go to next character */
230234 }
231235 if (n >= len ) /* (also handles original 'n' being negative) */
232236 return 0 ; /* no more codepoints */
233237 else {
234238 utfint code ;
235239 const char * next = utf8_decode (s + n , & code , strict );
236- if (next == NULL )
237- return luaL_error (L , "invalid UTF-8 code" );
240+ if (next == NULL || iscontp ( next ) )
241+ return luaL_error (L , MSGInvalid );
238242 lua_pushinteger (L , n + 1 );
239243 lua_pushinteger (L , code );
240244 return 2 ;
@@ -253,7 +257,8 @@ static int iter_auxlax (lua_State *L) {
253257
254258static int iter_codes (lua_State * L ) {
255259 int lax = lua_toboolean (L , 2 );
256- luaL_checkstring (L , 1 );
260+ const char * s = luaL_checkstring (L , 1 );
261+ luaL_argcheck (L , !iscontp (s ), 1 , MSGInvalid );
257262 lua_pushcfunction (L , lax ? iter_auxlax : iter_auxstrict );
258263 lua_pushvalue (L , 1 );
259264 lua_pushinteger (L , 0 );
0 commit comments