Skip to content

Commit 8e75967

Browse files
authored
Fix issues with scoped flags (#192)
- Scoped ASCII/Unicode flags (`(?a:pattern)`/`(?u:pattern)`) should be respected for Unicode properties in `bre`. - Fix issues with disabled scoped flags. - Regex will allow global flags in scoped groups, but they still apply globally. - Regex allows disabling in non-scoped groups.
1 parent 0df8e77 commit 8e75967

File tree

4 files changed

+63
-42
lines changed

4 files changed

+63
-42
lines changed

backrefs/_bre_parse.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@
3737
_CURLY_BRACKETS = frozenset(('{', '}'))
3838
_PROPERTY_STRIP = frozenset((' ', '-', '_'))
3939
_PROPERTY = _WORD | _DIGIT | _PROPERTY_STRIP
40-
_GLOBAL_FLAGS = frozenset(('a', 'u', 'L'))
41-
_SCOPED_FLAGS = frozenset(('i', 'm', 's', 'u', 'x'))
40+
_SCOPED_FLAGS_POSITIVE = frozenset(('a', 'i', 'L', 'm', 's', 'u', 'x'))
41+
_SCOPED_FLAGS_NEGATIVE = frozenset(('i', 'm', 's', 'x'))
42+
_SCOPED_END = frozenset((':', ')'))
4243

4344
_CURLY_BRACKETS_ORD = frozenset((0x7b, 0x7d))
4445

@@ -360,51 +361,47 @@ def get_comments(self, i: _util.StringIter) -> str | None:
360361

361362
return ''.join(value)
362363

363-
def get_flags(self, i: _util.StringIter, scoped: bool = False) -> str | None:
364+
def get_flags(self, i: _util.StringIter) -> tuple[str | None, bool]:
364365
"""Get flags."""
365366

366367
index = i.index
367368
value = ['(']
368369
toggle = False
369-
end = ':' if scoped else ')'
370+
smells_scoped = False
370371
try:
371372
c = next(i)
372373
if c != '?':
373374
i.rewind(1)
374-
return None
375+
return None, False
375376
value.append(c)
376377
c = next(i)
377-
while c != end:
378+
while c not in _SCOPED_END:
378379
if toggle:
379-
if c not in _SCOPED_FLAGS:
380+
if c not in _SCOPED_FLAGS_NEGATIVE:
380381
raise ValueError('Bad scope')
381-
toggle = False
382-
elif scoped and c == '-':
382+
elif c == '-':
383+
smells_scoped = True
383384
toggle = True
384-
elif scoped and c in _GLOBAL_FLAGS:
385-
raise ValueError("Bad flag")
386-
elif c not in _GLOBAL_FLAGS and c not in _SCOPED_FLAGS:
385+
elif c not in _SCOPED_FLAGS_POSITIVE:
387386
raise ValueError("Bad flag")
388387
value.append(c)
389388
c = next(i)
389+
if smells_scoped and c != ':':
390+
raise ValueError("Bad flag")
391+
elif c == ':':
392+
smells_scoped = True
390393
value.append(c)
391394
except Exception:
392395
i.rewind(i.index - index)
393396
value = []
394397

395-
return ''.join(value) if value else None
398+
return ''.join(value) if value else None, smells_scoped
396399

397400
def subgroup(self, t: str, i: _util.StringIter) -> list[str]:
398401
"""Handle parenthesis."""
399402

400403
current = [] # type: list[str]
401404

402-
# (?flags)
403-
flags = self.get_flags(i)
404-
if flags:
405-
self.flags(flags[2:-1])
406-
return [flags]
407-
408405
# (?#comment)
409406
comments = self.get_comments(i)
410407
if comments:
@@ -413,11 +410,13 @@ def subgroup(self, t: str, i: _util.StringIter) -> list[str]:
413410
verbose = self.verbose
414411
unicode_flag = self.unicode
415412

416-
# (?flags:pattern)
417-
flags = self.get_flags(i, True)
413+
# (?flags:pattern) or (?flags)
414+
flags, scoped = self.get_flags(i)
418415
if flags: # pragma: no cover
419416
t = flags
420-
self.flags(flags[2:-1], scoped=True)
417+
self.flags(flags[2:-1], scoped=scoped)
418+
if not scoped:
419+
return [flags]
421420

422421
current = []
423422
try:

backrefs/_bregex_parse.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030
_CURLY_BRACKETS = frozenset(('{', '}'))
3131
_PROPERTY_STRIP = frozenset((' ', '-', '_'))
3232
_PROPERTY = _WORD | _DIGIT | _PROPERTY_STRIP
33-
_GLOBAL_FLAGS = frozenset(('L', 'a', 'b', 'e', 'r', 'u', 'p'))
34-
_SCOPED_FLAGS = frozenset(('i', 'm', 's', 'f', 'w', 'x'))
33+
_GLOBAL_FLAGS = frozenset(('b', 'e', 'p', 'r', 'u'))
34+
_SCOPED_FLAGS = frozenset(('a', 'f', 'i', 'L', 'm', 's', 'u', 'w', 'x'))
3535
_VERSIONS = frozenset(('0', '1'))
36+
_SCOPED_END = frozenset((':', ')'))
3637

3738
_CURLY_BRACKETS_ORD = frozenset((0x7b, 0x7d))
3839

@@ -260,66 +261,65 @@ def get_comments(self, i: _util.StringIter) -> str | None:
260261

261262
return ''.join(value) if value else None
262263

263-
def get_flags(self, i: _util.StringIter, version0: bool, scoped: bool = False) -> str | None:
264+
def get_flags(self, i: _util.StringIter, version0: bool) -> tuple[str | None, bool]:
264265
"""Get flags."""
265266

266267
index = i.index
267268
value = ['(']
268269
version = False
269270
toggle = False
270-
end = ':' if scoped else ')'
271+
smells_scoped = False
271272
try:
272273
c = next(i)
273274
if c != '?':
274275
i.rewind(1)
275-
return None
276+
return None, False
276277
value.append(c)
277278
c = next(i)
278-
while c != end:
279+
while c not in _SCOPED_END:
279280
if toggle:
280281
if c not in _SCOPED_FLAGS:
281282
raise ValueError('Bad scope')
282-
toggle = False
283-
elif (not version0 or scoped) and c == '-':
284-
toggle = True
285283
elif version:
286284
if c not in _VERSIONS:
287285
raise ValueError('Bad version')
288286
version = False
287+
elif c == '-':
288+
toggle = True
289289
elif c == 'V':
290290
version = True
291-
elif c not in _GLOBAL_FLAGS and c not in _SCOPED_FLAGS:
291+
elif c not in _SCOPED_FLAGS and c not in _GLOBAL_FLAGS:
292292
raise ValueError("Bad flag")
293293
value.append(c)
294294
c = next(i)
295+
if c == ':':
296+
smells_scoped = True
297+
295298
value.append(c)
296299
except Exception:
297300
i.rewind(i.index - index)
298301
value = []
299302

300-
return ''.join(value) if value else None
303+
return ''.join(value) if value else None, smells_scoped
301304

302305
def subgroup(self, t: str, i: _util.StringIter) -> list[str]:
303306
"""Handle parenthesis."""
304307

305-
# (?flags)
306-
flags = self.get_flags(i, self.version == _regex.V0)
307-
if flags:
308-
self.flags(flags[2:-1])
309-
return [flags]
310-
311308
# (?#comment)
312309
comments = self.get_comments(i)
313310
if comments:
314311
return [comments]
315312

316313
verbose = self.verbose
317314

318-
# (?flags:pattern)
319-
flags = self.get_flags(i, (self.version == _regex.V0), True)
315+
# (?flags:pattern) or (?flags)
316+
# "scoped" only refers to verbose
317+
flags, scoped = self.get_flags(i, self.version == _regex.V0)
320318
if flags:
321319
t = flags
322-
self.flags(flags[2:-1], scoped=True)
320+
self.flags(flags[2:-1], scoped=scoped)
321+
if not scoped:
322+
return [flags]
323323

324324
current = [] # type: list[str]
325325
try:

docs/src/markdown/about/changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
`[[:xdigit:]]`, and `[[:punct:]]`. To explicitly use standard Unicode rules for these compatibility properties, use
88
the Unicode property form instead: `[\p{Alnum}]`, `[\p{Digit}]`, `[\p{Punct}]`, or `[\p{XDigit}]`. This has changed
99
to ensure no confusion for users expecting compatible POSIX style character class properties.
10+
- **FIX**: Scoped ASCII/Unicode flags (`(?a:pattern)`/`(?u:pattern)`) should be respected for Unicode properties in
11+
`bre`.
12+
- **FIX**: Fix issues with disabled scoped flags.
1013

1114
## 5.9
1215

tests/test_bre.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
PY39_PLUS = (3, 9) <= sys.version_info
1313
PY311_PLUS = (3, 11) <= sys.version_info
14+
PY313_PLUS = (3, 13) <= sys.version_info
1415

1516
if PY311_PLUS:
1617
import re._constants as _constants
@@ -21,6 +22,14 @@
2122
class TestSearchTemplate(unittest.TestCase):
2223
"""Search template tests."""
2324

25+
def test_inline_unicode(self):
26+
"""Test inline Unicode/ASCII cases."""
27+
28+
self.assertTrue(bre.match(r'\p{N}', '\uff19', flags=bre.ASCII) is None)
29+
self.assertTrue(bre.match(r'(?u:\p{N})', '\uff19', flags=bre.ASCII) is not None)
30+
self.assertTrue(bre.match(r'\p{N}', '\uff19', flags=bre.UNICODE) is not None)
31+
self.assertTrue(bre.match(r'(?a:\p{N})', '\uff19', flags=bre.UNICODE) is None)
32+
2433
def test_custom_binary_properties(self):
2534
"""Test new custom binary properties."""
2635

@@ -1986,6 +1995,16 @@ def test_dont_case_special_refs(self):
19861995
class TestExceptions(unittest.TestCase):
19871996
"""Test Exceptions."""
19881997

1998+
def test_bad_flag(self):
1999+
"""Test bad flag."""
2000+
2001+
if PY313_PLUS:
2002+
with self.assertRaises(re.PatternError):
2003+
bre.compile(r'(?-i)')
2004+
else:
2005+
with self.assertRaises(re.error):
2006+
bre.compile(r'(?-i)')
2007+
19892008
def test_format_existing_group_no_match_with_index(self):
19902009
"""Test format group with no match and attempt at indexing."""
19912010

0 commit comments

Comments
 (0)