Skip to content

Commit 147946b

Browse files
committed
Use ast instead of eval for string extraction
This is safer (as we don't actually execute anything), and allows us to parse f-strings too. Closes #769 (supersedes it) Refs #715 (doesn't add an error yet, but doesn't crash on f-strings)
1 parent 05df10f commit 147946b

File tree

2 files changed

+53
-9
lines changed

2 files changed

+53
-9
lines changed

babel/messages/extract.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
:copyright: (c) 2013-2022 by the Babel Team.
1616
:license: BSD, see LICENSE for more details.
1717
"""
18-
18+
import ast
1919
import os
2020
from os.path import relpath
2121
import sys
@@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
487487
if nested:
488488
funcname = value
489489
elif tok == STRING:
490-
# Unwrap quotes in a safe manner, maintaining the string's
491-
# encoding
492-
# https://sourceforge.net/tracker/?func=detail&atid=355470&
493-
# aid=617979&group_id=5470
494-
code = compile('# coding=%s\n%s' % (str(encoding), value),
495-
'<string>', 'eval', future_flags)
496-
value = eval(code, {'__builtins__': {}}, {})
497-
buf.append(value)
490+
val = _parse_python_string(value, encoding, future_flags)
491+
if val is not None:
492+
buf.append(val)
498493
elif tok == OP and value == ',':
499494
if buf:
500495
messages.append(''.join(buf))
@@ -516,6 +511,28 @@ def extract_python(fileobj, keywords, comment_tags, options):
516511
funcname = value
517512

518513

514+
def _parse_python_string(value, encoding, future_flags):
515+
# Unwrap quotes in a safe manner, maintaining the string's encoding
516+
# https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
517+
code = compile(
518+
f'# coding={str(encoding)}\n{value}',
519+
'<string>',
520+
'eval',
521+
ast.PyCF_ONLY_AST | future_flags,
522+
)
523+
if isinstance(code, ast.Expression):
524+
body = code.body
525+
if isinstance(body, ast.Str):
526+
return body.s
527+
if isinstance(body, ast.JoinedStr): # f-string
528+
if all(isinstance(node, ast.Str) for node in body.values):
529+
return ''.join(node.s for node in body.values)
530+
if all(isinstance(node, ast.Constant) for node in body.values):
531+
return ''.join(str(node.value) for node in body.values)
532+
# TODO: we could raise an error or warning when not all nodes are constants
533+
return None
534+
535+
519536
def extract_javascript(fileobj, keywords, comment_tags, options):
520537
"""Extract messages from JavaScript source code.
521538

tests/messages/test_extract.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,30 @@ def test_future(self):
528528
messages = list(extract.extract('python', buf,
529529
extract.DEFAULT_KEYWORDS, [], {}))
530530
assert messages[0][1] == u'\xa0'
531+
532+
def test_f_strings(self):
533+
buf = BytesIO(br"""
534+
t1 = _('foobar')
535+
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
536+
t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
537+
t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines)
538+
f'flying shark'
539+
'... hello'
540+
)
541+
t4 = _(f'spameggs {t1}') # should not be extracted
542+
""")
543+
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
544+
assert len(messages) == 4
545+
assert messages[0][1] == u'foobar'
546+
assert messages[1][1] == u'spameggsfeast'
547+
assert messages[2][1] == u'spameggskerroshampurilainen'
548+
assert messages[3][1] == u'whoa! a flying shark... hello'
549+
550+
def test_f_strings_non_utf8(self):
551+
buf = BytesIO(b"""
552+
# -- coding: latin-1 --
553+
t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
554+
""")
555+
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
556+
assert len(messages) == 1
557+
assert messages[0][1] == u'åäöÅÄÖ'

0 commit comments

Comments
 (0)