Skip to content

Commit 5b8d2c3

Browse files
committed
Issue #8478: Untokenizer.compat now processes first token from iterator input.
Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees.
1 parent 58edfd9 commit 5b8d2c3

File tree

3 files changed

+27
-13
lines changed

3 files changed

+27
-13
lines changed

Lib/test/test_tokenize.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,19 @@ def test_bad_input_order(self):
11651165
'start (1,3) precedes previous end (2,2)')
11661166
self.assertRaises(ValueError, u.add_whitespace, (2,1))
11671167

1168+
def test_iter_compat(self):
1169+
u = Untokenizer()
1170+
token = (NAME, 'Hello')
1171+
tokens = [(ENCODING, 'utf-8'), token]
1172+
u.compat(token, iter([]))
1173+
self.assertEqual(u.tokens, ["Hello "])
1174+
u = Untokenizer()
1175+
self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1176+
u = Untokenizer()
1177+
self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1178+
self.assertEqual(u.encoding, 'utf-8')
1179+
self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1180+
11681181

11691182
__test__ = {"doctests" : doctests, 'decistmt': decistmt}
11701183

Lib/tokenize.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@
2525
'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
2626
'Michael Foord')
2727
import builtins
28-
import re
29-
import sys
30-
from token import *
3128
from codecs import lookup, BOM_UTF8
3229
import collections
3330
from io import TextIOWrapper
31+
from itertools import chain
32+
import re
33+
import sys
34+
from token import *
35+
3436
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
3537
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
3638

@@ -237,9 +239,10 @@ def add_whitespace(self, start):
237239
self.tokens.append(" " * col_offset)
238240

239241
def untokenize(self, iterable):
240-
for t in iterable:
242+
it = iter(iterable)
243+
for t in it:
241244
if len(t) == 2:
242-
self.compat(t, iterable)
245+
self.compat(t, it)
243246
break
244247
tok_type, token, start, end, line = t
245248
if tok_type == ENCODING:
@@ -254,17 +257,12 @@ def untokenize(self, iterable):
254257
return "".join(self.tokens)
255258

256259
def compat(self, token, iterable):
257-
startline = False
258260
indents = []
259261
toks_append = self.tokens.append
260-
toknum, tokval = token
261-
262-
if toknum in (NAME, NUMBER):
263-
tokval += ' '
264-
if toknum in (NEWLINE, NL):
265-
startline = True
262+
startline = token[0] in (NEWLINE, NL)
266263
prevstring = False
267-
for tok in iterable:
264+
265+
for tok in chain([token], iterable):
268266
toknum, tokval = tok[:2]
269267
if toknum == ENCODING:
270268
self.encoding = tokval

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ Library
2626
- Issue #17671: Fixed a crash when use non-initialized io.BufferedRWPair.
2727
Based on patch by Stephen Tu.
2828

29+
- Issue #8478: Untokenizer.compat processes first token from iterator input.
30+
Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees.
31+
2932
- Issue #20594: Avoid name clash with the libc function posix_close.
3033

3134
- Issue #19856: shutil.move() failed to move a directory to other directory

0 commit comments

Comments
 (0)