Skip to content

Commit 2cba6b8

Browse files
authored
bpo-29240: readline now ignores the UTF-8 Mode (#5145)
Add new fuctions ignoring the UTF-8 mode: * _Py_DecodeCurrentLocale() * _Py_EncodeCurrentLocale() * _PyUnicode_DecodeCurrentLocaleAndSize() * _PyUnicode_EncodeCurrentLocale() Modify the readline module to use these functions. Re-enable test_readline.test_nonascii().
1 parent f80c0ca commit 2cba6b8

File tree

6 files changed

+125
-42
lines changed

6 files changed

+125
-42
lines changed

Include/fileutils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
2424
const char *s,
2525
Py_ssize_t size,
2626
size_t *p_wlen);
27+
28+
PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
29+
const char *arg,
30+
size_t *size);
31+
32+
PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
33+
const wchar_t *text,
34+
size_t *error_pos);
2735
#endif
2836

2937
#ifndef Py_LIMITED_API

Include/unicodeobject.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1810,6 +1810,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
18101810
PyObject *unicode,
18111811
const char *errors
18121812
);
1813+
1814+
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
1815+
const char *str,
1816+
Py_ssize_t len,
1817+
const char *errors);
1818+
1819+
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
1820+
PyObject *unicode,
1821+
const char *errors
1822+
);
18131823
#endif
18141824

18151825
/* --- File system encoding ---------------------------------------------- */

Lib/test/test_readline.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ def test_auto_history_disabled(self):
152152
output = run_pty(self.auto_history_script.format(False))
153153
self.assertIn(b"History length: 0\r\n", output)
154154

155-
@unittest.skipIf(True,
156-
"FIXME: test broken by bpo-29240")
157155
def test_nonascii(self):
158156
try:
159157
readline.add_history("\xEB\xEF")

Modules/readline.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,14 @@ static PyModuleDef readlinemodule;
132132
static PyObject *
133133
encode(PyObject *b)
134134
{
135-
return PyUnicode_EncodeLocale(b, "surrogateescape");
135+
return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
136136
}
137137

138138
static PyObject *
139139
decode(const char *s)
140140
{
141-
return PyUnicode_DecodeLocale(s, "surrogateescape");
141+
return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s),
142+
"surrogateescape");
142143
}
143144

144145

Objects/unicodeobject.c

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3395,8 +3395,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
33953395
}
33963396
}
33973397

3398-
PyObject *
3399-
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
3398+
static PyObject *
3399+
unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
34003400
{
34013401
Py_ssize_t wlen, wlen2;
34023402
wchar_t *wstr;
@@ -3423,7 +3423,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
34233423
/* "surrogateescape" error handler */
34243424
char *str;
34253425

3426-
str = Py_EncodeLocale(wstr, &error_pos);
3426+
if (current_locale) {
3427+
str = _Py_EncodeCurrentLocale(wstr, &error_pos);
3428+
}
3429+
else {
3430+
str = Py_EncodeLocale(wstr, &error_pos);
3431+
}
34273432
if (str == NULL) {
34283433
if (error_pos == (size_t)-1) {
34293434
PyErr_NoMemory();
@@ -3437,7 +3442,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
34373442
PyMem_Free(wstr);
34383443

34393444
bytes = PyBytes_FromString(str);
3440-
PyMem_Free(str);
3445+
if (current_locale) {
3446+
PyMem_RawFree(str);
3447+
}
3448+
else {
3449+
PyMem_Free(str);
3450+
}
34413451
}
34423452
else {
34433453
/* strict mode */
@@ -3502,6 +3512,18 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
35023512
return NULL;
35033513
}
35043514

3515+
PyObject *
3516+
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
3517+
{
3518+
return unicode_encode_locale(unicode, errors, 0);
3519+
}
3520+
3521+
PyObject *
3522+
_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
3523+
{
3524+
return unicode_encode_locale(unicode, errors, 1);
3525+
}
3526+
35053527
PyObject *
35063528
PyUnicode_EncodeFSDefault(PyObject *unicode)
35073529
{
@@ -3524,7 +3546,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
35243546
Py_FileSystemDefaultEncodeErrors);
35253547
}
35263548
else {
3527-
return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
3549+
return unicode_encode_locale(unicode,
3550+
Py_FileSystemDefaultEncodeErrors, 0);
35283551
}
35293552
#endif
35303553
}
@@ -3695,9 +3718,9 @@ mbstowcs_errorpos(const char *str, size_t len)
36953718
return 0;
36963719
}
36973720

3698-
PyObject*
3699-
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
3700-
const char *errors)
3721+
static PyObject*
3722+
unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
3723+
int current_locale)
37013724
{
37023725
wchar_t smallbuf[256];
37033726
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
@@ -3719,7 +3742,12 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
37193742

37203743
if (surrogateescape) {
37213744
/* "surrogateescape" error handler */
3722-
wstr = Py_DecodeLocale(str, &wlen);
3745+
if (current_locale) {
3746+
wstr = _Py_DecodeCurrentLocale(str, &wlen);
3747+
}
3748+
else {
3749+
wstr = Py_DecodeLocale(str, &wlen);
3750+
}
37233751
if (wstr == NULL) {
37243752
if (wlen == (size_t)-1)
37253753
PyErr_NoMemory();
@@ -3794,11 +3822,25 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
37943822
return NULL;
37953823
}
37963824

3825+
PyObject*
3826+
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
3827+
const char *errors)
3828+
{
3829+
return unicode_decode_locale(str, len, errors, 0);
3830+
}
3831+
3832+
PyObject*
3833+
_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
3834+
const char *errors)
3835+
{
3836+
return unicode_decode_locale(str, len, errors, 1);
3837+
}
3838+
37973839
PyObject*
37983840
PyUnicode_DecodeLocale(const char *str, const char *errors)
37993841
{
38003842
Py_ssize_t size = (Py_ssize_t)strlen(str);
3801-
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
3843+
return unicode_decode_locale(str, size, errors, 0);
38023844
}
38033845

38043846

Python/fileutils.c

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
263263

264264
#if !defined(__APPLE__) && !defined(__ANDROID__)
265265
static wchar_t*
266-
decode_locale(const char* arg, size_t *size)
266+
decode_current_locale(const char* arg, size_t *size)
267267
{
268268
wchar_t *res;
269269
size_t argsize;
@@ -380,32 +380,13 @@ decode_locale(const char* arg, size_t *size)
380380
#endif
381381

382382

383-
/* Decode a byte string from the locale encoding with the
384-
surrogateescape error handler: undecodable bytes are decoded as characters
385-
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
386-
character, escape the bytes using the surrogateescape error handler instead
387-
of decoding them.
388-
389-
Return a pointer to a newly allocated wide character string, use
390-
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
391-
wide characters excluding the null character into *size
392-
393-
Return NULL on decoding error or memory allocation error. If *size* is not
394-
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
395-
decoding error.
396-
397-
Decoding errors should never happen, unless there is a bug in the C
398-
library.
399-
400-
Use the Py_EncodeLocale() function to encode the character string back to a
401-
byte string. */
402-
wchar_t*
403-
Py_DecodeLocale(const char* arg, size_t *size)
383+
static wchar_t*
384+
decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
404385
{
405386
#if defined(__APPLE__) || defined(__ANDROID__)
406387
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
407388
#else
408-
if (Py_UTF8Mode == 1) {
389+
if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
409390
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
410391
}
411392

@@ -426,11 +407,45 @@ Py_DecodeLocale(const char* arg, size_t *size)
426407
}
427408
#endif
428409

429-
return decode_locale(arg, size);
410+
return decode_current_locale(arg, size);
430411
#endif /* __APPLE__ or __ANDROID__ */
431412
}
432413

433414

415+
/* Decode a byte string from the locale encoding with the
416+
surrogateescape error handler: undecodable bytes are decoded as characters
417+
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
418+
character, escape the bytes using the surrogateescape error handler instead
419+
of decoding them.
420+
421+
Return a pointer to a newly allocated wide character string, use
422+
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
423+
wide characters excluding the null character into *size
424+
425+
Return NULL on decoding error or memory allocation error. If *size* is not
426+
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
427+
decoding error.
428+
429+
Decoding errors should never happen, unless there is a bug in the C
430+
library.
431+
432+
Use the Py_EncodeLocale() function to encode the character string back to a
433+
byte string. */
434+
wchar_t*
435+
Py_DecodeLocale(const char* arg, size_t *size)
436+
{
437+
return decode_locale(arg, size, 0);
438+
}
439+
440+
441+
/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
442+
wchar_t*
443+
_Py_DecodeCurrentLocale(const char* arg, size_t *size)
444+
{
445+
return decode_locale(arg, size, 1);
446+
}
447+
448+
434449
#if !defined(__APPLE__) && !defined(__ANDROID__)
435450
static char*
436451
encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
@@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
508523
#endif
509524

510525
static char*
511-
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
526+
encode_locale(const wchar_t *text, size_t *error_pos,
527+
int raw_malloc, int ignore_utf8_mode)
512528
{
513529
#if defined(__APPLE__) || defined(__ANDROID__)
514530
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
515531
#else /* __APPLE__ */
516-
if (Py_UTF8Mode == 1) {
532+
if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
517533
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
518534
}
519535

@@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
544560
char*
545561
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
546562
{
547-
return encode_locale(text, error_pos, 0);
563+
return encode_locale(text, error_pos, 0, 0);
548564
}
549565

550566

@@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
553569
char*
554570
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
555571
{
556-
return encode_locale(text, error_pos, 1);
572+
return encode_locale(text, error_pos, 1, 0);
573+
}
574+
575+
576+
/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
577+
char*
578+
_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
579+
{
580+
return encode_locale(text, error_pos, 1, 1);
557581
}
558582

559583

0 commit comments

Comments
 (0)