Skip to content

Commit f3170cc

Browse files
author
Victor Stinner
committed
Use locale encoding if Py_FileSystemDefaultEncoding is not set
* PyUnicode_EncodeFSDefault(), PyUnicode_DecodeFSDefaultAndSize() and PyUnicode_DecodeFSDefault() use the locale encoding instead of UTF-8 if Py_FileSystemDefaultEncoding is NULL * redecode_filenames() functions and _Py_code_object_list (issue #9630) are no more needed: remove them
1 parent 6a4aff1 commit f3170cc

File tree

8 files changed

+48
-297
lines changed

8 files changed

+48
-297
lines changed

Doc/c-api/unicode.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
415415
Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the
416416
``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
417417
418-
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
418+
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
419+
locale encoding.
419420
420421
.. versionchanged:: 3.2
421422
Use ``'strict'`` error handler on Windows.
@@ -426,7 +427,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
426427
Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding`
427428
and the ``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
428429
429-
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
430+
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
431+
locale encoding.
430432
431433
Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
432434
@@ -440,7 +442,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
440442
``'surrogateescape'`` error handler, or ``'strict'`` on Windows, and return
441443
:class:`bytes`.
442444
443-
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
445+
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
446+
locale encoding.
444447
445448
.. versionadded:: 3.2
446449

Include/code.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ PyAPI_DATA(PyTypeObject) PyCode_Type;
7272
PyAPI_FUNC(PyCodeObject *) PyCode_New(
7373
int, int, int, int, int, PyObject *, PyObject *,
7474
PyObject *, PyObject *, PyObject *, PyObject *,
75-
PyObject *, PyObject *, int, PyObject *);
75+
PyObject *, PyObject *, int, PyObject *);
7676
/* same as struct above */
7777

7878
/* Creates a new empty code object with the specified source location. */
@@ -99,13 +99,6 @@ PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co,
9999
PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
100100
PyObject *names, PyObject *lineno_obj);
101101

102-
/* List of weak references to all code objects. The list is used by
103-
initfsencoding() to redecode code filenames at startup if the filesystem
104-
encoding changes. At initfsencoding() exit, the list is set to NULL and it
105-
is no more used. */
106-
107-
extern PyObject *_Py_code_object_list;
108-
109102
#ifdef __cplusplus
110103
}
111104
#endif

Include/unicodeobject.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,8 @@ PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
11931193
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
11941194
and the "surrogateescape" error handler.
11951195
1196-
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
1196+
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1197+
encoding.
11971198
11981199
Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
11991200
*/
@@ -1205,7 +1206,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
12051206
/* Decode a string using Py_FileSystemDefaultEncoding
12061207
and the "surrogateescape" error handler.
12071208
1208-
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
1209+
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1210+
encoding.
12091211
*/
12101212

12111213
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
@@ -1216,7 +1218,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
12161218
/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
12171219
"surrogateescape" error handler, and return bytes.
12181220
1219-
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
1221+
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1222+
encoding.
12201223
*/
12211224

12221225
PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.2 Beta 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Use locale encoding instead of UTF-8 to encode and decode filenames if
14+
Py_FileSystemDefaultEncoding is not set.
15+
1316
- Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead the file
1417
descriptor.
1518

Objects/codeobject.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#define NAME_CHARS \
66
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
77

8-
PyObject *_Py_code_object_list = NULL;
9-
108
/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
119

1210
static int
@@ -111,17 +109,6 @@ PyCode_New(int argcount, int kwonlyargcount,
111109
co->co_lnotab = lnotab;
112110
co->co_zombieframe = NULL;
113111
co->co_weakreflist = NULL;
114-
115-
if (_Py_code_object_list != NULL) {
116-
int err;
117-
PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL);
118-
if (ref == NULL)
119-
goto error;
120-
err = PyList_Append(_Py_code_object_list, ref);
121-
Py_DECREF(ref);
122-
if (err)
123-
goto error;
124-
}
125112
}
126113
return co;
127114

Objects/object.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,10 +1604,6 @@ _Py_ReadyTypes(void)
16041604
if (PyType_Ready(&PyCode_Type) < 0)
16051605
Py_FatalError("Can't initialize code type");
16061606

1607-
_Py_code_object_list = PyList_New(0);
1608-
if (_Py_code_object_list == NULL)
1609-
Py_FatalError("Can't initialize code type");
1610-
16111607
if (PyType_Ready(&PyFrame_Type) < 0)
16121608
Py_FatalError("Can't initialize frame type");
16131609

Objects/unicodeobject.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,11 +1597,22 @@ PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
15971597
"surrogateescape");
15981598
}
15991599
else {
1600-
/* if you change the default encoding, update also
1601-
PyUnicode_DecodeFSDefaultAndSize() and redecode_filenames() */
1602-
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
1603-
PyUnicode_GET_SIZE(unicode),
1604-
"surrogateescape");
1600+
/* locale encoding with surrogateescape */
1601+
wchar_t *wchar;
1602+
char *bytes;
1603+
PyObject *bytes_obj;
1604+
1605+
wchar = PyUnicode_AsWideCharString(unicode, NULL);
1606+
if (wchar == NULL)
1607+
return NULL;
1608+
bytes = _Py_wchar2char(wchar);
1609+
PyMem_Free(wchar);
1610+
if (bytes == NULL)
1611+
return NULL;
1612+
1613+
bytes_obj = PyBytes_FromString(bytes);
1614+
PyMem_Free(bytes);
1615+
return bytes_obj;
16051616
}
16061617
}
16071618

@@ -1769,9 +1780,22 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
17691780
"surrogateescape");
17701781
}
17711782
else {
1772-
/* if you change the default encoding, update also
1773-
PyUnicode_EncodeFSDefault() and redecode_filenames() */
1774-
return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
1783+
/* locale encoding with surrogateescape */
1784+
wchar_t *wchar;
1785+
PyObject *unicode;
1786+
1787+
if (s[size] != '\0' || size != strlen(s)) {
1788+
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
1789+
return NULL;
1790+
}
1791+
1792+
wchar = _Py_char2wchar(s);
1793+
if (wchar == NULL)
1794+
return NULL;
1795+
1796+
unicode = PyUnicode_FromWideChar(wchar, -1);
1797+
PyMem_Free(wchar);
1798+
return unicode;
17751799
}
17761800
}
17771801

0 commit comments

Comments
 (0)