Skip to content

Commit 65ef742

Browse files
authored
bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972) (GH-8974)
* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C" locale. * On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE locale is "C". * test_utf8_mode.test_cmd_line() checks also that the command line arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled with POSIX locale or C locale. (cherry picked from commit 5cb2589)
1 parent e3f2082 commit 65ef742

File tree

5 files changed

+39
-13
lines changed

5 files changed

+39
-13
lines changed

Lib/test/test_utf8_mode.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313

1414
MS_WINDOWS = (sys.platform == 'win32')
15+
POSIX_LOCALES = ('C', 'POSIX')
1516

1617

1718
class UTF8ModeTests(unittest.TestCase):
@@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase):
2324

2425
def posix_locale(self):
2526
loc = locale.setlocale(locale.LC_CTYPE, None)
26-
return (loc == 'C')
27+
return (loc in POSIX_LOCALES)
2728

2829
def get_output(self, *args, failure=False, **kw):
2930
kw = dict(self.DEFAULT_ENV, **kw)
@@ -39,8 +40,10 @@ def get_output(self, *args, failure=False, **kw):
3940
def test_posix_locale(self):
4041
code = 'import sys; print(sys.flags.utf8_mode)'
4142

42-
out = self.get_output('-c', code, LC_ALL='C')
43-
self.assertEqual(out, '1')
43+
for loc in POSIX_LOCALES:
44+
with self.subTest(LC_ALL=loc):
45+
out = self.get_output('-c', code, LC_ALL=loc)
46+
self.assertEqual(out, '1')
4447

4548
def test_xoption(self):
4649
code = 'import sys; print(sys.flags.utf8_mode)'
@@ -201,8 +204,10 @@ def test_locale_getpreferredencoding(self):
201204
out = self.get_output('-X', 'utf8', '-c', code)
202205
self.assertEqual(out, 'UTF-8 UTF-8')
203206

204-
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
205-
self.assertEqual(out, 'UTF-8 UTF-8')
207+
for loc in POSIX_LOCALES:
208+
with self.subTest(LC_ALL=loc):
209+
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
210+
self.assertEqual(out, 'UTF-8 UTF-8')
206211

207212
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
208213
def test_cmd_line(self):
@@ -217,11 +222,17 @@ def check(utf8_opt, expected, **kw):
217222
self.assertEqual(args, ascii(expected), out)
218223

219224
check('utf8', [arg_utf8])
225+
for loc in POSIX_LOCALES:
226+
with self.subTest(LC_ALL=loc):
227+
check('utf8', [arg_utf8], LC_ALL=loc)
228+
220229
if sys.platform == 'darwin' or support.is_android:
221230
c_arg = arg_utf8
222231
else:
223232
c_arg = arg_ascii
224-
check('utf8=0', [c_arg], LC_ALL='C')
233+
for loc in POSIX_LOCALES:
234+
with self.subTest(LC_ALL=loc):
235+
check('utf8=0', [c_arg], LC_ALL=loc)
225236

226237
def test_optim_level(self):
227238
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C"
2+
locale.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the
2+
ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE
3+
locale is "C".

Modules/main.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,15 +2102,25 @@ pymain_read_conf(_PyMain *pymain, _Py_CommandLineDetails *cmdline)
21022102
static void
21032103
config_init_locale(_PyCoreConfig *config)
21042104
{
2105-
if (_Py_LegacyLocaleDetected()) {
2106-
/* POSIX locale: enable C locale coercion and UTF-8 Mode */
2107-
if (config->utf8_mode < 0) {
2108-
config->utf8_mode = 1;
2109-
}
2110-
if (config->coerce_c_locale < 0) {
2105+
if (config->coerce_c_locale < 0) {
2106+
/* The C locale enables the C locale coercion (PEP 538) */
2107+
if (_Py_LegacyLocaleDetected()) {
21112108
config->coerce_c_locale = 1;
21122109
}
21132110
}
2111+
2112+
#ifndef MS_WINDOWS
2113+
if (config->utf8_mode < 0) {
2114+
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
2115+
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
2116+
if (ctype_loc != NULL
2117+
&& (strcmp(ctype_loc, "C") == 0
2118+
|| strcmp(ctype_loc, "POSIX") == 0))
2119+
{
2120+
config->utf8_mode = 1;
2121+
}
2122+
}
2123+
#endif
21142124
}
21152125

21162126

Python/fileutils.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ check_force_ascii(void)
128128
loc = setlocale(LC_CTYPE, NULL);
129129
if (loc == NULL)
130130
goto error;
131-
if (strcmp(loc, "C") != 0) {
131+
if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
132132
/* the LC_CTYPE locale is different than C */
133133
return 0;
134134
}

0 commit comments

Comments
 (0)