Skip to content

Commit dfe0dc7

Browse files
authored
bpo-34485: Add _PyCoreConfig.stdio_encoding (GH-8881)
* Add stdio_encoding and stdio_errors fields to _PyCoreConfig. * Add unit tests on stdio_encoding and stdio_errors.
1 parent 177d921 commit dfe0dc7

File tree

6 files changed

+265
-135
lines changed

6 files changed

+265
-135
lines changed

Include/coreconfig.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,18 @@ typedef struct {
203203
If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */
204204
int buffered_stdio;
205205

206+
/* Encoding of sys.stdin, sys.stdout and sys.stderr.
207+
Value set from PYTHONIOENCODING environment variable and
208+
Py_SetStandardStreamEncoding() function.
209+
See also 'stdio_errors' attribute. */
210+
char *stdio_encoding;
211+
212+
/* Error handler of sys.stdin and sys.stdout.
213+
Value set from PYTHONIOENCODING environment variable and
214+
Py_SetStandardStreamEncoding() function.
215+
See also 'stdio_encoding' attribute. */
216+
char *stdio_errors;
217+
206218
#ifdef MS_WINDOWS
207219
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
208220
encoding for the filesystem encoding.

Include/pylifecycle.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
179179
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
180180
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
181181
#endif
182+
#ifdef Py_BUILD_CORE
183+
PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
184+
#endif
182185

183186
#ifdef __cplusplus
184187
}

Lib/test/test_embed.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,13 +288,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
288288
'quiet': 0,
289289
'user_site_directory': 1,
290290
'buffered_stdio': 1,
291+
# None means that check_config() gets the expected encoding at runtime
292+
'stdio_encoding': None,
293+
'stdio_errors': None,
291294

292295
'_install_importlib': 1,
293296
'_check_hash_pycs_mode': 'default',
294297
'_frozen': 0,
295298
}
296299

300+
def get_stdio_encoding(self, env):
301+
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
302+
args = (sys.executable, '-c', code)
303+
proc = subprocess.run(args, env=env, text=True,
304+
stdout=subprocess.PIPE,
305+
stderr=subprocess.STDOUT)
306+
if proc.returncode:
307+
raise Exception(f"failed to get the stdio encoding: stdout={proc.stdout!r}")
308+
out = proc.stdout.rstrip()
309+
return out.split()
310+
297311
def check_config(self, testname, expected):
312+
expected = dict(self.DEFAULT_CONFIG, **expected)
313+
298314
env = dict(os.environ)
299315
for key in list(env):
300316
if key.startswith('PYTHON'):
@@ -303,13 +319,19 @@ def check_config(self, testname, expected):
303319
# on the current locale
304320
env['PYTHONCOERCECLOCALE'] = '0'
305321
env['PYTHONUTF8'] = '0'
306-
out, err = self.run_embedded_interpreter(testname, env=env)
307-
# Ignore err
308322

309-
expected = dict(self.DEFAULT_CONFIG, **expected)
323+
if expected['stdio_encoding'] is None or expected['stdio_errors'] is None:
324+
res = self.get_stdio_encoding(env)
325+
if expected['stdio_encoding'] is None:
326+
expected['stdio_encoding'] = res[0]
327+
if expected['stdio_errors'] is None:
328+
expected['stdio_errors'] = res[1]
310329
for key, value in expected.items():
311330
expected[key] = str(value)
312331

332+
out, err = self.run_embedded_interpreter(testname, env=env)
333+
# Ignore err
334+
313335
config = {}
314336
for line in out.splitlines():
315337
key, value = line.split(' = ', 1)
@@ -331,7 +353,11 @@ def test_init_global_config(self):
331353
'verbose': 1,
332354
'quiet': 1,
333355
'buffered_stdio': 0,
356+
334357
'utf8_mode': 1,
358+
'stdio_encoding': 'utf-8',
359+
'stdio_errors': 'surrogateescape',
360+
335361
'user_site_directory': 0,
336362
'_frozen': 1,
337363
}
@@ -350,6 +376,8 @@ def test_init_from_config(self):
350376
'malloc_stats': 1,
351377

352378
'utf8_mode': 1,
379+
'stdio_encoding': 'iso8859-1',
380+
'stdio_errors': 'replace',
353381

354382
'pycache_prefix': 'conf_pycache_prefix',
355383
'program_name': './conf_program_name',
@@ -387,6 +415,8 @@ def test_init_env(self):
387415
'write_bytecode': 0,
388416
'verbose': 1,
389417
'buffered_stdio': 0,
418+
'stdio_encoding': 'iso8859-1',
419+
'stdio_errors': 'replace',
390420
'user_site_directory': 0,
391421
'faulthandler': 1,
392422
'dev_mode': 1,

Programs/_testembed.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ dump_config(void)
374374
printf("user_site_directory = %i\n", config->user_site_directory);
375375
printf("buffered_stdio = %i\n", config->buffered_stdio);
376376
ASSERT_EQUAL(config->buffered_stdio, !Py_UnbufferedStdioFlag);
377+
printf("stdio_encoding = %s\n", config->stdio_encoding);
378+
printf("stdio_errors = %s\n", config->stdio_errors);
377379

378380
/* FIXME: test legacy_windows_fs_encoding */
379381
/* FIXME: test legacy_windows_stdio */
@@ -532,6 +534,11 @@ static int test_init_from_config(void)
532534
Py_UnbufferedStdioFlag = 0;
533535
config.buffered_stdio = 0;
534536

537+
putenv("PYTHONIOENCODING=cp424");
538+
Py_SetStandardStreamEncoding("ascii", "ignore");
539+
config.stdio_encoding = "iso8859-1";
540+
config.stdio_errors = "replace";
541+
535542
putenv("PYTHONNOUSERSITE=");
536543
Py_NoUserSiteDirectory = 0;
537544
config.user_site_directory = 0;
@@ -569,6 +576,7 @@ static void test_init_env_putenvs(void)
569576
putenv("PYTHONNOUSERSITE=1");
570577
putenv("PYTHONFAULTHANDLER=1");
571578
putenv("PYTHONDEVMODE=1");
579+
putenv("PYTHONIOENCODING=iso8859-1:replace");
572580
/* FIXME: test PYTHONWARNINGS */
573581
/* FIXME: test PYTHONEXECUTABLE */
574582
/* FIXME: test PYTHONHOME */

Python/coreconfig.c

Lines changed: 179 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#include "Python.h"
22
#include "internal/pystate.h"
33
#include <locale.h>
4+
#ifdef HAVE_LANGINFO_H
5+
# include <langinfo.h>
6+
#endif
47

58

69
#define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list)
8992
* mechanism that attempts to figure out an appropriate IO encoding
9093
*/
9194

92-
char *_Py_StandardStreamEncoding = NULL;
93-
char *_Py_StandardStreamErrors = NULL;
95+
static char *_Py_StandardStreamEncoding = NULL;
96+
static char *_Py_StandardStreamErrors = NULL;
9497

9598
int
9699
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
@@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
205208
CLEAR(config->dll_path);
206209
#endif
207210
CLEAR(config->base_exec_prefix);
211+
212+
CLEAR(config->stdio_encoding);
213+
CLEAR(config->stdio_errors);
208214
#undef CLEAR
209215
#undef CLEAR_WSTRLIST
210216
}
@@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
216222
_PyCoreConfig_Clear(config);
217223

218224
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
225+
#define COPY_STR_ATTR(ATTR) \
226+
do { \
227+
if (config2->ATTR != NULL) { \
228+
config->ATTR = _PyMem_RawStrdup(config2->ATTR); \
229+
if (config->ATTR == NULL) { \
230+
return -1; \
231+
} \
232+
} \
233+
} while (0)
219234
#define COPY_WSTR_ATTR(ATTR) \
220235
do { \
221236
if (config2->ATTR != NULL) { \
@@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
287302
COPY_ATTR(quiet);
288303
COPY_ATTR(user_site_directory);
289304
COPY_ATTR(buffered_stdio);
305+
COPY_STR_ATTR(stdio_encoding);
306+
COPY_STR_ATTR(stdio_errors);
290307
#ifdef MS_WINDOWS
291308
COPY_ATTR(legacy_windows_fs_encoding);
292309
COPY_ATTR(legacy_windows_stdio);
@@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config)
932949
}
933950

934951

952+
static const char *
953+
get_stdio_errors(const _PyCoreConfig *config)
954+
{
955+
#ifndef MS_WINDOWS
956+
const char *loc = setlocale(LC_CTYPE, NULL);
957+
if (loc != NULL) {
958+
/* surrogateescape is the default in the legacy C and POSIX locales */
959+
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
960+
return "surrogateescape";
961+
}
962+
963+
#ifdef PY_COERCE_C_LOCALE
964+
/* surrogateescape is the default in locale coercion target locales */
965+
if (_Py_IsLocaleCoercionTarget(loc)) {
966+
return "surrogateescape";
967+
}
968+
#endif
969+
}
970+
971+
return "strict";
972+
#else
973+
/* On Windows, always use surrogateescape by default */
974+
return "surrogateescape";
975+
#endif
976+
}
977+
978+
979+
_PyInitError
980+
_Py_get_locale_encoding(char **locale_encoding)
981+
{
982+
#ifdef MS_WINDOWS
983+
char encoding[20];
984+
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
985+
#elif defined(__ANDROID__)
986+
const char *encoding = "UTF-8";
987+
#else
988+
const char *encoding = nl_langinfo(CODESET);
989+
if (!encoding || encoding[0] == '\0') {
990+
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
991+
"nl_langinfo(CODESET) failed");
992+
}
993+
#endif
994+
*locale_encoding = _PyMem_RawStrdup(encoding);
995+
if (*locale_encoding == NULL) {
996+
return _Py_INIT_NO_MEMORY();
997+
}
998+
return _Py_INIT_OK();
999+
}
1000+
1001+
1002+
static _PyInitError
1003+
config_init_stdio_encoding(_PyCoreConfig *config)
1004+
{
1005+
/* If Py_SetStandardStreamEncoding() have been called, use these
1006+
parameters. */
1007+
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
1008+
config->stdio_encoding = _PyMem_RawStrdup(_Py_StandardStreamEncoding);
1009+
if (config->stdio_encoding == NULL) {
1010+
return _Py_INIT_NO_MEMORY();
1011+
}
1012+
}
1013+
1014+
if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
1015+
config->stdio_errors = _PyMem_RawStrdup(_Py_StandardStreamErrors);
1016+
if (config->stdio_errors == NULL) {
1017+
return _Py_INIT_NO_MEMORY();
1018+
}
1019+
}
1020+
1021+
if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
1022+
return _Py_INIT_OK();
1023+
}
1024+
1025+
/* PYTHONIOENCODING environment variable */
1026+
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
1027+
if (opt) {
1028+
char *pythonioencoding = _PyMem_RawStrdup(opt);
1029+
if (pythonioencoding == NULL) {
1030+
return _Py_INIT_NO_MEMORY();
1031+
}
1032+
1033+
char *err = strchr(pythonioencoding, ':');
1034+
if (err) {
1035+
*err = '\0';
1036+
err++;
1037+
if (!err[0]) {
1038+
err = NULL;
1039+
}
1040+
}
1041+
1042+
/* Does PYTHONIOENCODING contain an encoding? */
1043+
if (pythonioencoding[0]) {
1044+
if (config->stdio_encoding == NULL) {
1045+
config->stdio_encoding = _PyMem_RawStrdup(pythonioencoding);
1046+
if (config->stdio_encoding == NULL) {
1047+
PyMem_RawFree(pythonioencoding);
1048+
return _Py_INIT_NO_MEMORY();
1049+
}
1050+
}
1051+
1052+
/* If the encoding is set but not the error handler,
1053+
use "strict" error handler by default.
1054+
PYTHONIOENCODING=latin1 behaves as
1055+
PYTHONIOENCODING=latin1:strict. */
1056+
if (!err) {
1057+
err = "strict";
1058+
}
1059+
}
1060+
1061+
if (config->stdio_errors == NULL && err != NULL) {
1062+
config->stdio_errors = _PyMem_RawStrdup(err);
1063+
if (config->stdio_errors == NULL) {
1064+
PyMem_RawFree(pythonioencoding);
1065+
return _Py_INIT_NO_MEMORY();
1066+
}
1067+
}
1068+
1069+
PyMem_RawFree(pythonioencoding);
1070+
}
1071+
1072+
/* UTF-8 Mode uses UTF-8/surrogateescape */
1073+
if (config->utf8_mode) {
1074+
if (config->stdio_encoding == NULL) {
1075+
config->stdio_encoding = _PyMem_RawStrdup("utf-8");
1076+
if (config->stdio_encoding == NULL) {
1077+
return _Py_INIT_NO_MEMORY();
1078+
}
1079+
}
1080+
if (config->stdio_errors == NULL) {
1081+
config->stdio_errors = _PyMem_RawStrdup("surrogateescape");
1082+
if (config->stdio_errors == NULL) {
1083+
return _Py_INIT_NO_MEMORY();
1084+
}
1085+
}
1086+
}
1087+
1088+
/* Choose the default error handler based on the current locale. */
1089+
if (config->stdio_encoding == NULL) {
1090+
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
1091+
if (_Py_INIT_FAILED(err)) {
1092+
return err;
1093+
}
1094+
}
1095+
if (config->stdio_errors == NULL) {
1096+
const char *errors = get_stdio_errors(config);
1097+
config->stdio_errors = _PyMem_RawStrdup(errors);
1098+
if (config->stdio_errors == NULL) {
1099+
return _Py_INIT_NO_MEMORY();
1100+
}
1101+
}
1102+
1103+
return _Py_INIT_OK();
1104+
}
1105+
1106+
9351107
/* Read configuration settings from standard locations
9361108
*
9371109
* This function doesn't make any changes to the interpreter state - it
@@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
10441216
config->argc = 0;
10451217
}
10461218

1219+
err = config_init_stdio_encoding(config);
1220+
if (_Py_INIT_FAILED(err)) {
1221+
return err;
1222+
}
1223+
10471224
assert(config->coerce_c_locale >= 0);
10481225
assert(config->use_environment >= 0);
10491226

0 commit comments

Comments
 (0)