11#include "Python.h"
22#include "internal/pystate.h"
33#include <locale.h>
4+ #ifdef HAVE_LANGINFO_H
5+ # include <langinfo.h>
6+ #endif
47
58
69#define DECODE_LOCALE_ERR (NAME , LEN ) \
@@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list)
8992 * mechanism that attempts to figure out an appropriate IO encoding
9093 */
9194
92- char * _Py_StandardStreamEncoding = NULL ;
93- char * _Py_StandardStreamErrors = NULL ;
95+ static char * _Py_StandardStreamEncoding = NULL ;
96+ static char * _Py_StandardStreamErrors = NULL ;
9497
9598int
9699Py_SetStandardStreamEncoding (const char * encoding , const char * errors )
@@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
205208 CLEAR (config -> dll_path );
206209#endif
207210 CLEAR (config -> base_exec_prefix );
211+
212+ CLEAR (config -> stdio_encoding );
213+ CLEAR (config -> stdio_errors );
208214#undef CLEAR
209215#undef CLEAR_WSTRLIST
210216}
@@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
216222 _PyCoreConfig_Clear (config );
217223
218224#define COPY_ATTR (ATTR ) config->ATTR = config2->ATTR
225+ #define COPY_STR_ATTR (ATTR ) \
226+ do { \
227+ if (config2->ATTR != NULL) { \
228+ config->ATTR = _PyMem_RawStrdup(config2->ATTR); \
229+ if (config->ATTR == NULL) { \
230+ return -1; \
231+ } \
232+ } \
233+ } while (0)
219234#define COPY_WSTR_ATTR (ATTR ) \
220235 do { \
221236 if (config2->ATTR != NULL) { \
@@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
287302 COPY_ATTR (quiet );
288303 COPY_ATTR (user_site_directory );
289304 COPY_ATTR (buffered_stdio );
305+ COPY_STR_ATTR (stdio_encoding );
306+ COPY_STR_ATTR (stdio_errors );
290307#ifdef MS_WINDOWS
291308 COPY_ATTR (legacy_windows_fs_encoding );
292309 COPY_ATTR (legacy_windows_stdio );
@@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config)
932949}
933950
934951
952+ static const char *
953+ get_stdio_errors (const _PyCoreConfig * config )
954+ {
955+ #ifndef MS_WINDOWS
956+ const char * loc = setlocale (LC_CTYPE , NULL );
957+ if (loc != NULL ) {
958+ /* surrogateescape is the default in the legacy C and POSIX locales */
959+ if (strcmp (loc , "C" ) == 0 || strcmp (loc , "POSIX" ) == 0 ) {
960+ return "surrogateescape" ;
961+ }
962+
963+ #ifdef PY_COERCE_C_LOCALE
964+ /* surrogateescape is the default in locale coercion target locales */
965+ if (_Py_IsLocaleCoercionTarget (loc )) {
966+ return "surrogateescape" ;
967+ }
968+ #endif
969+ }
970+
971+ return "strict" ;
972+ #else
973+ /* On Windows, always use surrogateescape by default */
974+ return "surrogateescape" ;
975+ #endif
976+ }
977+
978+
979+ _PyInitError
980+ _Py_get_locale_encoding (char * * locale_encoding )
981+ {
982+ #ifdef MS_WINDOWS
983+ char encoding [20 ];
984+ PyOS_snprintf (encoding , sizeof (encoding ), "cp%d" , GetACP ());
985+ #elif defined(__ANDROID__ )
986+ const char * encoding = "UTF-8" ;
987+ #else
988+ const char * encoding = nl_langinfo (CODESET );
989+ if (!encoding || encoding [0 ] == '\0' ) {
990+ return _Py_INIT_USER_ERR ("failed to get the locale encoding: "
991+ "nl_langinfo(CODESET) failed" );
992+ }
993+ #endif
994+ * locale_encoding = _PyMem_RawStrdup (encoding );
995+ if (* locale_encoding == NULL ) {
996+ return _Py_INIT_NO_MEMORY ();
997+ }
998+ return _Py_INIT_OK ();
999+ }
1000+
1001+
1002+ static _PyInitError
1003+ config_init_stdio_encoding (_PyCoreConfig * config )
1004+ {
1005+ /* If Py_SetStandardStreamEncoding() have been called, use these
1006+ parameters. */
1007+ if (config -> stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL ) {
1008+ config -> stdio_encoding = _PyMem_RawStrdup (_Py_StandardStreamEncoding );
1009+ if (config -> stdio_encoding == NULL ) {
1010+ return _Py_INIT_NO_MEMORY ();
1011+ }
1012+ }
1013+
1014+ if (config -> stdio_errors == NULL && _Py_StandardStreamErrors != NULL ) {
1015+ config -> stdio_errors = _PyMem_RawStrdup (_Py_StandardStreamErrors );
1016+ if (config -> stdio_errors == NULL ) {
1017+ return _Py_INIT_NO_MEMORY ();
1018+ }
1019+ }
1020+
1021+ if (config -> stdio_encoding != NULL && config -> stdio_errors != NULL ) {
1022+ return _Py_INIT_OK ();
1023+ }
1024+
1025+ /* PYTHONIOENCODING environment variable */
1026+ const char * opt = _PyCoreConfig_GetEnv (config , "PYTHONIOENCODING" );
1027+ if (opt ) {
1028+ char * pythonioencoding = _PyMem_RawStrdup (opt );
1029+ if (pythonioencoding == NULL ) {
1030+ return _Py_INIT_NO_MEMORY ();
1031+ }
1032+
1033+ char * err = strchr (pythonioencoding , ':' );
1034+ if (err ) {
1035+ * err = '\0' ;
1036+ err ++ ;
1037+ if (!err [0 ]) {
1038+ err = NULL ;
1039+ }
1040+ }
1041+
1042+ /* Does PYTHONIOENCODING contain an encoding? */
1043+ if (pythonioencoding [0 ]) {
1044+ if (config -> stdio_encoding == NULL ) {
1045+ config -> stdio_encoding = _PyMem_RawStrdup (pythonioencoding );
1046+ if (config -> stdio_encoding == NULL ) {
1047+ PyMem_RawFree (pythonioencoding );
1048+ return _Py_INIT_NO_MEMORY ();
1049+ }
1050+ }
1051+
1052+ /* If the encoding is set but not the error handler,
1053+ use "strict" error handler by default.
1054+ PYTHONIOENCODING=latin1 behaves as
1055+ PYTHONIOENCODING=latin1:strict. */
1056+ if (!err ) {
1057+ err = "strict" ;
1058+ }
1059+ }
1060+
1061+ if (config -> stdio_errors == NULL && err != NULL ) {
1062+ config -> stdio_errors = _PyMem_RawStrdup (err );
1063+ if (config -> stdio_errors == NULL ) {
1064+ PyMem_RawFree (pythonioencoding );
1065+ return _Py_INIT_NO_MEMORY ();
1066+ }
1067+ }
1068+
1069+ PyMem_RawFree (pythonioencoding );
1070+ }
1071+
1072+ /* UTF-8 Mode uses UTF-8/surrogateescape */
1073+ if (config -> utf8_mode ) {
1074+ if (config -> stdio_encoding == NULL ) {
1075+ config -> stdio_encoding = _PyMem_RawStrdup ("utf-8" );
1076+ if (config -> stdio_encoding == NULL ) {
1077+ return _Py_INIT_NO_MEMORY ();
1078+ }
1079+ }
1080+ if (config -> stdio_errors == NULL ) {
1081+ config -> stdio_errors = _PyMem_RawStrdup ("surrogateescape" );
1082+ if (config -> stdio_errors == NULL ) {
1083+ return _Py_INIT_NO_MEMORY ();
1084+ }
1085+ }
1086+ }
1087+
1088+ /* Choose the default error handler based on the current locale. */
1089+ if (config -> stdio_encoding == NULL ) {
1090+ _PyInitError err = _Py_get_locale_encoding (& config -> stdio_encoding );
1091+ if (_Py_INIT_FAILED (err )) {
1092+ return err ;
1093+ }
1094+ }
1095+ if (config -> stdio_errors == NULL ) {
1096+ const char * errors = get_stdio_errors (config );
1097+ config -> stdio_errors = _PyMem_RawStrdup (errors );
1098+ if (config -> stdio_errors == NULL ) {
1099+ return _Py_INIT_NO_MEMORY ();
1100+ }
1101+ }
1102+
1103+ return _Py_INIT_OK ();
1104+ }
1105+
1106+
9351107/* Read configuration settings from standard locations
9361108 *
9371109 * This function doesn't make any changes to the interpreter state - it
@@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
10441216 config -> argc = 0 ;
10451217 }
10461218
1219+ err = config_init_stdio_encoding (config );
1220+ if (_Py_INIT_FAILED (err )) {
1221+ return err ;
1222+ }
1223+
10471224 assert (config -> coerce_c_locale >= 0 );
10481225 assert (config -> use_environment >= 0 );
10491226
0 commit comments