Thanks to visit codestin.com
Credit goes to github.com

Skip to content

bpo-36775: Add _Py_FORCE_UTF8_FS_ENCODING macro #13056

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Include/pyport.h
Original file line number Diff line number Diff line change
Expand Up @@ -819,4 +819,14 @@ extern _invalid_parameter_handler _Py_silent_invalid_parameter_handler;
# error "Py_TRACE_REFS ABI is not compatible with release and debug ABI"
#endif

#if defined(__ANDROID__) || defined(__VXWORKS__)
/* Ignore the locale encoding: force UTF-8 */
# define _Py_FORCE_UTF8_LOCALE
#endif

#if defined(_Py_FORCE_UTF8_LOCALE) || defined(__APPLE__)
/* Use UTF-8 as filesystem encoding */
# define _Py_FORCE_UTF8_FS_ENCODING
#endif

#endif /* Py_PYPORT_H */
4 changes: 2 additions & 2 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3506,7 +3506,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__)
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
Expand Down Expand Up @@ -3730,7 +3730,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__)
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
Expand Down
72 changes: 27 additions & 45 deletions Python/coreconfig.c
Original file line number Diff line number Diff line change
Expand Up @@ -1313,7 +1313,7 @@ config_get_locale_encoding(char **locale_encoding)
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
#elif defined(__ANDROID__) || defined(__VXWORKS__)
#elif defined(_Py_FORCE_UTF8_LOCALE)
const char *encoding = "UTF-8";
#else
const char *encoding = nl_langinfo(CODESET);
Expand Down Expand Up @@ -1450,81 +1450,63 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
{
_PyInitError err;

#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
if (config->filesystem_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"mbcs");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->filesystem_errors == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_errors,
"replace");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
}

/* Windows defaults to utf-8/surrogatepass (PEP 529).

Note: UTF-8 Mode takes the same code path and the Legacy Windows FS
encoding has the priortiy over UTF-8 Mode. */
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
#else

if (config->filesystem_errors == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_errors,
"surrogatepass");
if (_Py_INIT_FAILED(err)) {
return err;
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"mbcs");
}
}
#else
if (config->filesystem_encoding == NULL) {
else
#endif
if (preconfig->utf8_mode) {
/* UTF-8 Mode use: utf-8/surrogateescape */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
/* errors defaults to surrogateescape above */
}
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"ascii");
}
#endif
else {
/* macOS and Android use UTF-8,
other platforms use the locale encoding. */
#if defined(__APPLE__) || defined(__ANDROID__)
#ifdef MS_WINDOWS
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
#else
err = config_get_locale_encoding(&config->filesystem_encoding);
#endif
}
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */

if (_Py_INIT_FAILED(err)) {
return err;
}
}

if (config->filesystem_errors == NULL) {
/* by default, use the "surrogateescape" error handler */
err = _PyCoreConfig_SetString(&config->filesystem_errors,
"surrogateescape");
const char *errors;
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
errors = "replace";
}
else {
errors = "surrogatepass";
}
#else
errors = "surrogateescape";
#endif
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
#endif
return _Py_INIT_OK();
}

Expand Down
16 changes: 8 additions & 8 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ _Py_device_encoding(int fd)
Py_RETURN_NONE;
}

#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)

#define USE_FORCE_ASCII

Expand Down Expand Up @@ -309,7 +309,7 @@ _Py_ResetForceASCII(void)
{
/* nothing to do */
}
#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */


#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
Expand Down Expand Up @@ -536,15 +536,15 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
int current_locale, _Py_error_handler errors)
{
if (current_locale) {
#if defined(__ANDROID__) || defined(__VXWORKS__)
#ifdef _Py_FORCE_UTF8_LOCALE
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors);
#else
return decode_current_locale(arg, wstr, wlen, reason, errors);
#endif
}

#if defined(__APPLE__) || defined(__ANDROID__) || defined(__VXWORKS__)
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors);
#else
Expand All @@ -569,7 +569,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
#endif

return decode_current_locale(arg, wstr, wlen, reason, errors);
#endif /* __APPLE__ or __ANDROID__ or __VXWORKS__ */
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
}


Expand Down Expand Up @@ -727,7 +727,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
int raw_malloc, int current_locale, _Py_error_handler errors)
{
if (current_locale) {
#ifdef __ANDROID__
#ifdef _Py_FORCE_UTF8_LOCALE
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors);
#else
Expand All @@ -736,7 +736,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
#endif
}

#if defined(__APPLE__) || defined(__ANDROID__)
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors);
#else
Expand All @@ -762,7 +762,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,

return encode_current_locale(text, str, error_pos, reason,
raw_malloc, errors);
#endif /* __APPLE__ or __ANDROID__ */
#endif /* _Py_FORCE_UTF8_FS_ENCODING */
}

static char*
Expand Down
3 changes: 1 addition & 2 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,7 @@ _Py_CoerceLegacyLocale(int warn)
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
defined(HAVE_LANGINFO_H) && defined(CODESET)
#if !defined(_Py_FORCE_UTF8_LOCALE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {
Expand Down