Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1b57967

Browse files
author
Victor Stinner
committed
Issue #13560: Locale codec functions use the classic "errors" parameter,
instead of surrogateescape So it would be possible to support more error handlers later.
1 parent ab59594 commit 1b57967

6 files changed

Lines changed: 49 additions & 17 deletions

File tree

Include/unicodeobject.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
16081608
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
16091609
const char *str,
16101610
Py_ssize_t len,
1611-
int surrogateescape);
1611+
const char *errors);
16121612

16131613
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
16141614
length using strlen(). */
16151615

16161616
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
16171617
const char *str,
1618-
int surrogateescape);
1618+
const char *errors);
16191619

16201620
/* Encode a Unicode object to the current locale encoding. The encoder is
16211621
strict is *surrogateescape* is equal to zero, otherwise the
@@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
16241624

16251625
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
16261626
PyObject *unicode,
1627-
int surrogateescape
1627+
const char *errors
16281628
);
16291629

16301630
/* --- File system encoding ---------------------------------------------- */

Modules/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv)
495495
/* Use utf-8 on Mac OS X */
496496
unicode = PyUnicode_FromString(p);
497497
#else
498-
unicode = PyUnicode_DecodeLocale(p, 1);
498+
unicode = PyUnicode_DecodeLocale(p, "surrogateescape");
499499
#endif
500500
if (unicode == NULL) {
501501
/* ignore errors */

Modules/posixmodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args)
78917891
"strerror() argument out of range");
78927892
return NULL;
78937893
}
7894-
return PyUnicode_DecodeLocale(message, 1);
7894+
return PyUnicode_DecodeLocale(message, "surrogateescape");
78957895
}
78967896

78977897

Modules/timemodule.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
486486
fmt = format;
487487
#else
488488
/* Convert the unicode string to an ascii one */
489-
format = PyUnicode_EncodeLocale(format_arg, 1);
489+
format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
490490
if (format == NULL)
491491
return NULL;
492492
fmt = PyBytes_AS_STRING(format);
@@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args)
532532
#ifdef HAVE_WCSFTIME
533533
ret = PyUnicode_FromWideChar(outbuf, buflen);
534534
#else
535-
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1);
535+
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen,
536+
"surrogateescape");
536537
#endif
537538
PyMem_Free(outbuf);
538539
break;
@@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) {
764765
#endif /* PYOS_OS2 */
765766
#endif
766767
PyModule_AddIntConstant(m, "daylight", daylight);
767-
otz0 = PyUnicode_DecodeLocale(tzname[0], 1);
768-
otz1 = PyUnicode_DecodeLocale(tzname[1], 1);
768+
otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
769+
otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
769770
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
770771
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
771772
#ifdef HAVE_STRUCT_TM_TM_ZONE

Objects/unicodeobject.c

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr)
31253125
return 0;
31263126
}
31273127

3128+
static int
3129+
locale_error_handler(const char *errors, int *surrogateescape)
3130+
{
3131+
if (errors == NULL) {
3132+
*surrogateescape = 0;
3133+
return 0;
3134+
}
3135+
3136+
if (strcmp(errors, "strict") == 0) {
3137+
*surrogateescape = 0;
3138+
return 0;
3139+
}
3140+
if (strcmp(errors, "surrogateescape") == 0) {
3141+
*surrogateescape = 1;
3142+
return 0;
3143+
}
3144+
PyErr_Format(PyExc_ValueError,
3145+
"only 'strict' and 'surrogateescape' error handlers "
3146+
"are supported, not '%s'",
3147+
errors);
3148+
return -1;
3149+
}
3150+
31283151
PyObject *
3129-
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
3152+
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
31303153
{
31313154
Py_ssize_t wlen, wlen2;
31323155
wchar_t *wstr;
@@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
31353158
PyObject *reason;
31363159
PyObject *exc;
31373160
size_t error_pos;
3161+
int surrogateescape;
3162+
3163+
if (locale_error_handler(errors, &surrogateescape) < 0)
3164+
return NULL;
31383165

31393166
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
31403167
if (wstr == NULL)
@@ -3198,7 +3225,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
31983225
Py_XDECREF(bytes);
31993226

32003227
if (errmsg != NULL)
3201-
reason = PyUnicode_DecodeLocale(errmsg, 1);
3228+
reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
32023229
else
32033230
reason = PyUnicode_FromString(
32043231
"wcstombs() encountered an unencodable "
@@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
32433270
"surrogateescape");
32443271
}
32453272
else {
3246-
return PyUnicode_EncodeLocale(unicode, 1);
3273+
return PyUnicode_EncodeLocale(unicode, "surrogateescape");
32473274
}
32483275
#endif
32493276
}
@@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
33513378

33523379
PyObject*
33533380
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
3354-
int surrogateescape)
3381+
const char *errors)
33553382
{
33563383
wchar_t smallbuf[256];
33573384
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
33583385
wchar_t *wstr;
33593386
size_t wlen, wlen2;
33603387
PyObject *unicode;
3388+
int surrogateescape;
3389+
3390+
if (locale_error_handler(errors, &surrogateescape) < 0)
3391+
return NULL;
33613392

33623393
if (str[len] != '\0' || len != strlen(str)) {
33633394
PyErr_SetString(PyExc_TypeError, "embedded null character");
@@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
34193450
}
34203451

34213452
PyObject*
3422-
PyUnicode_DecodeLocale(const char *str, int surrogateescape)
3453+
PyUnicode_DecodeLocale(const char *str, const char *errors)
34233454
{
34243455
Py_ssize_t size = (Py_ssize_t)strlen(str);
3425-
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
3456+
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
34263457
}
34273458

34283459

@@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
34563487
"surrogateescape");
34573488
}
34583489
else {
3459-
return PyUnicode_DecodeLocaleAndSize(s, size, 1);
3490+
return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape");
34603491
}
34613492
#endif
34623493
}

Python/errors.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
355355
#ifndef MS_WINDOWS
356356
if (i != 0) {
357357
char *s = strerror(i);
358-
message = PyUnicode_DecodeLocale(s, 1);
358+
message = PyUnicode_DecodeLocale(s, "surrogateescape");
359359
}
360360
else {
361361
/* Sometimes errno didn't get set */

0 commit comments

Comments
 (0)