Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 43c5778

Browse files
committed
Rename utf8b error handler to surrogateescape.
1 parent e0a2b72 commit 43c5778

9 files changed

Lines changed: 30 additions & 30 deletions

File tree

Doc/library/codecs.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ and implemented by all standard Python codecs:
322322
| ``'backslashreplace'`` | Replace with backslashed escape sequences |
323323
| | (only for encoding). |
324324
+-------------------------+-----------------------------------------------+
325-
| ``'utf8b'`` | Replace byte with surrogate U+DCxx. |
325+
| ``'surrogateescape'`` | Replace byte with surrogate U+DCxx. |
326326
+-------------------------+-----------------------------------------------+
327327

328328
In addition, the following error handlers are specific to a single codec:
@@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
335335
+-------------------+---------+-------------------------------------------+
336336

337337
.. versionadded:: 3.1
338-
The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
338+
The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
339339

340340
The set of allowed values can be extended via :meth:`register_error`.
341341

Doc/library/os.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).
6464

6565
.. versionchanged:: 3.1
6666
On some systems, conversion using the file system encoding may
67-
fail. In this case, Python uses the ``utf8b`` encoding error
68-
handler, which means that undecodable bytes are replaced by a
67+
fail. In this case, Python uses the ``surrogateescape`` encoding
68+
error handler, which means that undecodable bytes are replaced by a
6969
Unicode character U+DCxx on decoding, and these are again
7070
translated to the original byte on encoding.
7171

Lib/test/test_codecs.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,32 +1521,32 @@ def test_unicode_escape(self):
15211521
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
15221522
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
15231523

1524-
class Utf8bTest(unittest.TestCase):
1524+
class SurrogateEscapeTest(unittest.TestCase):
15251525

15261526
def test_utf8(self):
15271527
# Bad byte
1528-
self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
1528+
self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
15291529
"foo\udc80bar")
1530-
self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
1530+
self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
15311531
b"foo\x80bar")
15321532
# bad-utf-8 encoded surrogate
1533-
self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
1533+
self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
15341534
"\udced\udcb0\udc80")
1535-
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
1535+
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
15361536
b"\xed\xb0\x80")
15371537

15381538
def test_ascii(self):
15391539
# bad byte
1540-
self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
1540+
self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
15411541
"foo\udc80bar")
1542-
self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
1542+
self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
15431543
b"foo\x80bar")
15441544

15451545
def test_charmap(self):
15461546
# bad byte: \xa5 is unmapped in iso-8859-3
1547-
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
1547+
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
15481548
"foo\udca5bar")
1549-
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
1549+
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
15501550
b"foo\xa5bar")
15511551

15521552

@@ -1576,7 +1576,7 @@ def test_main():
15761576
CharmapTest,
15771577
WithStmtTest,
15781578
TypesTest,
1579-
Utf8bTest,
1579+
SurrogateEscapeTest,
15801580
)
15811581

15821582

Lib/test/test_os.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -708,13 +708,13 @@ def setUp(self):
708708
self.fsencoding = sys.getfilesystemencoding()
709709
sys.setfilesystemencoding("utf-8")
710710
self.dir = support.TESTFN
711-
self.bdir = self.dir.encode("utf-8", "utf8b")
711+
self.bdir = self.dir.encode("utf-8", "surrogateescape")
712712
os.mkdir(self.dir)
713713
self.unicodefn = []
714714
for fn in self.filenames:
715715
f = open(os.path.join(self.bdir, fn), "w")
716716
f.close()
717-
self.unicodefn.append(fn.decode("utf-8", "utf8b"))
717+
self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))
718718

719719
def tearDown(self):
720720
shutil.rmtree(self.dir)

Modules/_io/fileio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
245245
return -1;
246246

247247
stringobj = PyUnicode_AsEncodedString(
248-
u, Py_FileSystemDefaultEncoding, "utf8b");
248+
u, Py_FileSystemDefaultEncoding, "surrogateescape");
249249
Py_DECREF(u);
250250
if (stringobj == NULL)
251251
return -1;

Modules/posixmodule.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,13 @@ convertenviron(void)
494494
if (p == NULL)
495495
continue;
496496
k = PyUnicode_Decode(*e, (int)(p-*e),
497-
Py_FileSystemDefaultEncoding, "utf8b");
497+
Py_FileSystemDefaultEncoding, "surrogateescape");
498498
if (k == NULL) {
499499
PyErr_Clear();
500500
continue;
501501
}
502502
v = PyUnicode_Decode(p+1, strlen(p+1),
503-
Py_FileSystemDefaultEncoding, "utf8b");
503+
Py_FileSystemDefaultEncoding, "surrogateescape");
504504
if (v == NULL) {
505505
PyErr_Clear();
506506
Py_DECREF(k);
@@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)
21672167
return posix_error();
21682168
if (use_bytes)
21692169
return PyBytes_FromStringAndSize(buf, strlen(buf));
2170-
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
2170+
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");
21712171
}
21722172

21732173
PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)
25132513

25142514
w = PyUnicode_FromEncodedObject(v,
25152515
Py_FileSystemDefaultEncoding,
2516-
"utf8b");
2516+
"surrogateescape");
25172517
Py_DECREF(v);
25182518
if (w != NULL)
25192519
v = w;
@@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)
46954695

46964696
w = PyUnicode_FromEncodedObject(v,
46974697
Py_FileSystemDefaultEncoding,
4698-
"utf8b");
4698+
"surrogateescape");
46994699
if (w != NULL) {
47004700
Py_DECREF(v);
47014701
v = w;

Modules/python.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ char2wchar(char* arg)
4242
return res;
4343
PyMem_Free(res);
4444
}
45-
/* Conversion failed. Fall back to escaping with utf8b. */
45+
/* Conversion failed. Fall back to escaping with surrogateescape. */
4646
#ifdef HAVE_MBRTOWC
4747
/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
4848

Objects/unicodeobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
15491549
return 0;
15501550
output = PyUnicode_AsEncodedObject(arg,
15511551
Py_FileSystemDefaultEncoding,
1552-
"utf8b");
1552+
"surrogateescape");
15531553
Py_DECREF(arg);
15541554
if (!output)
15551555
return 0;

Python/codecs.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
830830
}
831831

832832
static PyObject *
833-
PyCodec_UTF8bErrors(PyObject *exc)
833+
PyCodec_SurrogateEscapeErrors(PyObject *exc)
834834
{
835835
PyObject *restuple;
836836
PyObject *object;
@@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
940940
return PyCodec_SurrogatePassErrors(exc);
941941
}
942942

943-
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
943+
static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
944944
{
945-
return PyCodec_UTF8bErrors(exc);
945+
return PyCodec_SurrogateEscapeErrors(exc);
946946
}
947947

948948
static int _PyCodecRegistry_Init(void)
@@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)
10011001
}
10021002
},
10031003
{
1004-
"utf8b",
1004+
"surrogateescape",
10051005
{
1006-
"utf8b",
1007-
utf8b_errors,
1006+
"surrogateescape",
1007+
surrogateescape_errors,
10081008
METH_O
10091009
}
10101010
}

0 commit comments

Comments
 (0)