Rename utf8b error handler to surrogateescape.

loewis · loewis · commit 43c57785d331 · 2009-05-10T08:15:24.000Z
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
@@ -322,7 +322,7 @@ and implemented by all standard Python codecs:
 | ``'backslashreplace'``  | Replace with backslashed escape sequences     |
 |                         | (only for encoding).                          |
 +-------------------------+-----------------------------------------------+
-| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           |
+| ``'surrogateescape'``   | Replace byte with surrogate U+DCxx.           |
 +-------------------------+-----------------------------------------------+
 
 In addition, the following error handlers are specific to a single codec:
@@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
 +-------------------+---------+-------------------------------------------+
 
 .. versionadded:: 3.1
-   The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
+   The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
 
 The set of allowed values can be extended via :meth:`register_error`.
 
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
@@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).
 
 .. versionchanged:: 3.1
    On some systems, conversion using the file system encoding may
-   fail. In this case, Python uses the ``utf8b`` encoding error
-   handler, which means that undecodable bytes are replaced by a
+   fail. In this case, Python uses the ``surrogateescape`` encoding
+   error handler, which means that undecodable bytes are replaced by a
    Unicode character U+DCxx on decoding, and these are again
    translated to the original byte on encoding.
 
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -1521,32 +1521,32 @@ def test_unicode_escape(self):
         self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
         self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
 
-class Utf8bTest(unittest.TestCase):
+class SurrogateEscapeTest(unittest.TestCase):
 
     def test_utf8(self):
         # Bad byte
-        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
                          "foo\udc80bar")
-        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+        self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
                          b"foo\x80bar")
         # bad-utf-8 encoded surrogate
-        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
                          "\udced\udcb0\udc80")
-        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
                          b"\xed\xb0\x80")
 
     def test_ascii(self):
         # bad byte
-        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
                          "foo\udc80bar")
-        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+        self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
                          b"foo\x80bar")
 
     def test_charmap(self):
         # bad byte: \xa5 is unmapped in iso-8859-3
-        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
                          "foo\udca5bar")
-        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
                          b"foo\xa5bar")
 
 
@@ -1576,7 +1576,7 @@ def test_main():
         CharmapTest,
         WithStmtTest,
         TypesTest,
-        Utf8bTest,
+        SurrogateEscapeTest,
     )
 
 
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
@@ -708,13 +708,13 @@ def setUp(self):
             self.fsencoding = sys.getfilesystemencoding()
             sys.setfilesystemencoding("utf-8")
             self.dir = support.TESTFN
-            self.bdir = self.dir.encode("utf-8", "utf8b")
+            self.bdir = self.dir.encode("utf-8", "surrogateescape")
             os.mkdir(self.dir)
             self.unicodefn = []
             for fn in self.filenames:
                 f = open(os.path.join(self.bdir, fn), "w")
                 f.close()
-                self.unicodefn.append(fn.decode("utf-8", "utf8b"))
+                self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))
 
         def tearDown(self):
             shutil.rmtree(self.dir)
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
 				return -1;
 
 			stringobj = PyUnicode_AsEncodedString(
-				u, Py_FileSystemDefaultEncoding, "utf8b");
+				u, Py_FileSystemDefaultEncoding, "surrogateescape");
 			Py_DECREF(u);
 			if (stringobj == NULL)
 				return -1;
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
@@ -494,13 +494,13 @@ convertenviron(void)
 		if (p == NULL)
 			continue;
 		k = PyUnicode_Decode(*e, (int)(p-*e),
-				     Py_FileSystemDefaultEncoding, "utf8b");
+				     Py_FileSystemDefaultEncoding, "surrogateescape");
 		if (k == NULL) {
 			PyErr_Clear();
 			continue;
 		}
 		v = PyUnicode_Decode(p+1, strlen(p+1),
-				     Py_FileSystemDefaultEncoding, "utf8b");
+				     Py_FileSystemDefaultEncoding, "surrogateescape");
 		if (v == NULL) {
 			PyErr_Clear();
 			Py_DECREF(k);
@@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)
 		return posix_error();
 	if (use_bytes)
 		return PyBytes_FromStringAndSize(buf, strlen(buf));
-	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
+	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");
 }
 
 PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)
 
 			w = PyUnicode_FromEncodedObject(v,
 					Py_FileSystemDefaultEncoding,
-					"utf8b");
+					"surrogateescape");
 			Py_DECREF(v);
 			if (w != NULL)
 				v = w;
@@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)
 
 		w = PyUnicode_FromEncodedObject(v,
 				Py_FileSystemDefaultEncoding,
-				"utf8b");
+				"surrogateescape");
 		if (w != NULL) {
 			Py_DECREF(v);
 			v = w;
diff --git a/Modules/python.c b/Modules/python.c
@@ -42,7 +42,7 @@ char2wchar(char* arg)
 			return res;
 		PyMem_Free(res);
 	}
-	/* Conversion failed. Fall back to escaping with utf8b. */
+	/* Conversion failed. Fall back to escaping with surrogateescape. */
 #ifdef HAVE_MBRTOWC
 	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
 	
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
             return 0;
         output = PyUnicode_AsEncodedObject(arg, 
                                            Py_FileSystemDefaultEncoding,
-                                           "utf8b");
+                                           "surrogateescape");
         Py_DECREF(arg);
         if (!output)
             return 0;
diff --git a/Python/codecs.c b/Python/codecs.c
@@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
 }
 
 static PyObject *
-PyCodec_UTF8bErrors(PyObject *exc)
+PyCodec_SurrogateEscapeErrors(PyObject *exc)
 {
     PyObject *restuple;
     PyObject *object;
@@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
     return PyCodec_SurrogatePassErrors(exc);
 }
 
-static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
 {
-    return PyCodec_UTF8bErrors(exc);
+    return PyCodec_SurrogateEscapeErrors(exc);
 }
 
 static int _PyCodecRegistry_Init(void)
@@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)
 	    }
 	},
 	{
-	    "utf8b",
+	    "surrogateescape",
 	    {
-		"utf8b",
-		utf8b_errors,
+		"surrogateescape",
+		surrogateescape_errors,
 		METH_O
 	    }
 	}

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ char2wchar(char* arg)`
`42`	`42`	`return res;`
`43`	`43`	`PyMem_Free(res);`
`44`	`44`	`}`
`45`		`- /* Conversion failed. Fall back to escaping with utf8b. */`
	`45`	`+ /* Conversion failed. Fall back to escaping with surrogateescape. */`
`46`	`46`	`#ifdef HAVE_MBRTOWC`
`47`	`47`	`/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */`
`48`	`48`
Original file line number	Diff line number	Diff line change
`@@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)`
`830`	`830`	`}`
`831`	`831`
`832`	`832`	`static PyObject *`
`833`		`-PyCodec_UTF8bErrors(PyObject *exc)`
	`833`	`+PyCodec_SurrogateEscapeErrors(PyObject *exc)`
`834`	`834`	`{`
`835`	`835`	`PyObject *restuple;`
`836`	`836`	`PyObject *object;`
`@@ -940,9 +940,9 @@ static PyObject surrogatepass_errors(PyObject self, PyObject *exc)`
`940`	`940`	`return PyCodec_SurrogatePassErrors(exc);`
`941`	`941`	`}`
`942`	`942`
`943`		`-static PyObject utf8b_errors(PyObject self, PyObject *exc)`
	`943`	`+static PyObject surrogateescape_errors(PyObject self, PyObject *exc)`
`944`	`944`	`{`
`945`		`- return PyCodec_UTF8bErrors(exc);`
	`945`	`+ return PyCodec_SurrogateEscapeErrors(exc);`
`946`	`946`	`}`
`947`	`947`
`948`	`948`	`static int _PyCodecRegistry_Init(void)`
`@@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)`
`1001`	`1001`	`}`
`1002`	`1002`	`},`
`1003`	`1003`	`{`
`1004`		`- "utf8b",`
	`1004`	`+ "surrogateescape",`
`1005`	`1005`	`{`
`1006`		`- "utf8b",`
`1007`		`- utf8b_errors,`
	`1006`	`+ "surrogateescape",`
	`1007`	`+ surrogateescape_errors,`
`1008`	`1008`	`METH_O`
`1009`	`1009`	`}`
`1010`	`1010`	`}`