Add a test that checks the basic functionality of every encoding.

doerwalter · doerwalter · commit ee1d24703ffc · 2004-12-29T16:04:38.000Z
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -549,6 +549,182 @@ def test_readlines(self):
         f = self.reader(self.stream)
         self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
 
+all_unicode_encodings = [
+    "ascii",
+    "base64_codec",
+    "big5",
+    "big5hkscs",
+    "charmap",
+    "cp037",
+    "cp1006",
+    "cp1026",
+    "cp1140",
+    "cp1250",
+    "cp1251",
+    "cp1252",
+    "cp1253",
+    "cp1254",
+    "cp1255",
+    "cp1256",
+    "cp1257",
+    "cp1258",
+    "cp424",
+    "cp437",
+    "cp500",
+    "cp737",
+    "cp775",
+    "cp850",
+    "cp852",
+    "cp855",
+    "cp856",
+    "cp857",
+    "cp860",
+    "cp861",
+    "cp862",
+    "cp863",
+    "cp864",
+    "cp865",
+    "cp866",
+    "cp869",
+    "cp874",
+    "cp875",
+    "cp932",
+    "cp949",
+    "cp950",
+    "euc_jis_2004",
+    "euc_jisx0213",
+    "euc_jp",
+    "euc_kr",
+    "gb18030",
+    "gb2312",
+    "gbk",
+    "hex_codec",
+    "hp_roman8",
+    "hz",
+    "idna",
+    "iso2022_jp",
+    "iso2022_jp_1",
+    "iso2022_jp_2",
+    "iso2022_jp_2004",
+    "iso2022_jp_3",
+    "iso2022_jp_ext",
+    "iso2022_kr",
+    "iso8859_1",
+    "iso8859_10",
+    "iso8859_11",
+    "iso8859_13",
+    "iso8859_14",
+    "iso8859_15",
+    "iso8859_16",
+    "iso8859_2",
+    "iso8859_3",
+    "iso8859_4",
+    "iso8859_5",
+    "iso8859_6",
+    "iso8859_7",
+    "iso8859_8",
+    "iso8859_9",
+    "johab",
+    "koi8_r",
+    "koi8_u",
+    "latin_1",
+    "mac_cyrillic",
+    "mac_greek",
+    "mac_iceland",
+    "mac_latin2",
+    "mac_roman",
+    "mac_turkish",
+    "palmos",
+    "ptcp154",
+    "punycode",
+    "raw_unicode_escape",
+    "rot_13",
+    "shift_jis",
+    "shift_jis_2004",
+    "shift_jisx0213",
+    "tis_620",
+    "unicode_escape",
+    "unicode_internal",
+    "utf_16",
+    "utf_16_be",
+    "utf_16_le",
+    "utf_7",
+    "utf_8",
+]
+
+if hasattr(codecs, "mbcs_encode"):
+    all_unicode_encodings.append("mbcs")
+
+# The following encodings work only with str, not unicode
+all_string_encodings = [
+    "quopri_codec",
+    "string_escape",
+    "uu_codec",
+]
+
+# The following encoding is not tested, because it's not supposed
+# to work:
+#    "undefined"
+
+# The following encodings don't work in stateful mode
+broken_unicode_with_streams = [
+    "base64_codec",
+    "hex_codec",
+    "punycode",
+    "unicode_internal"
+]
+
+try:
+    import bz2
+except ImportError:
+    pass
+else:
+    all_unicode_encodings.append("bz2_codec")
+    broken_unicode_with_streams.append("bz2_codec")
+
+try:
+    import zlib
+except ImportError:
+    pass
+else:
+    all_unicode_encodings.append("zlib_codec")
+    broken_unicode_with_streams.append("zlib_codec")
+
+class BasicUnicodeTest(unittest.TestCase):
+    def test_basics(self):
+        s = u"abc123" # all codecs should be able to encode these
+        for encoding in all_unicode_encodings:
+            (bytes, size) = codecs.getencoder(encoding)(s)
+            if encoding != "unicode_internal":
+                self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
+            (chars, size) = codecs.getdecoder(encoding)(bytes)
+            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
+
+            if encoding not in broken_unicode_with_streams:
+                # check stream reader/writer
+                q = Queue()
+                writer = codecs.getwriter(encoding)(q)
+                encodedresult = ""
+                for c in s:
+                    writer.write(c)
+                    encodedresult += q.read()
+                q = Queue()
+                reader = codecs.getreader(encoding)(q)
+                decodedresult = u""
+                for c in encodedresult:
+                    q.write(c)
+                    decodedresult += reader.read()
+                self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
+
+class BasicStrTest(unittest.TestCase):
+    def test_basics(self):
+        s = "abc123"
+        for encoding in all_string_encodings:
+            (bytes, size) = codecs.getencoder(encoding)(s)
+            self.assertEqual(size, len(s))
+            (chars, size) = codecs.getdecoder(encoding)(bytes)
+            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
+
 def test_main():
     test_support.run_unittest(
         UTF16Test,
@@ -561,7 +737,9 @@ def test_main():
         NameprepTest,
         CodecTest,
         CodecsModuleTest,
-        StreamReaderTest
+        StreamReaderTest,
+        BasicUnicodeTest,
+        BasicStrTest
     )