From 6816eb3af3573bea081e2d18591ddf85a1cf9a85 Mon Sep 17 00:00:00 2001
From: Srinivas Reddy Thatiparthy <thatiparthysreenivas@gmail.com>
Date: Thu, 25 Jun 2020 22:10:25 +0530
Subject: [PATCH 1/4] bpo-41115: Convert UnicodeError to UnicodeEncodeError|
 UnicodeDecodeError in idna.py, utf_16.py, utf_32.py, punycode.py,
 undefined.py modules.

---
 Lib/encodings/idna.py                         | 22 +++++++++----------
 Lib/encodings/punycode.py                     | 21 ++++++++++--------
 Lib/encodings/undefined.py                    | 22 +++++++++----------
 Lib/encodings/utf_16.py                       |  6 ++---
 Lib/encodings/utf_32.py                       |  4 ++--
 Lib/test/test_codecs.py                       |  4 ++--
 .../2020-06-26-06-03-42.bpo-41115.NTjUWO.rst  |  2 ++
 7 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst

diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index ea4058512fe366..962afa82309f19 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -63,14 +63,14 @@ def ToASCII(label):
     try:
         # Step 1: try ASCII
         label = label.encode("ascii")
-    except UnicodeError:
+    except UnicodeEncodeError:
         pass
     else:
         # Skip to step 3: UseSTD3ASCIIRules is false, so
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeError("label empty or too long")
+        raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
 
     # Step 2: nameprep
     label = nameprep(label)
@@ -79,17 +79,17 @@ def ToASCII(label):
     # Step 4: try ASCII
     try:
         label = label.encode("ascii")
-    except UnicodeError:
+    except UnicodeEncodeError:
         pass
     else:
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeError("label empty or too long")
+        raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
 
     # Step 5: Check ACE prefix
     if label.startswith(sace_prefix):
-        raise UnicodeError("Label starts with ACE prefix")
+        raise UnicodeEncodeError("ascii", label, 0, len(label), "Label starts with ACE prefix")
 
     # Step 6: Encode with PUNYCODE
     label = label.encode("punycode")
@@ -100,7 +100,7 @@ def ToASCII(label):
     # Step 8: Check size
     if 0 < len(label) < 64:
         return label
-    raise UnicodeError("label empty or too long")
+    raise UnicodeEncodeError("punycode", label, 0, len(label), "label empty or too long")
 
 def ToUnicode(label):
     # Step 1: Check for ASCII
@@ -110,7 +110,7 @@ def ToUnicode(label):
         try:
             label = label.encode("ascii")
             pure_ascii = True
-        except UnicodeError:
+        except UnicodeEncodeError:
             pure_ascii = False
     if not pure_ascii:
         # Step 2: Perform nameprep
@@ -118,8 +118,8 @@ def ToUnicode(label):
         # It doesn't say this, but apparently, it should be ASCII now
         try:
             label = label.encode("ascii")
-        except UnicodeError:
-            raise UnicodeError("Invalid character in IDN label")
+        except UnicodeEncodeError:
+            raise UnicodeEncodeError("ascii", label, 0, len(label), "Invalid character in IDN label")
     # Step 3: Check for ACE prefix
     if not label.startswith(ace_prefix):
         return str(label, "ascii")
@@ -162,9 +162,9 @@ def encode(self, input, errors='strict'):
             labels = result.split(b'.')
             for label in labels[:-1]:
                 if not (0 < len(label) < 64):
-                    raise UnicodeError("label empty or too long")
+                    raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
             if len(labels[-1]) >= 64:
-                raise UnicodeError("label too long")
+                raise UnicodeEncodeError("ascii", labels[-1], 0, len(labels[-1]), "label too long")
             return result, len(input)
 
         result = bytearray()
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 1c5726447077b1..0054f47efba8a5 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -1,4 +1,4 @@
-""" Codec for the Punicode encoding, as specified in RFC 3492
+""" Codec for the Punycode encoding, as specified in RFC 3492
 
 Written by Martin v. Löwis.
 """
@@ -74,7 +74,9 @@ def T(j, bias):
     if res > 26: return 26
     return res
 
+
 digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
+
 def generate_generalized_integer(N, bias):
     """3.3 Generalized variable-length integers"""
     result = bytearray()
@@ -111,7 +113,7 @@ def generate_integers(baselen, deltas):
     for points, delta in enumerate(deltas):
         s = generate_generalized_integer(delta, bias)
         result.extend(s)
-        bias = adapt(delta, points==0, baselen+points+1)
+        bias = adapt(delta, points == 0, baselen+points+1)
     return bytes(result)
 
 def punycode_encode(text):
@@ -134,7 +136,8 @@ def decode_generalized_number(extended, extpos, bias, errors):
             char = ord(extended[extpos])
         except IndexError:
             if errors == "strict":
-                raise UnicodeError("incomplete punicode string")
+                raise UnicodeDecodeError("punycode", bytes(extended[extpos], "utf-8"), extpos, extpos+1,
+                                         "incomplete punycode string")
             return extpos + 1, None
         extpos += 1
         if 0x41 <= char <= 0x5A: # A-Z
@@ -142,8 +145,8 @@ def decode_generalized_number(extended, extpos, bias, errors):
         elif 0x30 <= char <= 0x39:
             digit = char - 22 # 0x30-26
         elif errors == "strict":
-            raise UnicodeError("Invalid extended code point '%s'"
-                               % extended[extpos-1])
+            raise UnicodeDecodeError("punycode", bytes(extended[extpos-1], "utf-8"), extpos-1, extpos,
+                                     "Invalid extended code point '%s'" % extended[extpos-1])
         else:
             return extpos, None
         t = T(j, bias)
@@ -171,7 +174,7 @@ def insertion_sort(base, extended, errors):
         char += pos // (len(base) + 1)
         if char > 0x10FFFF:
             if errors == "strict":
-                raise UnicodeError("Invalid character U+%x" % char)
+                raise UnicodeDecodeError("punycode", bytes(char, "utf-8"), 0, len(char), "Invalid character U+%x" % char)
             char = ord('?')
         pos = pos % (len(base) + 1)
         base = base[:pos] + chr(char) + base[pos:]
@@ -217,13 +220,13 @@ def decode(self, input, final=False):
             raise UnicodeError("Unsupported error handling "+self.errors)
         return punycode_decode(input, self.errors)
 
-class StreamWriter(Codec,codecs.StreamWriter):
+class StreamWriter(Codec, codecs.StreamWriter):
     pass
 
-class StreamReader(Codec,codecs.StreamReader):
+class StreamReader(Codec, codecs.StreamReader):
     pass
 
-### encodings module API
+# encodings module API
 
 def getregentry():
     return codecs.CodecInfo(
diff --git a/Lib/encodings/undefined.py b/Lib/encodings/undefined.py
index 4690288355c710..269a06c8160fe9 100644
--- a/Lib/encodings/undefined.py
+++ b/Lib/encodings/undefined.py
@@ -1,8 +1,8 @@
 """ Python 'undefined' Codec
 
-    This codec will always raise a ValueError exception when being
-    used. It is intended for use by the site.py file to switch off
-    automatic string to Unicode coercion.
+    This codec will always raise a UnicodeEncodeError | UnicodeDecodeError
+    exception when being used. It is intended for use by the site.py file
+    to switch off automatic string to Unicode coercion.
 
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 
@@ -15,24 +15,24 @@
 
 class Codec(codecs.Codec):
 
-    def encode(self,input,errors='strict'):
-        raise UnicodeError("undefined encoding")
+    def encode(self, input, errors='strict'):
+        raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")
 
-    def decode(self,input,errors='strict'):
-        raise UnicodeError("undefined encoding")
+    def decode(self, input, errors='strict'):
+        raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")
 
 class IncrementalEncoder(codecs.IncrementalEncoder):
     def encode(self, input, final=False):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")
 
 class IncrementalDecoder(codecs.IncrementalDecoder):
     def decode(self, input, final=False):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")
 
-class StreamWriter(Codec,codecs.StreamWriter):
+class StreamWriter(Codec, codecs.StreamWriter):
     pass
 
-class StreamReader(Codec,codecs.StreamReader):
+class StreamReader(Codec, codecs.StreamReader):
     pass
 
 ### encodings module API
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
index c61248242be8c7..4e641bd03aef38 100644
--- a/Lib/encodings/utf_16.py
+++ b/Lib/encodings/utf_16.py
@@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final):
             elif byteorder == 1:
                 self.decoder = codecs.utf_16_be_decode
             elif consumed >= 2:
-                raise UnicodeError("UTF-16 stream does not start with BOM")
+                raise UnicodeDecodeError("utc-16", input, 0, 0, "UTF-16 stream does not start with BOM")
             return (output, consumed)
         return self.decoder(input, self.errors, final)
 
@@ -137,8 +137,8 @@ def decode(self, input, errors='strict'):
             self.decode = codecs.utf_16_le_decode
         elif byteorder == 1:
             self.decode = codecs.utf_16_be_decode
-        elif consumed>=2:
-            raise UnicodeError("UTF-16 stream does not start with BOM")
+        elif consumed >= 2:
+            raise UnicodeDecodeError("utf-16", input, 0, 0, "UTF-16 stream does not start with BOM")
         return (object, consumed)
 
 ### encodings module API
diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
index cdf84d14129a62..c4c1e2ccfa5279 100644
--- a/Lib/encodings/utf_32.py
+++ b/Lib/encodings/utf_32.py
@@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final):
             elif byteorder == 1:
                 self.decoder = codecs.utf_32_be_decode
             elif consumed >= 4:
-                raise UnicodeError("UTF-32 stream does not start with BOM")
+                raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
             return (output, consumed)
         return self.decoder(input, self.errors, final)
 
@@ -133,7 +133,7 @@ def decode(self, input, errors='strict'):
         elif byteorder == 1:
             self.decode = codecs.utf_32_be_decode
         elif consumed>=4:
-            raise UnicodeError("UTF-32 stream does not start with BOM")
+            raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
         return (object, consumed)
 
 ### encodings module API
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 54a3520802a4f3..9f856dcee3598d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1334,13 +1334,13 @@ def test_decode(self):
 
     def test_decode_invalid(self):
         testcases = [
-            (b"xn--w&", "strict", UnicodeError()),
+            (b"xn--w&", "strict", UnicodeDecodeError("punycode", b"xn--w&", 0, 0, "")),
             (b"xn--w&", "ignore", "xn-"),
         ]
         for puny, errors, expected in testcases:
             with self.subTest(puny=puny, errors=errors):
                 if isinstance(expected, Exception):
-                    self.assertRaises(UnicodeError, puny.decode, "punycode", errors)
+                    self.assertRaises(UnicodeDecodeError, puny.decode, "punycode", errors)
                 else:
                     self.assertEqual(puny.decode("punycode", errors), expected)
 
diff --git a/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst b/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst
new file mode 100644
index 00000000000000..bf2203e0a1ba4e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst
@@ -0,0 +1,2 @@
+Convert :exc: UnicodeError to :exc: UnicodeEncodeError or :exc: UnicodeDecodeError where appropriate.
+Patch by Srinivas Reddy Thatiparthy
\ No newline at end of file

From 0d2420741eb75f062b869a4dc289f263257045a8 Mon Sep 17 00:00:00 2001
From: Srinivas Reddy Thatiparthy <thatiparthysreenivas@gmail.com>
Date: Fri, 26 Jun 2020 11:57:01 +0530
Subject: [PATCH 2/4] bpo-41115: Convert label to str(label)

---
 Lib/encodings/idna.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 962afa82309f19..14485a269a8093 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -70,7 +70,7 @@ def ToASCII(label):
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
+        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
 
     # Step 2: nameprep
     label = nameprep(label)
@@ -85,11 +85,11 @@ def ToASCII(label):
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
+        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
 
     # Step 5: Check ACE prefix
     if label.startswith(sace_prefix):
-        raise UnicodeEncodeError("ascii", label, 0, len(label), "Label starts with ACE prefix")
+        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "Label starts with ACE prefix")
 
     # Step 6: Encode with PUNYCODE
     label = label.encode("punycode")
@@ -100,7 +100,7 @@ def ToASCII(label):
     # Step 8: Check size
     if 0 < len(label) < 64:
         return label
-    raise UnicodeEncodeError("punycode", label, 0, len(label), "label empty or too long")
+    raise UnicodeEncodeError("punycode", str(label), 0, len(label), "label empty or too long")
 
 def ToUnicode(label):
     # Step 1: Check for ASCII
@@ -162,9 +162,9 @@ def encode(self, input, errors='strict'):
             labels = result.split(b'.')
             for label in labels[:-1]:
                 if not (0 < len(label) < 64):
-                    raise UnicodeEncodeError("ascii", label, 0, len(label), "label empty or too long")
+                    raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
             if len(labels[-1]) >= 64:
-                raise UnicodeEncodeError("ascii", labels[-1], 0, len(labels[-1]), "label too long")
+                raise UnicodeEncodeError("ascii", str(labels[-1]), 0, len(labels[-1]), "label too long")
             return result, len(input)
 
         result = bytearray()

From dd44d595e98c806e176176e5c58d0ced38153d38 Mon Sep 17 00:00:00 2001
From: Srinivas Reddy Thatiparthy <thatiparthysreenivas@gmail.com>
Date: Fri, 26 Jun 2020 12:15:53 +0530
Subject: [PATCH 3/4] bpo-41115: Fix build failures in idna.py

---
 Lib/encodings/idna.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 14485a269a8093..bf20a3974c54b3 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -162,9 +162,11 @@ def encode(self, input, errors='strict'):
             labels = result.split(b'.')
             for label in labels[:-1]:
                 if not (0 < len(label) < 64):
-                    raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
+                    raise UnicodeEncodeError("ascii", label.decode('ascii'), 0, len(label.decode('ascii')),
+                                             "label empty or too long")
             if len(labels[-1]) >= 64:
-                raise UnicodeEncodeError("ascii", str(labels[-1]), 0, len(labels[-1]), "label too long")
+                raise UnicodeEncodeError("ascii", labels[-1].decode('ascii'), 0, len(labels[-1].decode('ascii')),
+                                         "label too long")
             return result, len(input)
 
         result = bytearray()

From 64778d2d025344a1dfba5cb039f81dae0be34577 Mon Sep 17 00:00:00 2001
From: Srinivas Reddy Thatiparthy <thatiparthysreenivas@gmail.com>
Date: Fri, 26 Jun 2020 12:39:06 +0530
Subject: [PATCH 4/4] bpo-41115:Fix doc failure

---
 Lib/encodings/idna.py                              | 14 +++++++++-----
 .../2020-06-26-06-03-42.bpo-41115.NTjUWO.rst       |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index bf20a3974c54b3..dc70838069431c 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -70,7 +70,8 @@ def ToASCII(label):
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
+        raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                 "label empty or too long")
 
     # Step 2: nameprep
     label = nameprep(label)
@@ -85,7 +86,8 @@ def ToASCII(label):
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label empty or too long")
+        raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                 "label empty or too long")
 
     # Step 5: Check ACE prefix
     if label.startswith(sace_prefix):
@@ -98,9 +100,10 @@ def ToASCII(label):
     label = ace_prefix + label
 
     # Step 8: Check size
-    if 0 < len(label) < 64:
+    if len(label) < 64:
         return label
-    raise UnicodeEncodeError("punycode", str(label), 0, len(label), "label empty or too long")
+    raise UnicodeEncodeError("punycode", label.decode("punycode"), 0,
+                             len(label.decode("punycode")), "label too long")
 
 def ToUnicode(label):
     # Step 1: Check for ASCII
@@ -119,7 +122,8 @@ def ToUnicode(label):
         try:
             label = label.encode("ascii")
         except UnicodeEncodeError:
-            raise UnicodeEncodeError("ascii", label, 0, len(label), "Invalid character in IDN label")
+            raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                     "Invalid character in IDN label")
     # Step 3: Check for ACE prefix
     if not label.startswith(ace_prefix):
         return str(label, "ascii")
diff --git a/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst b/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst
index bf2203e0a1ba4e..9827897d709b68 100644
--- a/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst
+++ b/Misc/NEWS.d/next/Library/2020-06-26-06-03-42.bpo-41115.NTjUWO.rst
@@ -1,2 +1,2 @@
-Convert :exc: UnicodeError to :exc: UnicodeEncodeError or :exc: UnicodeDecodeError where appropriate.
+Convert :exc:`UnicodeError` to :exc:`UnicodeEncodeError` or :exc:`UnicodeDecodeError` where appropriate.
 Patch by Srinivas Reddy Thatiparthy
\ No newline at end of file