Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-85287: Convert UnicodeError to UnicodeEncodeError| UnicodeDecodeError #21165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions Lib/encodings/idna.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,15 @@ def ToASCII(label):
try:
# Step 1: try ASCII
label = label.encode("ascii")
except UnicodeError:
except UnicodeEncodeError:
pass
else:
# Skip to step 3: UseSTD3ASCIIRules is false, so
# Skip to step 8.
if 0 < len(label) < 64:
return label
raise UnicodeError("label empty or too long")
raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
"label empty or too long")

# Step 2: nameprep
label = nameprep(label)
Expand All @@ -79,17 +80,18 @@ def ToASCII(label):
# Step 4: try ASCII
try:
label = label.encode("ascii")
except UnicodeError:
except UnicodeEncodeError:
pass
else:
# Skip to step 8.
if 0 < len(label) < 64:
return label
raise UnicodeError("label empty or too long")
raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
"label empty or too long")

# Step 5: Check ACE prefix
if label.startswith(sace_prefix):
raise UnicodeError("Label starts with ACE prefix")
raise UnicodeEncodeError("ascii", str(label), 0, len(label), "Label starts with ACE prefix")

# Step 6: Encode with PUNYCODE
label = label.encode("punycode")
Expand All @@ -98,9 +100,10 @@ def ToASCII(label):
label = ace_prefix + label

# Step 8: Check size
if 0 < len(label) < 64:
if len(label) < 64:
return label
raise UnicodeError("label empty or too long")
raise UnicodeEncodeError("punycode", label.decode("punycode"), 0,
len(label.decode("punycode")), "label too long")

def ToUnicode(label):
# Step 1: Check for ASCII
Expand All @@ -110,16 +113,17 @@ def ToUnicode(label):
try:
label = label.encode("ascii")
pure_ascii = True
except UnicodeError:
except UnicodeEncodeError:
pure_ascii = False
if not pure_ascii:
# Step 2: Perform nameprep
label = nameprep(label)
# It doesn't say this, but apparently, it should be ASCII now
try:
label = label.encode("ascii")
except UnicodeError:
raise UnicodeError("Invalid character in IDN label")
except UnicodeEncodeError:
raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
"Invalid character in IDN label")
# Step 3: Check for ACE prefix
if not label.startswith(ace_prefix):
return str(label, "ascii")
Expand Down Expand Up @@ -162,9 +166,11 @@ def encode(self, input, errors='strict'):
labels = result.split(b'.')
for label in labels[:-1]:
if not (0 < len(label) < 64):
raise UnicodeError("label empty or too long")
raise UnicodeEncodeError("ascii", label.decode('ascii'), 0, len(label.decode('ascii')),
"label empty or too long")
if len(labels[-1]) >= 64:
raise UnicodeError("label too long")
raise UnicodeEncodeError("ascii", labels[-1].decode('ascii'), 0, len(labels[-1].decode('ascii')),
"label too long")
return result, len(input)

result = bytearray()
Expand Down
21 changes: 12 additions & 9 deletions Lib/encodings/punycode.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Codec for the Punicode encoding, as specified in RFC 3492
""" Codec for the Punycode encoding, as specified in RFC 3492
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch


Written by Martin v. Löwis.
"""
Expand Down Expand Up @@ -74,7 +74,9 @@ def T(j, bias):
if res > 26: return 26
return res


digits = b"abcdefghijklmnopqrstuvwxyz0123456789"

def generate_generalized_integer(N, bias):
"""3.3 Generalized variable-length integers"""
result = bytearray()
Expand Down Expand Up @@ -111,7 +113,7 @@ def generate_integers(baselen, deltas):
for points, delta in enumerate(deltas):
s = generate_generalized_integer(delta, bias)
result.extend(s)
bias = adapt(delta, points==0, baselen+points+1)
bias = adapt(delta, points == 0, baselen+points+1)
return bytes(result)

def punycode_encode(text):
Expand All @@ -134,16 +136,17 @@ def decode_generalized_number(extended, extpos, bias, errors):
char = ord(extended[extpos])
except IndexError:
if errors == "strict":
raise UnicodeError("incomplete punicode string")
raise UnicodeDecodeError("punycode", bytes(extended[extpos], "utf-8"), extpos, extpos+1,
"incomplete punycode string")
return extpos + 1, None
extpos += 1
if 0x41 <= char <= 0x5A: # A-Z
digit = char - 0x41
elif 0x30 <= char <= 0x39:
digit = char - 22 # 0x30-26
elif errors == "strict":
raise UnicodeError("Invalid extended code point '%s'"
% extended[extpos-1])
raise UnicodeDecodeError("punycode", bytes(extended[extpos-1], "utf-8"), extpos-1, extpos,
"Invalid extended code point '%s'" % extended[extpos-1])
else:
return extpos, None
t = T(j, bias)
Expand Down Expand Up @@ -171,7 +174,7 @@ def insertion_sort(base, extended, errors):
char += pos // (len(base) + 1)
if char > 0x10FFFF:
if errors == "strict":
raise UnicodeError("Invalid character U+%x" % char)
raise UnicodeDecodeError("punycode", bytes(char, "utf-8"), 0, len(char), "Invalid character U+%x" % char)
char = ord('?')
pos = pos % (len(base) + 1)
base = base[:pos] + chr(char) + base[pos:]
Expand Down Expand Up @@ -217,13 +220,13 @@ def decode(self, input, final=False):
raise UnicodeError("Unsupported error handling "+self.errors)
return punycode_decode(input, self.errors)

class StreamWriter(Codec,codecs.StreamWriter):
class StreamWriter(Codec, codecs.StreamWriter):
pass

class StreamReader(Codec,codecs.StreamReader):
class StreamReader(Codec, codecs.StreamReader):
pass

### encodings module API
# encodings module API

def getregentry():
return codecs.CodecInfo(
Expand Down
22 changes: 11 additions & 11 deletions Lib/encodings/undefined.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
""" Python 'undefined' Codec

This codec will always raise a ValueError exception when being
used. It is intended for use by the site.py file to switch off
automatic string to Unicode coercion.
This codec will always raise a UnicodeEncodeError | UnicodeDecodeError
exception when being used. It is intended for use by the site.py file
to switch off automatic string to Unicode coercion.

Written by Marc-Andre Lemburg ([email protected]).

Expand All @@ -15,24 +15,24 @@

class Codec(codecs.Codec):

def encode(self,input,errors='strict'):
raise UnicodeError("undefined encoding")
def encode(self, input, errors='strict'):
raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")

def decode(self,input,errors='strict'):
raise UnicodeError("undefined encoding")
def decode(self, input, errors='strict'):
raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")

class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
raise UnicodeError("undefined encoding")
raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")

class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
raise UnicodeError("undefined encoding")
raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")

class StreamWriter(Codec,codecs.StreamWriter):
class StreamWriter(Codec, codecs.StreamWriter):
pass

class StreamReader(Codec,codecs.StreamReader):
class StreamReader(Codec, codecs.StreamReader):
pass

### encodings module API
Expand Down
6 changes: 3 additions & 3 deletions Lib/encodings/utf_16.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final):
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
raise UnicodeDecodeError("utc-16", input, 0, 0, "UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)

Expand Down Expand Up @@ -137,8 +137,8 @@ def decode(self, input, errors='strict'):
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError("UTF-16 stream does not start with BOM")
elif consumed >= 2:
raise UnicodeDecodeError("utf-16", input, 0, 0, "UTF-16 stream does not start with BOM")
return (object, consumed)

### encodings module API
Expand Down
4 changes: 2 additions & 2 deletions Lib/encodings/utf_32.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final):
elif byteorder == 1:
self.decoder = codecs.utf_32_be_decode
elif consumed >= 4:
raise UnicodeError("UTF-32 stream does not start with BOM")
raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)

Expand Down Expand Up @@ -133,7 +133,7 @@ def decode(self, input, errors='strict'):
elif byteorder == 1:
self.decode = codecs.utf_32_be_decode
elif consumed>=4:
raise UnicodeError("UTF-32 stream does not start with BOM")
raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
return (object, consumed)

### encodings module API
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,13 +1334,13 @@ def test_decode(self):

def test_decode_invalid(self):
testcases = [
(b"xn--w&", "strict", UnicodeError()),
(b"xn--w&", "strict", UnicodeDecodeError("punycode", b"xn--w&", 0, 0, "")),
(b"xn--w&", "ignore", "xn-"),
]
for puny, errors, expected in testcases:
with self.subTest(puny=puny, errors=errors):
if isinstance(expected, Exception):
self.assertRaises(UnicodeError, puny.decode, "punycode", errors)
self.assertRaises(UnicodeDecodeError, puny.decode, "punycode", errors)
else:
self.assertEqual(puny.decode("punycode", errors), expected)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Convert :exc:`UnicodeError` to :exc:`UnicodeEncodeError` or :exc:`UnicodeDecodeError` where appropriate.
Patch by Srinivas Reddy Thatiparthy