Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0ac30f8

Browse files
committed
Enhance the punycode decoder so that it can decode
unicode objects. Fix the idna codec and the tests.
1 parent 1f05a3b commit 0ac30f8

3 files changed

Lines changed: 136 additions & 130 deletions

File tree

Lib/encodings/idna.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
88

99
# IDNA section 5
10-
ace_prefix = "xn--"
10+
ace_prefix = b"xn--"
11+
sace_prefix = "xn--"
1112

1213
# This assumes query strings, so AllowUnassigned is true
1314
def nameprep(label):
@@ -87,7 +88,7 @@ def ToASCII(label):
8788
raise UnicodeError("label empty or too long")
8889

8990
# Step 5: Check ACE prefix
90-
if label.startswith(ace_prefix):
91+
if label.startswith(sace_prefix):
9192
raise UnicodeError("Label starts with ACE prefix")
9293

9394
# Step 6: Encode with PUNYCODE
@@ -134,7 +135,7 @@ def ToUnicode(label):
134135

135136
# Step 7: Compare the result of step 6 with the one of step 3
136137
# label2 will already be in lower case.
137-
if label.lower() != label2:
138+
if str(label, "ascii").lower() != str(label2, "ascii"):
138139
raise UnicodeError("IDNA does not round-trip", label, label2)
139140

140141
# Step 8: return the result of step 5
@@ -143,7 +144,7 @@ def ToUnicode(label):
143144
### Codec APIs
144145

145146
class Codec(codecs.Codec):
146-
def encode(self,input,errors='strict'):
147+
def encode(self, input, errors='strict'):
147148

148149
if errors != 'strict':
149150
# IDNA is quite clear that implementations must be strict
@@ -152,19 +153,21 @@ def encode(self,input,errors='strict'):
152153
if not input:
153154
return b"", 0
154155

155-
result = []
156+
result = b""
156157
labels = dots.split(input)
157-
if labels and len(labels[-1])==0:
158+
if labels and not labels[-1]:
158159
trailing_dot = b'.'
159160
del labels[-1]
160161
else:
161162
trailing_dot = b''
162163
for label in labels:
163-
result.append(ToASCII(label))
164-
# Join with U+002E
165-
return b".".join(result)+trailing_dot, len(input)
164+
if result:
165+
# Join with U+002E
166+
result.extend(b'.')
167+
result.extend(ToASCII(label))
168+
return result+trailing_dot, len(input)
166169

167-
def decode(self,input,errors='strict'):
170+
def decode(self, input, errors='strict'):
168171

169172
if errors != 'strict':
170173
raise UnicodeError("Unsupported error handling "+errors)
@@ -199,30 +202,31 @@ def _buffer_encode(self, input, errors, final):
199202
raise UnicodeError("unsupported error handling "+errors)
200203

201204
if not input:
202-
return ("", 0)
205+
return (b'', 0)
203206

204207
labels = dots.split(input)
205-
trailing_dot = ''
208+
trailing_dot = b''
206209
if labels:
207210
if not labels[-1]:
208-
trailing_dot = '.'
211+
trailing_dot = b'.'
209212
del labels[-1]
210213
elif not final:
211214
# Keep potentially unfinished label until the next call
212215
del labels[-1]
213216
if labels:
214-
trailing_dot = '.'
217+
trailing_dot = b'.'
215218

216-
result = []
219+
result = b""
217220
size = 0
218221
for label in labels:
219-
result.append(ToASCII(label))
220222
if size:
223+
# Join with U+002E
224+
result.extend(b'.')
221225
size += 1
226+
result.extend(ToASCII(label))
222227
size += len(label)
223228

224-
# Join with U+002E
225-
result = ".".join(result) + trailing_dot
229+
result += trailing_dot
226230
size += len(trailing_dot)
227231
return (result, size)
228232

@@ -239,8 +243,7 @@ def _buffer_decode(self, input, errors, final):
239243
labels = dots.split(input)
240244
else:
241245
# Must be ASCII string
242-
input = str(input)
243-
str(input, "ascii")
246+
input = str(input, "ascii")
244247
labels = input.split(".")
245248

246249
trailing_dot = ''

Lib/encodings/punycode.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
181181
return base
182182

183183
def punycode_decode(text, errors):
184+
if isinstance(text, str):
185+
text = text.encode("ascii")
184186
pos = text.rfind(b"-")
185187
if pos == -1:
186188
base = ""
@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
194196

195197
class Codec(codecs.Codec):
196198

197-
def encode(self,input,errors='strict'):
199+
def encode(self, input, errors='strict'):
198200
res = punycode_encode(input)
199201
return res, len(input)
200202

201-
def decode(self,input,errors='strict'):
203+
def decode(self, input, errors='strict'):
202204
if errors not in ('strict', 'replace', 'ignore'):
203205
raise UnicodeError, "Unsupported error handling "+errors
204206
res = punycode_decode(input, errors)

0 commit comments

Comments
 (0)