77dots = re .compile ("[\u002E \u3002 \uFF0E \uFF61 ]" )
88
99# IDNA section 5
10- ace_prefix = "xn--"
10+ ace_prefix = b"xn--"
11+ sace_prefix = "xn--"
1112
1213# This assumes query strings, so AllowUnassigned is true
1314def nameprep (label ):
@@ -87,7 +88,7 @@ def ToASCII(label):
8788 raise UnicodeError ("label empty or too long" )
8889
8990 # Step 5: Check ACE prefix
90- if label .startswith (ace_prefix ):
91+ if label .startswith (sace_prefix ):
9192 raise UnicodeError ("Label starts with ACE prefix" )
9293
9394 # Step 6: Encode with PUNYCODE
@@ -134,7 +135,7 @@ def ToUnicode(label):
134135
135136 # Step 7: Compare the result of step 6 with the one of step 3
136137 # label2 will already be in lower case.
137- if label .lower () != label2 :
138+ if str ( label , "ascii" ) .lower () != str ( label2 , "ascii" ) :
138139 raise UnicodeError ("IDNA does not round-trip" , label , label2 )
139140
140141 # Step 8: return the result of step 5
@@ -143,7 +144,7 @@ def ToUnicode(label):
143144### Codec APIs
144145
145146class Codec (codecs .Codec ):
146- def encode (self ,input ,errors = 'strict' ):
147+ def encode (self , input , errors = 'strict' ):
147148
148149 if errors != 'strict' :
149150 # IDNA is quite clear that implementations must be strict
@@ -152,19 +153,21 @@ def encode(self,input,errors='strict'):
152153 if not input :
153154 return b"" , 0
154155
155- result = []
156+ result = b""
156157 labels = dots .split (input )
157- if labels and len ( labels [- 1 ]) == 0 :
158+ if labels and not labels [- 1 ]:
158159 trailing_dot = b'.'
159160 del labels [- 1 ]
160161 else :
161162 trailing_dot = b''
162163 for label in labels :
163- result .append (ToASCII (label ))
164- # Join with U+002E
165- return b"." .join (result )+ trailing_dot , len (input )
164+ if result :
165+ # Join with U+002E
166+ result .extend (b'.' )
167+ result .extend (ToASCII (label ))
168+ return result + trailing_dot , len (input )
166169
167- def decode (self ,input ,errors = 'strict' ):
170+ def decode (self , input , errors = 'strict' ):
168171
169172 if errors != 'strict' :
170173 raise UnicodeError ("Unsupported error handling " + errors )
@@ -199,30 +202,31 @@ def _buffer_encode(self, input, errors, final):
199202 raise UnicodeError ("unsupported error handling " + errors )
200203
201204 if not input :
202- return ("" , 0 )
205+ return (b'' , 0 )
203206
204207 labels = dots .split (input )
205- trailing_dot = ''
208+ trailing_dot = b ''
206209 if labels :
207210 if not labels [- 1 ]:
208- trailing_dot = '.'
211+ trailing_dot = b '.'
209212 del labels [- 1 ]
210213 elif not final :
211214 # Keep potentially unfinished label until the next call
212215 del labels [- 1 ]
213216 if labels :
214- trailing_dot = '.'
217+ trailing_dot = b '.'
215218
216- result = []
219+ result = b""
217220 size = 0
218221 for label in labels :
219- result .append (ToASCII (label ))
220222 if size :
223+ # Join with U+002E
224+ result .extend (b'.' )
221225 size += 1
226+ result .extend (ToASCII (label ))
222227 size += len (label )
223228
224- # Join with U+002E
225- result = "." .join (result ) + trailing_dot
229+ result += trailing_dot
226230 size += len (trailing_dot )
227231 return (result , size )
228232
@@ -239,8 +243,7 @@ def _buffer_decode(self, input, errors, final):
239243 labels = dots .split (input )
240244 else :
241245 # Must be ASCII string
242- input = str (input )
243- str (input , "ascii" )
246+ input = str (input , "ascii" )
244247 labels = input .split ("." )
245248
246249 trailing_dot = ''
0 commit comments