Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit abd15ee

Browse files
committed
Yet another problem with unicode - some HTLM pages can not be decoded because they contain undecodable characters.
It causes raise of UnicodeDecodeError deep inside python. This only happens if xrds location is not found before some unicode character. - Catch UnicodeDecodeError when searching for yadis - Update check of whether yadis was used - if xrds location is none it was not - Added tests, update previous unicode test with comment
1 parent 4ed2deb commit abd15ee

File tree

5 files changed

+69
-4
lines changed

5 files changed

+69
-4
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
22
<html>
33
<head>
4-
<title param="ěščřžýáíé &raquo;">Identity Page for Smoker</title>
4+
<title param="ěščřžýáíé &raquo;">Title with param that needs decoding</title>
55
</head>
66
<body>
7-
<p>foo</p>
7+
<p>This page can be properly decoded and everything will will be fine</p>
88
</body>
99
</html>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2+
<html>
3+
<head>
4+
<title param="¿¿¿¿¿ýáíé &raquo;">Title with param which raises UnicodeError</title>
5+
<meta http-equiv="X-XRDS-Location" content="http://someuser.unittest/xrds" />
6+
</head>
7+
<body>
8+
<p>
9+
weird sign Å to prevent successful decoding
10+
</p>
11+
<p>
12+
This page can not be properly decoded so its content will be passed to HTML parser
13+
encoded but title raises UnicodeError because x-xrds-location is not found on time
14+
</p>
15+
</body>
16+
</html>
17+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2+
<html>
3+
<head>
4+
<meta http-equiv="X-XRDS-Location" content="http://someuser.unittest/xrds" />
5+
<title param="¿¿¿¿¿ýáíé &raquo;">Title with param which raises UnicodeError</title>
6+
</head>
7+
<body>
8+
<p>
9+
weird sign Å to prevent successful decoding
10+
</p>
11+
<p>
12+
This page can not be properly decoded so its content will be passed to HTML parser
13+
encoded but service will be found because x-xrds-location is found on time
14+
</p>
15+
</body>
16+
</html>
17+

openid/test/test_discover.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,11 +250,36 @@ def test_404(self):
250250
discover.discover, self.id_url + '/404')
251251

252252
def test_unicode(self):
253+
"""
254+
Check page with unicode and HTML entities
255+
"""
253256
self._discover(
254257
content_type='text/html;charset=utf-8',
255258
data=readDataFile('unicode.html'),
256259
expected_services=0)
257260

261+
def test_unicode_undecodable_html(self):
262+
"""
263+
Check page with unicode and HTML entities that can not be decoded
264+
"""
265+
data = readDataFile('unicode2.html')
266+
self.failUnlessRaises(UnicodeDecodeError, data.decode, 'utf-8')
267+
self._discover(content_type='text/html;charset=utf-8',
268+
data=data, expected_services=0)
269+
270+
def test_unicode_undecodable_html2(self):
271+
"""
272+
Check page with unicode and HTML entities that can not be decoded
273+
but xrds document is found before it matters
274+
"""
275+
self.documents[self.id_url + 'xrds'] = (
276+
'application/xrds+xml', readDataFile('yadis_idp.xml'))
277+
278+
data = readDataFile('unicode3.html')
279+
self.failUnlessRaises(UnicodeDecodeError, data.decode, 'utf-8')
280+
self._discover(content_type='text/html;charset=utf-8',
281+
data=data, expected_services=1)
282+
258283
def test_noOpenID(self):
259284
services = self._discover(content_type='text/plain',
260285
data="junk",

openid/yadis/discover.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def __init__(self, request_uri):
4545

4646
def usedYadisLocation(self):
4747
"""Was the Yadis protocol's indirection used?"""
48+
if self.xrds_uri is None:
49+
return False
4850
return self.normalized_uri != self.xrds_uri
4951

5052
def isXRDS(self):
@@ -131,18 +133,22 @@ def whereIsYadis(resp):
131133
content_type = content_type or ''
132134
encoding = content_type.rsplit(';', 1)
133135
if len(encoding) == 2 and encoding[1].strip().startswith('charset='):
134-
encoding = encoding[1].split('=', 1)[1]
136+
encoding = encoding[1].split('=', 1)[1].strip()
135137
else:
136138
encoding = 'UTF-8'
137139

138140
try:
139141
content = resp.body.decode(encoding)
140142
except UnicodeError:
143+
# Keep encoded version in case yadis location can be found before encoding shut this up.
144+
# Possible errors will be caught lower.
141145
content = resp.body
142146

143147
try:
144148
yadis_loc = findHTMLMeta(StringIO(content))
145-
except MetaNotFound:
149+
except (MetaNotFound, UnicodeError):
150+
# UnicodeError: Response body could not be encoded and xrds location
151+
# could not be found before troubles occurs.
146152
pass
147153

148154
return yadis_loc

0 commit comments

Comments
 (0)