Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 42b0c2f

Browse files
committed
Merged revisions 83209 via svnmerge from
svn+ssh://[email protected]/python/branches/py3k ........ r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines Fix Issue6325 - robotparse to honor urls with query strings. ........
1 parent b8f96c1 commit 42b0c2f

2 files changed

Lines changed: 15 additions & 2 deletions

File tree

Lib/test/test_robotparser.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,17 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
205205
RobotTest(13, doc, good, bad, agent="googlebot")
206206

207207

208+
# 14. For issue #6325 (query string support)
209+
doc = """
210+
User-agent: *
211+
Disallow: /some/path?name=value
212+
"""
213+
214+
good = ['/some/path']
215+
bad = ['/some/path?name=value']
216+
217+
RobotTest(14, doc, good, bad)
218+
208219

209220
class NetworkTestCase(unittest.TestCase):
210221

Lib/urllib/robotparser.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,10 @@ def can_fetch(self, useragent, url):
129129
return True
130130
# search for given user agent matches
131131
# the first match counts
132-
url = urllib.parse.quote(
133-
urllib.parse.urlparse(urllib.parse.unquote(url))[2])
132+
parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
133+
url = urllib.parse.urlunparse(('','',parsed_url.path,
134+
parsed_url.params,parsed_url.query, parsed_url.fragment))
135+
url = urllib.parse.quote(url)
134136
if not url:
135137
url = "/"
136138
for entry in self.entries:

0 commit comments

Comments
 (0)