Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 64acb5c

Browse files
committed
Samuel L. Bayer:
- same trick with "import wcnew; webchecker = wcnew" as above - updated readhtml() method to handle pair representation; used new name suppression infrastructure from wcnew.py to suppress processing name anchors [And untabified --GvR]
1 parent a894640 commit 64acb5c

1 file changed

Lines changed: 12 additions & 4 deletions

File tree

Tools/webchecker/websucker.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import urllib
1111
import getopt
1212

13-
import webchecker
13+
import wcnew
14+
15+
webchecker = wcnew
1416

1517
# Extract real version number if necessary
1618
if __version__[0] == '$':
@@ -45,14 +47,20 @@ def main():
4547
class Sucker(webchecker.Checker):
4648

4749
checkext = 0
50+
nonames = 1
51+
52+
# SAM 11/13/99: in general, URLs are now URL pairs.
53+
# Since we've suppressed name anchor checking,
54+
# we can ignore the second dimension.
4855

49-
def readhtml(self, url):
56+
def readhtml(self, url_pair):
57+
url = url_pair[0]
5058
text = None
5159
path = self.savefilename(url)
5260
try:
5361
f = open(path, "rb")
5462
except IOError:
55-
f = self.openpage(url)
63+
f = self.openpage(url_pair)
5664
if f:
5765
info = f.info()
5866
nurl = f.geturl()
@@ -89,7 +97,7 @@ def savefilename(self, url):
8997
host, port = urllib.splitnport(host)
9098
host = string.lower(host)
9199
if not path or path[-1] == "/":
92-
path = path + "index.html"
100+
path = path + "index.html"
93101
if os.sep != "/":
94102
path = string.join(string.split(path, "/"), os.sep)
95103
path = os.path.join(host, path)

0 commit comments

Comments
 (0)