3939-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
4040-q -- quiet operation (also suppresses external links report)
4141-v -- verbose operation; repeating -v will increase verbosity
42+ -t root -- specify root dir which should be treated as internal (can repeat)
43+ -a -- don't check name anchors
4244
4345Command line arguments:
4446
6163import string
6264from Tkinter import *
6365import tktools
64- import webchecker
66+ import wcnew
6567import random
6668
69+ webchecker = wcnew
70+
6771# Override some for a weaker platform
6872if sys .platform == 'mac' :
6973 webchecker .DEFROOT = "http://grail.cnri.reston.va.us/"
7276
7377def main ():
7478 try :
75- opts , args = getopt .getopt (sys .argv [1 :], 'm:qv ' )
79+ opts , args = getopt .getopt (sys .argv [1 :], 't:m:qva ' )
7680 except getopt .error , msg :
7781 sys .stdout = sys .stderr
7882 print msg
7983 print __doc__ % vars (webchecker )
8084 sys .exit (2 )
85+ webchecker .verbose = webchecker .VERBOSE
86+ webchecker .nonames = webchecker .NONAMES
87+ webchecker .maxpage = webchecker .MAXPAGE
88+ extra_roots = []
8189 for o , a in opts :
8290 if o == '-m' :
8391 webchecker .maxpage = string .atoi (a )
8492 if o == '-q' :
8593 webchecker .verbose = 0
8694 if o == '-v' :
8795 webchecker .verbose = webchecker .verbose + 1
96+ if o == '-t' :
97+ extra_roots .append (a )
98+ if o == '-a' :
99+ webchecker .nonames = not webchecker .nonames
88100 root = Tk (className = 'Webchecker' )
89101 root .protocol ("WM_DELETE_WINDOW" , root .quit )
90102 c = CheckerWindow (root )
103+ c .setflags (verbose = webchecker .verbose , maxpage = webchecker .maxpage ,
104+ nonames = webchecker .nonames )
91105 if args :
92106 for arg in args [:- 1 ]:
93107 c .addroot (arg )
94108 c .suggestroot (args [- 1 ])
109+ # Usually conditioned on whether external links
110+ # will be checked, but since that's not a command
111+ # line option, just toss them in.
112+ for url_root in extra_roots :
113+ # Make sure it's terminated by a slash,
114+ # so that addroot doesn't discard the last
115+ # directory component.
116+ if url_root [- 1 ] != "/" :
117+ url_root = url_root + "/"
118+ c .addroot (url_root , add_to_do = 0 )
95119 root .mainloop ()
96120
97121
@@ -139,11 +163,12 @@ def __init__(self, parent, root=webchecker.DEFROOT):
139163 self .__checking .pack (side = TOP , fill = X )
140164 self .__mp = mp = MultiPanel (parent )
141165 sys .stdout = self .__log = LogPanel (mp , "Log" )
142- self .__todo = ListPanel (mp , "To check" , self .showinfo )
143- self .__done = ListPanel (mp , "Checked" , self .showinfo )
144- self .__bad = ListPanel (mp , "Bad links" , self .showinfo )
145- self .__errors = ListPanel (mp , "Pages w/ bad links" , self .showinfo )
166+ self .__todo = ListPanel (mp , "To check" , self , self .showinfo )
167+ self .__done = ListPanel (mp , "Checked" , self , self .showinfo )
168+ self .__bad = ListPanel (mp , "Bad links" , self , self .showinfo )
169+ self .__errors = ListPanel (mp , "Pages w/ bad links" , self , self .showinfo )
146170 self .__details = LogPanel (mp , "Details" )
171+ self .root_seed = None
147172 webchecker .Checker .__init__ (self )
148173 if root :
149174 root = string .strip (str (root ))
@@ -155,11 +180,14 @@ def reset(self):
155180 webchecker .Checker .reset (self )
156181 for p in self .__todo , self .__done , self .__bad , self .__errors :
157182 p .clear ()
183+ if self .root_seed :
184+ self .suggestroot (self .root_seed )
158185
159186 def suggestroot (self , root ):
160187 self .__rootentry .delete (0 , END )
161188 self .__rootentry .insert (END , root )
162189 self .__rootentry .select_range (0 , END )
190+ self .root_seed = root
163191
164192 def enterroot (self , event = None ):
165193 root = self .__rootentry .get ()
@@ -221,7 +249,7 @@ def dosomething(self):
221249 self .__todo .list .select_set (i )
222250 self .__todo .list .yview (i )
223251 url = self .__todo .items [i ]
224- self .__checking .config (text = "Checking " + url )
252+ self .__checking .config (text = "Checking " + self . format_url ( url ) )
225253 self .__parent .update ()
226254 self .dopage (url )
227255 else :
@@ -232,7 +260,7 @@ def dosomething(self):
232260 def showinfo (self , url ):
233261 d = self .__details
234262 d .clear ()
235- d .put ("URL: %s\n " % url )
263+ d .put ("URL: %s\n " % self . format_url ( url ) )
236264 if self .bad .has_key (url ):
237265 d .put ("Error: %s\n " % str (self .bad [url ]))
238266 if url in self .roots :
@@ -246,18 +274,18 @@ def showinfo(self, url):
246274 else :
247275 d .put ("Status: unknown (!)\n " )
248276 o = []
249- if self .errors .has_key (url ):
277+ if ( not url [ 1 ]) and self .errors .has_key (url [ 0 ] ):
250278 d .put ("Bad links from this page:\n " )
251- for triple in self .errors [url ]:
279+ for triple in self .errors [url [ 0 ] ]:
252280 link , rawlink , msg = triple
253- d .put (" HREF %s" % link )
254- if link != rawlink : d .put (" (%s)" % rawlink )
281+ d .put (" HREF %s" % self . format_url ( link ) )
282+ if self . format_url ( link ) != rawlink : d .put (" (%s)" % rawlink )
255283 d .put ("\n " )
256284 d .put (" error %s\n " % str (msg ))
257285 self .__mp .showpanel ("Details" )
258286 for source , rawlink in o :
259287 d .put ("Origin: %s" % source )
260- if rawlink != url :
288+ if rawlink != self . format_url ( url ) :
261289 d .put (" (%s)" % rawlink )
262290 d .put ("\n " )
263291 d .text .yview ("1.0" )
@@ -288,7 +316,7 @@ def markdone(self, url):
288316
289317 def seterror (self , url , triple ):
290318 webchecker .Checker .seterror (self , url , triple )
291- self .__errors .insert (url )
319+ self .__errors .insert (( url , '' ) )
292320 self .newstatus ()
293321
294322 def newstatus (self ):
@@ -301,10 +329,11 @@ def update_checkext(self):
301329
302330class ListPanel :
303331
304- def __init__ (self , mp , name , showinfo = None ):
332+ def __init__ (self , mp , name , checker , showinfo = None ):
305333 self .mp = mp
306334 self .name = name
307335 self .showinfo = showinfo
336+ self .checker = checker
308337 self .panel = mp .addpanel (name )
309338 self .list , self .frame = tktools .make_list_box (
310339 self .panel , width = 60 , height = 5 )
@@ -321,7 +350,7 @@ def clear(self):
321350 def doubleclick (self , event ):
322351 l = self .selectedindices ()
323352 if l :
324- self .showinfo (self .list . get ( l [0 ]) )
353+ self .showinfo (self .items [ l [0 ]] )
325354
326355 def selectedindices (self ):
327356 l = self .list .curselection ()
@@ -334,7 +363,7 @@ def insert(self, url):
334363 self .mp .showpanel (self .name )
335364 # (I tried sorting alphabetically, but the display is too jumpy)
336365 i = len (self .items )
337- self .list .insert (i , url )
366+ self .list .insert (i , self . checker . format_url ( url ) )
338367 self .list .yview (i )
339368 self .items .insert (i , url )
340369
0 commit comments