1- """Parse a Python file and retrieve classes and methods.
1+ """Parse a Python module and describe its classes and methods.
22
3- Parse enough of a Python file to recognize class and method
4- definitions and to find out the superclasses of a class.
3+ Parse enough of a Python file to recognize imports and class and
4+ method definitions, and to find out the superclasses of a class.
55
66The interface consists of a single function:
7- readmodule_ex(module [, path[, inpackage]])
8- module is the name of a Python module, path is an optional list of
9- directories where the module is to be searched. If present, path is
10- prepended to the system search path sys.path. (inpackage is used
11- internally to search for a submodule of a package.)
12- The return value is a dictionary. The keys of the dictionary are
13- the names of the classes defined in the module (including classes
14- that are defined via the from XXX import YYY construct). The values
15- are class instances of the class Class defined here.
7+ readmodule_ex(module [, path])
8+ where module is the name of a Python module, and path is an optional
9+ list of directories where the module is to be searched. If present,
10+ path is prepended to the system search path sys.path. The return
11+ value is a dictionary. The keys of the dictionary are the names of
12+ the classes defined in the module (including classes that are defined
13+ via the from XXX import YYY construct). The values are class
14+ instances of the class Class defined here. One special key/value pair
15+ is present for packages: the key '__path__' has a list as its value
16+ which contains the package search path.
1617
1718A class is described by the class Class in this module. Instances
1819of this class have the following instance variables:
3637 name -- the name of the class
3738 file -- the file in which the class was defined
3839 lineno -- the line in the file on which the class statement occurred
39-
40-
41- BUGS
42- - Nested classes and functions can confuse it.
43-
44- PACKAGE CAVEAT
45- - When you call readmodule_ex for a package, dict['__path__'] is a
46- list, which may confuse older class browsers. (readmodule filters
47- these out though.)
4840"""
4941
5042import sys
5143import imp
5244import tokenize # Python tokenizer
53- from token import NAME
45+ from token import NAME , DEDENT , NEWLINE
5446
5547__all__ = ["readmodule" , "readmodule_ex" , "Class" , "Function" ]
5648
@@ -86,14 +78,14 @@ def readmodule(module, path=[]):
8678 Call readmodule_ex() and then only keep Class objects from the
8779 resulting dictionary.'''
8880
89- dict = readmodule_ex (module , path )
81+ dict = _readmodule (module , path )
9082 res = {}
9183 for key , value in dict .items ():
9284 if isinstance (value , Class ):
9385 res [key ] = value
9486 return res
9587
96- def readmodule_ex (module , path = [], inpackage = None ):
88+ def readmodule_ex (module , path = []):
9789 '''Read a module file and return a dictionary of classes.
9890
9991 Search for MODULE in PATH and sys.path, read and parse the
@@ -105,7 +97,10 @@ def readmodule_ex(module, path=[], inpackage=None):
10597 package search path; otherwise, we are searching for a top-level
10698 module, and PATH is combined with sys.path.
10799 '''
100+ return _readmodule (module , path )
108101
102+ def _readmodule (module , path , inpackage = None ):
103+ '''Do the hard work for readmodule[_ex].'''
109104 # Compute the full module name (prepending inpackage if set)
110105 if inpackage :
111106 fullmodule = "%s.%s" % (inpackage , module )
@@ -129,10 +124,10 @@ def readmodule_ex(module, path=[], inpackage=None):
129124 if i >= 0 :
130125 package = module [:i ]
131126 submodule = module [i + 1 :]
132- parent = readmodule_ex (package , path , inpackage )
127+ parent = _readmodule (package , path , inpackage )
133128 if inpackage :
134129 package = "%s.%s" % (inpackage , package )
135- return readmodule_ex (submodule , parent ['__path__' ], package )
130+ return _readmodule (submodule , parent ['__path__' ], package )
136131
137132 # Search the path for the module
138133 f = None
@@ -150,36 +145,42 @@ def readmodule_ex(module, path=[], inpackage=None):
150145 f .close ()
151146 return dict
152147
153- classstack = [] # stack of (class, indent) pairs
148+ stack = [] # stack of (class, indent) pairs
154149
155150 g = tokenize .generate_tokens (f .readline )
156151 try :
157152 for tokentype , token , start , end , line in g :
158- if token == 'def' :
153+ if tokentype == DEDENT :
154+ lineno , thisindent = start
155+ # close nested classes and defs
156+ while stack and stack [- 1 ][1 ] >= thisindent :
157+ del stack [- 1 ]
158+ elif token == 'def' :
159159 lineno , thisindent = start
160+ # close previous nested classes and defs
161+ while stack and stack [- 1 ][1 ] >= thisindent :
162+ del stack [- 1 ]
160163 tokentype , meth_name , start , end , line = g .next ()
161164 if tokentype != NAME :
162165 continue # Syntax error
163- # close all classes indented at least as much
164- while classstack and \
165- classstack [- 1 ][1 ] >= thisindent :
166- del classstack [- 1 ]
167- if classstack :
168- # it's a class method
169- cur_class = classstack [- 1 ][0 ]
170- cur_class ._addmethod (meth_name , lineno )
166+ if stack :
167+ cur_class = stack [- 1 ][0 ]
168+ if isinstance (cur_class , Class ):
169+ # it's a method
170+ cur_class ._addmethod (meth_name , lineno )
171+ # else it's a nested def
171172 else :
172173 # it's a function
173174 dict [meth_name ] = Function (module , meth_name , file , lineno )
175+ stack .append ((None , thisindent )) # Marker for nested fns
174176 elif token == 'class' :
175177 lineno , thisindent = start
178+ # close previous nested classes and defs
179+ while stack and stack [- 1 ][1 ] >= thisindent :
180+ del stack [- 1 ]
176181 tokentype , class_name , start , end , line = g .next ()
177182 if tokentype != NAME :
178183 continue # Syntax error
179- # close all classes indented at least as much
180- while classstack and \
181- classstack [- 1 ][1 ] >= thisindent :
182- del classstack [- 1 ]
183184 # parse what follows the class name
184185 tokentype , token , start , end , line = g .next ()
185186 inherit = None
@@ -208,6 +209,7 @@ def readmodule_ex(module, path=[], inpackage=None):
208209 if c in d :
209210 n = d [c ]
210211 names .append (n )
212+ super = []
211213 if token == '(' :
212214 level += 1
213215 elif token == ')' :
@@ -220,20 +222,21 @@ def readmodule_ex(module, path=[], inpackage=None):
220222 super .append (token )
221223 inherit = names
222224 cur_class = Class (module , class_name , inherit , file , lineno )
223- dict [class_name ] = cur_class
224- classstack .append ((cur_class , thisindent ))
225+ if not stack :
226+ dict [class_name ] = cur_class
227+ stack .append ((cur_class , thisindent ))
225228 elif token == 'import' and start [1 ] == 0 :
226229 modules = _getnamelist (g )
227230 for mod , mod2 in modules :
228231 try :
229232 # Recursively read the imported module
230233 if not inpackage :
231- readmodule_ex (mod , path )
234+ _readmodule (mod , path )
232235 else :
233236 try :
234- readmodule_ex (mod , path , inpackage )
237+ _readmodule (mod , path , inpackage )
235238 except ImportError :
236- readmodule_ex (mod )
239+ _readmodule (mod , [] )
237240 except :
238241 # If we can't find or parse the imported module,
239242 # too bad -- don't die here.
@@ -245,7 +248,7 @@ def readmodule_ex(module, path=[], inpackage=None):
245248 names = _getnamelist (g )
246249 try :
247250 # Recursively read the imported module
248- d = readmodule_ex (mod , path , inpackage )
251+ d = _readmodule (mod , path , inpackage )
249252 except :
250253 # If we can't find or parse the imported module,
251254 # too bad -- don't die here.
@@ -256,11 +259,9 @@ def readmodule_ex(module, path=[], inpackage=None):
256259 if n in d :
257260 dict [n2 or n ] = d [n ]
258261 elif n == '*' :
259- # only add a name if not already there (to mimic
260- # what Python does internally) also don't add
261- # names that start with _
262+ # don't add names that start with _
262263 for n in d :
263- if n [0 ] != '_' and not n in dict :
264+ if n [0 ] != '_' :
264265 dict [n ] = d [n ]
265266 except StopIteration :
266267 pass
@@ -306,3 +307,32 @@ def _getname(g):
306307 break
307308 parts .append (token )
308309 return ("." .join (parts ), token )
310+
311+ def _main ():
312+ # Main program for testing.
313+ import os
314+ mod = sys .argv [1 ]
315+ if os .path .exists (mod ):
316+ path = [os .path .dirname (mod )]
317+ mod = os .path .basename (mod )
318+ if mod .lower ().endswith (".py" ):
319+ mod = mod [:- 3 ]
320+ else :
321+ path = []
322+ dict = readmodule_ex (mod , path )
323+ objs = dict .values ()
324+ objs .sort (lambda a , b : cmp (getattr (a , 'lineno' , 0 ),
325+ getattr (b , 'lineno' , 0 )))
326+ for obj in objs :
327+ if isinstance (obj , Class ):
328+ print "class" , obj .name , obj .super , obj .lineno
329+ methods = obj .methods .items ()
330+ methods .sort (lambda a , b : cmp (a [1 ], b [1 ]))
331+ for name , lineno in methods :
332+ if name != "__path__" :
333+ print " def" , name , lineno
334+ elif isinstance (obj , Function ):
335+ print "def" , obj .name , obj .lineno
336+
337+ if __name__ == "__main__" :
338+ _main ()
0 commit comments