Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0a6f954

Browse files
committed
Another big update, fixing all known bugs related to nesting functions
and classes. Also add a mini main program that dumps the results for a given file or module.
1 parent 4b2030f commit 0a6f954

1 file changed

Lines changed: 80 additions & 50 deletions

File tree

Lib/pyclbr.py

Lines changed: 80 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
1-
"""Parse a Python file and retrieve classes and methods.
1+
"""Parse a Python module and describe its classes and methods.
22
3-
Parse enough of a Python file to recognize class and method
4-
definitions and to find out the superclasses of a class.
3+
Parse enough of a Python file to recognize imports and class and
4+
method definitions, and to find out the superclasses of a class.
55
66
The interface consists of a single function:
7-
readmodule_ex(module [, path[, inpackage]])
8-
module is the name of a Python module, path is an optional list of
9-
directories where the module is to be searched. If present, path is
10-
prepended to the system search path sys.path. (inpackage is used
11-
internally to search for a submodule of a package.)
12-
The return value is a dictionary. The keys of the dictionary are
13-
the names of the classes defined in the module (including classes
14-
that are defined via the from XXX import YYY construct). The values
15-
are class instances of the class Class defined here.
7+
readmodule_ex(module [, path])
8+
where module is the name of a Python module, and path is an optional
9+
list of directories where the module is to be searched. If present,
10+
path is prepended to the system search path sys.path. The return
11+
value is a dictionary. The keys of the dictionary are the names of
12+
the classes defined in the module (including classes that are defined
13+
via the from XXX import YYY construct). The values are class
14+
instances of the class Class defined here. One special key/value pair
15+
is present for packages: the key '__path__' has a list as its value
16+
which contains the package search path.
1617
1718
A class is described by the class Class in this module. Instances
1819
of this class have the following instance variables:
@@ -36,21 +37,12 @@
3637
name -- the name of the class
3738
file -- the file in which the class was defined
3839
lineno -- the line in the file on which the class statement occurred
39-
40-
41-
BUGS
42-
- Nested classes and functions can confuse it.
43-
44-
PACKAGE CAVEAT
45-
- When you call readmodule_ex for a package, dict['__path__'] is a
46-
list, which may confuse older class browsers. (readmodule filters
47-
these out though.)
4840
"""
4941

5042
import sys
5143
import imp
5244
import tokenize # Python tokenizer
53-
from token import NAME
45+
from token import NAME, DEDENT, NEWLINE
5446

5547
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
5648

@@ -86,14 +78,14 @@ def readmodule(module, path=[]):
8678
Call readmodule_ex() and then only keep Class objects from the
8779
resulting dictionary.'''
8880

89-
dict = readmodule_ex(module, path)
81+
dict = _readmodule(module, path)
9082
res = {}
9183
for key, value in dict.items():
9284
if isinstance(value, Class):
9385
res[key] = value
9486
return res
9587

96-
def readmodule_ex(module, path=[], inpackage=None):
88+
def readmodule_ex(module, path=[]):
9789
'''Read a module file and return a dictionary of classes.
9890
9991
Search for MODULE in PATH and sys.path, read and parse the
@@ -105,7 +97,10 @@ def readmodule_ex(module, path=[], inpackage=None):
10597
package search path; otherwise, we are searching for a top-level
10698
module, and PATH is combined with sys.path.
10799
'''
100+
return _readmodule(module, path)
108101

102+
def _readmodule(module, path, inpackage=None):
103+
'''Do the hard work for readmodule[_ex].'''
109104
# Compute the full module name (prepending inpackage if set)
110105
if inpackage:
111106
fullmodule = "%s.%s" % (inpackage, module)
@@ -129,10 +124,10 @@ def readmodule_ex(module, path=[], inpackage=None):
129124
if i >= 0:
130125
package = module[:i]
131126
submodule = module[i+1:]
132-
parent = readmodule_ex(package, path, inpackage)
127+
parent = _readmodule(package, path, inpackage)
133128
if inpackage:
134129
package = "%s.%s" % (inpackage, package)
135-
return readmodule_ex(submodule, parent['__path__'], package)
130+
return _readmodule(submodule, parent['__path__'], package)
136131

137132
# Search the path for the module
138133
f = None
@@ -150,36 +145,42 @@ def readmodule_ex(module, path=[], inpackage=None):
150145
f.close()
151146
return dict
152147

153-
classstack = [] # stack of (class, indent) pairs
148+
stack = [] # stack of (class, indent) pairs
154149

155150
g = tokenize.generate_tokens(f.readline)
156151
try:
157152
for tokentype, token, start, end, line in g:
158-
if token == 'def':
153+
if tokentype == DEDENT:
154+
lineno, thisindent = start
155+
# close nested classes and defs
156+
while stack and stack[-1][1] >= thisindent:
157+
del stack[-1]
158+
elif token == 'def':
159159
lineno, thisindent = start
160+
# close previous nested classes and defs
161+
while stack and stack[-1][1] >= thisindent:
162+
del stack[-1]
160163
tokentype, meth_name, start, end, line = g.next()
161164
if tokentype != NAME:
162165
continue # Syntax error
163-
# close all classes indented at least as much
164-
while classstack and \
165-
classstack[-1][1] >= thisindent:
166-
del classstack[-1]
167-
if classstack:
168-
# it's a class method
169-
cur_class = classstack[-1][0]
170-
cur_class._addmethod(meth_name, lineno)
166+
if stack:
167+
cur_class = stack[-1][0]
168+
if isinstance(cur_class, Class):
169+
# it's a method
170+
cur_class._addmethod(meth_name, lineno)
171+
# else it's a nested def
171172
else:
172173
# it's a function
173174
dict[meth_name] = Function(module, meth_name, file, lineno)
175+
stack.append((None, thisindent)) # Marker for nested fns
174176
elif token == 'class':
175177
lineno, thisindent = start
178+
# close previous nested classes and defs
179+
while stack and stack[-1][1] >= thisindent:
180+
del stack[-1]
176181
tokentype, class_name, start, end, line = g.next()
177182
if tokentype != NAME:
178183
continue # Syntax error
179-
# close all classes indented at least as much
180-
while classstack and \
181-
classstack[-1][1] >= thisindent:
182-
del classstack[-1]
183184
# parse what follows the class name
184185
tokentype, token, start, end, line = g.next()
185186
inherit = None
@@ -208,6 +209,7 @@ def readmodule_ex(module, path=[], inpackage=None):
208209
if c in d:
209210
n = d[c]
210211
names.append(n)
212+
super = []
211213
if token == '(':
212214
level += 1
213215
elif token == ')':
@@ -220,20 +222,21 @@ def readmodule_ex(module, path=[], inpackage=None):
220222
super.append(token)
221223
inherit = names
222224
cur_class = Class(module, class_name, inherit, file, lineno)
223-
dict[class_name] = cur_class
224-
classstack.append((cur_class, thisindent))
225+
if not stack:
226+
dict[class_name] = cur_class
227+
stack.append((cur_class, thisindent))
225228
elif token == 'import' and start[1] == 0:
226229
modules = _getnamelist(g)
227230
for mod, mod2 in modules:
228231
try:
229232
# Recursively read the imported module
230233
if not inpackage:
231-
readmodule_ex(mod, path)
234+
_readmodule(mod, path)
232235
else:
233236
try:
234-
readmodule_ex(mod, path, inpackage)
237+
_readmodule(mod, path, inpackage)
235238
except ImportError:
236-
readmodule_ex(mod)
239+
_readmodule(mod, [])
237240
except:
238241
# If we can't find or parse the imported module,
239242
# too bad -- don't die here.
@@ -245,7 +248,7 @@ def readmodule_ex(module, path=[], inpackage=None):
245248
names = _getnamelist(g)
246249
try:
247250
# Recursively read the imported module
248-
d = readmodule_ex(mod, path, inpackage)
251+
d = _readmodule(mod, path, inpackage)
249252
except:
250253
# If we can't find or parse the imported module,
251254
# too bad -- don't die here.
@@ -256,11 +259,9 @@ def readmodule_ex(module, path=[], inpackage=None):
256259
if n in d:
257260
dict[n2 or n] = d[n]
258261
elif n == '*':
259-
# only add a name if not already there (to mimic
260-
# what Python does internally) also don't add
261-
# names that start with _
262+
# don't add names that start with _
262263
for n in d:
263-
if n[0] != '_' and not n in dict:
264+
if n[0] != '_':
264265
dict[n] = d[n]
265266
except StopIteration:
266267
pass
@@ -306,3 +307,32 @@ def _getname(g):
306307
break
307308
parts.append(token)
308309
return (".".join(parts), token)
310+
311+
def _main():
312+
# Main program for testing.
313+
import os
314+
mod = sys.argv[1]
315+
if os.path.exists(mod):
316+
path = [os.path.dirname(mod)]
317+
mod = os.path.basename(mod)
318+
if mod.lower().endswith(".py"):
319+
mod = mod[:-3]
320+
else:
321+
path = []
322+
dict = readmodule_ex(mod, path)
323+
objs = dict.values()
324+
objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
325+
getattr(b, 'lineno', 0)))
326+
for obj in objs:
327+
if isinstance(obj, Class):
328+
print "class", obj.name, obj.super, obj.lineno
329+
methods = obj.methods.items()
330+
methods.sort(lambda a, b: cmp(a[1], b[1]))
331+
for name, lineno in methods:
332+
if name != "__path__":
333+
print " def", name, lineno
334+
elif isinstance(obj, Function):
335+
print "def", obj.name, obj.lineno
336+
337+
if __name__ == "__main__":
338+
_main()

0 commit comments

Comments
 (0)