22
33# This script counts the lines of code in various types of source files.
44# It has options to print summaries at various levels.
5+ #
6+ # It can be used with python version 2 and 3.
57
68# todo:
79# - possibly specify/override/add files/dirs to be ignored on command line
@@ -21,7 +23,7 @@ import argparse
2123# - A list of matching file names
2224# - The delimiter defining a comment in a single line
2325# - The delimiters defining the start and end of a block comment
24- # - indication if file contains code or other info (1=code, 0=other)
26+ # - Indication if file contains code or other info (1=code, 0=other)
2527# Similar to cloc there are a few theoretical problems:
2628# 1. If a quoted string contains comment delimiters, they are recognized
2729# as comment delimiters. In principle some regexes could be defined to replace
@@ -30,18 +32,19 @@ import argparse
3032# 2. A regex like '""".*"""' is greedy, thus a line like """some1"""some2"""
3133# is fully matched. In practice such lines are not used.
3234# See cnt.py-new for an attempt solving these issues.
33- types = [ ('C++' , ['cc' , 'tcc' , 'hcc' , 'cpp' , 'cxx' ], None , [], '//' , '/\ *' , '\ */' , 1 ),
34- ('C++Hdr' , ['h' , 'hpp' , 'hxx' ], None , [], '//' , '/\ *' , '\ */' , 1 ),
35- ('C' , ['c' ], None , [], '//' , '/\ *' , '\ */' , 1 ),
36- ('Cuda' , ['cu' ], None , [], '//' , '/\ *' , '\ */' , 1 ),
37- ('OpenCL' , ['cl' ], None , [], '//' , '/\ *' , '\ */' , 1 ),
35+ types = [ ('C++' , ['cc' , 'tcc' , 'hcc' , 'cpp' , 'cxx' ], None , [], '//' , '/*' , '*/' , 1 ),
36+ ('C++Hdr' , ['h' , 'hpp' , 'hxx' ], None , [], '//' , '/*' , '*/' , 1 ),
37+ ('C' , ['c' ], None , [], '//' , '/*' , '*/' , 1 ),
38+ ('Cuda' , ['cu' ], None , [], '//' , '/*' , '*/' , 1 ),
39+ ('OpenCL' , ['cl' ], None , [], '//' , '/*' , '*/' , 1 ),
3840 ('Fortran' , ['f' , 'for' ], None , [], '*' , '' , '' , 1 ),
3941 ('Assembly' , ['m' , 'S' ], None , [], '' , '' , '' , 1 ),
4042 ('Lisp' , ['lisp' ], None , [], '' , '' , '' , 1 ),
4143 ('SQL' , ['sql' ], None , [], '--' , '' , '' , 1 ),
42- ('Flex' , ['l' , 'll' ], None , [], '//' , '/\*' , '\*/' , 1 ),
43- ('Bison' , ['y' , 'yy' ], None , [], '//' , '/\*' , '\*/' , 1 ),
44- ('Python' , ['py' , 'python' ], None , [], '#' , '"""' , '"""' , 1 ),
44+ ('TaQL' , ['taql' ], None , [], '#' , '' , '' , 1 ),
45+ ('Flex' , ['l' , 'll' ], None , [], '//' , '/*' , '*/' , 1 ),
46+ ('Bison' , ['y' , 'yy' ], None , [], '//' , '/*' , '*/' , 1 ),
47+ ('Python' , ['py' , 'python' , 'python3' ], None , [], '#' , '"""' , '"""' , 1 ),
4548 ('Perl' , ['pl' , 'perl' ], None , [], '#' , '' , '' , 1 ),
4649 ('test-run' , ['run' ], None , [], '' ,'' ,'' , 0 ),
4750 ('test-in' , ['in' ], re .compile ('in_.*' ), [], '' ,'' ,'' , 0 ),
@@ -66,18 +69,18 @@ types = [ ('C++', ['cc', 'tcc', 'hcc', 'cpp', 'cxx'], None, [], '//', '/\*', '\*
6669
6770def showTypes (verbose ):
6871 for (type ,exts ,extre ,filenms ,comm ,scomm ,ecomm ,ctyp ) in types :
69- print '%-24s code=%d' % (type ,ctyp )
70- print ' file name extensions: ' , exts
72+ print ( '%-24s code=%d' % (type ,ctyp ) )
73+ print ( ' file name extensions: ' , exts )
7174 if verbose :
7275 if not extre is None :
73- print ' extension pattern: ' , extre .pattern
76+ print ( ' extension pattern: ' , extre .pattern )
7477 if len (filenms ) > 0 :
75- print ' file names: ' , filenms
78+ print ( ' file names: ' , filenms )
7679 if len (comm ) > 0 :
77- print ' comment marker: ' , comm
80+ print ( ' comment marker: ' , comm )
7881 if len (scomm ) > 0 :
79- print ' start comment block: ' , scomm
80- print ' end comment block: ' , ecomm
82+ print ( ' start comment block: ' , scomm )
83+ print ( ' end comment block: ' , ecomm )
8184
8285
8386# Define regex for a line containing an alphanumeric character
@@ -96,14 +99,27 @@ def is_textfile(filename):
9699 CHUNKSIZE = 4096
97100 while 1 :
98101 chunk = fin .read (CHUNKSIZE )
99- if '\0 ' in chunk :
100- return False
102+ if sys .version_info .major == 2 :
103+ if '\0 ' in chunk :
104+ return False
105+ else :
106+ if 0 in chunk :
107+ return False
101108 if len (chunk ) < CHUNKSIZE :
102109 break
103110 finally :
104111 fin .close ()
105112 return True
106113
114+ def add_escape (str ):
115+ ''' Add an escape character for special regex characters'''
116+ out = ''
117+ for c in str :
118+ if c in '.*[]|' :
119+ out += '\\ '
120+ out += c
121+ return out
122+
107123
108124# Return tuple with nr of files, nr of lines, nr of code lines,
109125# nr of comment lines, nr of blank lines, and nr of header lines.
@@ -122,14 +138,16 @@ def countcodecomm (filename, linecomm, scomm='', ecomm='', basic=False):
122138 skipHeader = not basic
123139 blockComm = False
124140 if len (scomm ) > 0 :
125- reComm2a = re .compile ('\s*' + scomm + '\s*' + ecomm + '\s*' )
126- reComm2b = re .compile ('\s*' + scomm + '(.*)' + ecomm + '\s*' )
127- reSComm = re .compile (scomm )
128- reEComm = re .compile (ecomm )
129- reTillSComm = re .compile ('.*' + scomm + '\s*' )
130- reTillEComm = re .compile ('.*' + ecomm + '\s*' )
131- reFromSComm = re .compile ('\s*' + scomm + '.*' )
132- reFromEComm = re .compile ('\s*' + ecomm + '.*' )
141+ scomm_esc = add_escape (scomm )
142+ ecomm_esc = add_escape (scomm )
143+ reComm2a = re .compile ('\s*' + scomm_esc + '\s*' + ecomm_esc + '\s*' )
144+ reComm2b = re .compile ('\s*' + scomm_esc + '(.*)' + ecomm_esc + '\s*' )
145+ reSComm = re .compile (scomm_esc )
146+ reEComm = re .compile (ecomm_esc )
147+ reTillSComm = re .compile ('.*' + scomm_esc + '\s*' )
148+ reTillEComm = re .compile ('.*' + ecomm_esc + '\s*' )
149+ reFromSComm = re .compile ('\s*' + scomm_esc + '.*' )
150+ reFromEComm = re .compile ('\s*' + ecomm_esc + '.*' )
133151 # Loop over all lines in the file.
134152 for line in f :
135153 nline += 1
@@ -257,7 +275,7 @@ def countother(filename, basic, usecode):
257275 return ('unknown' , 0 , (1 ,0 ,0 ,0 ,0 ,0 ))
258276 return ('unknown' , 0 , (1 ,nline ,0 ,0 ,nblank ,0 ))
259277
260- def countfiles (dirname , test , basic , ccperc , verbose , printlevel , level , usecode , dosum ):
278+ def countfiles (dirname , test , basic , ccperc , verbose , printlevel , level , usecode , dosum , warn_unknown ):
261279 sums = [{}, {}]
262280 for t in types :
263281 sums [0 ][t [0 ]] = [0 ,0 ,0 ,0 ,0 ,0 ]
@@ -279,7 +297,7 @@ def countfiles(dirname, test, basic, ccperc, verbose, printlevel, level, usecode
279297 # skip symlinks because casacore contains symlink to itself
280298 continue
281299 elif stat .S_ISDIR (mode ):
282- cnts = countfiles (ffile , test , basic , ccperc , verbose , printlevel , level + 1 , usecode , dosum )
300+ cnts = countfiles (ffile , test , basic , ccperc , verbose , printlevel , level + 1 , usecode , dosum , warn_unknown )
283301 for j in [0 ,1 ]:
284302 for t in types :
285303 for i in range (len (sums [j ][t [0 ]])):
@@ -308,7 +326,8 @@ def countfiles(dirname, test, basic, ccperc, verbose, printlevel, level, usecode
308326 for i in range (len (cnt )):
309327 sums [inx ][type ][i ] += cnt [i ]
310328 if type == 'unknown' :
311- sys .stderr .write ('Unknown type: %s\n ' % ffile )
329+ if warn_unknown :
330+ sys .stderr .write ('Unknown type: %s\n ' % ffile )
312331 elif verbose :
313332 sys .stderr .write ('** %s\n ' % ffile )
314333 printCount (sys .stderr , type , cnt , ccperc );
@@ -336,10 +355,10 @@ def countfiles(dirname, test, basic, ccperc, verbose, printlevel, level, usecode
336355 return sums
337356
338357def testit ():
339- print countcodecomm ('/Users/diepen/testcnt1' , '#' )
340- print countcodecomm ('/Users/diepen/testcnt2' , '#' , '"""' , '"""' )
341- print countcodecomm ('/Users/diepen/testcnt1' , '#' , '' , '' , False )
342- print countcodecomm ('/Users/diepen/testcnt2' , '#' , '"""' , '"""' , False )
358+ print ( countcodecomm ('/Users/diepen/testcnt1' , '#' ) )
359+ print ( countcodecomm ('/Users/diepen/testcnt2' , '#' , '"""' , '"""' ) )
360+ print ( countcodecomm ('/Users/diepen/testcnt1' , '#' , '' , '' , False ) )
361+ print ( countcodecomm ('/Users/diepen/testcnt2' , '#' , '"""' , '"""' , False ) )
343362
344363
345364if __name__ == '__main__' :
@@ -350,6 +369,7 @@ if __name__ == '__main__':
350369 parser .add_argument ('-s' , '--sum' , help = 'only calculate and print the sum of all file types' , action = 'store_true' )
351370 parser .add_argument ('-l' , '--limitperc' , help = 'limit to the nr of code and comment lines to determine percentages' , action = 'store_true' )
352371 parser .add_argument ('-p' , '--printlevel' , type = int , default = 0 , help = 'first directory level to print (default 0 (=top))' )
372+ parser .add_argument ('-w' , '--warn_unknown' , help = 'warn if a file with an unknown type is found' , action = 'store_true' )
353373 parser .add_argument ('-d' , '--displaytypes' , help = 'display the currently recognized file types (full info with -v)' , action = 'store_true' )
354374 parser .add_argument ('-t' , '--testinclude' , help = 'do not count test directories separately' , action = 'store_true' )
355375 parser .add_argument ('-v' , '--verbose' , help = 'print count for each source file' , action = 'store_true' )
@@ -358,26 +378,26 @@ if __name__ == '__main__':
358378 if len (sys .argv ) == 1 :
359379 #print 'Testing the script ...'
360380 #testit()
361- print ''
362- print 'countcode counts per known source file type the number of source lines in the'
363- print ' files in the given directory and recursively in its subdirectories.'
364- print 'It supports many file types. The type is recognized from the file name extension'
365- print ' or the shebang script type. Use -s to see all supported types.'
366- print 'The following line types are counted:'
367- print ' code: pure code lines)'
368- print ' comment: pure comment lines'
369- print ' blank: empty lines or lines containing whitespace only'
370- print ' header: the copyright header (leading comment lines)'
371- print ' other: all other lines (e.g., single {, /*, etc.)'
372- print 'Unless -b is given, a pure code or comment line has to contain an alphanumeric'
373- print ' character; e.g., a single } does not count as code line.'
374- print 'It calculates the percentage of code and comment lines in the total number of'
375- print ' lines or (if -l is given) in the sum of code and comment lines.'
376- print 'Unless -t is given, files in test directories are counted separately.'
377- print 'Normal output is written on stdout; verbose on stderr.'
378- print 'Files with an unknown type are reported on stderr.'
379- print 'Note that -bt should give about the same results as a tool like cloc.'
380- print ''
381+ print ( '' )
382+ print ( 'countcode counts per known source file type the number of source lines in the' )
383+ print ( ' files in the given directory and recursively in its subdirectories.' )
384+ print ( 'It supports many file types. The type is recognized from the file name extension' )
385+ print ( ' or the shebang script type. Use -s to see all supported types.' )
386+ print ( 'The following line types are counted:' )
387+ print ( ' code: pure code lines)' )
388+ print ( ' comment: pure comment lines' )
389+ print ( ' blank: empty lines or lines containing whitespace only' )
390+ print ( ' header: the copyright header (leading comment lines)' )
391+ print ( ' other: all other lines (e.g., single {, /*, etc.)' )
392+ print ( 'Unless -b is given, a pure code or comment line has to contain an alphanumeric' )
393+ print ( ' character; e.g., a single } does not count as code line.' )
394+ print ( 'It calculates the percentage of code and comment lines in the total number of' )
395+ print ( ' lines or (if -l is given) in the sum of code and comment lines.' )
396+ print ( 'Unless -t is given, files in test directories are counted separately.' )
397+ print ( 'Normal output is written on stdout; verbose on stderr.' )
398+ print ( 'Files with an unknown type are reported on stderr.' )
399+ print ( 'Note that -bt should give about the same results as a tool like cloc.' )
400+ print ( '' )
381401 parser .parse_args (['-h' ])
382402 else :
383403 values = parser .parse_args (sys .argv [1 :])
@@ -391,6 +411,6 @@ if __name__ == '__main__':
391411 dirname = dirname [:- 1 ]
392412 sys .stdout .write ('%s Count %s test=%d basic=%d limitperc=%d code=%d\n ' % (time .ctime (),dirname ,test ,values .basic ,values .limitperc ,values .code ))
393413 printHeader ()
394- countfiles (dirname , test , values .basic , values .limitperc , values .verbose , values .printlevel , 0 , values .code , values .sum )
414+ countfiles (dirname , test , values .basic , values .limitperc , values .verbose , values .printlevel , 0 , values .code , values .sum , values . warn_unknown )
395415 printHeader ()
396416 sys .stdout .write ('%s Count %s test=%d basic=%d limitperc=%d code=%d\n ' % (time .ctime (),dirname ,test ,values .basic ,values .limitperc ,values .code ))
0 commit comments