44import shutil
55import settings
66import utils
7+ import packages as pack
8+ import frameworks as fr
79
810"""
911This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -28,14 +30,7 @@ def append_csv_dict_item(list, dictionary, key):
2830 list .append (None )
2931
3032
31- def increment_dict_item (value , dictionary , key ):
32- """Increments the value of the dictionary[key] by value."""
33- if key not in dictionary :
34- dictionary [key ] = 0
35- dictionary [key ] += int (value )
36-
37-
38- def collect_package_stats (packages , cwes , filter ):
33+ def collect_package_stats (packages : pack .PackageCollection , cwes , filter ):
3934 """
4035 Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
4136 packages to frameworks, or (2) matches packages that were previously not processed.
@@ -48,20 +43,21 @@ def collect_package_stats(packages, cwes, filter):
4843 framework_cwes = {}
4944 processed_packages = set ()
5045
51- for package in packages :
46+ for package in packages .get_packages ():
47+ package : pack .Package = package
5248 if filter (package ):
5349 processed_packages .add (package )
54- sources += int ( packages [ package ][ "kind" ]. get ("source:remote" , 0 ) )
55- steps += int ( packages [ package ][ "part" ]. get ("summary" , 0 ) )
56- sinks += int ( packages [ package ][ "part" ]. get ("sink" , 0 ) )
50+ sources += package . get_kind_count ("source:remote" )
51+ steps += package . get_part_count ("summary" )
52+ sinks += package . get_part_count ("sink" )
5753
5854 for cwe in cwes :
5955 sink = "sink:" + cwes [cwe ]["sink" ]
60- if sink in packages [package ]["kind" ]:
56+ count = package .get_kind_count (sink )
57+ if count > 0 :
6158 if cwe not in framework_cwes :
6259 framework_cwes [cwe ] = 0
63- framework_cwes [cwe ] += int (
64- packages [package ]["kind" ][sink ])
60+ framework_cwes [cwe ] += count
6561
6662 return sources , steps , sinks , framework_cwes , processed_packages
6763
@@ -137,37 +133,12 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
137133 utils .run_codeql_query (config .ql_path , db , ql_output )
138134 shutil .rmtree (db )
139135
140- packages = {}
141- parts = set ()
142- kinds = set ()
143-
144- # Read the generated CSV file, and collect package statistics.
145- with open (ql_output ) as csvfile :
146- reader = csv .reader (csvfile )
147- for row in reader :
148- # row: "android.util",1,"remote","source",16
149- package = row [0 ]
150- if package not in packages :
151- packages [package ] = {
152- "count" : row [1 ],
153- # part: "summary", "sink", or "source"
154- "part" : {},
155- # kind: "source:remote", "sink:create-file", ...
156- "kind" : {}
157- }
158-
159- part = row [3 ]
160- parts .add (part )
161- increment_dict_item (row [4 ], packages [package ]["part" ], part )
162-
163- kind = part + ":" + row [2 ]
164- kinds .add (kind )
165- increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
136+ packages = pack .PackageCollection (ql_output )
166137
167138 os .remove (ql_output )
168139
169- parts = sorted ( parts )
170- kinds = sorted ( kinds )
140+ parts = packages . get_parts ( )
141+ kinds = packages . get_kinds ( )
171142
172143 # Write the denormalized package statistics to a CSV file.
173144 with open (output_csv .format (language = lang ), 'w' , newline = '' ) as csvfile :
@@ -179,44 +150,21 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
179150
180151 csvwriter .writerow (headers )
181152
182- for package in sorted (packages ):
183- row = [package ]
153+ for package in packages .get_packages ():
154+ package : pack .Package = package
155+ row = [package .name ]
184156 for part in parts :
185- append_csv_dict_item (row , packages [ package ][ " part" ], part )
157+ append_csv_number (row , package . get_part_count ( part ) )
186158 for kind in kinds :
187- append_csv_dict_item (row , packages [ package ][ " kind" ], kind )
159+ append_csv_number (row , package . get_kind_count ( kind ) )
188160 csvwriter .writerow (row )
189161
190162 # Read the additional framework data, such as URL, friendly name
191- frameworks = {}
192-
193- with open (input_framework_csv .format (language = lang )) as csvfile :
194- reader = csv .reader (csvfile )
195- next (reader )
196- for row in reader :
197- # row: Hibernate,https://hibernate.org/,org.hibernate
198- framwork = row [0 ]
199- if framwork not in frameworks :
200- frameworks [framwork ] = {
201- "package" : row [2 ],
202- "url" : row [1 ]
203- }
163+ frameworks = fr .FrameworkCollection (
164+ input_framework_csv .format (language = lang ))
204165
205166 # Read the additional CWE data
206- cwes = {}
207-
208- with open (input_cwe_sink_csv .format (language = lang )) as csvfile :
209- reader = csv .reader (csvfile )
210- next (reader )
211- for row in reader :
212- # row: CWE-89,sql,SQL injection
213- cwe = row [0 ]
214- if cwe not in cwes :
215- cwes [cwe ] = {
216- "sink" : row [1 ],
217- "label" : row [2 ]
218- }
219-
167+ cwes = utils .read_cwes (input_cwe_sink_csv .format (language = lang ))
220168 sorted_cwes = sorted (cwes )
221169
222170 with open (output_rst .format (language = lang ), 'w' , newline = '' ) as rst_file :
@@ -246,34 +194,24 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
246194
247195 processed_packages = set ()
248196
249- all_package_patterns = set (
250- (frameworks [fr ]["package" ] for fr in frameworks ))
251-
252197 # Write a row for each framework.
253- for framework in sorted (frameworks ):
198+ for framework in frameworks .get_frameworks ():
199+ framework : fr .Framework = framework
254200 row = []
255201
256202 # Add the framework name to the row
257- if not frameworks [ framework ][ " url" ] :
258- row .append (row_prefix + framework )
203+ if not framework . url :
204+ row .append (row_prefix + framework . name )
259205 else :
260206 row .append (
261- row_prefix + "`" + framework + " <" + frameworks [ framework ][ " url" ] + ">`_" )
207+ row_prefix + "`" + framework . name + " <" + framework . url + ">`_" )
262208
263209 # Add the package name to the row
264- row .append ("``" + frameworks [framework ]["package" ] + "``" )
265-
266- current_package_pattern = frameworks [framework ]["package" ]
210+ row .append ("``" + framework .package_pattern + "``" )
267211
268212 # Collect statistics on the current framework
269- # current_package_pattern is either full name, such as "org.hibernate", or a prefix, such as "java.*"
270- # Package patterns might overlap, in case of 'org.apache.commons.io' and 'org.apache.*', the statistics for
271- # the latter will not include the statistics for the former.
272- def package_match (package_name , pattern ): return (pattern .endswith (
273- "*" ) and package_name .startswith (pattern [:- 1 ])) or (not pattern .endswith ("*" ) and pattern == package_name )
274-
275213 def collect_framework (): return collect_package_stats (
276- packages , cwes , lambda p : package_match ( p , current_package_pattern ) and all ( len ( current_package_pattern ) >= len ( pattern ) or not package_match ( p , pattern ) for pattern in all_package_patterns ))
214+ packages , cwes , frameworks . get_package_filter ( framework ))
277215
278216 row , f_processed_packages = add_package_stats_to_row (
279217 row , sorted_cwes , collect_framework )
@@ -290,8 +228,8 @@ def collect_others(): return collect_package_stats(
290228 row , other_packages = add_package_stats_to_row (
291229 row , sorted_cwes , collect_others )
292230
293- row [1 ] = ", " .join ("``{0}``" .format (p )
294- for p in sorted (other_packages ))
231+ row [1 ] = ", " .join ("``{0}``" .format (p . name )
232+ for p in sorted (other_packages , key = lambda x : x . name ))
295233
296234 csvwriter .writerow (row )
297235
0 commit comments