66
77"""
88This script runs the CSV coverage report QL query, and transforms it to a more readable format.
9- """
9+ There are two main outputs: (i) a CSV file containing the coverage data, and (ii) an RST page containing the coverage
10+ data.
11+ """
1012
1113
1214def subprocess_run (cmd ):
@@ -48,8 +50,20 @@ def append_csv_dict_item(list, dictionary, key):
4850 list .append (None )
4951
5052
51- def collect_package_stats (packages , filter ):
52- """Collects coverage statistics for packages matching the given filter."""
53+ def increment_dict_item (value , dictionary , key ):
54+ """Increments the value of the dictionary[key] by value."""
55+ if key not in dictionary :
56+ dictionary [key ] = 0
57+ dictionary [key ] += int (value )
58+
59+
60+ def collect_package_stats (packages , cwes , filter ):
61+ """
62+ Collects coverage statistics for packages matching the given filter. `filter` is a `lambda` that for example (i) matches
63+ packages to frameworks, or (2) matches packages that were previously not processed.
64+
65+ The returned statistics are used to generate a single row in a CSV file.
66+ """
5367 sources = 0
5468 steps = 0
5569 sinks = 0
@@ -75,7 +89,11 @@ def collect_package_stats(packages, filter):
7589
7690
7791def add_package_stats_to_row (row , sorted_cwes , collect ):
78- """ Adds collected statistic to the row. """
92+ """
93+ Adds collected statistic to the row. `collect` is a `lambda` that returns the statistics for example for (i) individual
94+ frameworks, (ii) leftout frameworks summarized in the 'Others' row, or (iii) all frameworks summarized in the 'Totals'
95+ row.
96+ """
7997 sources , steps , sinks , framework_cwes , processed_packages = collect ()
8098
8199 append_csv_number (row , sources )
@@ -112,11 +130,19 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
112130 "java" , "Java" , ".java" , prefix + "java/ql/src/meta/frameworks/Coverage.ql" )
113131]
114132
115- with open ("flow-model-coverage.rst" , 'w' ) as rst_file :
133+ # The names of input and output files. The placeholder {language} is replaced with the language name.
134+ output_rst = "flow-model-coverage.rst"
135+ output_rst_csv = "rst-csv-flow-model-coverage-{language}.csv"
136+ output_ql_csv = "output-{language}.csv"
137+ output_csv = "csv-flow-model-coverage-{language}.csv"
138+ input_framework_csv = prefix + "misc/scripts/frameworks-{language}.csv"
139+ input_cwe_sink_csv = prefix + "misc/scripts/cwe-sink-{language}.csv"
140+
141+ with open (output_rst , 'w' ) as rst_file :
116142 for config in configs :
117143 lang = config .lang
118144 db = "empty-" + lang
119- ql_output = "output-" + lang + ".csv"
145+ ql_output = output_ql_csv . format ( language = lang )
120146 create_empty_database (lang , config .ext , db )
121147 run_codeql_query (config .ql_path , db , ql_output )
122148
@@ -128,36 +154,37 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
128154 with open (ql_output ) as csvfile :
129155 reader = csv .reader (csvfile )
130156 for row in reader :
157+ # row: "android.util",1,"remote","source",16
131158 package = row [0 ]
132159 if package not in packages :
133160 packages [package ] = {
134161 "count" : row [1 ],
162+ # part: "summary", "sink", or "source"
135163 "part" : {},
164+ # kind: "source:remote", "sink:create-file", ...
136165 "kind" : {}
137166 }
167+
138168 part = row [3 ]
139169 parts .add (part )
140- if part not in packages [package ]["part" ]:
141- packages [package ]["part" ][part ] = 0
142- packages [package ]["part" ][part ] += int (row [4 ])
170+ increment_dict_item (row [4 ], packages [package ]["part" ], part )
171+
143172 kind = part + ":" + row [2 ]
144173 kinds .add (kind )
145- if kind not in packages [package ]["kind" ]:
146- packages [package ]["kind" ][kind ] = 0
147- packages [package ]["kind" ][kind ] += int (row [4 ])
174+ increment_dict_item (row [4 ], packages [package ]["kind" ], kind )
175+
176+ parts = sorted (parts )
177+ kinds = sorted (kinds )
148178
149179 # Write the denormalized package statistics to a CSV file.
150- with open ("csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
180+ with open (output_csv . format ( language = lang ) , 'w' , newline = '' ) as csvfile :
151181 csvwriter = csv .writer (csvfile )
152182
153- parts = sorted (parts )
154- kinds = sorted (kinds )
155-
156- columns = ["package" ]
157- columns .extend (parts )
158- columns .extend (kinds )
183+ headers = ["package" ]
184+ headers .extend (parts )
185+ headers .extend (kinds )
159186
160- csvwriter .writerow (columns )
187+ csvwriter .writerow (headers )
161188
162189 for package in sorted (packages ):
163190 row = [package ]
@@ -170,10 +197,11 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
170197 # Read the additional framework data, such as URL, friendly name
171198 frameworks = {}
172199
173- with open (prefix + "misc/scripts/frameworks-" + lang + ".csv" ) as csvfile :
200+ with open (input_framework_csv . format ( language = lang ) ) as csvfile :
174201 reader = csv .reader (csvfile )
175202 next (reader )
176203 for row in reader :
204+ # row: Hibernate,https://hibernate.org/,org.hibernate
177205 framwork = row [0 ]
178206 if framwork not in frameworks :
179207 frameworks [framwork ] = {
@@ -184,18 +212,21 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
184212 # Read the additional CWE data
185213 cwes = {}
186214
187- with open (prefix + "misc/scripts/cwe-sink-" + lang + ".csv" ) as csvfile :
215+ with open (input_cwe_sink_csv . format ( language = lang ) ) as csvfile :
188216 reader = csv .reader (csvfile )
189217 next (reader )
190218 for row in reader :
219+ # row: CWE-89,sql,SQL injection
191220 cwe = row [0 ]
192221 if cwe not in cwes :
193222 cwes [cwe ] = {
194223 "sink" : row [1 ],
195224 "label" : row [2 ]
196225 }
197226
198- file_name = "rst-csv-flow-model-coverage-" + lang + ".csv"
227+ sorted_cwes = sorted (cwes )
228+
229+ file_name = output_rst_csv .format (language = lang )
199230
200231 rst_file .write (
201232 config .capitalized_lang + " framework & library support\n " )
@@ -210,17 +241,23 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
210241 with open (file_name , 'w' , newline = '' ) as csvfile :
211242 csvwriter = csv .writer (csvfile )
212243
213- columns = ["Framework / library" , "package" ,
214- "remote flow sources" , "taint & value steps" , "sinks (total)" ]
215- for cwe in sorted (cwes ):
216- columns .append ("`" + cwe + "` :sub:`" +
217- cwes [cwe ]["label" ] + "`" )
218- csvwriter .writerow (columns )
244+ # Write CSV header.
245+ headers = ["Framework / library" ,
246+ "Package" ,
247+ "Remote flow sources" ,
248+ "Taint & value steps" ,
249+ "Sinks (total)" ]
250+ for cwe in sorted_cwes :
251+ headers .append (
252+ "`{0}` :sub:`{1}`" .format (cwe , cwes [cwe ]["label" ]))
253+ csvwriter .writerow (headers )
219254
220255 processed_packages = set ()
221256
257+ # Write a row for each framework.
222258 for framework in sorted (frameworks ):
223259 row = []
260+
224261 # Add the framework name to the row
225262 if not frameworks [framework ]["url" ]:
226263 row .append (framework )
@@ -234,12 +271,12 @@ def __init__(self, lang, capitalized_lang, ext, ql_path):
234271 prefix = frameworks [framework ]["package" ]
235272
236273 # Collect statistics on the current framework
274+ # package name is either full name, such as "org.hibernate", or a prefix, such as "java.*"
237275 def collect_framework (): return collect_package_stats (
238- packages ,
239- lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
276+ packages , cwes , lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
240277
241278 row , f_processed_packages = add_package_stats_to_row (
242- row , sorted ( cwes ) , collect_framework )
279+ row , sorted_cwes , collect_framework )
243280
244281 csvwriter .writerow (row )
245282 processed_packages .update (f_processed_packages )
@@ -248,11 +285,10 @@ def collect_framework(): return collect_package_stats(
248285 row = ["Others" , None ]
249286
250287 def collect_others (): return collect_package_stats (
251- packages ,
252- lambda p : p not in processed_packages )
288+ packages , cwes , lambda p : p not in processed_packages )
253289
254290 row , other_packages = add_package_stats_to_row (
255- row , sorted ( cwes ) , collect_others )
291+ row , sorted_cwes , collect_others )
256292
257293 row [1 ] = ", " .join ("``{0}``" .format (p )
258294 for p in sorted (other_packages ))
@@ -262,11 +298,9 @@ def collect_others(): return collect_package_stats(
262298 # Collect statistics on all packages
263299 row = ["Totals" , None ]
264300
265- def collect_total (): return collect_package_stats (
266- packages ,
267- lambda p : True )
301+ def collect_total (): return collect_package_stats (packages , cwes , lambda p : True )
268302
269303 row , _ = add_package_stats_to_row (
270- row , sorted ( cwes ) , collect_total )
304+ row , sorted_cwes , collect_total )
271305
272306 csvwriter .writerow (row )
0 commit comments