@@ -32,6 +32,62 @@ def run_codeql_query(query, database, output):
3232 "--format=csv" , "--no-titles" , "--output" , output ])
3333
3434
35+ def append_csv_number (list , value ):
36+ """Adds a number to the list or None if the value is not greater than 0."""
37+ if value > 0 :
38+ list .append (value )
39+ else :
40+ list .append (None )
41+
42+
43+ def append_csv_dict_item (list , dictionary , key ):
44+ """Adds a dictionary item to the list if the key is in the dictionary."""
45+ if key in dictionary :
46+ list .append (dictionary [key ])
47+ else :
48+ list .append (None )
49+
50+
51+ def collect_package_stats (packages , filter ):
52+ """Collects coverage statistics for packages matching the given filter."""
53+ sources = 0
54+ steps = 0
55+ sinks = 0
56+ framework_cwes = {}
57+ processed_packages = set ()
58+
59+ for package in packages :
60+ if filter (package ):
61+ processed_packages .add (package )
62+ sources += int (packages [package ]["kind" ].get ("source:remote" , 0 ))
63+ steps += int (packages [package ]["part" ].get ("summary" , 0 ))
64+ sinks += int (packages [package ]["part" ].get ("sink" , 0 ))
65+
66+ for cwe in cwes :
67+ sink = "sink:" + cwes [cwe ]["sink" ]
68+ if sink in packages [package ]["kind" ]:
69+ if cwe not in framework_cwes :
70+ framework_cwes [cwe ] = 0
71+ framework_cwes [cwe ] += int (
72+ packages [package ]["kind" ][sink ])
73+
74+ return sources , steps , sinks , framework_cwes , processed_packages
75+
76+
77+ def add_package_stats_to_row (row , sorted_cwes , collect ):
78+ """ Adds collected statistic to the row. """
79+ sources , steps , sinks , framework_cwes , processed_packages = collect ()
80+
81+ append_csv_number (row , sources )
82+ append_csv_number (row , steps )
83+ append_csv_number (row , sinks )
84+
85+ for cwe in sorted_cwes :
86+ append_csv_dict_item (row , framework_cwes , cwe )
87+
88+ return row , processed_packages
89+
90+
3591class LanguageConfig :
3692 def __init__ (self , lang , ext , ql_path ):
3793 self .lang = lang
@@ -61,13 +117,14 @@ def __init__(self, lang, ext, ql_path):
61117 query_path = config .ql_path
62118 db = "empty-" + lang
63119 ql_output = "output-" + lang + ".csv"
64- create_empty_database (lang , ext , db )
120+ # create_empty_database(lang, ext, db)
65121 run_codeql_query (query_path , db , ql_output )
66122
67123 packages = {}
68124 parts = set ()
69125 kinds = set ()
70126
127+ # Read the generated CSV file, and collect package statistics.
71128 with open (ql_output ) as csvfile :
72129 reader = csv .reader (csvfile )
73130 for row in reader :
@@ -89,6 +146,7 @@ def __init__(self, lang, ext, ql_path):
89146 packages [package ]["kind" ][kind ] = 0
90147 packages [package ]["kind" ][kind ] += int (row [4 ])
91148
149+ # Write the denormalized package statistics to a CSV file.
92150 with open ("csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
93151 csvwriter = csv .writer (csvfile )
94152
@@ -104,13 +162,97 @@ def __init__(self, lang, ext, ql_path):
104162 for package in sorted (packages ):
105163 row = [package ]
106164 for part in parts :
107- if part in packages [package ]["part" ]:
108- row .append (packages [package ]["part" ][part ])
109- else :
110- row .append (None )
165+ append_csv_dict_item (row , packages [package ]["part" ], part )
111166 for kind in kinds :
112- if kind in packages [package ]["kind" ]:
113- row .append (packages [package ]["kind" ][kind ])
114- else :
115- row .append (None )
167+ append_csv_dict_item (row , packages [package ]["kind" ], kind )
116168 csvwriter .writerow (row )
169+
170+ # Read the additional framework data, such as URL, friendly name
171+ frameworks = {}
172+
173+ with open (prefix + "misc/scripts/frameworks-" + lang + ".csv" ) as csvfile :
174+ reader = csv .reader (csvfile )
175+ next (reader )
176+ for row in reader :
177+ framwork = row [0 ]
178+ if framwork not in frameworks :
179+ frameworks [framwork ] = {
180+ "package" : row [2 ],
181+ "url" : row [1 ]
182+ }
183+
184+ # Read the additional CWE data
185+ cwes = {}
186+
187+ with open (prefix + "misc/scripts/cwe-sink-" + lang + ".csv" ) as csvfile :
188+ reader = csv .reader (csvfile )
189+ next (reader )
190+ for row in reader :
191+ cwe = row [0 ]
192+ if cwe not in cwes :
193+ cwes [cwe ] = {
194+ "sink" : row [1 ],
195+ "label" : row [2 ]
196+ }
197+
198+ with open ("rst-csv-flow-model-coverage-" + lang + ".csv" , 'w' , newline = '' ) as csvfile :
199+ csvwriter = csv .writer (csvfile )
200+
201+ columns = ["Framework / library" , "package" ,
202+ "remote flow sources" , "taint & value steps" , "sinks (total)" ]
203+ for cwe in sorted (cwes ):
204+ columns .append ("`" + cwe + "` :sub:`" + cwes [cwe ]["label" ] + "`" )
205+ csvwriter .writerow (columns )
206+
207+ processed_packages = set ()
208+
209+ for framework in sorted (frameworks ):
210+ row = []
211+ # Add the framework name to the row
212+ if not frameworks [framework ]["url" ]:
213+ row .append (framework )
214+ else :
215+ row .append (
216+ "`" + framework + " <" + frameworks [framework ]["url" ] + ">`_" )
217+
218+ # Add the package name to the row
219+ row .append (frameworks [framework ]["package" ])
220+
221+ prefix = frameworks [framework ]["package" ]
222+
223+ # Collect statistics on the current framework
224+ def collect_framework (): return collect_package_stats (
225+ packages ,
226+ lambda p : (prefix .endswith ("*" ) and p .startswith (prefix [:- 1 ])) or (not prefix .endswith ("*" ) and prefix == p ))
227+
228+ row , f_processed_packages = add_package_stats_to_row (
229+ row , sorted (cwes ), collect_framework )
230+
231+ csvwriter .writerow (row )
232+ processed_packages .update (f_processed_packages )
233+
234+ # Collect statistics on all packages that are not part of a framework
235+ row = ["Others" , None ]
236+
237+ def collect_others (): return collect_package_stats (
238+ packages ,
239+ lambda p : p not in processed_packages )
240+
241+ row , _ = add_package_stats_to_row (
242+ row , sorted (cwes ), collect_others )
243+
244+ csvwriter .writerow (row )
245+
246+ # Collect statistics on all packages
247+ row = ["Total" , None ]
248+
249+ def collect_total (): return collect_package_stats (
250+ packages ,
251+ lambda p : True )
252+
253+ row , _ = add_package_stats_to_row (
254+ row , sorted (cwes ), collect_total )
255+
256+ csvwriter .writerow (row )
257+
258+ # todo: generate rst page referencing the csv files
0 commit comments