|
| 1 | +import subprocess |
| 2 | +import csv |
| 3 | +import sys |
| 4 | +import os |
| 5 | +import shutil |
| 6 | +from datetime import date |
| 7 | +import datetime |
| 8 | +import utils |
| 9 | + |
| 10 | +""" |
| 11 | + Gets the sink/source/summary statistics for different days. |
| 12 | +""" |
| 13 | + |
| 14 | +# the distance between commits to include in the output |
| 15 | +day_distance = 1 |
| 16 | + |
| 17 | +# the directory where codeql is. This is the directory where we change the SHAs |
| 18 | +working_dir = sys.argv[1] |
| 19 | + |
| 20 | +lang = "java" |
| 21 | +db = "empty-java" |
| 22 | +ql_output = "output-java.csv" |
| 23 | +csv_output = "timeseries-java.csv" |
| 24 | + |
| 25 | + |
| 26 | +def get_str_output(arr): |
| 27 | + r = subprocess.check_output(arr) |
| 28 | + return r.decode("utf-8").strip("\n'") |
| 29 | + |
| 30 | + |
| 31 | +def get_date(sha): |
| 32 | + d = get_str_output( |
| 33 | + ["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha]) |
| 34 | + return date.fromisoformat(d) |
| 35 | + |
| 36 | + |
| 37 | +def get_parent(sha, date): |
| 38 | + parent_sha = get_str_output( |
| 39 | + ["git", "rev-parse", sha + "^"]) |
| 40 | + parent_date = get_date(parent_sha) |
| 41 | + return (parent_sha, parent_date) |
| 42 | + |
| 43 | + |
| 44 | +def get_previous_sha(sha, date): |
| 45 | + parent_sha, parent_date = get_parent(sha, date) |
| 46 | + while parent_date > date + datetime.timedelta(days=-1 * day_distance): |
| 47 | + parent_sha, parent_date = get_parent(parent_sha, parent_date) |
| 48 | + |
| 49 | + return (parent_sha, parent_date) |
| 50 | + |
| 51 | + |
| 52 | +def get_stats(): |
| 53 | + if os.path.isdir(db): |
| 54 | + shutil.rmtree(db) |
| 55 | + utils.create_empty_database(lang, ".java", db) |
| 56 | + utils.run_codeql_query( |
| 57 | + "java/ql/src/meta/frameworks/Coverage.ql", db, ql_output) |
| 58 | + shutil.rmtree(db) |
| 59 | + |
| 60 | + sources = 0 |
| 61 | + sinks = 0 |
| 62 | + summaries = 0 |
| 63 | + |
| 64 | + with open(ql_output) as csvfile: |
| 65 | + reader = csv.reader(csvfile) |
| 66 | + for row in reader: |
| 67 | + # row: "android.util",1,"remote","source",16 |
| 68 | + if row[3] == "source": |
| 69 | + sources += int(row[4]) |
| 70 | + if row[3] == "sink": |
| 71 | + sinks += int(row[4]) |
| 72 | + if row[3] == "summary": |
| 73 | + summaries += int(row[4]) |
| 74 | + |
| 75 | + os.remove(ql_output) |
| 76 | + |
| 77 | + return (sources, sinks, summaries) |
| 78 | + |
| 79 | + |
| 80 | +with open(csv_output, 'w', newline='') as csvfile: |
| 81 | + csvwriter = csv.writer(csvfile) |
| 82 | + csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"]) |
| 83 | + |
| 84 | + os.chdir(working_dir) |
| 85 | + |
| 86 | + utils.subprocess_run(["git", "checkout", "main"]) |
| 87 | + |
| 88 | + current_sha = get_str_output(["git", "rev-parse", "HEAD"]) |
| 89 | + current_date = get_date(current_sha) |
| 90 | + |
| 91 | + while True: |
| 92 | + print("Getting stats for " + current_sha) |
| 93 | + utils.subprocess_run(["git", "checkout", current_sha]) |
| 94 | + |
| 95 | + try: |
| 96 | + stats = get_stats() |
| 97 | + |
| 98 | + csvwriter.writerow( |
| 99 | + [current_sha, current_date, stats[0], stats[1], stats[2]]) |
| 100 | + |
| 101 | + print("Collected stats for " + current_sha + |
| 102 | + " at " + current_date.isoformat()) |
| 103 | + except: |
| 104 | + print("Unexpected error:", sys.exc_info()[0]) |
| 105 | + |
| 106 | + if os.path.isdir(db): |
| 107 | + shutil.rmtree(db) |
| 108 | + print("Error getting stats for " + |
| 109 | + current_sha + ". Stopping iteration.") |
| 110 | + break |
| 111 | + |
| 112 | + current_sha, current_date = get_previous_sha(current_sha, current_date) |
| 113 | + |
| 114 | +utils.subprocess_run(["git", "checkout", "main"]) |
0 commit comments