Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bcbcd61

Browse files
committed
Kotlin: Improve the dbscheme generator
We now work out the supertype relationships based on the sets of leaf types that are included, rather than simply following the hierarchy of declarations. This means that we know about more supertype relationships that exist, so there is less need to cast types.
1 parent 5aac46f commit bcbcd61

1 file changed

Lines changed: 118 additions & 66 deletions

File tree

java/kotlin-extractor/generate_dbscheme.py

Lines changed: 118 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,119 @@
33
import re
44
import sys
55

6-
def upperFirst(string):
7-
return string[0].upper() + string[1:]
6+
enums = {}
7+
unions = {}
8+
tables = {}
89

9-
with open('../ql/lib/config/semmlecode.dbscheme', 'r') as f:
10-
dbscheme = f.read()
10+
def parse_dbscheme(filename):
11+
with open(filename, 'r') as f:
12+
dbscheme = f.read()
1113

12-
# Remove comments
13-
dbscheme = re.sub(r'/\*.*?\*/', '', dbscheme, flags=re.DOTALL)
14-
dbscheme = re.sub(r'//[^\r\n]*/', '', dbscheme)
14+
# Remove comments
15+
dbscheme = re.sub(r'/\*.*?\*/', '', dbscheme, flags=re.DOTALL)
16+
dbscheme = re.sub(r'//[^\r\n]*/', '', dbscheme)
17+
18+
# kind enums
19+
for name, kind, body in re.findall(r'case\s+@([^.\s]*)\.([^.\s]*)\s+of\b(.*?);',
20+
dbscheme,
21+
flags=re.DOTALL):
22+
mapping = []
23+
for num, typ in re.findall(r'(\d+)\s*=\s*@(\S+)', body):
24+
mapping.append((int(num), typ))
25+
enums[name] = (kind, mapping)
26+
27+
# unions
28+
for name, rhs in re.findall(r'@(\w+)\s*=\s*(@\w+(?:\s*\|\s*@\w+)*)',
29+
dbscheme,
30+
flags=re.DOTALL):
31+
typs = re.findall(r'@(\w+)', rhs)
32+
unions[name] = typs
33+
34+
# tables
35+
for relname, body in re.findall('\n([\w_]+)(\([^)]*\))',
36+
dbscheme,
37+
flags=re.DOTALL):
38+
columns = list(re.findall('(\S+)\s*:\s*([^\s,]+)(?:\s+(ref)|)', body))
39+
tables[relname] = columns
40+
41+
parse_dbscheme('../ql/lib/config/semmlecode.dbscheme')
1542

16-
enums = {}
1743
type_aliases = {}
18-
type_hierarchy = {}
44+
45+
for alias, typs in unions.items():
46+
if len(typs) == 1:
47+
real = typs[0]
48+
if real in type_aliases:
49+
real = type_aliases[real]
50+
type_aliases[alias] = real
1951

2052
def unalias(t):
21-
while t in type_aliases:
22-
t = type_aliases[t]
23-
return t
53+
return type_aliases.get(t, t)
54+
55+
type_leaf = set()
56+
type_union = {}
57+
58+
for name, (kind, mapping) in enums.items():
59+
s = set()
60+
for num, typ in mapping:
61+
s.add(typ)
62+
type_leaf.add(typ)
63+
type_union[name] = s
64+
65+
for name, typs in unions.items():
66+
if name not in type_aliases:
67+
type_union[name] = set(map(unalias, typs))
68+
69+
for relname, columns in tables.items():
70+
for _, db_type, ref in columns:
71+
if db_type[0] == '@' and ref == '':
72+
db_type_name = db_type[1:]
73+
if db_type_name not in enums:
74+
type_leaf.add(db_type_name)
75+
76+
type_union_of_leaves = {}
2477

25-
def genTable(kt, relname, body, enum = None, kind = None, num = None, typ = None):
78+
def to_leaves(t):
79+
if t not in type_union_of_leaves:
80+
xs = type_union[t]
81+
leaves = set()
82+
for x in xs:
83+
if x in type_leaf:
84+
leaves.add(x)
85+
else:
86+
to_leaves(x)
87+
leaves.update(type_union_of_leaves[x])
88+
type_union_of_leaves[t] = leaves
89+
90+
for t in type_union:
91+
to_leaves(t)
92+
93+
supertypes = {}
94+
for t in type_leaf:
95+
supers = set()
96+
for sup, s in type_union_of_leaves.items():
97+
if t in s:
98+
supers.add(sup)
99+
supertypes[t] = supers
100+
for t, leaves in type_union_of_leaves.items():
101+
supers = set()
102+
for sup, s in type_union_of_leaves.items():
103+
if t != sup and leaves.issubset(s):
104+
supers.add(sup)
105+
supertypes[t] = supers
106+
107+
def upperFirst(string):
108+
return string[0].upper() + string[1:]
109+
110+
def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = None):
26111
kt.write('fun TrapWriter.write' + upperFirst(relname))
27112
if kind is not None:
28113
kt.write('_' + typ)
29114
kt.write('(')
30-
for colname, db_type in re.findall('(\S+)\s*:\s*([^\s,]+)', body):
115+
for colname, db_type, _ in columns:
31116
if colname != kind:
32117
kt.write(colname + ': ')
33118
if db_type == 'int':
34-
# TODO: Do something better if the column is a 'case'
35119
kt.write('Int')
36120
elif db_type == 'float':
37121
kt.write('Double')
@@ -52,7 +136,7 @@ def genTable(kt, relname, body, enum = None, kind = None, num = None, typ = None
52136
kt.write(') {\n')
53137
kt.write(' this.writeTrap("' + relname + '(')
54138
comma = ''
55-
for colname, db_type in re.findall('(\S+)\s*:\s*([^\s,]+)', body):
139+
for colname, db_type, _ in columns:
56140
kt.write(comma)
57141
if colname == kind:
58142
kt.write(str(num))
@@ -70,59 +154,27 @@ def genTable(kt, relname, body, enum = None, kind = None, num = None, typ = None
70154
kt.write('/* Generated by ' + sys.argv[0] + ': Do not edit manually. */\n')
71155
kt.write('package com.github.codeql\n')
72156

73-
# kind enums
74-
for name, kind, body in re.findall(r'case\s+@([^.\s]*)\.([^.\s]*)\s+of\b(.*?);',
75-
dbscheme,
76-
flags=re.DOTALL):
77-
mapping = []
78-
for num, typ in re.findall(r'(\d+)\s*=\s*@(\S+)', body):
79-
s = type_hierarchy.get(typ, set())
80-
s.add(name)
81-
type_hierarchy[typ] = s
82-
mapping.append((int(num), typ))
83-
enums[name] = (kind, mapping)
84-
85-
# unions
86-
for name, unions in re.findall(r'@(\w+)\s*=\s*(@\w+(?:\s*\|\s*@\w+)*)',
87-
dbscheme,
88-
flags=re.DOTALL):
89-
type_hierarchy[name] = type_hierarchy.get(name, set())
90-
typs = re.findall(r'@(\w+)', unions)
91-
if len(typs) == 1:
92-
type_aliases[name] = typs[0]
93-
else:
94-
for typ in typs:
95-
s = type_hierarchy.get(typ, set())
96-
s.add(name)
97-
type_hierarchy[typ] = s
98-
99-
# tables
100-
for relname, body in re.findall('\n([\w_]+)(\([^)]*\))',
101-
dbscheme,
102-
flags=re.DOTALL):
157+
for relname, columns in tables.items():
103158
enum = None
104-
for db_type in re.findall(':\s*@([^\s,]+)\s*(?:,|$)', body):
105-
type_hierarchy[db_type] = type_hierarchy.get(db_type, set())
106-
if db_type in enums:
107-
enum = db_type
159+
for _, db_type, ref in columns:
160+
if db_type[0] == '@' and ref == '':
161+
db_type_name = db_type[1:]
162+
if db_type_name in enums:
163+
enum = db_type_name
108164
if enum is None:
109-
genTable(kt, relname, body)
165+
genTable(kt, relname, columns)
110166
else:
111167
(kind, mapping) = enums[enum]
112168
for num, typ in mapping:
113-
genTable(kt, relname, body, enum, kind, num, typ)
114-
115-
for typ in sorted(type_hierarchy):
116-
if typ in type_aliases:
117-
kt.write('typealias Db' + upperFirst(typ) + ' = Db' + upperFirst(type_aliases[typ]) + '\n')
118-
else:
119-
kt.write('sealed interface Db' + upperFirst(typ))
120-
# This map of unalias avoids duplicates when both T and an
121-
# alias of T appear in the set. Sorting makes the output
122-
# deterministic.
123-
names = sorted(set(map(unalias, type_hierarchy[typ])))
124-
if names:
125-
kt.write(': ')
126-
kt.write(', '.join(map(lambda name: 'Db' + upperFirst(name), names)))
127-
kt.write('\n')
169+
genTable(kt, relname, columns, enum, kind, num, typ)
128170

171+
for typ in sorted(supertypes):
172+
kt.write('sealed interface Db' + upperFirst(typ))
173+
# Sorting makes the output deterministic.
174+
names = sorted(supertypes[typ])
175+
if names:
176+
kt.write(': ')
177+
kt.write(', '.join(map(lambda name: 'Db' + upperFirst(name), names)))
178+
kt.write('\n')
179+
for alias in sorted(type_aliases):
180+
kt.write('typealias Db' + upperFirst(alias) + ' = Db' + upperFirst(type_aliases[alias]) + '\n')

0 commit comments

Comments
 (0)