Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b9d8fe7

Browse files
smowtonigfoo
authored andcommitted
TRAP formatting: adopt Java's standards
* Encode dates with D"" strings * Truncate exceedingly long string values * Note that floats don't require any special handling
1 parent 4adf582 commit b9d8fe7

3 files changed

Lines changed: 63 additions & 5 deletions

File tree

java/kotlin-extractor/generate_dbscheme.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = N
124124
elif db_type == 'string':
125125
kt.write('String')
126126
elif db_type == 'date':
127-
kt.write('String')
127+
kt.write('Date')
128128
elif db_type == 'boolean':
129129
kt.write('Boolean')
130130
elif db_type[0] == '@':
@@ -142,11 +142,11 @@ def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = N
142142
kt.write(comma)
143143
if colname == kind:
144144
kt.write(str(num))
145-
elif db_type == 'string' or db_type == 'date':
146-
kt.write('\\"${escapeTrapString(' + colname + ')}\\"')
145+
elif db_type == 'string':
146+
kt.write('\\"${escapeTrapString(truncateString(' + colname + '))}\\"')
147+
elif db_type == 'date':
148+
kt.write('D\\"${' + colname + '}\\"')
147149
else:
148-
# TODO: Any reformatting or escaping necessary?
149-
# e.g. float formats?
150150
kt.write('$' + colname)
151151
comma = ', '
152152
kt.write(')\\n")\n')
@@ -155,6 +155,7 @@ def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = N
155155
with open('src/main/kotlin/KotlinExtractorDbScheme.kt', 'w') as kt:
156156
kt.write('/* Generated by ' + sys.argv[0] + ': Do not edit manually. */\n')
157157
kt.write('package com.github.codeql\n')
158+
kt.write('import java.util.Date')
158159

159160
for relname, columns in tables.items():
160161
enum = None
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package com.semmle.util.unicode;
2+
3+
public class UTF8Util {
4+
/**
5+
* Get the length (in Unicode code units, not code points) of the longest prefix of
6+
* a string that can be UTF-8 encoded in no more than the given number of bytes.
7+
*
8+
* <p>
9+
* Unencodable characters (such as lone surrogate halves or low surrogates
10+
* that do not follow a high surrogate) are treated as being encoded in
11+
* three bytes. This is safe since on encoding they will be replaced by
12+
* a replacement character, which in turn will take at most three bytes to
13+
* encode.
14+
* </p>
15+
*
16+
* @param str string to encode
17+
* @param maxEncodedLength maximum number of bytes for the encoded prefix
18+
* @return length of the prefix
19+
*/
20+
public static int encodablePrefixLength(String str, int maxEncodedLength) {
21+
// no character takes more than three bytes to encode
22+
if (str.length() > maxEncodedLength / 3) {
23+
int encodedLength = 0;
24+
for (int i = 0; i < str.length(); ++i) {
25+
int oldI = i;
26+
char c = str.charAt(i);
27+
if (c <= 0x7f) {
28+
encodedLength += 1;
29+
} else if (c <= 0x7ff) {
30+
encodedLength += 2;
31+
} else if (Character.isHighSurrogate(c)) {
32+
// surrogate pairs take four bytes to encode
33+
if (i+1 < str.length() && Character.isLowSurrogate(str.charAt(i+1))) {
34+
encodedLength += 4;
35+
++i;
36+
} else {
37+
// lone high surrogate, assume length three
38+
encodedLength += 3;
39+
}
40+
} else {
41+
encodedLength += 3;
42+
}
43+
44+
if (encodedLength > maxEncodedLength) {
45+
return oldI;
46+
}
47+
}
48+
}
49+
50+
return str.length();
51+
}
52+
}

java/kotlin-extractor/src/main/kotlin/KotlinExtractorExtension.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import java.io.FileOutputStream
88
import java.nio.file.Files
99
import java.nio.file.Paths
1010
import com.semmle.util.files.FileUtil
11+
import com.semmle.util.unicode.UTF8Util
1112
import kotlin.system.exitProcess
1213

1314
class KotlinExtractorExtension(
@@ -117,6 +118,10 @@ class FileExtractionProblems(val invocationExtractionProblems: ExtractionProblem
117118

118119
fun escapeTrapString(str: String) = str.replace("\"", "\"\"")
119120

121+
const val MAX_STRLEN = 1.shl(20) // 1 megabyte
122+
123+
fun truncateString(str: String) = str.substring(0, UTF8Util.encodablePrefixLength(str, MAX_STRLEN))
124+
120125
private fun equivalentTrap(f1: File, f2: File): Boolean {
121126
f1.bufferedReader().use { bw1 ->
122127
f2.bufferedReader().use { bw2 ->

0 commit comments

Comments
 (0)