Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8bb1af8

Browse files
committed
C# extractor: Limit string literals to 1MB. This is made more complicated by the fact that we need to limit the number of bytes to output, rather than the number of characters.
1 parent 97fc4b0 commit 8bb1af8

4 files changed

Lines changed: 15616 additions & 21 deletions

File tree

csharp/extractor/Semmle.Extraction/Tuple.cs

Lines changed: 102 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using System.Linq;
2+
13
namespace Semmle.Extraction
24
{
35
/// <summary>
@@ -15,6 +17,68 @@ public Tuple(string name, params object[] args)
1517
Args = args;
1618
}
1719

20+
const int maxStringBytes = 1<<20; // 1MB
21+
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
22+
23+
private static bool NeedsTruncation(string s)
24+
{
25+
// Optimization: only count the actual number of bytes if there is the possibility
26+
// of the string exceeding maxStringBytes
27+
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
28+
encoding.GetByteCount(s) > maxStringBytes;
29+
}
30+
31+
private static bool NeedsTruncation(string[] array)
32+
{
33+
// Optimization: only count the actual number of bytes if there is the possibility
34+
// of the strings exceeding maxStringBytes
35+
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
36+
array.Sum(s => encoding.GetByteCount(s)) > maxStringBytes;
37+
}
38+
39+
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
40+
41+
/// <summary>
42+
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
43+
/// </summary>
44+
/// <param name="s">The input string to truncate.</param>
45+
/// <param name="bytesRemaining">The number of bytes available.</param>
46+
/// <returns>The truncated string.</returns>
47+
private static string TruncateString(string s, ref int bytesRemaining)
48+
{
49+
int outputLen = encoding.GetByteCount(s);
50+
if (outputLen > bytesRemaining)
51+
{
52+
outputLen = 0;
53+
int chars;
54+
for (chars = 0; chars < s.Length; ++chars)
55+
{
56+
var bytes = encoding.GetByteCount(s, chars, 1);
57+
if (outputLen + bytes <= bytesRemaining)
58+
outputLen += bytes;
59+
else
60+
break;
61+
}
62+
s = s.Substring(0, chars);
63+
}
64+
bytesRemaining -= outputLen;
65+
return s;
66+
}
67+
68+
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
69+
70+
/// <summary>
71+
/// Output a string to the trap file, such that the encoded output does not exceed
72+
/// <paramref name="bytesRemaining"/> bytes.
73+
/// </summary>
74+
/// <param name="tb">The trapbuilder</param>
75+
/// <param name="s">The string to output.</param>
76+
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
77+
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
78+
{
79+
WriteString(tb, TruncateString(s, ref bytesRemaining));
80+
}
81+
1882
/// <summary>
1983
/// Constructs a unique string for this tuple.
2084
/// </summary>
@@ -27,48 +91,65 @@ public void EmitToTrapBuilder(ITrapBuilder tb)
2791
foreach (var a in Args)
2892
{
2993
if (column > 0) tb.Append(", ");
30-
if (a is Label)
94+
if (a is Label l)
3195
{
32-
((Label)a).AppendTo(tb);
96+
l.AppendTo(tb);
3397
}
34-
else if (a is IEntity)
98+
else if (a is IEntity e)
3599
{
36-
((IEntity)a).Label.AppendTo(tb);
100+
e.Label.AppendTo(tb);
37101
}
38-
else if (a is string)
102+
else if (a is string s)
39103
{
40104
tb.Append("\"");
41-
tb.Append(((string)a).Replace("\"", "\"\""));
105+
if (NeedsTruncation(s))
106+
{
107+
// Slow path
108+
int remaining = maxStringBytes;
109+
WriteTruncatedString(tb, s, ref remaining);
110+
}
111+
else
112+
{
113+
// Fast path
114+
WriteString(tb, s);
115+
}
42116
tb.Append("\"");
43117
}
44118
else if (a is System.Enum)
45119
{
46120
tb.Append((int)a);
47121
}
48-
else if (a is int)
122+
else if (a is int i)
49123
{
50-
tb.Append((int)a);
124+
tb.Append(i);
51125
}
52-
else if (a == null)
126+
else if(a is string[] array)
53127
{
54-
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
55-
Name, column);
56-
}
57-
else
58-
{
59-
var array = a as string[];
60-
if (array != null)
128+
tb.Append("\"");
129+
if (NeedsTruncation(array))
61130
{
62-
tb.Append("\"");
131+
// Slow path
132+
int remaining = maxStringBytes;
63133
foreach (var element in array)
64-
tb.Append(element.Replace("\"", "\"\""));
65-
tb.Append("\"");
134+
WriteTruncatedString(tb, element, ref remaining);
66135
}
67136
else
68137
{
69-
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
70-
a.GetType(), Name, column);
138+
// Fast path
139+
foreach (var element in array)
140+
WriteString(tb, element);
71141
}
142+
tb.Append("\"");
143+
}
144+
else if (a is null)
145+
{
146+
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
147+
Name, column);
148+
}
149+
else
150+
{
151+
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
152+
a.GetType(), Name, column);
72153
}
73154
++column;
74155
}

0 commit comments

Comments
 (0)