Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6eefb26

Browse files
author
Stephan Brandauer
committed
Automodel extraction queries in java telemetry query directory
1 parent 246d904 commit 6eefb26

6 files changed

Lines changed: 757 additions & 0 deletions
Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
/**
2+
* For internal use only.
3+
*/
4+
5+
private import java
6+
private import semmle.code.java.dataflow.DataFlow
7+
private import semmle.code.java.dataflow.TaintTracking
8+
private import semmle.code.java.security.PathCreation
9+
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
10+
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
11+
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
12+
private import semmle.code.java.Expr as Expr
13+
private import semmle.code.java.security.QueryInjection
14+
private import semmle.code.java.security.RequestForgery
15+
import AutomodelSharedCharacteristics as SharedCharacteristics
16+
import AutomodelEndpointTypes as AutomodelEndpointTypes
17+
18+
module CandidatesImpl implements SharedCharacteristics::CandidateSig {
19+
class Endpoint = DataFlow::ParameterNode;
20+
21+
class EndpointType = AutomodelEndpointTypes::EndpointType;
22+
23+
predicate isNegative(AutomodelEndpointTypes::EndpointType t) {
24+
t instanceof AutomodelEndpointTypes::NegativeSinkType
25+
}
26+
27+
string getLocationString(Endpoint e) { result = e.getLocation().toString() }
28+
29+
predicate isKnownLabel(string label, string humanReadableLabel, EndpointType type) {
30+
label = "read-file" and
31+
humanReadableLabel = "read file" and
32+
type instanceof AutomodelEndpointTypes::TaintedPathSinkType
33+
or
34+
label = "create-file" and
35+
humanReadableLabel = "create file" and
36+
type instanceof AutomodelEndpointTypes::TaintedPathSinkType
37+
or
38+
label = "sql" and
39+
humanReadableLabel = "mad modeled sql" and
40+
type instanceof AutomodelEndpointTypes::SqlSinkType
41+
or
42+
label = "open-url" and
43+
humanReadableLabel = "open url" and
44+
type instanceof AutomodelEndpointTypes::RequestForgerySinkType
45+
or
46+
label = "jdbc-url" and
47+
humanReadableLabel = "jdbc url" and
48+
type instanceof AutomodelEndpointTypes::RequestForgerySinkType
49+
or
50+
label = "command-injection" and
51+
humanReadableLabel = "command injection" and
52+
type instanceof AutomodelEndpointTypes::CommandInjectionSinkType
53+
}
54+
55+
predicate isSink(Endpoint e, string label) {
56+
exists(
57+
string package, string type, boolean subtypes, string name, string signature, string ext,
58+
string input
59+
|
60+
sinkSpec(e, package, type, subtypes, name, signature, ext, input) and
61+
ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, label, _)
62+
)
63+
}
64+
65+
predicate isNeutral(Endpoint e) {
66+
exists(string package, string type, string name, string signature |
67+
sinkSpec(e, package, type, _, name, signature, _, _) and
68+
ExternalFlow::neutralModel(package, type, name, [signature, ""], _)
69+
)
70+
}
71+
72+
additional predicate sinkSpec(
73+
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
74+
string ext, string input
75+
) {
76+
package = e.getEnclosingCallable().getDeclaringType().getPackage().toString() and
77+
type = e.getEnclosingCallable().getDeclaringType().getName() and
78+
subtypes = false and
79+
name = e.getEnclosingCallable().getName() and
80+
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
81+
ext = "" and
82+
exists(int paramIdx | e.isParameterOf(_, paramIdx) | input = "Argument[" + paramIdx + "]")
83+
}
84+
85+
predicate hasMetadata(Endpoint n, string metadata) {
86+
exists(
87+
string package, string type, boolean subtypes, string name, string signature, string ext,
88+
int input, string provenance, boolean isPublic, boolean isFinal, string calleeJavaDoc
89+
|
90+
hasMetadata(n, package, type, name, signature, input, isFinal, isPublic, calleeJavaDoc) and
91+
(if isFinal = true then subtypes = false else subtypes = true) and
92+
ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069
93+
provenance = "ai-generated" and
94+
metadata =
95+
"{" //
96+
+ "'Package': '" + package //
97+
+ "', 'Type': '" + type //
98+
+ "', 'Subtypes': " + subtypes //
99+
+ ", 'Name': '" + name //
100+
+ "', 'Signature': '" + signature //
101+
+ "', 'Ext': '" + ext //
102+
+ "', 'Argument index': " + input //
103+
+ ", 'Provenance': '" + provenance //
104+
+ "', 'Is public': " + isPublic //
105+
+ "', 'Callee JavaDoc': '" + calleeJavaDoc.replaceAll("'", "\"") //
106+
+ "'}" // TODO: Why are the curly braces added twice?
107+
)
108+
}
109+
}
110+
111+
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<CandidatesImpl>;
112+
113+
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
114+
115+
class Endpoint = CandidatesImpl::Endpoint;
116+
117+
/*
118+
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
119+
*/
120+
121+
/**
122+
* Holds if `n` has the given metadata.
123+
*
124+
* This is a helper function to extract and export needed information about each endpoint.
125+
*/
126+
predicate hasMetadata(
127+
Endpoint n, string package, string type, string name, string signature, int input,
128+
boolean isFinal, boolean isPublic, string calleeJavaDoc
129+
) {
130+
exists(Callable callee |
131+
n.asParameter() = callee.getParameter(input) and
132+
package = callee.getDeclaringType().getPackage().getName() and
133+
type = callee.getDeclaringType().getErasure().(RefType).nestedName() and
134+
(
135+
if callee.isFinal() or callee.getDeclaringType().isFinal()
136+
then isFinal = true
137+
else isFinal = false
138+
) and
139+
name = callee.getSourceDeclaration().getName() and
140+
signature = ExternalFlow::paramsString(callee) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
141+
(if callee.isPublic() then isPublic = true else isPublic = false) and
142+
if exists(callee.(Documentable).getJavadoc())
143+
then calleeJavaDoc = callee.(Documentable).getJavadoc().toString()
144+
else calleeJavaDoc = ""
145+
)
146+
}
147+
148+
/*
149+
* EndpointCharacteristic classes that are specific to Automodel for Java.
150+
*/
151+
152+
/**
153+
* A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
154+
*
155+
* A sink is highly unlikely to be exploitable if its callee's name starts with `is` and the callee has a boolean return
156+
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
157+
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
158+
*
159+
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
160+
*/
161+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
162+
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
163+
164+
override predicate appliesToEndpoint(Endpoint e) {
165+
not CandidatesImpl::isSink(e, _) and
166+
e.getEnclosingCallable().getName().matches("is%") and
167+
e.getEnclosingCallable().getReturnType() instanceof BooleanType
168+
}
169+
}
170+
171+
/**
172+
* A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
173+
* sink.
174+
*
175+
* A sink is highly unlikely to be exploitable if its callee's name is `exists` or `notExists` and the callee has a
176+
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
177+
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
178+
*/
179+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
180+
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
181+
182+
override predicate appliesToEndpoint(Endpoint e) {
183+
not CandidatesImpl::isSink(e, _) and
184+
exists(Callable callee |
185+
callee = e.getEnclosingCallable() and
186+
(
187+
callee.getName().toLowerCase() = "exists" or
188+
callee.getName().toLowerCase() = "notexists"
189+
) and
190+
callee.getReturnType() instanceof BooleanType
191+
)
192+
}
193+
}
194+
195+
/**
196+
* A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
197+
*/
198+
private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
199+
ExceptionCharacteristic() { this = "exception" }
200+
201+
override predicate appliesToEndpoint(Endpoint e) {
202+
e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable
203+
}
204+
}
205+
206+
/**
207+
* A negative characteristic that indicates that an endpoint sits in a test file.
208+
*
209+
* WARNING: These endpoints should not be used as negative samples for training, because there can in fact be sinks in
210+
* test files -- we just don't care to model them because they aren't exploitable.
211+
*/
212+
private class TestFileCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
213+
TestFileCharacteristic() { this = "test file" }
214+
215+
override predicate appliesToEndpoint(Endpoint e) {
216+
exists(File f | f = e.getLocation().getFile() and isInTestFile(f))
217+
}
218+
219+
private predicate isInTestFile(File file) {
220+
file.getAbsolutePath().matches("%src/test/%") or
221+
file.getAbsolutePath().matches("%/guava-tests/%") or
222+
file.getAbsolutePath().matches("%/guava-testlib/%")
223+
}
224+
}
225+
226+
/**
227+
* A negative characteristic that filters out calls to undocumented methods. The assumption is that methods that are
228+
* intended / likely to be called from outside the package are documented.
229+
*
230+
* Note that in practice we have seen some interesting sinks in methods that are external-facing but undocumented (and
231+
* appear in empty Javadoc pages), so this filter can be expected to lead to the loss of some interesting sinks.
232+
*/
233+
private class UndocumentedMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
234+
{
235+
UndocumentedMethodCharacteristic() { this = "undocumented method" }
236+
237+
override predicate appliesToEndpoint(Endpoint e) {
238+
not exists(e.getEnclosingCallable().(Documentable).getJavadoc())
239+
}
240+
}
241+
242+
/**
243+
* A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in
244+
* the standard Java modeling, because they cannot be called from outside the package.
245+
*/
246+
private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
247+
{
248+
NonPublicMethodCharacteristic() { this = "non-public method" }
249+
250+
override predicate appliesToEndpoint(Endpoint e) { not e.getEnclosingCallable().isPublic() }
251+
}
252+
253+
/**
254+
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
255+
* characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
256+
* an error message indicating why this combination is problematic.
257+
*
258+
* Copied from
259+
* javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
260+
*/
261+
predicate erroneousEndpoints(
262+
Endpoint endpoint, EndpointCharacteristic characteristic,
263+
AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage,
264+
boolean ignoreKnownModelingErrors
265+
) {
266+
// An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
267+
// sink/source type (including the negative type).
268+
exists(
269+
EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2,
270+
float confidence2
271+
|
272+
endpointType != endpointClass2 and
273+
(
274+
endpointType instanceof AutomodelEndpointTypes::SinkType and
275+
endpointClass2 instanceof AutomodelEndpointTypes::SinkType
276+
or
277+
endpointType instanceof AutomodelEndpointTypes::SourceType and
278+
endpointClass2 instanceof AutomodelEndpointTypes::SourceType
279+
) and
280+
characteristic.appliesToEndpoint(endpoint) and
281+
characteristic2.appliesToEndpoint(endpoint) and
282+
characteristic.hasImplications(endpointType, true, confidence) and
283+
characteristic2.hasImplications(endpointClass2, true, confidence2) and
284+
confidence > SharedCharacteristics::mediumConfidence() and
285+
confidence2 > SharedCharacteristics::mediumConfidence() and
286+
(
287+
ignoreKnownModelingErrors = true and
288+
not knownOverlappingCharacteristics(characteristic, characteristic2)
289+
or
290+
ignoreKnownModelingErrors = false
291+
)
292+
) and
293+
errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
294+
or
295+
// An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and
296+
// also include negative indicators with medium/high confidence for this same class.
297+
exists(EndpointCharacteristic characteristic2, float confidence2 |
298+
characteristic.appliesToEndpoint(endpoint) and
299+
characteristic2.appliesToEndpoint(endpoint) and
300+
characteristic.hasImplications(endpointType, true, confidence) and
301+
characteristic2.hasImplications(endpointType, false, confidence2) and
302+
confidence > SharedCharacteristics::mediumConfidence() and
303+
confidence2 > SharedCharacteristics::mediumConfidence()
304+
) and
305+
ignoreKnownModelingErrors = false and
306+
errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
307+
}
308+
309+
/**
310+
* Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
311+
* have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically,
312+
* `PathCreation` is prone to FPs.
313+
*/
314+
private predicate knownOverlappingCharacteristics(
315+
EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2
316+
) {
317+
characteristic1 != characteristic2 and
318+
characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and
319+
characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"]
320+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* For internal use only.
3+
*
4+
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
5+
* only predict classes defined within this file. This file is the source of truth for the integer
6+
* representation of each of these classes.
7+
*/
8+
9+
/** A class that can be predicted by a classifier. */
10+
abstract class EndpointType extends string {
11+
/**
12+
* Holds when the string matches the name of the sink / source type.
13+
*/
14+
bindingset[this]
15+
EndpointType() { any() }
16+
17+
/**
18+
* Gets the name of the sink/source kind for this endpoint type as used in models-as-data.
19+
*
20+
* See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
21+
*/
22+
final string getKind() { result = this }
23+
}
24+
25+
/** A class for sink types that can be predicted by a classifier. */
26+
abstract class SinkType extends EndpointType {
27+
bindingset[this]
28+
SinkType() { any() }
29+
}
30+
31+
/** A class for source types that can be predicted by a classifier. */
32+
abstract class SourceType extends EndpointType {
33+
bindingset[this]
34+
SourceType() { any() }
35+
}
36+
37+
/** The `Negative` class for non-sinks. */
38+
class NegativeSinkType extends SinkType {
39+
NegativeSinkType() { this = "non-sink" }
40+
}
41+
42+
/** A sink relevant to the SQL injection query */
43+
class SqlSinkType extends SinkType {
44+
SqlSinkType() { this = "sql" }
45+
}
46+
47+
/** A sink relevant to the tainted path injection query. */
48+
class TaintedPathSinkType extends SinkType {
49+
TaintedPathSinkType() { this = "tainted-path" }
50+
}
51+
52+
/** A sink relevant to the SSRF query. */
53+
class RequestForgerySinkType extends SinkType {
54+
RequestForgerySinkType() { this = "ssrf" }
55+
}
56+
57+
/** A sink relevant to the command injection query. */
58+
class CommandInjectionSinkType extends SinkType {
59+
CommandInjectionSinkType() { this = "command-injection" }
60+
}

0 commit comments

Comments
 (0)