|
| 1 | +/** |
| 2 | + * For internal use only. |
| 3 | + */ |
| 4 | + |
| 5 | +private import java |
| 6 | +private import semmle.code.java.dataflow.DataFlow |
| 7 | +private import semmle.code.java.dataflow.TaintTracking |
| 8 | +private import semmle.code.java.security.PathCreation |
| 9 | +private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow |
| 10 | +private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl |
| 11 | +private import semmle.code.java.security.ExternalAPIs as ExternalAPIs |
| 12 | +private import semmle.code.java.Expr as Expr |
| 13 | +private import semmle.code.java.security.QueryInjection |
| 14 | +private import semmle.code.java.security.RequestForgery |
| 15 | +import AutomodelSharedCharacteristics as SharedCharacteristics |
| 16 | +import AutomodelEndpointTypes as AutomodelEndpointTypes |
| 17 | + |
| 18 | +module CandidatesImpl implements SharedCharacteristics::CandidateSig { |
| 19 | + class Endpoint = DataFlow::ParameterNode; |
| 20 | + |
| 21 | + class EndpointType = AutomodelEndpointTypes::EndpointType; |
| 22 | + |
| 23 | + predicate isNegative(AutomodelEndpointTypes::EndpointType t) { |
| 24 | + t instanceof AutomodelEndpointTypes::NegativeSinkType |
| 25 | + } |
| 26 | + |
| 27 | + string getLocationString(Endpoint e) { result = e.getLocation().toString() } |
| 28 | + |
| 29 | + predicate isKnownLabel(string label, string humanReadableLabel, EndpointType type) { |
| 30 | + label = "read-file" and |
| 31 | + humanReadableLabel = "read file" and |
| 32 | + type instanceof AutomodelEndpointTypes::TaintedPathSinkType |
| 33 | + or |
| 34 | + label = "create-file" and |
| 35 | + humanReadableLabel = "create file" and |
| 36 | + type instanceof AutomodelEndpointTypes::TaintedPathSinkType |
| 37 | + or |
| 38 | + label = "sql" and |
| 39 | + humanReadableLabel = "mad modeled sql" and |
| 40 | + type instanceof AutomodelEndpointTypes::SqlSinkType |
| 41 | + or |
| 42 | + label = "open-url" and |
| 43 | + humanReadableLabel = "open url" and |
| 44 | + type instanceof AutomodelEndpointTypes::RequestForgerySinkType |
| 45 | + or |
| 46 | + label = "jdbc-url" and |
| 47 | + humanReadableLabel = "jdbc url" and |
| 48 | + type instanceof AutomodelEndpointTypes::RequestForgerySinkType |
| 49 | + or |
| 50 | + label = "command-injection" and |
| 51 | + humanReadableLabel = "command injection" and |
| 52 | + type instanceof AutomodelEndpointTypes::CommandInjectionSinkType |
| 53 | + } |
| 54 | + |
| 55 | + predicate isSink(Endpoint e, string label) { |
| 56 | + exists( |
| 57 | + string package, string type, boolean subtypes, string name, string signature, string ext, |
| 58 | + string input |
| 59 | + | |
| 60 | + sinkSpec(e, package, type, subtypes, name, signature, ext, input) and |
| 61 | + ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, label, _) |
| 62 | + ) |
| 63 | + } |
| 64 | + |
| 65 | + predicate isNeutral(Endpoint e) { |
| 66 | + exists(string package, string type, string name, string signature | |
| 67 | + sinkSpec(e, package, type, _, name, signature, _, _) and |
| 68 | + ExternalFlow::neutralModel(package, type, name, [signature, ""], _) |
| 69 | + ) |
| 70 | + } |
| 71 | + |
| 72 | + additional predicate sinkSpec( |
| 73 | + Endpoint e, string package, string type, boolean subtypes, string name, string signature, |
| 74 | + string ext, string input |
| 75 | + ) { |
| 76 | + package = e.getEnclosingCallable().getDeclaringType().getPackage().toString() and |
| 77 | + type = e.getEnclosingCallable().getDeclaringType().getName() and |
| 78 | + subtypes = false and |
| 79 | + name = e.getEnclosingCallable().getName() and |
| 80 | + signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and |
| 81 | + ext = "" and |
| 82 | + exists(int paramIdx | e.isParameterOf(_, paramIdx) | input = "Argument[" + paramIdx + "]") |
| 83 | + } |
| 84 | + |
| 85 | + predicate hasMetadata(Endpoint n, string metadata) { |
| 86 | + exists( |
| 87 | + string package, string type, boolean subtypes, string name, string signature, string ext, |
| 88 | + int input, string provenance, boolean isPublic, boolean isFinal, string calleeJavaDoc |
| 89 | + | |
| 90 | + hasMetadata(n, package, type, name, signature, input, isFinal, isPublic, calleeJavaDoc) and |
| 91 | + (if isFinal = true then subtypes = false else subtypes = true) and |
| 92 | + ext = "" and // see https://github.slack.com/archives/CP9127VUK/p1673979477496069 |
| 93 | + provenance = "ai-generated" and |
| 94 | + metadata = |
| 95 | + "{" // |
| 96 | + + "'Package': '" + package // |
| 97 | + + "', 'Type': '" + type // |
| 98 | + + "', 'Subtypes': " + subtypes // |
| 99 | + + ", 'Name': '" + name // |
| 100 | + + "', 'Signature': '" + signature // |
| 101 | + + "', 'Ext': '" + ext // |
| 102 | + + "', 'Argument index': " + input // |
| 103 | + + ", 'Provenance': '" + provenance // |
| 104 | + + "', 'Is public': " + isPublic // |
| 105 | + + "', 'Callee JavaDoc': '" + calleeJavaDoc.replaceAll("'", "\"") // |
| 106 | + + "'}" // TODO: Why are the curly braces added twice? |
| 107 | + ) |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<CandidatesImpl>; |
| 112 | + |
| 113 | +class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic; |
| 114 | + |
| 115 | +class Endpoint = CandidatesImpl::Endpoint; |
| 116 | + |
| 117 | +/* |
| 118 | + * Predicates that are used to surface prompt examples and candidates for classification with an ML model. |
| 119 | + */ |
| 120 | + |
| 121 | +/** |
| 122 | + * Holds if `n` has the given metadata. |
| 123 | + * |
| 124 | + * This is a helper function to extract and export needed information about each endpoint. |
| 125 | + */ |
| 126 | +predicate hasMetadata( |
| 127 | + Endpoint n, string package, string type, string name, string signature, int input, |
| 128 | + boolean isFinal, boolean isPublic, string calleeJavaDoc |
| 129 | +) { |
| 130 | + exists(Callable callee | |
| 131 | + n.asParameter() = callee.getParameter(input) and |
| 132 | + package = callee.getDeclaringType().getPackage().getName() and |
| 133 | + type = callee.getDeclaringType().getErasure().(RefType).nestedName() and |
| 134 | + ( |
| 135 | + if callee.isFinal() or callee.getDeclaringType().isFinal() |
| 136 | + then isFinal = true |
| 137 | + else isFinal = false |
| 138 | + ) and |
| 139 | + name = callee.getSourceDeclaration().getName() and |
| 140 | + signature = ExternalFlow::paramsString(callee) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)? |
| 141 | + (if callee.isPublic() then isPublic = true else isPublic = false) and |
| 142 | + if exists(callee.(Documentable).getJavadoc()) |
| 143 | + then calleeJavaDoc = callee.(Documentable).getJavadoc().toString() |
| 144 | + else calleeJavaDoc = "" |
| 145 | + ) |
| 146 | +} |
| 147 | + |
| 148 | +/* |
| 149 | + * EndpointCharacteristic classes that are specific to Automodel for Java. |
| 150 | + */ |
| 151 | + |
| 152 | +/** |
| 153 | + * A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink. |
| 154 | + * |
| 155 | + * A sink is highly unlikely to be exploitable if its callee's name starts with `is` and the callee has a boolean return |
| 156 | + * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does |
| 157 | + * the dangerous/interesting thing, so we want the latter to be modeled as the sink. |
| 158 | + * |
| 159 | + * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks |
| 160 | + */ |
| 161 | +private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 162 | + UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } |
| 163 | + |
| 164 | + override predicate appliesToEndpoint(Endpoint e) { |
| 165 | + not CandidatesImpl::isSink(e, _) and |
| 166 | + e.getEnclosingCallable().getName().matches("is%") and |
| 167 | + e.getEnclosingCallable().getReturnType() instanceof BooleanType |
| 168 | + } |
| 169 | +} |
| 170 | + |
| 171 | +/** |
| 172 | + * A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a |
| 173 | + * sink. |
| 174 | + * |
| 175 | + * A sink is highly unlikely to be exploitable if its callee's name is `exists` or `notExists` and the callee has a |
| 176 | + * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the |
| 177 | + * dangerous/interesting thing, so we want the latter to be modeled as the sink. |
| 178 | + */ |
| 179 | +private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 180 | + UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } |
| 181 | + |
| 182 | + override predicate appliesToEndpoint(Endpoint e) { |
| 183 | + not CandidatesImpl::isSink(e, _) and |
| 184 | + exists(Callable callee | |
| 185 | + callee = e.getEnclosingCallable() and |
| 186 | + ( |
| 187 | + callee.getName().toLowerCase() = "exists" or |
| 188 | + callee.getName().toLowerCase() = "notexists" |
| 189 | + ) and |
| 190 | + callee.getReturnType() instanceof BooleanType |
| 191 | + ) |
| 192 | + } |
| 193 | +} |
| 194 | + |
| 195 | +/** |
| 196 | + * A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink. |
| 197 | + */ |
| 198 | +private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { |
| 199 | + ExceptionCharacteristic() { this = "exception" } |
| 200 | + |
| 201 | + override predicate appliesToEndpoint(Endpoint e) { |
| 202 | + e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | +/** |
| 207 | + * A negative characteristic that indicates that an endpoint sits in a test file. |
| 208 | + * |
| 209 | + * WARNING: These endpoints should not be used as negative samples for training, because there can in fact be sinks in |
| 210 | + * test files -- we just don't care to model them because they aren't exploitable. |
| 211 | + */ |
| 212 | +private class TestFileCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic { |
| 213 | + TestFileCharacteristic() { this = "test file" } |
| 214 | + |
| 215 | + override predicate appliesToEndpoint(Endpoint e) { |
| 216 | + exists(File f | f = e.getLocation().getFile() and isInTestFile(f)) |
| 217 | + } |
| 218 | + |
| 219 | + private predicate isInTestFile(File file) { |
| 220 | + file.getAbsolutePath().matches("%src/test/%") or |
| 221 | + file.getAbsolutePath().matches("%/guava-tests/%") or |
| 222 | + file.getAbsolutePath().matches("%/guava-testlib/%") |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +/** |
| 227 | + * A negative characteristic that filters out calls to undocumented methods. The assumption is that methods that are |
| 228 | + * intended / likely to be called from outside the package are documented. |
| 229 | + * |
| 230 | + * Note that in practice we have seen some interesting sinks in methods that are external-facing but undocumented (and |
| 231 | + * appear in empty Javadoc pages), so this filter can be expected to lead to the loss of some interesting sinks. |
| 232 | + */ |
| 233 | +private class UndocumentedMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic |
| 234 | +{ |
| 235 | + UndocumentedMethodCharacteristic() { this = "undocumented method" } |
| 236 | + |
| 237 | + override predicate appliesToEndpoint(Endpoint e) { |
| 238 | + not exists(e.getEnclosingCallable().(Documentable).getJavadoc()) |
| 239 | + } |
| 240 | +} |
| 241 | + |
| 242 | +/** |
| 243 | + * A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in |
| 244 | + * the standard Java modeling, because they cannot be called from outside the package. |
| 245 | + */ |
| 246 | +private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic |
| 247 | +{ |
| 248 | + NonPublicMethodCharacteristic() { this = "non-public method" } |
| 249 | + |
| 250 | + override predicate appliesToEndpoint(Endpoint e) { not e.getEnclosingCallable().isPublic() } |
| 251 | +} |
| 252 | + |
| 253 | +/** |
| 254 | + * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint |
| 255 | + * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with |
| 256 | + * an error message indicating why this combination is problematic. |
| 257 | + * |
| 258 | + * Copied from |
| 259 | + * javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql |
| 260 | + */ |
| 261 | +predicate erroneousEndpoints( |
| 262 | + Endpoint endpoint, EndpointCharacteristic characteristic, |
| 263 | + AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage, |
| 264 | + boolean ignoreKnownModelingErrors |
| 265 | +) { |
| 266 | + // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one |
| 267 | + // sink/source type (including the negative type). |
| 268 | + exists( |
| 269 | + EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2, |
| 270 | + float confidence2 |
| 271 | + | |
| 272 | + endpointType != endpointClass2 and |
| 273 | + ( |
| 274 | + endpointType instanceof AutomodelEndpointTypes::SinkType and |
| 275 | + endpointClass2 instanceof AutomodelEndpointTypes::SinkType |
| 276 | + or |
| 277 | + endpointType instanceof AutomodelEndpointTypes::SourceType and |
| 278 | + endpointClass2 instanceof AutomodelEndpointTypes::SourceType |
| 279 | + ) and |
| 280 | + characteristic.appliesToEndpoint(endpoint) and |
| 281 | + characteristic2.appliesToEndpoint(endpoint) and |
| 282 | + characteristic.hasImplications(endpointType, true, confidence) and |
| 283 | + characteristic2.hasImplications(endpointClass2, true, confidence2) and |
| 284 | + confidence > SharedCharacteristics::mediumConfidence() and |
| 285 | + confidence2 > SharedCharacteristics::mediumConfidence() and |
| 286 | + ( |
| 287 | + ignoreKnownModelingErrors = true and |
| 288 | + not knownOverlappingCharacteristics(characteristic, characteristic2) |
| 289 | + or |
| 290 | + ignoreKnownModelingErrors = false |
| 291 | + ) |
| 292 | + ) and |
| 293 | + errorMessage = "Endpoint has high-confidence positive indicators for multiple classes" |
| 294 | + or |
| 295 | + // An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and |
| 296 | + // also include negative indicators with medium/high confidence for this same class. |
| 297 | + exists(EndpointCharacteristic characteristic2, float confidence2 | |
| 298 | + characteristic.appliesToEndpoint(endpoint) and |
| 299 | + characteristic2.appliesToEndpoint(endpoint) and |
| 300 | + characteristic.hasImplications(endpointType, true, confidence) and |
| 301 | + characteristic2.hasImplications(endpointType, false, confidence2) and |
| 302 | + confidence > SharedCharacteristics::mediumConfidence() and |
| 303 | + confidence2 > SharedCharacteristics::mediumConfidence() |
| 304 | + ) and |
| 305 | + ignoreKnownModelingErrors = false and |
| 306 | + errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class" |
| 307 | +} |
| 308 | + |
| 309 | +/** |
| 310 | + * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that |
| 311 | + * have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically, |
| 312 | + * `PathCreation` is prone to FPs. |
| 313 | + */ |
| 314 | +private predicate knownOverlappingCharacteristics( |
| 315 | + EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2 |
| 316 | +) { |
| 317 | + characteristic1 != characteristic2 and |
| 318 | + characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and |
| 319 | + characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"] |
| 320 | +} |
0 commit comments