@@ -7,6 +7,9 @@ private import semmle.javascript.security.dataflow.SqlInjectionCustomizations
77private import semmle.javascript.security.dataflow.DomBasedXssCustomizations
88private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations
99private import semmle.javascript.security.dataflow.TaintedPathCustomizations
10+ private import CoreKnowledge as CoreKnowledge
11+ private import semmle.javascript.heuristics.SyntacticHeuristics
12+ private import semmle.javascript.filters.ClassifyFiles as ClassifyFiles
1013
1114/**
1215 * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
@@ -135,7 +138,8 @@ private class NosqlInjectionSinkCharacteristic extends EndpointCharacteristic {
135138}
136139
137140/*
138- * Characteristics that are indicative of not being a sink of any type.
141+ * Characteristics that are indicative of not being a sink of any type, and have historically been used to select
142+ * negative samples for training.
139143 */
140144
141145/**
@@ -442,3 +446,112 @@ private class BuiltinCallNameCharacteristic extends ArgumentToBuiltinFunctionCha
442446 )
443447 }
444448}
449+
450+ /*
451+ * Characteristics that have historically acted as endpoint filters to exclude endpoints from scoring at inference time.
452+ */
453+
454+ /** A characteristic that has historically acted as an endpoint filter for inference-time scoring. */
455+ abstract class EndpointFilterCharacteristic extends EndpointCharacteristic {
456+ bindingset [ this ]
457+ EndpointFilterCharacteristic ( ) { any ( ) }
458+ }
459+
460+ /**
461+ * An EndpointFilterCharacteristic that indicates that an endpoint is unlikely to be a sink of any type.
462+ * Replaces https://github.com/github/codeql/blob/387e57546bf7352f7c1cfe781daa1a3799b7063e/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/StandardEndpointFilters.qll#LL15C24-L15C24
463+ */
464+ abstract class StandardEndpointFilterCharacteristic extends EndpointFilterCharacteristic {
465+ bindingset [ this ]
466+ StandardEndpointFilterCharacteristic ( ) { any ( ) }
467+
468+ override predicate getImplications (
469+ EndpointType endpointClass , boolean isPositiveIndicator , float confidence
470+ ) {
471+ endpointClass instanceof NegativeType and
472+ isPositiveIndicator = true and
473+ confidence = mediumConfidence ( )
474+ }
475+ }
476+
477+ private class IsArgumentToModeledFunctionCharacteristic extends StandardEndpointFilterCharacteristic {
478+ IsArgumentToModeledFunctionCharacteristic ( ) { this = "argument to modeled function" }
479+
480+ override predicate getEndpoints ( DataFlow:: Node n ) {
481+ exists ( DataFlow:: InvokeNode invk , DataFlow:: Node known |
482+ invk .getAnArgument ( ) = n and
483+ invk .getAnArgument ( ) = known and
484+ (
485+ CoreKnowledge:: isKnownLibrarySink ( known )
486+ or
487+ CoreKnowledge:: isKnownStepSrc ( known )
488+ or
489+ CoreKnowledge:: isOtherModeledArgument ( known , _)
490+ )
491+ )
492+ }
493+ }
494+
495+ private class IsArgumentToSinklessLibraryCharacteristic extends StandardEndpointFilterCharacteristic {
496+ IsArgumentToSinklessLibraryCharacteristic ( ) { this = "argument to sinkless library" }
497+
498+ override predicate getEndpoints ( DataFlow:: Node n ) {
499+ exists ( DataFlow:: InvokeNode invk , DataFlow:: SourceNode commonSafeLibrary , string libraryName |
500+ libraryName = [ "slugify" , "striptags" , "marked" ]
501+ |
502+ commonSafeLibrary = DataFlow:: moduleImport ( libraryName ) and
503+ invk = [ commonSafeLibrary , commonSafeLibrary .getAPropertyRead ( ) ] .getAnInvocation ( ) and
504+ n = invk .getAnArgument ( )
505+ )
506+ }
507+ }
508+
509+ private class IsSanitizerCharacteristic extends StandardEndpointFilterCharacteristic {
510+ IsSanitizerCharacteristic ( ) { this = "sanitizer" }
511+
512+ override predicate getEndpoints ( DataFlow:: Node n ) {
513+ exists ( DataFlow:: CallNode call | n = call .getAnArgument ( ) |
514+ call .getCalleeName ( ) .regexpMatch ( "(?i).*(escape|valid(ate)?|sanitize|purify).*" )
515+ )
516+ }
517+ }
518+
519+ private class IsPredicateCharacteristic extends StandardEndpointFilterCharacteristic {
520+ IsPredicateCharacteristic ( ) { this = "predicate" }
521+
522+ override predicate getEndpoints ( DataFlow:: Node n ) {
523+ exists ( DataFlow:: CallNode call | n = call .getAnArgument ( ) |
524+ call .getCalleeName ( ) .regexpMatch ( "(equals|(|is|has|can)(_|[A-Z])).*" )
525+ )
526+ }
527+ }
528+
529+ private class IsHashCharacteristic extends StandardEndpointFilterCharacteristic {
530+ IsHashCharacteristic ( ) { this = "hash" }
531+
532+ override predicate getEndpoints ( DataFlow:: Node n ) {
533+ exists ( DataFlow:: CallNode call | n = call .getAnArgument ( ) |
534+ call .getCalleeName ( ) .regexpMatch ( "(?i)^(sha\\d*|md5|hash)$" )
535+ )
536+ }
537+ }
538+
539+ private class IsNumericCharacteristic extends StandardEndpointFilterCharacteristic {
540+ IsNumericCharacteristic ( ) { this = "numeric" }
541+
542+ override predicate getEndpoints ( DataFlow:: Node n ) { isReadFrom ( n , ".*index.*" ) }
543+ }
544+
545+ private class InIrrelevantFileCharacteristic extends StandardEndpointFilterCharacteristic {
546+ private string category ;
547+
548+ InIrrelevantFileCharacteristic ( ) {
549+ this = "in " + category + " file" and category = [ "externs" , "generated" , "library" , "test" ]
550+ }
551+
552+ override predicate getEndpoints ( DataFlow:: Node n ) {
553+ // Ignore candidate sinks within externs, generated, library, and test code
554+ ClassifyFiles:: classify ( n .getFile ( ) , category ) and
555+ this = "in " + category + " file"
556+ }
557+ }
0 commit comments