diff --git a/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index 8dc0c3d7f6b1..be0cf4434311 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index e9a5ea24deca..813eece65b96 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 5485a2645bcb..1ed9b0320709 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,8 +7,8 @@ */ import internal.CaptureModels -import Heuristic +import SinkModels from DataFlowSinkTargetApi api, string sink -where sink = captureSink(api) +where sink = Heuristic::captureSink(api) select sink order by sink diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql index c2240c646886..4f86fb6b6fb1 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,8 +7,8 @@ */ import internal.CaptureModels -import Heuristic +import SourceModels from DataFlowSourceTargetApi api, string source -where source = captureSource(api) +where source = Heuristic::captureSource(api) select source order by source diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 60341abc0b59..a023afabd31c 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 58acfa011186..93abe205f1a6 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -2,7 +2,7 @@ * Provides predicates related to capturing summary models of the Standard or a 3rd party library. */ -private import cpp +private import cpp as Cpp private import semmle.code.cpp.ir.IR private import semmle.code.cpp.dataflow.ExternalFlow as ExternalFlow private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon @@ -10,113 +10,67 @@ private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplSpecific private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate as DataFlowPrivate private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.code.cpp.ir.dataflow.internal.TaintTrackingImplSpecific -private import semmle.code.cpp.dataflow.new.TaintTracking +private import semmle.code.cpp.dataflow.new.TaintTracking as Tt +private import semmle.code.cpp.dataflow.new.DataFlow as Df private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl -module ModelGeneratorInput implements ModelGeneratorInputSig { +/** + * Holds if `f` is a "private" function. + * + * A "private" function does not contribute any models as it is assumed + * to be an implementation detail of some other "public" function for which + * we will generate a summary. + */ +private predicate isPrivateOrProtected(Cpp::Function f) { + f.getNamespace().getParentNamespace*().isAnonymous() + or + exists(Cpp::MemberFunction mf | mf = f | + mf.isPrivate() + or + mf.isProtected() + ) + or + f.isStatic() +} + +private predicate isUninterestingForModels(Callable api) { + // Note: This also makes all global/static-local variables + // not relevant (which is good!) + not api.(Cpp::Function).hasDefinition() + or + isPrivateOrProtected(api) + or + api instanceof Cpp::Destructor + or + api = any(Cpp::LambdaExpression lambda).getLambdaFunction() + or + api.isFromUninstantiatedTemplate(_) +} + +private predicate relevant(Callable api) { + api.fromSource() and + not isUninterestingForModels(api) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig +{ + private module DataFlow = Df::DataFlow; + class Type = DataFlowPrivate::DataFlowType; // Note: This also includes `this` class Parameter = DataFlow::ParameterNode; - class Callable = Declaration; + class Callable = Cpp::Declaration; class NodeExtended extends DataFlow::Node { Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingDeclaration() } } - Parameter asParameter(NodeExtended n) { result = n } - Callable getEnclosingCallable(NodeExtended n) { result = n.getEnclosingCallable().asSourceCallable() } - Callable getAsExprEnclosingCallable(NodeExtended n) { - result = n.asExpr().getEnclosingDeclaration() - } - - /** Gets `api` if it is relevant. */ - private Callable liftedImpl(Callable api) { result = api and relevant(api) } - - private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSourceModel(Callable api) { - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSinkModel(Callable api) { - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) - } - - /** - * Holds if `f` is a "private" function. - * - * A "private" function does not contribute any models as it is assumed - * to be an implementation detail of some other "public" function for which - * we will generate a summary. - */ - private predicate isPrivateOrProtected(Function f) { - f.getNamespace().getParentNamespace*().isAnonymous() - or - exists(MemberFunction mf | mf = f | - mf.isPrivate() - or - mf.isProtected() - ) - or - f.isStatic() - } - - private predicate isUninterestingForModels(Callable api) { - // Note: This also makes all global/static-local variables - // not relevant (which is good!) - not api.(Function).hasDefinition() - or - isPrivateOrProtected(api) - or - api instanceof Destructor - or - api = any(LambdaExpression lambda).getLambdaFunction() - or - api.isFromUninstantiatedTemplate(_) - } - - private predicate relevant(Callable api) { - api.fromSource() and - not isUninterestingForModels(api) - } - - class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - Callable lift() { result = lift } - - predicate isRelevant() { - relevant(this) and - not hasManualSummaryModel(this) - } - } - - class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } - } - - class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } - } - - class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { not hasManualSourceModel(this) } - } - class InstanceParameterNode extends DataFlow::ParameterNode { InstanceParameterNode() { DataFlowPrivate::nodeHasInstruction(this, @@ -124,7 +78,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig" @@ -166,7 +120,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } +private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + private module DataFlow = Df::DataFlow; - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } + Parameter asParameter(NodeExtended n) { result = n } - predicate containerContent(DataFlow::ContentSet cs) { cs instanceof DataFlow::ElementContent } + Callable getAsExprEnclosingCallable(NodeExtended n) { + result = n.asExpr().getEnclosingDeclaration() + } + + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) + } + + /** Gets `api` if it is relevant. */ + private Callable liftedImpl(Callable api) { result = api and relevant(api) } + + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } + + Callable lift() { result = lift } + + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } + } predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and + Tt::TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and not exists(DataFlow::Content f | DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) ) @@ -341,7 +328,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 7841f8ed1a44..43342aa671ee 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import cpp as Cpp private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = Cpp::Declaration; diff --git a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 0156eaaeb988..8196f6329cc9 100644 --- a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import cpp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import InlineModelsAsDataTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 3ab1dc6c4710..fc05c4fe434f 100644 --- a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import cpp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import InlineModelsAsDataTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index 039c96a9a0bc..4d56c922a397 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index 2afb0ea02845..c74240bedea5 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql index f4c9405c96a7..f0d3294cd8ac 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 70f853b35a95..97688d6eb423 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index a0193397eb2e..61656c200adf 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index beb14cd8e627..60d7b42a46df 100644 --- a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -10,6 +10,7 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index e3de78767eaa..a53958ad0e6d 100644 --- a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -10,6 +10,7 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ce83369df077..b0300e4a87f1 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -15,7 +15,41 @@ private import semmle.code.csharp.frameworks.System private import semmle.code.csharp.Location private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate irrelevantAccessor(CS::Accessor a) { + a.getDeclaration().(CS::Property).isReadWrite() +} + +private predicate isUninterestingForModels(Callable api) { + api.getDeclaringType().getNamespace().getFullName() = "" + or + api instanceof CS::ConversionOperator + or + api instanceof Util::MainMethod + or + api instanceof CS::Destructor + or + api instanceof CS::AnonymousFunctionExpr + or + api.(CS::Constructor).isParameterless() + or + exists(Type decl | decl = api.getDeclaringType() | + decl instanceof SystemObjectClass or + decl instanceof SystemValueTypeClass + ) + or + // Disregard properties that have both a get and a set accessor, + // which implicitly means auto implemented properties. + irrelevantAccessor(api) +} + +private predicate relevant(Callable api) { + [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and + api.fromSource() and + api.isUnboundDeclaration() and + not isUninterestingForModels(api) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig { class Type = CS::Type; class Parameter = CS::Parameter; @@ -24,127 +58,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig`. - */ - private predicate isHigherOrder(Callable api) { - exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | - t instanceof SystemLinqExpressions::DelegateExtType - ) - } - - private predicate irrelevantAccessor(CS::Accessor a) { - a.getDeclaration().(CS::Property).isReadWrite() - } - - private predicate isUninterestingForModels(Callable api) { - api.getDeclaringType().getNamespace().getFullName() = "" - or - api instanceof CS::ConversionOperator - or - api instanceof Util::MainMethod - or - api instanceof CS::Destructor - or - api instanceof CS::AnonymousFunctionExpr - or - api.(CS::Constructor).isParameterless() - or - exists(Type decl | decl = api.getDeclaringType() | - decl instanceof SystemObjectClass or - decl instanceof SystemValueTypeClass - ) - or - // Disregard properties that have both a get and a set accessor, - // which implicitly means auto implemented properties. - irrelevantAccessor(api) - } - - private predicate relevant(Callable api) { - [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and - api.fromSource() and - api.isUnboundDeclaration() and - not isUninterestingForModels(api) - } - - private Callable getARelevantOverrideeOrImplementee(Overridable m) { - m.overridesOrImplements(result) and relevant(result) - } - - /** - * Gets the super implementation of `api` if it is relevant. - * If such a super implementation does not exist, returns `api` if it is relevant. - */ - private Callable liftedImpl(Callable api) { - ( - result = getARelevantOverrideeOrImplementee(api) - or - result = api and relevant(api) - ) and - not exists(getARelevantOverrideeOrImplementee(result)) - } - - private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSourceModel(Callable api) { - api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSinkModel(Callable api) { - api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) - } - - predicate isUninterestingForDataFlowModels(Callable api) { none() } - - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) } - - class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } - } - - class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } - } - - class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { - not hasManualSourceModel(this) and - // Do not generate source models for overridable callables - // as virtual dispatch implies that too many methods - // will be considered sources. - not this.(Overridable).overridesOrImplements(_) - } - } - - class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - Callable lift() { result = lift } - - predicate isRelevant() { - relevant(this) and - not hasManualSummaryModel(this) - } - } - /** * Holds if `t` is a type that is generally used for bulk data in collection types. * Eg. char[] is roughly equivalent to string and thus a highly @@ -205,6 +120,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig { @@ -251,62 +166,91 @@ module ModelGeneratorInput implements ModelGeneratorInputSig - predicate apiSource(DataFlow::Node source) { - isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode +module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + Callable getAsExprEnclosingCallable(NodeExtended node) { + result = node.asExpr().getEnclosingCallable() } - private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) { - exists(DataFlowCall call | - dc1 = call.getEnclosingCallable() and - dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0) + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + + /** + * Holds if any of the parameters of `api` are `System.Func<>`. + */ + private predicate isHigherOrder(Callable api) { + exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | + t instanceof SystemLinqExpressions::DelegateExtType ) } - bindingset[dc1, dc2] - private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) = - fastTC(uniquelyCalls/2)(dc1, dc2) + private Callable getARelevantOverrideeOrImplementee(Overridable m) { + m.overridesOrImplements(result) and relevant(result) + } - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { - not exists(DataFlowCallable dc1, DataFlowCallable dc2 | - uniquelyCallsPlus(dc1, dc2) or dc1 = dc2 - | - dc1.getUnderlyingCallable() = api and - dc2.getUnderlyingCallable() = sourceEnclosing - ) + /** + * Gets the super implementation of `api` if it is relevant. + * If such a super implementation does not exist, returns `api` if it is relevant. + */ + private Callable liftedImpl(Callable api) { + ( + result = getARelevantOverrideeOrImplementee(api) + or + result = api and relevant(api) + ) and + not exists(getARelevantOverrideeOrImplementee(result)) } - string getInputArgument(DataFlow::Node source) { - exists(int pos | - pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and - result = "Argument[" + pos + "]" - ) - or - source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and - result = qualifierString() + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } + predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) } + + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + Callable lift() { result = lift } - predicate containerContent(DataFlow::ContentSet c) { c.isElement() } + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } + } predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and @@ -370,34 +314,88 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 97e58d439f0c..52611279b273 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import csharp as CS private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = CS::Callable; diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index f7b0633ddd39..baba462c8a24 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -2,7 +2,8 @@ private import csharp private import semmle.code.csharp.frameworks.system.collections.Generic as GenericCollections private import semmle.code.csharp.dataflow.internal.DataFlowPrivate private import semmle.code.csharp.frameworks.system.linq.Expressions -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput +private import CaptureModels::SummaryModelGeneratorInput as SummaryModelGeneratorInput private import CaptureModelsPrinting /** @@ -177,21 +178,19 @@ private predicate output(Callable callable, TypeParameter tp, string output) { delegateSink(callable, tp, output) } -private module ModelPrintingInput implements ModelPrintingSig { +private module ModelPrintingInput implements ModelPrintingSummarySig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = TypeBasedFlowTargetApi; - string getProvenance() { result = "tb-generated" } } -private module Printing = ModelPrinting; +private module Printing = ModelPrintingSummary; /** * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { +class TypeBasedFlowTargetApi extends SummaryModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 0d9e4cd52d9f..0c8134546d24 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 24cb66e427e7..b5a3b31a0354 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index d5aa685bfe31..e79cab74560c 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql index cc84ede4235e..0cc8dd6d08d3 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql index 4c10362960a2..2a54abf9b72d 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index b1340e2c0d33..1fe70bae0b5b 100644 --- a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index d17c11d4a7b0..6008c3bfb8c4 100644 --- a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 7c316a02b090..7fcba8850d6d 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 4a955d4614b7..c623645820b6 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 34b6521e7b22..3bc49c31df27 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index 8895fdaefbb3..b9dc9ea236a0 100644 --- a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -11,6 +11,7 @@ import java import semmle.code.java.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index 8f6bf1c1f531..5925364800c9 100644 --- a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -11,6 +11,7 @@ import java import semmle.code.java.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 9dd317b30067..09223d23b1c5 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -25,7 +25,20 @@ predicate isPrimitiveTypeUsedForBulkData(J::Type t) { t.hasName(["byte", "char", "Byte", "Character"]) } -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate isInfrequentlyUsed(J::CompilationUnit cu) { + cu.getPackage().getName().matches("javax.swing%") or + cu.getPackage().getName().matches("java.awt%") +} + +private predicate relevant(Callable api) { + api.isPublic() and + api.getDeclaringType().isPublic() and + api.fromSource() and + not isUninterestingForModels(api) and + not isInfrequentlyUsed(api.getCompilationUnit()) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig { class Type = J::Type; class Parameter = J::Parameter; @@ -34,96 +47,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig - string getInputArgument(DataFlow::Node source) { - exists(int pos | - source.(DataFlow::ParameterNode).isParameterOf(_, pos) and - if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() - ) - or - source.asExpr() instanceof J::FieldAccess and - result = qualifierString() +module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + Callable getAsExprEnclosingCallable(NodeExtended node) { + result = node.asExpr().getEnclosingCallable() } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { - not kind = "log-injection" and - not kind.matches("regex-use%") and - not kind = "file-content-store" + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + + private J::Method getARelevantOverride(J::Method m) { + result = m.getAnOverride() and + relevant(result) and + // Other exclusions for overrides. + not m instanceof J::ToStringMethod } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + /** + * Gets the super implementation of `m` if it is relevant. + * If such a super implementations does not exist, returns `m` if it is relevant. + */ + private J::Callable liftedImpl(J::Callable m) { + ( + result = getARelevantOverride(m) + or + result = m and relevant(m) + ) and + not exists(getARelevantOverride(result)) + } - predicate containerContent = DataFlowPrivate::containerContent/1; + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() + } + + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } + + Callable lift() { result = lift } + + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } + } + + predicate isUninterestingForDataFlowModels(Callable api) { + api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) + } predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and @@ -287,34 +250,71 @@ module ModelGeneratorInput implements ModelGeneratorInputSig= 0 then result = "Argument[" + pos + "]" else result = qualifierString() + ) + or + source.asExpr() instanceof J::FieldAccess and + result = qualifierString() + } + + bindingset[kind] + predicate isRelevantSinkKind(string kind) { + not kind = "log-injection" and + not kind.matches("regex-use%") and + not kind = "file-content-store" + } predicate sinkNode = ExternalFlow::sinkNode/2; } -import MakeModelGenerator +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 4b376654afbe..0d26f36c690a 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import java as J private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = J::Callable; diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 36aec8053196..00c8c686c2d1 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -2,7 +2,8 @@ private import java private import semmle.code.java.Collections private import semmle.code.java.dataflow.internal.ContainerFlow private import CaptureModels as CaptureModels -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput +private import CaptureModels::SummaryModelGeneratorInput as SummaryModelGeneratorInput private import CaptureModelsPrinting /** @@ -284,21 +285,19 @@ private predicate output(Callable callable, TypeVariable tv, string output) { functionalSink(callable, tv, output) } -module ModelPrintingInput implements ModelPrintingSig { +module ModelPrintingInput implements ModelPrintingSummarySig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi; - string getProvenance() { result = "tb-generated" } } -private module Printing = ModelPrinting; +private module Printing = ModelPrintingSummary; /** * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { +class TypeBasedFlowTargetApi extends SummaryModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 8dd23714fb79..b3d9101633b8 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 45485a8009a5..8d6021ab42e4 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index 3578153ddb82..ad567051922d 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql index 027670316c33..3d21b2e4f7d6 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql index d8346f0e3dca..bc95ecf3f2de 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index da90465197e5..4b672e74da59 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index 8efc8a485e12..556d1624f39d 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = Heuristic::captureNoFlow(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 36b1b8132977..989637a867e3 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 7086f719b2d8..2ffc4894b18c 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 8947dd015310..d0b3152c9be2 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index eb0cd638b534..b8855b94bf2a 100644 --- a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -10,6 +10,7 @@ private import codeql.rust.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index 1ddec1ff618b..ac25306ceeee 100644 --- a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -10,6 +10,7 @@ private import codeql.rust.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 237da46750bd..99e1c527b546 100644 --- a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -2,7 +2,7 @@ private import codeql.util.Unit private import rust private import rust as R private import codeql.rust.dataflow.DataFlow -private import codeql.rust.dataflow.internal.DataFlowImpl +private import codeql.rust.dataflow.internal.DataFlowImpl as DataFlowImpl private import codeql.rust.dataflow.internal.Node as Node private import codeql.rust.dataflow.internal.Content private import codeql.rust.dataflow.FlowSource as FlowSource @@ -11,7 +11,25 @@ private import codeql.rust.dataflow.internal.TaintTrackingImpl private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl private import codeql.rust.dataflow.internal.FlowSummaryImpl as FlowSummary -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate relevant(Function api) { + // Only include functions that have a resolved path. + api.hasCrateOrigin() and + api.hasExtendedCanonicalPath() and + ( + // This excludes closures (these are not exported API endpoints) and + // functions without a `pub` visiblity. A function can be `pub` without + // ultimately being exported by a crate, so this is an overapproximation. + api.hasVisibility() + or + // If a method implements a public trait it is exposed through the trait. + // We overapproximate this by including all trait method implementations. + exists(Impl impl | impl.hasTrait() and impl.getAssocItemList().getAssocItem(_) = api) + ) +} + +module ModelGeneratorCommonInput implements + ModelGeneratorCommonInputSig +{ // NOTE: We are not using type information for now. class Type = Unit; @@ -23,55 +41,10 @@ module ModelGeneratorInput implements ModelGeneratorInputSig - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } +private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = this and + relevant(this) + } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + Callable lift() { result = lift } - predicate containerContent(DataFlow::ContentSet c) { - c.(SingletonContentSet).getContent() instanceof ElementContent + predicate isRelevant() { relevant(this) } } + Callable getAsExprEnclosingCallable(NodeExtended node) { result = node.asExpr().getScope() } + + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } predicate isField(DataFlow::ContentSet c) { @@ -159,7 +141,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 4c2da918f739..789113f7580d 100644 --- a/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import rust as R private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = R::Callable; diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql index 14edea3af8a1..4b8041fb4447 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql index 66f0780448c7..c535ce787043 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest import codeql.rust.dataflow.internal.ModelsAsData diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql index 002689a20390..e0788253f43e 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll index b9592964f931..dabd687b52b9 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -15,7 +15,7 @@ private import ModelPrinting /** * Provides language-specific model generator parameters. */ -signature module ModelGeneratorInputSig Lang> { +signature module ModelGeneratorCommonInputSig Lang> { /** * A Type. */ @@ -49,52 +49,6 @@ signature module ModelGeneratorInputSig /** Gets the enclosing callable of `node`. */ Callable getEnclosingCallable(NodeExtended node); - /** - * Gets the enclosing callable of `node`, when considered as an expression. - */ - Callable getAsExprEnclosingCallable(NodeExtended node); - - /** Gets the parameter corresponding to this node, if any. */ - Parameter asParameter(NodeExtended n); - - /** - * A class of callables that are potentially relevant for generating summary or - * neutral models. - * - * In the Standard library and 3rd party libraries it is the callables (or callables that have a - * super implementation) that can be called from outside the library itself. - */ - class SummaryTargetApi extends Callable { - /** - * Gets the callable that a model will be lifted to. - * - * The lifted callable is relevant in terms of model - * generation (this is ensured by `liftedImpl`). - */ - Callable lift(); - - /** - * Holds if `this` is relevant in terms of model generation. - */ - predicate isRelevant(); - } - - /** - * A class of callables that are potentially relevant for generating source or - * sink models. - */ - class SourceOrSinkTargetApi extends Callable; - - /** - * A class of callables that are potentially relevant for generating source models. - */ - class SourceTargetApi extends SourceOrSinkTargetApi; - - /** - * A class of callables that are potentially relevant for generating sink models. - */ - class SinkTargetApi extends SourceOrSinkTargetApi; - /** * An instance parameter node. */ @@ -113,22 +67,6 @@ signature module ModelGeneratorInputSig */ Type getUnderlyingContentType(Lang::ContentSet c); - /** - * Gets the MaD string representation of the qualifier. - */ - string qualifierString(); - - /** - * Gets the MaD string representation of the parameter `p`. - */ - string parameterAccess(Parameter p); - - /** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ - string parameterContentAccess(Parameter p); - /** * Gets the MaD string representation of return through parameter at position * `pos` of callable `c`. @@ -154,69 +92,26 @@ signature module ModelGeneratorInputSig predicate isOwnInstanceAccessNode(Lang::ReturnNode node); /** - * Holds if `node` is a sanitizer for sink model construction. - */ - predicate sinkModelSanitizer(Lang::Node node); - - /** - * Holds if `source` is an api entrypoint relevant for creating sink models. - */ - predicate apiSource(Lang::Node source); - - /** - * Gets the MaD input string representation of `source`. - */ - string getInputArgument(Lang::Node source); - - /** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. + * Gets the MaD string representation of the parameter `p`. */ - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api); + string parameterAccess(Parameter p); /** - * Holds if `kind` is a relevant sink kind for creating sink models. + * Gets the MaD string representation of the parameter `p` + * when used in content flow. */ - bindingset[kind] - predicate isRelevantSinkKind(string kind); + string parameterContentAccess(Parameter p); /** - * Holds if `kind` is a relevant source kind for creating source models. + * Gets the MaD string representation of the qualifier. */ - bindingset[kind] - predicate isRelevantSourceKind(string kind); + string qualifierString(); /** * Holds if the the content `c` is a container. */ predicate containerContent(Lang::ContentSet c); - /** - * Holds if there is a taint step from `node1` to `node2` in content flow. - */ - predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo); - - /** - * Holds if the content set `c` is field like. - */ - predicate isField(Lang::ContentSet c); - - /** - * Holds if the content set `c` is callback like. - */ - predicate isCallback(Lang::ContentSet c); - - /** - * Gets the MaD synthetic name string representation for the content set `c`, if any. - */ - string getSyntheticName(Lang::ContentSet c); - - /** - * Gets the MaD string representation of the content set `c`. - */ - string printContent(Lang::ContentSet c); - /** * Gets the parameter position of the return kind, if any. */ @@ -230,22 +125,6 @@ signature module ModelGeneratorInputSig */ default string getReturnValueString(Lang::ReturnKind kind) { result = "ReturnValue" } - /** - * Holds if it is irrelevant to generate models for `api` based on data flow analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ - predicate isUninterestingForDataFlowModels(Callable api); - - /** - * Holds if it is irrelevant to generate models for `api` based on the heuristic - * (non-content) flow analysis. - * - * This serves as an extra filter for the `relevant` - * and `isUninterestingForDataFlowModels` predicates. - */ - predicate isUninterestingForHeuristicDataFlowModels(Callable api); - /** * Gets the string representation for the `i`th column in the MaD row for `api`. */ @@ -255,23 +134,14 @@ signature module ModelGeneratorInputSig * Gets the string representation for the `i`th column in the neutral MaD row for `api`. */ string partialNeutralModelRow(Callable api, int i); - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate sourceNode(Lang::Node node, string kind); - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate sinkNode(Lang::Node node, string kind); } -module MakeModelGenerator< +/** + * Make a factory for constructing different model generators. + */ +module MakeModelGeneratorFactory< LocationSig Location, InputSig Lang, Tt::InputSig TaintLang, - ModelGeneratorInputSig ModelGeneratorInput> + ModelGeneratorCommonInputSig ModelGeneratorInput> { private module DataFlow { import Lang @@ -338,16 +208,6 @@ module MakeModelGenerator< } } - final private class SummaryTargetApiFinal = SummaryTargetApi; - - class DataFlowSummaryTargetApi extends SummaryTargetApiFinal { - DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } - } - - class DataFlowSourceTargetApi = SourceTargetApi; - - class DataFlowSinkTargetApi = SinkTargetApi; - /** * Holds if `c` is a relevant content kind, where the underlying type is relevant. */ @@ -365,716 +225,917 @@ module MakeModelGenerator< containerContent(c) } + private string getOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) + } + /** - * Provides classes and predicates related to capturing models - * based on heuristic data flow. + * Provides language-specific summary model generator parameters. */ - module Heuristic { - private module ModelPrintingInput implements Printing::ModelPrintingSig { - class SummaryApi = DataFlowSummaryTargetApi; - - class SourceOrSinkApi = SourceOrSinkTargetApi; + signature module SummaryModelGeneratorInputSig { + /** + * A class of callables that are potentially relevant for generating summary or + * neutral models. + * + * In the Standard library and 3rd party libraries it is the callables (or callables that have a + * super implementation) that can be called from outside the library itself. + */ + class SummaryTargetApi extends Callable { + /** + * Gets the callable that a model will be lifted to. + * + * The lifted callable is relevant in terms of model + * generation (this is ensured by `liftedImpl`). + */ + Callable lift(); - string getProvenance() { result = "df-generated" } + /** + * Holds if `this` is relevant in terms of model generation. + */ + predicate isRelevant(); } - module ModelPrinting = Printing::ModelPrinting; + /** + * Gets the enclosing callable of `node`, when considered as an expression. + */ + Callable getAsExprEnclosingCallable(NodeExtended node); - private string getOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) - } + /** + * Gets the parameter corresponding to this node, if any. + */ + Parameter asParameter(NodeExtended n); /** - * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. + * Holds if there is a taint step from `node1` to `node2` in content flow. */ - private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(DataFlow::ContentSet f | - DataFlow::readStep(node1, f, node2) and - // Partially restrict the content types used for intermediate steps. - (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) - ) - or - exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f)) - } + predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo); /** - * Gets the MaD string representation of the parameter node `p`. + * Holds if the content set `c` is field like. */ - string parameterNodeAsInput(DataFlow::ParameterNode p) { - result = parameterAccess(asParameter(p)) - or - result = qualifierString() and p instanceof InstanceParameterNode - } + predicate isField(Lang::ContentSet c); /** - * Gets the MaD input string representation of `source`. + * Holds if the content set `c` is callback like. */ - private string asInputArgument(NodeExtended source) { result = getInputArgument(source) } + predicate isCallback(Lang::ContentSet c); /** - * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). + * Gets the MaD synthetic name string representation for the content set `c`, if any. */ - private string captureQualifierFlow(DataFlowSummaryTargetApi api) { - exists(ReturnNodeExt ret | - api = returnNodeEnclosingCallable(ret) and - isOwnInstanceAccessNode(ret) - ) and - result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue") - } + string getSyntheticName(Lang::ContentSet c); + + /** + * Gets the MaD string representation of the content set `c`. + */ + string printContent(Lang::ContentSet c); - private int accessPathLimit0() { result = 2 } + /** + * Holds if it is irrelevant to generate models for `api` based on data flow analysis. + * + * This serves as an extra filter for the `relevant` predicate. + */ + default predicate isUninterestingForDataFlowModels(Callable api) { none() } - private newtype TTaintState = - TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or - TTaintStore(int n) { n in [1 .. accessPathLimit0()] } + /** + * Holds if it is irrelevant to generate models for `api` based on the heuristic + * (non-content) flow analysis. + * + * This serves as an extra filter for the `relevant` + * and `isUninterestingForDataFlowModels` predicates. + */ + default predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } + } + + /** + * Make a summary model generator. + */ + module MakeSummaryModelGenerator { + private import SummaryModelGeneratorInput + + final private class SummaryTargetApiFinal = SummaryTargetApi; - abstract private class TaintState extends TTaintState { - abstract string toString(); + class DataFlowSummaryTargetApi extends SummaryTargetApiFinal { + DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } } /** - * A FlowState representing a tainted read. + * Provides classes and predicates related to capturing summary models + * based on heuristic data flow. */ - private class TaintRead extends TaintState, TTaintRead { - private int step; + module Heuristic { + private module ModelPrintingSummaryInput implements Printing::ModelPrintingSummarySig { + class SummaryApi = DataFlowSummaryTargetApi; + + string getProvenance() { result = "df-generated" } + } - TaintRead() { this = TTaintRead(step) } + module ModelPrintingSummary = Printing::ModelPrintingSummary; /** - * Gets the flow state step number. + * Gets the MaD string representation of the parameter node `p`. */ - int getStep() { result = step } + string parameterNodeAsInput(DataFlow::ParameterNode p) { + result = parameterAccess(asParameter(p)) + or + result = qualifierString() and p instanceof InstanceParameterNode + } - override string toString() { result = "TaintRead(" + step + ")" } - } + /** + * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). + */ + private string captureQualifierFlow(DataFlowSummaryTargetApi api) { + exists(ReturnNodeExt ret | + api = returnNodeEnclosingCallable(ret) and + isOwnInstanceAccessNode(ret) + ) and + result = ModelPrintingSummary::asLiftedValueModel(api, qualifierString(), "ReturnValue") + } - /** - * A FlowState representing a tainted write. - */ - private class TaintStore extends TaintState, TTaintStore { - private int step; + private int accessPathLimit0() { result = 2 } - TaintStore() { this = TTaintStore(step) } + private newtype TTaintState = + TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or + TTaintStore(int n) { n in [1 .. accessPathLimit0()] } + + abstract private class TaintState extends TTaintState { + abstract string toString(); + } /** - * Gets the flow state step number. + * A FlowState representing a tainted read. */ - int getStep() { result = step } + private class TaintRead extends TaintState, TTaintRead { + private int step; - override string toString() { result = "TaintStore(" + step + ")" } - } + TaintRead() { this = TTaintRead(step) } - /** - * A data flow configuration for tracking flow through APIs. - * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. - * - * This can be used to generate Flow summaries for APIs from parameter to return. - */ - private module PropagateFlowConfig implements DataFlow::StateConfigSig { - class FlowState = TaintState; - - predicate isSource(DataFlow::Node source, FlowState state) { - source instanceof DataFlow::ParameterNode and - exists(Callable c | - c = getEnclosingCallable(source) and - c instanceof DataFlowSummaryTargetApi and - not isUninterestingForHeuristicDataFlowModels(c) - ) and - state.(TaintRead).getStep() = 0 + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintRead(" + step + ")" } } - predicate isSink(DataFlow::Node sink, FlowState state) { - // Sinks are provided by `isSink/1` - none() + /** + * A FlowState representing a tainted write. + */ + private class TaintStore extends TaintState, TTaintStore { + private int step; + + TaintStore() { this = TTaintStore(step) } + + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintStore(" + step + ")" } } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - not isOwnInstanceAccessNode(sink) and - not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) + /** + * A data flow configuration for tracking flow through APIs. + * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. + * + * This can be used to generate Flow summaries for APIs from parameter to return. + */ + private module PropagateFlowConfig implements DataFlow::StateConfigSig { + class FlowState = TaintState; + + predicate isSource(DataFlow::Node source, FlowState state) { + source instanceof DataFlow::ParameterNode and + exists(Callable c | + c = getEnclosingCallable(source) and + c instanceof DataFlowSummaryTargetApi and + not isUninterestingForHeuristicDataFlowModels(c) + ) and + state.(TaintRead).getStep() = 0 + } + + predicate isSink(DataFlow::Node sink, FlowState state) { + // Sinks are provided by `isSink/1` + none() + } + + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + not isOwnInstanceAccessNode(sink) and + not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) + } + + predicate isAdditionalFlowStep( + DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + ) { + exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c | + node1 = n1.asNode() and + node2 = n2.asNode() and + DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and + isRelevantContent0(c) and + ( + state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 + or + state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() + ) + ) + or + exists(DataFlow::ContentSet c | + DataFlow::readStep(node1, c, node2) and + isRelevantContent0(c) and + state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + ) + } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } } - predicate isAdditionalFlowStep( - DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + module PropagateFlow = TaintTracking::GlobalWithState; + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + string captureThroughFlow0( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt ) { - exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c | - node1 = n1.asNode() and - node2 = n2.asNode() and - DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and - isRelevantContent0(c) and - ( - state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 - or - state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() - ) + exists(string input, string output | + getEnclosingCallable(p) = api and + getEnclosingCallable(returnNodeExt) = api and + input = parameterNodeAsInput(p) and + output = getOutput(returnNodeExt) and + input != output and + result = ModelPrintingSummary::asLiftedTaintModel(api, input, output) ) - or - exists(DataFlow::ContentSet c | - DataFlow::readStep(node1, c, node2) and - isRelevantContent0(c) and - state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + } + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + private string captureThroughFlow(DataFlowSummaryTargetApi api) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + PropagateFlow::flow(p, returnNodeExt) and + result = captureThroughFlow0(api, p, returnNodeExt) ) } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to the + * return value or parameter or if `api` is a fluent API. + */ + string captureFlow(DataFlowSummaryTargetApi api) { + result = captureQualifierFlow(api) or + result = captureThroughFlow(api) } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + /** + * Gets the neutral summary model for `api`, if any. + * A neutral summary model is generated, if we are not generating + * a summary model that applies to `api`. + */ + string captureNoFlow(DataFlowSummaryTargetApi api) { + not exists(DataFlowSummaryTargetApi api0 | + exists(captureFlow(api0)) and api0.lift() = api.lift() + ) and + api.isRelevant() and + result = ModelPrintingSummary::asNeutralSummaryModel(api) } } - module PropagateFlow = TaintTracking::GlobalWithState; - /** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + * Provides classes and predicates related to capturing summary models + * based on content data flow. */ - string captureThroughFlow0( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt - ) { - exists(string input, string output | - getEnclosingCallable(p) = api and - getEnclosingCallable(returnNodeExt) = api and - input = parameterNodeAsInput(p) and - output = getOutput(returnNodeExt) and - input != output and - result = ModelPrinting::asLiftedTaintModel(api, input, output) - ) - } + module ContentSensitive { + private import MakeImplContentDataFlow as ContentDataFlow - /** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ - private string captureThroughFlow(DataFlowSummaryTargetApi api) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - PropagateFlow::flow(p, returnNodeExt) and - result = captureThroughFlow0(api, p, returnNodeExt) - ) - } + private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source instanceof DataFlow::ParameterNode and + getEnclosingCallable(source) instanceof DataFlowSummaryTargetApi + } - /** - * Gets the summary model(s) of `api`, if there is flow from parameters to the - * return value or parameter or if `api` is a fluent API. - */ - string captureFlow(DataFlowSummaryTargetApi api) { - result = captureQualifierFlow(api) or - result = captureThroughFlow(api) - } + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + getEnclosingCallable(sink) instanceof DataFlowSummaryTargetApi + } - /** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a summary model that applies to `api`. - */ - string captureNoFlow(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0 | - exists(captureFlow(api0)) and api0.lift() = api.lift() - ) and - api.isRelevant() and - result = ModelPrinting::asNeutralSummaryModel(api) - } + predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; - /** - * A data flow configuration used for finding new sources. - * The sources are the already known existing sources and the sinks are the API return nodes. - * - * This can be used to generate Source summaries for an API, if the API expose an already known source - * via its return (then the API itself becomes a source). - */ - module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - sourceNode(source, kind) - ) - } + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi - } + int accessPathLimit() { result = 2 } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } + predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) - } - } + private module PropagateContentFlow = ContentDataFlow::Global; - private module PropagateFromSource = TaintTracking::Global; + private module ContentModelPrintingInput implements Printing::ModelPrintingSummarySig { + class SummaryApi = DataFlowSummaryTargetApi; - /** - * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. - */ - string captureSource(DataFlowSourceTargetApi api) { - exists(NodeExtended source, ReturnNodeExt sink, string kind | - PropagateFromSource::flow(source, sink) and - sourceNode(source, kind) and - api = getEnclosingCallable(sink) and - not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and - result = ModelPrinting::asSourceModel(api, getOutput(sink), kind) - ) - } - - /** - * A data flow configuration used for finding new sinks. - * The sources are the parameters of the API and the fields of the enclosing type. - * - * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) - * into an existing known sink (then the API itself becomes a sink). - */ - module PropagateToSinkConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - apiSource(source) and - getEnclosingCallable(source) instanceof DataFlowSinkTargetApi + string getProvenance() { result = "dfc-generated" } } - predicate isSink(DataFlow::Node sink) { - exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind)) + private module ContentModelPrinting = + Printing::ModelPrintingSummary; + + private string getContentOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) } - predicate isBarrier(DataFlow::Node node) { - exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t)) + /** + * Gets the MaD string representation of the parameter `p` + * when used in content flow. + */ + private string parameterNodeAsContentInput(DataFlow::ParameterNode p) { + result = parameterContentAccess(asParameter(p)) or - sinkModelSanitizer(node) + result = qualifierString() and p instanceof InstanceParameterNode } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureHasSourceCallContext + private string getContent(PropagateContentFlow::AccessPath ap, int i) { + result = "." + printContent(ap.getAtIndex(i)) } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + /** + * Gets the MaD string representation of a store step access path. + */ + private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i) } - } - private module PropagateToSink = TaintTracking::Global; + /** + * Gets the MaD string representation of a read step access path. + */ + private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i desc) + } - /** - * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. - */ - string captureSink(DataFlowSinkTargetApi api) { - exists(NodeExtended src, NodeExtended sink, string kind | - PropagateToSink::flow(src, sink) and - sinkNode(sink, kind) and - api = getEnclosingCallable(src) and - result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind) - ) - } - } + /** + * Holds if the access path `ap` contains a field or synthetic field access. + */ + private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + isField(ap.getAtIndex(_)) + } - /** - * Provides classes and predicates related to capturing summary models - * based on content data flow. - */ - module ContentSensitive { - private import MakeImplContentDataFlow as ContentDataFlow + /** + * Holds if this access path `ap` mentions a callback. + */ + private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) { + isCallback(ap.getAtIndex(_)) + } - private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - source instanceof DataFlow::ParameterNode and - getEnclosingCallable(source) instanceof DataFlowSummaryTargetApi + /** + * Holds if the access path `ap` is not a parameter or returnvalue of a callback + * stored in a field. + * + * That is, we currently don't include summaries that rely on parameters or return values + * of callbacks stored in fields. + */ + private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { + not (mentionsField(ap) and mentionsCallback(ap)) } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - getEnclosingCallable(sink) instanceof DataFlowSummaryTargetApi + private predicate apiFlow( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + getEnclosingCallable(returnNodeExt) = api and + getEnclosingCallable(p) = api } - predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; + /** + * A class of APIs relevant for modeling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API on a parameter, if + * #content flow from parameter <= 3 + * If an API produces more content flow on a parameter, it is likely that + * 1. Types are not sufficiently constrained on the parameter leading to a combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the heuristic based flow + * detection instead, as reads and stores would use a significant + * part of an objects internal state. + */ + private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { + private DataFlow::ParameterNode parameter; + + ContentDataFlowSummaryTargetApi() { + strictcount(string input, string output | + exists( + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores + | + apiFlow(this, parameter, reads, returnNodeExt, stores, _) and + input = parameterNodeAsContentInput(parameter) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) + ) + ) <= 3 + } + + /** + * Gets a parameter node of `this` api, where there are less than 3 possible models, if any. + */ + DataFlow::ParameterNode getARelevantParameterNode() { result = parameter } + } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + pragma[nomagic] + private predicate apiContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + getEnclosingCallable(returnNodeExt) = api and + getEnclosingCallable(p) = api and + p = api.getARelevantParameterNode() } - int accessPathLimit() { result = 2 } + /** + * Holds if any of the content sets in `path` translates into a synthetic field. + */ + private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { + exists(getSyntheticName(path.getAtIndex(_))) + } - predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } + private string getHashAtIndex(PropagateContentFlow::AccessPath ap, int i) { + result = getSyntheticName(ap.getAtIndex(i)) + } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + private string getReversedHash(PropagateContentFlow::AccessPath ap) { + result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i desc) } - } - private module PropagateContentFlow = ContentDataFlow::Global; + private string getHash(PropagateContentFlow::AccessPath ap) { + result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i) + } - private module ContentModelPrintingInput implements Printing::ModelPrintingSig { - class SummaryApi = DataFlowSummaryTargetApi; + /** + * Gets all access paths that contain the synthetic fields + * from `ap` in reverse order (if `ap` contains at least one synthetic field). + * These are the possible candidates for synthetic path continuations. + */ + private PropagateContentFlow::AccessPath getSyntheticPathCandidate( + PropagateContentFlow::AccessPath ap + ) { + getHash(ap) = getReversedHash(result) + } - class SourceOrSinkApi = SourceOrSinkTargetApi; + /** + * A module containing predicates for validating access paths containing content sets + * that translates into synthetic fields, when used for generated summary models. + */ + private module AccessPathSyntheticValidation { + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`). + */ + private predicate step( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + p.(NodeExtended).getType() = t1 and + returnNodeExt.getType() = t2 and + apiContentFlow(_, p, read, returnNodeExt, store, _) + ) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. + * + * Step A -> Synth. + */ + private predicate synthPathEntry( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + not hasSyntheticContent(read) and + hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` has synthetic content + * and `store` does not. + * + * Step Synth -> A. + */ + private predicate synthPathExit( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + not hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists a path of steps from `read` to an exit. + * + * read ->* Synth -> A + */ + private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { + synthPathExit(t, read, _, _) + or + hasSyntheticContent(read) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(t, read, midType, mid) and + reachesSynthExit(midType, getSyntheticPathCandidate(mid)) + ) + } + + /** + * Holds if there exists a path of steps from an entry to `store`. + * + * A -> Synth ->* store + */ + private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { + synthPathEntry(_, _, t, store) + or + hasSyntheticContent(store) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(midType, mid, t, store) and + synthEntryReaches(midType, getSyntheticPathCandidate(mid)) + ) + } + + /** + * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) + * contain content that will be translated into a synthetic field, when being used in + * a MaD summary model, and if there is a range of APIs, such that + * when chaining their flow access paths, there exists access paths `A` and `B` where + * A ->* read -> store ->* B and where `A` and `B` do not contain content that will + * be translated into a synthetic field. + * + * This is needed because we don't want to include summaries that reads from or + * stores into an "internal" synthetic field. + * + * Example: + * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and + * `setX`, which gets and sets a private field `X` on `t`. + * This would lead to the following content flows + * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. + * setX : Argument[0] -> Argument[this].SyntheticField[t.X] + * As the reads and stores are on synthetic fields we should only make summaries + * if both of these methods exist. + */ + pragma[nomagic] + predicate acceptReadStore( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + synthPathEntry(t1, read, t2, store) and + reachesSynthExit(t2, getSyntheticPathCandidate(store)) + or + exists(PropagateContentFlow::AccessPath store0 | + getSyntheticPathCandidate(store0) = read + | + synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + or + synthEntryReaches(t1, store0) and + step(t1, read, t2, store) and + reachesSynthExit(t2, getSyntheticPathCandidate(store)) + ) + } + } - string getProvenance() { result = "dfc-generated" } - } + /** + * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. + * Flow is considered relevant, + * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. + * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if + * the synthetic content is "live" on the relevant declaring type. + */ + private predicate apiRelevantContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath store, boolean preservesValue + ) { + apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and + ( + not hasSyntheticContent(read) and not hasSyntheticContent(store) + or + AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read, + returnNodeExt.getType(), store) + ) + } - private module ContentModelPrinting = Printing::ModelPrinting; + pragma[nomagic] + private predicate captureFlow0( + ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, + boolean lift + ) { + exists( + DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores + | + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and + input != output and + validateAccessPath(reads) and + validateAccessPath(stores) and + ( + if mentionsField(reads) or mentionsField(stores) + then lift = false and api.isRelevant() + else lift = true + ) + ) + } - private string getContentOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) + /** + * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to + * the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false. + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ + string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) { + exists(string input, string output, boolean preservesValue | + captureFlow0(api, input, output, _, lift) and + preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and + result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift) + ) + } } /** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. + * Gets the summary model(s) for `api`, if any. `lift` is true if the model is lifted + * otherwise false. + * The following heuristic is applied: + * 1. If content based flow yields at lease one summary for an API, then we use that. + * 2. If content based flow does not yield any summary for an API, then we try and + * generate flow summaries using the heuristic based summary generator. */ - private string parameterNodeAsContentInput(DataFlow::ParameterNode p) { - result = parameterContentAccess(asParameter(p)) + string captureFlow(DataFlowSummaryTargetApi api, boolean lift) { + result = ContentSensitive::captureFlow(api, lift) or - result = qualifierString() and p instanceof InstanceParameterNode - } - - private string getContent(PropagateContentFlow::AccessPath ap, int i) { - result = "." + printContent(ap.getAtIndex(i)) + not exists(DataFlowSummaryTargetApi api0 | + (api0 = api or api.lift() = api0) and + exists(ContentSensitive::captureFlow(api0, false)) + or + api0.lift() = api.lift() and + exists(ContentSensitive::captureFlow(api0, true)) + ) and + result = Heuristic::captureFlow(api) and + lift = true } /** - * Gets the MaD string representation of a store step access path. + * Gets the neutral summary model for `api`, if any. + * A neutral summary model is generated, if we are not generating + * a mixed summary model that applies to `api`. */ - private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i) + string captureNeutral(DataFlowSummaryTargetApi api) { + not exists(DataFlowSummaryTargetApi api0, boolean lift | + exists(captureFlow(api0, lift)) and + ( + lift = false and + (api0 = api or api0 = api.lift()) + or + lift = true and api0.lift() = api.lift() + ) + ) and + api.isRelevant() and + result = Heuristic::ModelPrintingSummary::asNeutralSummaryModel(api) } + } + /** + * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. + */ + private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + exists(DataFlow::ContentSet f | + DataFlow::readStep(node1, f, node2) and + // Partially restrict the content types used for intermediate steps. + (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) + ) + or + exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f)) + } + + /** + * Provides language-specific source model generator parameters. + */ + signature module SourceModelGeneratorInputSig { /** - * Gets the MaD string representation of a read step access path. + * A class of callables that are potentially relevant for generating source models. */ - private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i desc) - } + class SourceTargetApi extends Callable; /** - * Holds if the access path `ap` contains a field or synthetic field access. + * Holds if `node` is specified as a source with the given kind in a MaD flow + * model. */ - private predicate mentionsField(PropagateContentFlow::AccessPath ap) { - isField(ap.getAtIndex(_)) - } + predicate sourceNode(Lang::Node node, string kind); /** - * Holds if this access path `ap` mentions a callback. + * Holds if it is not relevant to generate a source model for `api`, even + * if flow is detected from a node within `source` to a sink within `api`. */ - private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) { - isCallback(ap.getAtIndex(_)) + bindingset[sourceEnclosing, api] + default predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { + none() } + } + /** + * Provides language-specific sink model generator parameters. + */ + signature module SinkModelGeneratorInputSig { /** - * Holds if the access path `ap` is not a parameter or returnvalue of a callback - * stored in a field. - * - * That is, we currently don't include summaries that rely on parameters or return values - * of callbacks stored in fields. + * A class of callables that are potentially relevant for generating sink models. */ - private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { - not (mentionsField(ap) and mentionsCallback(ap)) - } - - private predicate apiFlow( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue - ) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api - } + class SinkTargetApi extends Callable; /** - * A class of APIs relevant for modeling using content flow. - * The following heuristic is applied: - * Content flow is only relevant for an API on a parameter, if - * #content flow from parameter <= 3 - * If an API produces more content flow on a parameter, it is likely that - * 1. Types are not sufficiently constrained on the parameter leading to a combinatorial - * explosion in dispatch and thus in the generated summaries. - * 2. It is a reasonable approximation to use the heuristic based flow - * detection instead, as reads and stores would use a significant - * part of an objects internal state. + * Holds if `node` is specified as a sink with the given kind in a MaD flow + * model. */ - private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { - private DataFlow::ParameterNode parameter; - - ContentDataFlowSummaryTargetApi() { - strictcount(string input, string output | - exists( - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores - | - apiFlow(this, parameter, reads, returnNodeExt, stores, _) and - input = parameterNodeAsContentInput(parameter) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) - ) - ) <= 3 - } + predicate sinkNode(Lang::Node node, string kind); - /** - * Gets a parameter node of `this` api, where there are less than 3 possible models, if any. - */ - DataFlow::ParameterNode getARelevantParameterNode() { result = parameter } - } + /** + * Gets the MaD input string representation of `source`. + */ + string getInputArgument(Lang::Node source); - pragma[nomagic] - private predicate apiContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue - ) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api and - p = api.getARelevantParameterNode() - } + /** + * Holds if `source` is an api entrypoint relevant for creating sink models. + */ + predicate apiSource(Lang::Node source); /** - * Holds if any of the content sets in `path` translates into a synthetic field. + * Holds if `node` is a sanitizer for sink model construction. */ - private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { - exists(getSyntheticName(path.getAtIndex(_))) - } + default predicate sinkModelSanitizer(Lang::Node node) { none() } - private string getHashAtIndex(PropagateContentFlow::AccessPath ap, int i) { - result = getSyntheticName(ap.getAtIndex(i)) - } + /** + * Holds if `kind` is a relevant sink kind for creating sink models. + */ + bindingset[kind] + default predicate isRelevantSinkKind(string kind) { any() } + } - private string getReversedHash(PropagateContentFlow::AccessPath ap) { - result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i desc) - } + /** + * Make a source model generator. + */ + module MakeSourceModelGenerator { + private import SourceModelGeneratorInput - private string getHash(PropagateContentFlow::AccessPath ap) { - result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i) - } + class DataFlowSourceTargetApi = SourceTargetApi; /** - * Gets all access paths that contain the synthetic fields - * from `ap` in reverse order (if `ap` contains at least one synthetic field). - * These are the possible candidates for synthetic path continuations. + * Provides classes and predicates related to capturing source models + * based on heuristic data flow. */ - private PropagateContentFlow::AccessPath getSyntheticPathCandidate( - PropagateContentFlow::AccessPath ap - ) { - getHash(ap) = getReversedHash(result) - } + module Heuristic { + private module ModelPrintingSourceOrSinkInput implements + Printing::ModelPrintingSourceOrSinkSig + { + class SourceOrSinkApi = DataFlowSourceTargetApi; - /** - * A module containing predicates for validating access paths containing content sets - * that translates into synthetic fields, when used for generated summary models. - */ - private module AccessPathSyntheticValidation { - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`). - */ - private predicate step( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - p.(NodeExtended).getType() = t1 and - returnNodeExt.getType() = t2 and - apiContentFlow(_, p, read, returnNodeExt, store, _) - ) + string getProvenance() { result = "df-generated" } } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. - * - * Step A -> Synth. - */ - private predicate synthPathEntry( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - not hasSyntheticContent(read) and - hasSyntheticContent(store) and - step(t1, read, t2, store) - } + private module ModelPrintingSourceOrSink = + Printing::ModelPrintingSourceOrSink; /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` has synthetic content - * and `store` does not. + * A data flow configuration used for finding new sources. + * The sources are the already known existing sources and the sinks are the API return nodes. * - * Step Synth -> A. + * This can be used to generate Source summaries for an API, if the API expose an already known source + * via its return (then the API itself becomes a source). */ - private predicate synthPathExit( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - not hasSyntheticContent(store) and - step(t1, read, t2, store) + module PropagateFromSourceConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { sourceNode(source, _) } + + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureHasSinkCallContext + } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } } + private module PropagateFromSource = TaintTracking::Global; + /** - * Holds if there exists a path of steps from `read` to an exit. - * - * read ->* Synth -> A + * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. */ - private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { - synthPathExit(t, read, _, _) - or - hasSyntheticContent(read) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(t, read, midType, mid) and - reachesSynthExit(midType, getSyntheticPathCandidate(mid)) + string captureSource(DataFlowSourceTargetApi api) { + exists(NodeExtended source, ReturnNodeExt sink, string kind | + PropagateFromSource::flow(source, sink) and + sourceNode(source, kind) and + api = getEnclosingCallable(sink) and + not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and + result = ModelPrintingSourceOrSink::asSourceModel(api, getOutput(sink), kind) ) } + } + } + + /** + * Make a sink model generator. + */ + module MakeSinkModelGenerator { + private import SinkModelGeneratorInput + + class DataFlowSinkTargetApi = SinkTargetApi; + + /** + * Provides classes and predicates related to capturing sink models + * based on heuristic data flow. + */ + module Heuristic { + private module ModelPrintingSourceOrSinkInput implements + Printing::ModelPrintingSourceOrSinkSig + { + class SourceOrSinkApi = DataFlowSinkTargetApi; + + string getProvenance() { result = "df-generated" } + } + + private module ModelPrintingSourceOrSink = + Printing::ModelPrintingSourceOrSink; /** - * Holds if there exists a path of steps from an entry to `store`. - * - * A -> Synth ->* store + * Gets the MaD input string representation of `source`. */ - private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { - synthPathEntry(_, _, t, store) - or - hasSyntheticContent(store) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(midType, mid, t, store) and - synthEntryReaches(midType, getSyntheticPathCandidate(mid)) - ) - } + private string asInputArgument(NodeExtended source) { result = getInputArgument(source) } /** - * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) - * contain content that will be translated into a synthetic field, when being used in - * a MaD summary model, and if there is a range of APIs, such that - * when chaining their flow access paths, there exists access paths `A` and `B` where - * A ->* read -> store ->* B and where `A` and `B` do not contain content that will - * be translated into a synthetic field. + * A data flow configuration used for finding new sinks. + * The sources are the parameters of the API and the fields of the enclosing type. * - * This is needed because we don't want to include summaries that reads from or - * stores into an "internal" synthetic field. - * - * Example: - * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and - * `setX`, which gets and sets a private field `X` on `t`. - * This would lead to the following content flows - * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. - * setX : Argument[0] -> Argument[this].SyntheticField[t.X] - * As the reads and stores are on synthetic fields we should only make summaries - * if both of these methods exist. + * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) + * into an existing known sink (then the API itself becomes a sink). */ - pragma[nomagic] - predicate acceptReadStore( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - synthPathEntry(t1, read, t2, store) and - reachesSynthExit(t2, getSyntheticPathCandidate(store)) - or - exists(PropagateContentFlow::AccessPath store0 | getSyntheticPathCandidate(store0) = read | - synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + module PropagateToSinkConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + apiSource(source) and + getEnclosingCallable(source) instanceof DataFlowSinkTargetApi + } + + predicate isSink(DataFlow::Node sink) { + exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind)) + } + + predicate isBarrier(DataFlow::Node node) { + exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t)) or - synthEntryReaches(t1, store0) and - step(t1, read, t2, store) and - reachesSynthExit(t2, getSyntheticPathCandidate(store)) - ) + sinkModelSanitizer(node) + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureHasSourceCallContext + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } } - } - /** - * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. - * Flow is considered relevant, - * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. - * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if - * the synthetic content is "live" on the relevant declaring type. - */ - private predicate apiRelevantContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath store, boolean preservesValue - ) { - apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and - ( - not hasSyntheticContent(read) and not hasSyntheticContent(store) - or - AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read, - returnNodeExt.getType(), store) - ) - } + private module PropagateToSink = TaintTracking::Global; - pragma[nomagic] - private predicate captureFlow0( - ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, - boolean lift - ) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - validateAccessPath(reads) and - validateAccessPath(stores) and - ( - if mentionsField(reads) or mentionsField(stores) - then lift = false and api.isRelevant() - else lift = true + /** + * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. + */ + string captureSink(DataFlowSinkTargetApi api) { + exists(NodeExtended src, NodeExtended sink, string kind | + PropagateToSink::flow(src, sink) and + sinkNode(sink, kind) and + api = getEnclosingCallable(src) and + result = ModelPrintingSourceOrSink::asSinkModel(api, asInputArgument(src), kind) ) - ) - } - - /** - * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to - * the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false. - * - * Models are lifted to the best type in case the read and store access paths do not - * contain a field or synthetic field access. - */ - string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) { - exists(string input, string output, boolean preservesValue | - captureFlow0(api, input, output, _, lift) and - preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and - result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift) - ) + } } } - - /** - * Gets the summary model(s) for `api`, if any. `lift` is true if the model is lifted - * otherwise false. - * The following heuristic is applied: - * 1. If content based flow yields at lease one summary for an API, then we use that. - * 2. If content based flow does not yield any summary for an API, then we try and - * generate flow summaries using the heuristic based summary generator. - */ - string captureFlow(DataFlowSummaryTargetApi api, boolean lift) { - result = ContentSensitive::captureFlow(api, lift) - or - not exists(DataFlowSummaryTargetApi api0 | - (api0 = api or api.lift() = api0) and - exists(ContentSensitive::captureFlow(api0, false)) - or - api0.lift() = api.lift() and - exists(ContentSensitive::captureFlow(api0, true)) - ) and - result = Heuristic::captureFlow(api) and - lift = true - } - - /** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a mixed summary model that applies to `api`. - */ - string captureNeutral(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0, boolean lift | - exists(captureFlow(api0, lift)) and - ( - lift = false and - (api0 = api or api0 = api.lift()) - or - lift = true and api0.lift() = api.lift() - ) - ) and - api.isRelevant() and - result = Heuristic::ModelPrinting::asNeutralSummaryModel(api) - } } diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll index 0ab92f7032b4..fc1e0113d1d0 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll @@ -16,7 +16,7 @@ signature module ModelPrintingLangSig { } module ModelPrintingImpl { - signature module ModelPrintingSig { + signature module ModelPrintingSummarySig { /** * The class of APIs relevant for model generation. */ @@ -24,22 +24,32 @@ module ModelPrintingImpl { Lang::Callable lift(); } - class SourceOrSinkApi extends Lang::Callable; - /** * Gets the string representation of the provenance of the models. */ string getProvenance(); } - module ModelPrinting { + signature module ModelPrintingSourceOrSinkSig { /** - * Computes the first columns for MaD rows used for summaries, sources and sinks. + * The class of APIs relevant for model generation. */ - private string asPartialModel(Lang::Callable api) { - result = strictconcat(int i | | Lang::partialModelRow(api, i), ";" order by i) + ";" - } + class SourceOrSinkApi extends Lang::Callable; + + /** + * Gets the string representation of the provenance of the models. + */ + string getProvenance(); + } + + /** + * Computes the first columns for MaD rows used for summaries, sources and sinks. + */ + private string asPartialModel(Lang::Callable api) { + result = strictconcat(int i | | Lang::partialModelRow(api, i), ";" order by i) + ";" + } + module ModelPrintingSummary { /** * Computes the first columns for neutral MaD rows. */ @@ -106,7 +116,9 @@ module ModelPrintingImpl { preservesValue = false and result = asSummaryModel(api, input, output, "taint", lift) } + } + module ModelPrintingSourceOrSink { /** * Gets the sink model for `api` with `input` and `kind`. */