From b5c596b2ce2450e1969ca751fa3ff3ce47155bf9 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Thu, 24 Apr 2025 15:32:12 +0200 Subject: [PATCH 1/7] Shared: Split model printing of summaries and sources/sinks into separate param modules. --- .../CaptureTypeBasedSummaryModels.qll | 6 ++-- .../CaptureTypeBasedSummaryModels.qll | 6 ++-- .../internal/ModelGeneratorImpl.qll | 31 +++++++++++-------- .../modelgenerator/internal/ModelPrinting.qll | 28 ++++++++++++----- 4 files changed, 42 insertions(+), 29 deletions(-) diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index f7b0633ddd39..62286c27888c 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -177,15 +177,13 @@ private predicate output(Callable callable, TypeParameter tp, string output) { delegateSink(callable, tp, output) } -private module ModelPrintingInput implements ModelPrintingSig { +private module ModelPrintingInput implements ModelPrintingSummarySig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = TypeBasedFlowTargetApi; - string getProvenance() { result = "tb-generated" } } -private module Printing = ModelPrinting; +private module Printing = ModelPrintingSummary; /** * A class of callables that are relevant generating summaries for based diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 36aec8053196..9145a0779075 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -284,15 +284,13 @@ private predicate output(Callable callable, TypeVariable tv, string output) { functionalSink(callable, tv, output) } -module ModelPrintingInput implements ModelPrintingSig { +module ModelPrintingInput implements ModelPrintingSummarySig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi; - string getProvenance() { result = "tb-generated" } } -private module Printing = ModelPrinting; +private module Printing = ModelPrintingSummary; /** * A class of callables that are relevant generating summaries for based diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll index b9592964f931..59f53c2b1c90 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -370,20 +370,27 @@ module MakeModelGenerator< * based on heuristic data flow. */ module Heuristic { - private module ModelPrintingInput implements Printing::ModelPrintingSig { + private module ModelPrintingSummaryInput implements Printing::ModelPrintingSummarySig { class SummaryApi = DataFlowSummaryTargetApi; + string getProvenance() { result = "df-generated" } + } + + module ModelPrintingSummary = Printing::ModelPrintingSummary; + + private module ModelPrintingSourceOrSinkInput implements Printing::ModelPrintingSourceOrSinkSig { class SourceOrSinkApi = SourceOrSinkTargetApi; string getProvenance() { result = "df-generated" } } - module ModelPrinting = Printing::ModelPrinting; - private string getOutput(ReturnNodeExt node) { result = PrintReturnNodeExt::getOutput(node) } + private module ModelPrintingSourceOrSink = + Printing::ModelPrintingSourceOrSink; + /** * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. */ @@ -419,7 +426,7 @@ module MakeModelGenerator< api = returnNodeEnclosingCallable(ret) and isOwnInstanceAccessNode(ret) ) and - result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue") + result = ModelPrintingSummary::asLiftedValueModel(api, qualifierString(), "ReturnValue") } private int accessPathLimit0() { result = 2 } @@ -539,7 +546,7 @@ module MakeModelGenerator< input = parameterNodeAsInput(p) and output = getOutput(returnNodeExt) and input != output and - result = ModelPrinting::asLiftedTaintModel(api, input, output) + result = ModelPrintingSummary::asLiftedTaintModel(api, input, output) ) } @@ -572,7 +579,7 @@ module MakeModelGenerator< exists(captureFlow(api0)) and api0.lift() = api.lift() ) and api.isRelevant() and - result = ModelPrinting::asNeutralSummaryModel(api) + result = ModelPrintingSummary::asNeutralSummaryModel(api) } /** @@ -617,7 +624,7 @@ module MakeModelGenerator< sourceNode(source, kind) and api = getEnclosingCallable(sink) and not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and - result = ModelPrinting::asSourceModel(api, getOutput(sink), kind) + result = ModelPrintingSourceOrSink::asSourceModel(api, getOutput(sink), kind) ) } @@ -663,7 +670,7 @@ module MakeModelGenerator< PropagateToSink::flow(src, sink) and sinkNode(sink, kind) and api = getEnclosingCallable(src) and - result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind) + result = ModelPrintingSourceOrSink::asSinkModel(api, asInputArgument(src), kind) ) } } @@ -703,15 +710,13 @@ module MakeModelGenerator< private module PropagateContentFlow = ContentDataFlow::Global; - private module ContentModelPrintingInput implements Printing::ModelPrintingSig { + private module ContentModelPrintingInput implements Printing::ModelPrintingSummarySig { class SummaryApi = DataFlowSummaryTargetApi; - class SourceOrSinkApi = SourceOrSinkTargetApi; - string getProvenance() { result = "dfc-generated" } } - private module ContentModelPrinting = Printing::ModelPrinting; + private module ContentModelPrinting = Printing::ModelPrintingSummary; private string getContentOutput(ReturnNodeExt node) { result = PrintReturnNodeExt::getOutput(node) @@ -1075,6 +1080,6 @@ module MakeModelGenerator< ) ) and api.isRelevant() and - result = Heuristic::ModelPrinting::asNeutralSummaryModel(api) + result = Heuristic::ModelPrintingSummary::asNeutralSummaryModel(api) } } diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll index 0ab92f7032b4..fc1e0113d1d0 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll @@ -16,7 +16,7 @@ signature module ModelPrintingLangSig { } module ModelPrintingImpl { - signature module ModelPrintingSig { + signature module ModelPrintingSummarySig { /** * The class of APIs relevant for model generation. */ @@ -24,22 +24,32 @@ module ModelPrintingImpl { Lang::Callable lift(); } - class SourceOrSinkApi extends Lang::Callable; - /** * Gets the string representation of the provenance of the models. */ string getProvenance(); } - module ModelPrinting { + signature module ModelPrintingSourceOrSinkSig { /** - * Computes the first columns for MaD rows used for summaries, sources and sinks. + * The class of APIs relevant for model generation. */ - private string asPartialModel(Lang::Callable api) { - result = strictconcat(int i | | Lang::partialModelRow(api, i), ";" order by i) + ";" - } + class SourceOrSinkApi extends Lang::Callable; + + /** + * Gets the string representation of the provenance of the models. + */ + string getProvenance(); + } + + /** + * Computes the first columns for MaD rows used for summaries, sources and sinks. + */ + private string asPartialModel(Lang::Callable api) { + result = strictconcat(int i | | Lang::partialModelRow(api, i), ";" order by i) + ";" + } + module ModelPrintingSummary { /** * Computes the first columns for neutral MaD rows. */ @@ -106,7 +116,9 @@ module ModelPrintingImpl { preservesValue = false and result = asSummaryModel(api, input, output, "taint", lift) } + } + module ModelPrintingSourceOrSink { /** * Gets the sink model for `api` with `input` and `kind`. */ From bb6530fcf85e47c696c13f02a3b1f00652d3ff04 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Thu, 24 Apr 2025 16:12:01 +0200 Subject: [PATCH 2/7] Shared: Make the summary, source and sink model generation a parameterized module. --- .../internal/ModelGeneratorImpl.qll | 1481 +++++++++-------- 1 file changed, 773 insertions(+), 708 deletions(-) diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll index 59f53c2b1c90..d981ef151fd7 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -15,7 +15,7 @@ private import ModelPrinting /** * Provides language-specific model generator parameters. */ -signature module ModelGeneratorInputSig Lang> { +signature module ModelGeneratorCommonInputSig Lang> { /** * A Type. */ @@ -49,52 +49,6 @@ signature module ModelGeneratorInputSig /** Gets the enclosing callable of `node`. */ Callable getEnclosingCallable(NodeExtended node); - /** - * Gets the enclosing callable of `node`, when considered as an expression. - */ - Callable getAsExprEnclosingCallable(NodeExtended node); - - /** Gets the parameter corresponding to this node, if any. */ - Parameter asParameter(NodeExtended n); - - /** - * A class of callables that are potentially relevant for generating summary or - * neutral models. - * - * In the Standard library and 3rd party libraries it is the callables (or callables that have a - * super implementation) that can be called from outside the library itself. - */ - class SummaryTargetApi extends Callable { - /** - * Gets the callable that a model will be lifted to. - * - * The lifted callable is relevant in terms of model - * generation (this is ensured by `liftedImpl`). - */ - Callable lift(); - - /** - * Holds if `this` is relevant in terms of model generation. - */ - predicate isRelevant(); - } - - /** - * A class of callables that are potentially relevant for generating source or - * sink models. - */ - class SourceOrSinkTargetApi extends Callable; - - /** - * A class of callables that are potentially relevant for generating source models. - */ - class SourceTargetApi extends SourceOrSinkTargetApi; - - /** - * A class of callables that are potentially relevant for generating sink models. - */ - class SinkTargetApi extends SourceOrSinkTargetApi; - /** * An instance parameter node. */ @@ -113,22 +67,6 @@ signature module ModelGeneratorInputSig */ Type getUnderlyingContentType(Lang::ContentSet c); - /** - * Gets the MaD string representation of the qualifier. - */ - string qualifierString(); - - /** - * Gets the MaD string representation of the parameter `p`. - */ - string parameterAccess(Parameter p); - - /** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ - string parameterContentAccess(Parameter p); - /** * Gets the MaD string representation of return through parameter at position * `pos` of callable `c`. @@ -154,69 +92,26 @@ signature module ModelGeneratorInputSig predicate isOwnInstanceAccessNode(Lang::ReturnNode node); /** - * Holds if `node` is a sanitizer for sink model construction. - */ - predicate sinkModelSanitizer(Lang::Node node); - - /** - * Holds if `source` is an api entrypoint relevant for creating sink models. - */ - predicate apiSource(Lang::Node source); - - /** - * Gets the MaD input string representation of `source`. - */ - string getInputArgument(Lang::Node source); - - /** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. + * Gets the MaD string representation of the parameter `p`. */ - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api); + string parameterAccess(Parameter p); /** - * Holds if `kind` is a relevant sink kind for creating sink models. + * Gets the MaD string representation of the parameter `p` + * when used in content flow. */ - bindingset[kind] - predicate isRelevantSinkKind(string kind); + string parameterContentAccess(Parameter p); /** - * Holds if `kind` is a relevant source kind for creating source models. + * Gets the MaD string representation of the qualifier. */ - bindingset[kind] - predicate isRelevantSourceKind(string kind); + string qualifierString(); /** * Holds if the the content `c` is a container. */ predicate containerContent(Lang::ContentSet c); - /** - * Holds if there is a taint step from `node1` to `node2` in content flow. - */ - predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo); - - /** - * Holds if the content set `c` is field like. - */ - predicate isField(Lang::ContentSet c); - - /** - * Holds if the content set `c` is callback like. - */ - predicate isCallback(Lang::ContentSet c); - - /** - * Gets the MaD synthetic name string representation for the content set `c`, if any. - */ - string getSyntheticName(Lang::ContentSet c); - - /** - * Gets the MaD string representation of the content set `c`. - */ - string printContent(Lang::ContentSet c); - /** * Gets the parameter position of the return kind, if any. */ @@ -230,22 +125,6 @@ signature module ModelGeneratorInputSig */ default string getReturnValueString(Lang::ReturnKind kind) { result = "ReturnValue" } - /** - * Holds if it is irrelevant to generate models for `api` based on data flow analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ - predicate isUninterestingForDataFlowModels(Callable api); - - /** - * Holds if it is irrelevant to generate models for `api` based on the heuristic - * (non-content) flow analysis. - * - * This serves as an extra filter for the `relevant` - * and `isUninterestingForDataFlowModels` predicates. - */ - predicate isUninterestingForHeuristicDataFlowModels(Callable api); - /** * Gets the string representation for the `i`th column in the MaD row for `api`. */ @@ -255,23 +134,14 @@ signature module ModelGeneratorInputSig * Gets the string representation for the `i`th column in the neutral MaD row for `api`. */ string partialNeutralModelRow(Callable api, int i); - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate sourceNode(Lang::Node node, string kind); - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate sinkNode(Lang::Node node, string kind); } -module MakeModelGenerator< +/** + * Make a factory for constructing different model generators. + */ +module MakeModelGeneratorFactory< LocationSig Location, InputSig Lang, Tt::InputSig TaintLang, - ModelGeneratorInputSig ModelGeneratorInput> + ModelGeneratorCommonInputSig ModelGeneratorInput> { private module DataFlow { import Lang @@ -338,16 +208,6 @@ module MakeModelGenerator< } } - final private class SummaryTargetApiFinal = SummaryTargetApi; - - class DataFlowSummaryTargetApi extends SummaryTargetApiFinal { - DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } - } - - class DataFlowSourceTargetApi = SourceTargetApi; - - class DataFlowSinkTargetApi = SinkTargetApi; - /** * Holds if `c` is a relevant content kind, where the underlying type is relevant. */ @@ -365,721 +225,926 @@ module MakeModelGenerator< containerContent(c) } + private string getOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) + } + /** - * Provides classes and predicates related to capturing models - * based on heuristic data flow. + * Provides language-specific summary model generator parameters. */ - module Heuristic { - private module ModelPrintingSummaryInput implements Printing::ModelPrintingSummarySig { - class SummaryApi = DataFlowSummaryTargetApi; + signature module SummaryModelGeneratorInputSig { + /** + * A class of callables that are potentially relevant for generating summary or + * neutral models. + * + * In the Standard library and 3rd party libraries it is the callables (or callables that have a + * super implementation) that can be called from outside the library itself. + */ + class SummaryTargetApi extends Callable { + /** + * Gets the callable that a model will be lifted to. + * + * The lifted callable is relevant in terms of model + * generation (this is ensured by `liftedImpl`). + */ + Callable lift(); - string getProvenance() { result = "df-generated" } + /** + * Holds if `this` is relevant in terms of model generation. + */ + predicate isRelevant(); } - module ModelPrintingSummary = Printing::ModelPrintingSummary; + /** + * Gets the enclosing callable of `node`, when considered as an expression. + */ + Callable getAsExprEnclosingCallable(NodeExtended node); - private module ModelPrintingSourceOrSinkInput implements Printing::ModelPrintingSourceOrSinkSig { - class SourceOrSinkApi = SourceOrSinkTargetApi; + /** + * Gets the parameter corresponding to this node, if any. + */ + Parameter asParameter(NodeExtended n); - string getProvenance() { result = "df-generated" } - } + /** + * Holds if there is a taint step from `node1` to `node2` in content flow. + */ + predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo); - private string getOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) - } + /** + * Holds if the content set `c` is field like. + */ + predicate isField(Lang::ContentSet c); - private module ModelPrintingSourceOrSink = - Printing::ModelPrintingSourceOrSink; + /** + * Holds if the content set `c` is callback like. + */ + predicate isCallback(Lang::ContentSet c); /** - * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. + * Gets the MaD synthetic name string representation for the content set `c`, if any. */ - private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(DataFlow::ContentSet f | - DataFlow::readStep(node1, f, node2) and - // Partially restrict the content types used for intermediate steps. - (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) - ) - or - exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f)) - } + string getSyntheticName(Lang::ContentSet c); /** - * Gets the MaD string representation of the parameter node `p`. + * Gets the MaD string representation of the content set `c`. */ - string parameterNodeAsInput(DataFlow::ParameterNode p) { - result = parameterAccess(asParameter(p)) - or - result = qualifierString() and p instanceof InstanceParameterNode - } + string printContent(Lang::ContentSet c); /** - * Gets the MaD input string representation of `source`. + * Holds if it is irrelevant to generate models for `api` based on data flow analysis. + * + * This serves as an extra filter for the `relevant` predicate. */ - private string asInputArgument(NodeExtended source) { result = getInputArgument(source) } + predicate isUninterestingForDataFlowModels(Callable api); /** - * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). + * Holds if it is irrelevant to generate models for `api` based on the heuristic + * (non-content) flow analysis. + * + * This serves as an extra filter for the `relevant` + * and `isUninterestingForDataFlowModels` predicates. */ - private string captureQualifierFlow(DataFlowSummaryTargetApi api) { - exists(ReturnNodeExt ret | - api = returnNodeEnclosingCallable(ret) and - isOwnInstanceAccessNode(ret) - ) and - result = ModelPrintingSummary::asLiftedValueModel(api, qualifierString(), "ReturnValue") - } + predicate isUninterestingForHeuristicDataFlowModels(Callable api); + } - private int accessPathLimit0() { result = 2 } + /** + * Make a summary model generator. + */ + module MakeSummaryModelGenerator { + private import SummaryModelGeneratorInput - private newtype TTaintState = - TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or - TTaintStore(int n) { n in [1 .. accessPathLimit0()] } + final private class SummaryTargetApiFinal = SummaryTargetApi; - abstract private class TaintState extends TTaintState { - abstract string toString(); + class DataFlowSummaryTargetApi extends SummaryTargetApiFinal { + DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } } /** - * A FlowState representing a tainted read. + * Provides classes and predicates related to capturing summary models + * based on heuristic data flow. */ - private class TaintRead extends TaintState, TTaintRead { - private int step; + module Heuristic { + private module ModelPrintingSummaryInput implements Printing::ModelPrintingSummarySig { + class SummaryApi = DataFlowSummaryTargetApi; + + string getProvenance() { result = "df-generated" } + } + + module ModelPrintingSummary = Printing::ModelPrintingSummary; - TaintRead() { this = TTaintRead(step) } + /** + * Gets the MaD string representation of the parameter node `p`. + */ + string parameterNodeAsInput(DataFlow::ParameterNode p) { + result = parameterAccess(asParameter(p)) + or + result = qualifierString() and p instanceof InstanceParameterNode + } /** - * Gets the flow state step number. + * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). */ - int getStep() { result = step } + private string captureQualifierFlow(DataFlowSummaryTargetApi api) { + exists(ReturnNodeExt ret | + api = returnNodeEnclosingCallable(ret) and + isOwnInstanceAccessNode(ret) + ) and + result = ModelPrintingSummary::asLiftedValueModel(api, qualifierString(), "ReturnValue") + } - override string toString() { result = "TaintRead(" + step + ")" } - } + private int accessPathLimit0() { result = 2 } - /** - * A FlowState representing a tainted write. - */ - private class TaintStore extends TaintState, TTaintStore { - private int step; + private newtype TTaintState = + TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or + TTaintStore(int n) { n in [1 .. accessPathLimit0()] } - TaintStore() { this = TTaintStore(step) } + abstract private class TaintState extends TTaintState { + abstract string toString(); + } /** - * Gets the flow state step number. + * A FlowState representing a tainted read. */ - int getStep() { result = step } + private class TaintRead extends TaintState, TTaintRead { + private int step; - override string toString() { result = "TaintStore(" + step + ")" } - } + TaintRead() { this = TTaintRead(step) } - /** - * A data flow configuration for tracking flow through APIs. - * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. - * - * This can be used to generate Flow summaries for APIs from parameter to return. - */ - private module PropagateFlowConfig implements DataFlow::StateConfigSig { - class FlowState = TaintState; - - predicate isSource(DataFlow::Node source, FlowState state) { - source instanceof DataFlow::ParameterNode and - exists(Callable c | - c = getEnclosingCallable(source) and - c instanceof DataFlowSummaryTargetApi and - not isUninterestingForHeuristicDataFlowModels(c) - ) and - state.(TaintRead).getStep() = 0 + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintRead(" + step + ")" } } - predicate isSink(DataFlow::Node sink, FlowState state) { - // Sinks are provided by `isSink/1` - none() + /** + * A FlowState representing a tainted write. + */ + private class TaintStore extends TaintState, TTaintStore { + private int step; + + TaintStore() { this = TTaintStore(step) } + + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintStore(" + step + ")" } } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - not isOwnInstanceAccessNode(sink) and - not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) + /** + * A data flow configuration for tracking flow through APIs. + * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. + * + * This can be used to generate Flow summaries for APIs from parameter to return. + */ + private module PropagateFlowConfig implements DataFlow::StateConfigSig { + class FlowState = TaintState; + + predicate isSource(DataFlow::Node source, FlowState state) { + source instanceof DataFlow::ParameterNode and + exists(Callable c | + c = getEnclosingCallable(source) and + c instanceof DataFlowSummaryTargetApi and + not isUninterestingForHeuristicDataFlowModels(c) + ) and + state.(TaintRead).getStep() = 0 + } + + predicate isSink(DataFlow::Node sink, FlowState state) { + // Sinks are provided by `isSink/1` + none() + } + + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + not isOwnInstanceAccessNode(sink) and + not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) + } + + predicate isAdditionalFlowStep( + DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + ) { + exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c | + node1 = n1.asNode() and + node2 = n2.asNode() and + DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and + isRelevantContent0(c) and + ( + state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 + or + state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() + ) + ) + or + exists(DataFlow::ContentSet c | + DataFlow::readStep(node1, c, node2) and + isRelevantContent0(c) and + state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + ) + } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } } - predicate isAdditionalFlowStep( - DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + module PropagateFlow = TaintTracking::GlobalWithState; + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + string captureThroughFlow0( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt ) { - exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c | - node1 = n1.asNode() and - node2 = n2.asNode() and - DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and - isRelevantContent0(c) and - ( - state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 - or - state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() - ) - ) - or - exists(DataFlow::ContentSet c | - DataFlow::readStep(node1, c, node2) and - isRelevantContent0(c) and - state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + exists(string input, string output | + getEnclosingCallable(p) = api and + getEnclosingCallable(returnNodeExt) = api and + input = parameterNodeAsInput(p) and + output = getOutput(returnNodeExt) and + input != output and + result = ModelPrintingSummary::asLiftedTaintModel(api, input, output) ) } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + private string captureThroughFlow(DataFlowSummaryTargetApi api) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + PropagateFlow::flow(p, returnNodeExt) and + result = captureThroughFlow0(api, p, returnNodeExt) + ) } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to the + * return value or parameter or if `api` is a fluent API. + */ + string captureFlow(DataFlowSummaryTargetApi api) { + result = captureQualifierFlow(api) or + result = captureThroughFlow(api) } - } - module PropagateFlow = TaintTracking::GlobalWithState; - - /** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ - string captureThroughFlow0( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt - ) { - exists(string input, string output | - getEnclosingCallable(p) = api and - getEnclosingCallable(returnNodeExt) = api and - input = parameterNodeAsInput(p) and - output = getOutput(returnNodeExt) and - input != output and - result = ModelPrintingSummary::asLiftedTaintModel(api, input, output) - ) + /** + * Gets the neutral summary model for `api`, if any. + * A neutral summary model is generated, if we are not generating + * a summary model that applies to `api`. + */ + string captureNoFlow(DataFlowSummaryTargetApi api) { + not exists(DataFlowSummaryTargetApi api0 | + exists(captureFlow(api0)) and api0.lift() = api.lift() + ) and + api.isRelevant() and + result = ModelPrintingSummary::asNeutralSummaryModel(api) + } } /** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + * Provides classes and predicates related to capturing summary models + * based on content data flow. */ - private string captureThroughFlow(DataFlowSummaryTargetApi api) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - PropagateFlow::flow(p, returnNodeExt) and - result = captureThroughFlow0(api, p, returnNodeExt) - ) - } + module ContentSensitive { + private import MakeImplContentDataFlow as ContentDataFlow - /** - * Gets the summary model(s) of `api`, if there is flow from parameters to the - * return value or parameter or if `api` is a fluent API. - */ - string captureFlow(DataFlowSummaryTargetApi api) { - result = captureQualifierFlow(api) or - result = captureThroughFlow(api) - } + private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source instanceof DataFlow::ParameterNode and + getEnclosingCallable(source) instanceof DataFlowSummaryTargetApi + } - /** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a summary model that applies to `api`. - */ - string captureNoFlow(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0 | - exists(captureFlow(api0)) and api0.lift() = api.lift() - ) and - api.isRelevant() and - result = ModelPrintingSummary::asNeutralSummaryModel(api) - } + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + getEnclosingCallable(sink) instanceof DataFlowSummaryTargetApi + } - /** - * A data flow configuration used for finding new sources. - * The sources are the already known existing sources and the sinks are the API return nodes. - * - * This can be used to generate Source summaries for an API, if the API expose an already known source - * via its return (then the API itself becomes a source). - */ - module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - sourceNode(source, kind) - ) - } + predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi - } + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } + int accessPathLimit() { result = 2 } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) - } + predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } } - } - private module PropagateFromSource = TaintTracking::Global; + private module PropagateContentFlow = ContentDataFlow::Global; - /** - * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. - */ - string captureSource(DataFlowSourceTargetApi api) { - exists(NodeExtended source, ReturnNodeExt sink, string kind | - PropagateFromSource::flow(source, sink) and - sourceNode(source, kind) and - api = getEnclosingCallable(sink) and - not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and - result = ModelPrintingSourceOrSink::asSourceModel(api, getOutput(sink), kind) - ) - } + private module ContentModelPrintingInput implements Printing::ModelPrintingSummarySig { + class SummaryApi = DataFlowSummaryTargetApi; - /** - * A data flow configuration used for finding new sinks. - * The sources are the parameters of the API and the fields of the enclosing type. - * - * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) - * into an existing known sink (then the API itself becomes a sink). - */ - module PropagateToSinkConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - apiSource(source) and - getEnclosingCallable(source) instanceof DataFlowSinkTargetApi + string getProvenance() { result = "dfc-generated" } } - predicate isSink(DataFlow::Node sink) { - exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind)) + private module ContentModelPrinting = + Printing::ModelPrintingSummary; + + private string getContentOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) } - predicate isBarrier(DataFlow::Node node) { - exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t)) + /** + * Gets the MaD string representation of the parameter `p` + * when used in content flow. + */ + private string parameterNodeAsContentInput(DataFlow::ParameterNode p) { + result = parameterContentAccess(asParameter(p)) or - sinkModelSanitizer(node) + result = qualifierString() and p instanceof InstanceParameterNode } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureHasSourceCallContext + private string getContent(PropagateContentFlow::AccessPath ap, int i) { + result = "." + printContent(ap.getAtIndex(i)) } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + /** + * Gets the MaD string representation of a store step access path. + */ + private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i) } - } - private module PropagateToSink = TaintTracking::Global; + /** + * Gets the MaD string representation of a read step access path. + */ + private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i desc) + } - /** - * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. - */ - string captureSink(DataFlowSinkTargetApi api) { - exists(NodeExtended src, NodeExtended sink, string kind | - PropagateToSink::flow(src, sink) and - sinkNode(sink, kind) and - api = getEnclosingCallable(src) and - result = ModelPrintingSourceOrSink::asSinkModel(api, asInputArgument(src), kind) - ) - } - } + /** + * Holds if the access path `ap` contains a field or synthetic field access. + */ + private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + isField(ap.getAtIndex(_)) + } - /** - * Provides classes and predicates related to capturing summary models - * based on content data flow. - */ - module ContentSensitive { - private import MakeImplContentDataFlow as ContentDataFlow + /** + * Holds if this access path `ap` mentions a callback. + */ + private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) { + isCallback(ap.getAtIndex(_)) + } - private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - source instanceof DataFlow::ParameterNode and - getEnclosingCallable(source) instanceof DataFlowSummaryTargetApi + /** + * Holds if the access path `ap` is not a parameter or returnvalue of a callback + * stored in a field. + * + * That is, we currently don't include summaries that rely on parameters or return values + * of callbacks stored in fields. + */ + private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { + not (mentionsField(ap) and mentionsCallback(ap)) } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - getEnclosingCallable(sink) instanceof DataFlowSummaryTargetApi + private predicate apiFlow( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + getEnclosingCallable(returnNodeExt) = api and + getEnclosingCallable(p) = api } - predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; + /** + * A class of APIs relevant for modeling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API on a parameter, if + * #content flow from parameter <= 3 + * If an API produces more content flow on a parameter, it is likely that + * 1. Types are not sufficiently constrained on the parameter leading to a combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the heuristic based flow + * detection instead, as reads and stores would use a significant + * part of an objects internal state. + */ + private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { + private DataFlow::ParameterNode parameter; + + ContentDataFlowSummaryTargetApi() { + strictcount(string input, string output | + exists( + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores + | + apiFlow(this, parameter, reads, returnNodeExt, stores, _) and + input = parameterNodeAsContentInput(parameter) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) + ) + ) <= 3 + } + + /** + * Gets a parameter node of `this` api, where there are less than 3 possible models, if any. + */ + DataFlow::ParameterNode getARelevantParameterNode() { result = parameter } + } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + pragma[nomagic] + private predicate apiContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + getEnclosingCallable(returnNodeExt) = api and + getEnclosingCallable(p) = api and + p = api.getARelevantParameterNode() } - int accessPathLimit() { result = 2 } + /** + * Holds if any of the content sets in `path` translates into a synthetic field. + */ + private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { + exists(getSyntheticName(path.getAtIndex(_))) + } - predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } + private string getHashAtIndex(PropagateContentFlow::AccessPath ap, int i) { + result = getSyntheticName(ap.getAtIndex(i)) + } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + private string getReversedHash(PropagateContentFlow::AccessPath ap) { + result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i desc) } - } - private module PropagateContentFlow = ContentDataFlow::Global; + private string getHash(PropagateContentFlow::AccessPath ap) { + result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i) + } - private module ContentModelPrintingInput implements Printing::ModelPrintingSummarySig { - class SummaryApi = DataFlowSummaryTargetApi; + /** + * Gets all access paths that contain the synthetic fields + * from `ap` in reverse order (if `ap` contains at least one synthetic field). + * These are the possible candidates for synthetic path continuations. + */ + private PropagateContentFlow::AccessPath getSyntheticPathCandidate( + PropagateContentFlow::AccessPath ap + ) { + getHash(ap) = getReversedHash(result) + } - string getProvenance() { result = "dfc-generated" } - } + /** + * A module containing predicates for validating access paths containing content sets + * that translates into synthetic fields, when used for generated summary models. + */ + private module AccessPathSyntheticValidation { + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`). + */ + private predicate step( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + p.(NodeExtended).getType() = t1 and + returnNodeExt.getType() = t2 and + apiContentFlow(_, p, read, returnNodeExt, store, _) + ) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. + * + * Step A -> Synth. + */ + private predicate synthPathEntry( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + not hasSyntheticContent(read) and + hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` has synthetic content + * and `store` does not. + * + * Step Synth -> A. + */ + private predicate synthPathExit( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + not hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists a path of steps from `read` to an exit. + * + * read ->* Synth -> A + */ + private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { + synthPathExit(t, read, _, _) + or + hasSyntheticContent(read) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(t, read, midType, mid) and + reachesSynthExit(midType, getSyntheticPathCandidate(mid)) + ) + } + + /** + * Holds if there exists a path of steps from an entry to `store`. + * + * A -> Synth ->* store + */ + private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { + synthPathEntry(_, _, t, store) + or + hasSyntheticContent(store) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(midType, mid, t, store) and + synthEntryReaches(midType, getSyntheticPathCandidate(mid)) + ) + } + + /** + * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) + * contain content that will be translated into a synthetic field, when being used in + * a MaD summary model, and if there is a range of APIs, such that + * when chaining their flow access paths, there exists access paths `A` and `B` where + * A ->* read -> store ->* B and where `A` and `B` do not contain content that will + * be translated into a synthetic field. + * + * This is needed because we don't want to include summaries that reads from or + * stores into an "internal" synthetic field. + * + * Example: + * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and + * `setX`, which gets and sets a private field `X` on `t`. + * This would lead to the following content flows + * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. + * setX : Argument[0] -> Argument[this].SyntheticField[t.X] + * As the reads and stores are on synthetic fields we should only make summaries + * if both of these methods exist. + */ + pragma[nomagic] + predicate acceptReadStore( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + synthPathEntry(t1, read, t2, store) and + reachesSynthExit(t2, getSyntheticPathCandidate(store)) + or + exists(PropagateContentFlow::AccessPath store0 | + getSyntheticPathCandidate(store0) = read + | + synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + or + synthEntryReaches(t1, store0) and + step(t1, read, t2, store) and + reachesSynthExit(t2, getSyntheticPathCandidate(store)) + ) + } + } + + /** + * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. + * Flow is considered relevant, + * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. + * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if + * the synthetic content is "live" on the relevant declaring type. + */ + private predicate apiRelevantContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath store, boolean preservesValue + ) { + apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and + ( + not hasSyntheticContent(read) and not hasSyntheticContent(store) + or + AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read, + returnNodeExt.getType(), store) + ) + } - private module ContentModelPrinting = Printing::ModelPrintingSummary; + pragma[nomagic] + private predicate captureFlow0( + ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, + boolean lift + ) { + exists( + DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores + | + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and + input != output and + validateAccessPath(reads) and + validateAccessPath(stores) and + ( + if mentionsField(reads) or mentionsField(stores) + then lift = false and api.isRelevant() + else lift = true + ) + ) + } - private string getContentOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) + /** + * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to + * the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false. + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ + string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) { + exists(string input, string output, boolean preservesValue | + captureFlow0(api, input, output, _, lift) and + preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and + result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift) + ) + } } /** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. + * Gets the summary model(s) for `api`, if any. `lift` is true if the model is lifted + * otherwise false. + * The following heuristic is applied: + * 1. If content based flow yields at lease one summary for an API, then we use that. + * 2. If content based flow does not yield any summary for an API, then we try and + * generate flow summaries using the heuristic based summary generator. */ - private string parameterNodeAsContentInput(DataFlow::ParameterNode p) { - result = parameterContentAccess(asParameter(p)) + string captureFlow(DataFlowSummaryTargetApi api, boolean lift) { + result = ContentSensitive::captureFlow(api, lift) or - result = qualifierString() and p instanceof InstanceParameterNode - } - - private string getContent(PropagateContentFlow::AccessPath ap, int i) { - result = "." + printContent(ap.getAtIndex(i)) + not exists(DataFlowSummaryTargetApi api0 | + (api0 = api or api.lift() = api0) and + exists(ContentSensitive::captureFlow(api0, false)) + or + api0.lift() = api.lift() and + exists(ContentSensitive::captureFlow(api0, true)) + ) and + result = Heuristic::captureFlow(api) and + lift = true } /** - * Gets the MaD string representation of a store step access path. + * Gets the neutral summary model for `api`, if any. + * A neutral summary model is generated, if we are not generating + * a mixed summary model that applies to `api`. */ - private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i) + string captureNeutral(DataFlowSummaryTargetApi api) { + not exists(DataFlowSummaryTargetApi api0, boolean lift | + exists(captureFlow(api0, lift)) and + ( + lift = false and + (api0 = api or api0 = api.lift()) + or + lift = true and api0.lift() = api.lift() + ) + ) and + api.isRelevant() and + result = Heuristic::ModelPrintingSummary::asNeutralSummaryModel(api) } + } + + /** + * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. + */ + private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + exists(DataFlow::ContentSet f | + DataFlow::readStep(node1, f, node2) and + // Partially restrict the content types used for intermediate steps. + (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) + ) + or + exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f)) + } + /** + * Provides language-specific source model generator parameters. + */ + signature module SourceModelGeneratorInputSig { /** - * Gets the MaD string representation of a read step access path. + * A class of callables that are potentially relevant for generating source models. */ - private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i desc) - } + class SourceTargetApi extends Callable; /** - * Holds if the access path `ap` contains a field or synthetic field access. + * Holds if it is not relevant to generate a source model for `api`, even + * if flow is detected from a node within `source` to a sink within `api`. */ - private predicate mentionsField(PropagateContentFlow::AccessPath ap) { - isField(ap.getAtIndex(_)) - } + bindingset[sourceEnclosing, api] + predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api); /** - * Holds if this access path `ap` mentions a callback. + * Holds if `kind` is a relevant source kind for creating source models. */ - private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) { - isCallback(ap.getAtIndex(_)) - } + bindingset[kind] + predicate isRelevantSourceKind(string kind); /** - * Holds if the access path `ap` is not a parameter or returnvalue of a callback - * stored in a field. - * - * That is, we currently don't include summaries that rely on parameters or return values - * of callbacks stored in fields. + * Holds if `node` is specified as a source with the given kind in a MaD flow + * model. */ - private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { - not (mentionsField(ap) and mentionsCallback(ap)) - } + predicate sourceNode(Lang::Node node, string kind); + } - private predicate apiFlow( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue - ) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api - } + /** + * Provides language-specific sink model generator parameters. + */ + signature module SinkModelGeneratorInputSig { + /** + * A class of callables that are potentially relevant for generating sink models. + */ + class SinkTargetApi extends Callable; /** - * A class of APIs relevant for modeling using content flow. - * The following heuristic is applied: - * Content flow is only relevant for an API on a parameter, if - * #content flow from parameter <= 3 - * If an API produces more content flow on a parameter, it is likely that - * 1. Types are not sufficiently constrained on the parameter leading to a combinatorial - * explosion in dispatch and thus in the generated summaries. - * 2. It is a reasonable approximation to use the heuristic based flow - * detection instead, as reads and stores would use a significant - * part of an objects internal state. + * Gets the MaD input string representation of `source`. */ - private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { - private DataFlow::ParameterNode parameter; - - ContentDataFlowSummaryTargetApi() { - strictcount(string input, string output | - exists( - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores - | - apiFlow(this, parameter, reads, returnNodeExt, stores, _) and - input = parameterNodeAsContentInput(parameter) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) - ) - ) <= 3 - } + string getInputArgument(Lang::Node source); - /** - * Gets a parameter node of `this` api, where there are less than 3 possible models, if any. - */ - DataFlow::ParameterNode getARelevantParameterNode() { result = parameter } - } + /** + * Holds if `node` is a sanitizer for sink model construction. + */ + predicate sinkModelSanitizer(Lang::Node node); - pragma[nomagic] - private predicate apiContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue - ) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - getEnclosingCallable(returnNodeExt) = api and - getEnclosingCallable(p) = api and - p = api.getARelevantParameterNode() - } + /** + * Holds if `source` is an api entrypoint relevant for creating sink models. + */ + predicate apiSource(Lang::Node source); /** - * Holds if any of the content sets in `path` translates into a synthetic field. + * Holds if `kind` is a relevant sink kind for creating sink models. */ - private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { - exists(getSyntheticName(path.getAtIndex(_))) - } + bindingset[kind] + predicate isRelevantSinkKind(string kind); - private string getHashAtIndex(PropagateContentFlow::AccessPath ap, int i) { - result = getSyntheticName(ap.getAtIndex(i)) - } + /** + * Holds if `node` is specified as a sink with the given kind in a MaD flow + * model. + */ + predicate sinkNode(Lang::Node node, string kind); + } - private string getReversedHash(PropagateContentFlow::AccessPath ap) { - result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i desc) - } + /** + * Make a source model generator. + */ + module MakeSourceModelGenerator { + private import SourceModelGeneratorInput - private string getHash(PropagateContentFlow::AccessPath ap) { - result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i) - } + class DataFlowSourceTargetApi = SourceTargetApi; /** - * Gets all access paths that contain the synthetic fields - * from `ap` in reverse order (if `ap` contains at least one synthetic field). - * These are the possible candidates for synthetic path continuations. + * Provides classes and predicates related to capturing source models + * based on heuristic data flow. */ - private PropagateContentFlow::AccessPath getSyntheticPathCandidate( - PropagateContentFlow::AccessPath ap - ) { - getHash(ap) = getReversedHash(result) - } + module Heuristic { + private module ModelPrintingSourceOrSinkInput implements + Printing::ModelPrintingSourceOrSinkSig + { + class SourceOrSinkApi = DataFlowSourceTargetApi; - /** - * A module containing predicates for validating access paths containing content sets - * that translates into synthetic fields, when used for generated summary models. - */ - private module AccessPathSyntheticValidation { - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`). - */ - private predicate step( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - p.(NodeExtended).getType() = t1 and - returnNodeExt.getType() = t2 and - apiContentFlow(_, p, read, returnNodeExt, store, _) - ) + string getProvenance() { result = "df-generated" } } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. - * - * Step A -> Synth. - */ - private predicate synthPathEntry( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - not hasSyntheticContent(read) and - hasSyntheticContent(store) and - step(t1, read, t2, store) - } + private module ModelPrintingSourceOrSink = + Printing::ModelPrintingSourceOrSink; /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` has synthetic content - * and `store` does not. + * A data flow configuration used for finding new sources. + * The sources are the already known existing sources and the sinks are the API return nodes. * - * Step Synth -> A. + * This can be used to generate Source summaries for an API, if the API expose an already known source + * via its return (then the API itself becomes a source). */ - private predicate synthPathExit( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - not hasSyntheticContent(store) and - step(t1, read, t2, store) + module PropagateFromSourceConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + exists(string kind | + isRelevantSourceKind(kind) and + sourceNode(source, kind) + ) + } + + predicate isSink(DataFlow::Node sink) { + sink instanceof ReturnNodeExt and + getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureHasSinkCallContext + } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } } + private module PropagateFromSource = TaintTracking::Global; + /** - * Holds if there exists a path of steps from `read` to an exit. - * - * read ->* Synth -> A + * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. */ - private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { - synthPathExit(t, read, _, _) - or - hasSyntheticContent(read) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(t, read, midType, mid) and - reachesSynthExit(midType, getSyntheticPathCandidate(mid)) + string captureSource(DataFlowSourceTargetApi api) { + exists(NodeExtended source, ReturnNodeExt sink, string kind | + PropagateFromSource::flow(source, sink) and + sourceNode(source, kind) and + api = getEnclosingCallable(sink) and + not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and + result = ModelPrintingSourceOrSink::asSourceModel(api, getOutput(sink), kind) ) } + } + } + + /** + * Make a sink model generator. + */ + module MakeSinkModelGenerator { + private import SinkModelGeneratorInput + + class DataFlowSinkTargetApi = SinkTargetApi; + + /** + * Provides classes and predicates related to capturing sink models + * based on heuristic data flow. + */ + module Heuristic { + private module ModelPrintingSourceOrSinkInput implements + Printing::ModelPrintingSourceOrSinkSig + { + class SourceOrSinkApi = DataFlowSinkTargetApi; + + string getProvenance() { result = "df-generated" } + } + + private module ModelPrintingSourceOrSink = + Printing::ModelPrintingSourceOrSink; /** - * Holds if there exists a path of steps from an entry to `store`. - * - * A -> Synth ->* store + * Gets the MaD input string representation of `source`. */ - private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { - synthPathEntry(_, _, t, store) - or - hasSyntheticContent(store) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(midType, mid, t, store) and - synthEntryReaches(midType, getSyntheticPathCandidate(mid)) - ) - } + private string asInputArgument(NodeExtended source) { result = getInputArgument(source) } /** - * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) - * contain content that will be translated into a synthetic field, when being used in - * a MaD summary model, and if there is a range of APIs, such that - * when chaining their flow access paths, there exists access paths `A` and `B` where - * A ->* read -> store ->* B and where `A` and `B` do not contain content that will - * be translated into a synthetic field. + * A data flow configuration used for finding new sinks. + * The sources are the parameters of the API and the fields of the enclosing type. * - * This is needed because we don't want to include summaries that reads from or - * stores into an "internal" synthetic field. - * - * Example: - * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and - * `setX`, which gets and sets a private field `X` on `t`. - * This would lead to the following content flows - * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. - * setX : Argument[0] -> Argument[this].SyntheticField[t.X] - * As the reads and stores are on synthetic fields we should only make summaries - * if both of these methods exist. + * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) + * into an existing known sink (then the API itself becomes a sink). */ - pragma[nomagic] - predicate acceptReadStore( - Type t1, PropagateContentFlow::AccessPath read, Type t2, - PropagateContentFlow::AccessPath store - ) { - synthPathEntry(t1, read, t2, store) and - reachesSynthExit(t2, getSyntheticPathCandidate(store)) - or - exists(PropagateContentFlow::AccessPath store0 | getSyntheticPathCandidate(store0) = read | - synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + module PropagateToSinkConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + apiSource(source) and + getEnclosingCallable(source) instanceof DataFlowSinkTargetApi + } + + predicate isSink(DataFlow::Node sink) { + exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind)) + } + + predicate isBarrier(DataFlow::Node node) { + exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t)) or - synthEntryReaches(t1, store0) and - step(t1, read, t2, store) and - reachesSynthExit(t2, getSyntheticPathCandidate(store)) - ) + sinkModelSanitizer(node) + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureHasSourceCallContext + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } } - } - /** - * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. - * Flow is considered relevant, - * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. - * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if - * the synthetic content is "live" on the relevant declaring type. - */ - private predicate apiRelevantContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath store, boolean preservesValue - ) { - apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and - ( - not hasSyntheticContent(read) and not hasSyntheticContent(store) - or - AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read, - returnNodeExt.getType(), store) - ) - } + private module PropagateToSink = TaintTracking::Global; - pragma[nomagic] - private predicate captureFlow0( - ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, - boolean lift - ) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - validateAccessPath(reads) and - validateAccessPath(stores) and - ( - if mentionsField(reads) or mentionsField(stores) - then lift = false and api.isRelevant() - else lift = true + /** + * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. + */ + string captureSink(DataFlowSinkTargetApi api) { + exists(NodeExtended src, NodeExtended sink, string kind | + PropagateToSink::flow(src, sink) and + sinkNode(sink, kind) and + api = getEnclosingCallable(src) and + result = ModelPrintingSourceOrSink::asSinkModel(api, asInputArgument(src), kind) ) - ) - } - - /** - * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to - * the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false. - * - * Models are lifted to the best type in case the read and store access paths do not - * contain a field or synthetic field access. - */ - string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) { - exists(string input, string output, boolean preservesValue | - captureFlow0(api, input, output, _, lift) and - preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and - result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift) - ) + } } } - - /** - * Gets the summary model(s) for `api`, if any. `lift` is true if the model is lifted - * otherwise false. - * The following heuristic is applied: - * 1. If content based flow yields at lease one summary for an API, then we use that. - * 2. If content based flow does not yield any summary for an API, then we try and - * generate flow summaries using the heuristic based summary generator. - */ - string captureFlow(DataFlowSummaryTargetApi api, boolean lift) { - result = ContentSensitive::captureFlow(api, lift) - or - not exists(DataFlowSummaryTargetApi api0 | - (api0 = api or api.lift() = api0) and - exists(ContentSensitive::captureFlow(api0, false)) - or - api0.lift() = api.lift() and - exists(ContentSensitive::captureFlow(api0, true)) - ) and - result = Heuristic::captureFlow(api) and - lift = true - } - - /** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a mixed summary model that applies to `api`. - */ - string captureNeutral(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0, boolean lift | - exists(captureFlow(api0, lift)) and - ( - lift = false and - (api0 = api or api0 = api.lift()) - or - lift = true and api0.lift() = api.lift() - ) - ) and - api.isRelevant() and - result = Heuristic::ModelPrintingSummary::asNeutralSummaryModel(api) - } } From 0016fbfa213c4bc04d9239cab76c6233e7ac5982 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 25 Apr 2025 09:34:33 +0200 Subject: [PATCH 3/7] C#: Re-factor implementation to use the new model generator interface. --- .../CaptureContentSummaryModels.ql | 1 + .../modelgenerator/CaptureNeutralModels.ql | 1 + .../utils/modelgenerator/CaptureSinkModels.ql | 1 + .../modelgenerator/CaptureSourceModels.ql | 1 + .../modelgenerator/CaptureSummaryModels.ql | 1 + .../debug/CaptureSummaryModelsPartialPath.ql | 1 + .../debug/CaptureSummaryModelsPath.ql | 1 + .../modelgenerator/internal/CaptureModels.qll | 376 +++++++++--------- .../internal/CaptureModelsPrinting.qll | 2 +- .../CaptureTypeBasedSummaryModels.qll | 5 +- .../dataflow/CaptureContentSummaryModels.ql | 1 + .../dataflow/CaptureHeuristicSummaryModels.ql | 1 + .../dataflow/CaptureNeutralModels.ql | 1 + .../dataflow/CaptureSinkModels.ql | 1 + .../dataflow/CaptureSourceModels.ql | 1 + 15 files changed, 208 insertions(+), 187 deletions(-) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index 039c96a9a0bc..4d56c922a397 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index 2afb0ea02845..c74240bedea5 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql index f4c9405c96a7..f0d3294cd8ac 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 70f853b35a95..97688d6eb423 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index a0193397eb2e..61656c200adf 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index beb14cd8e627..60d7b42a46df 100644 --- a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -10,6 +10,7 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index e3de78767eaa..a53958ad0e6d 100644 --- a/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/csharp/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -10,6 +10,7 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ce83369df077..7bf8a248a6f2 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -15,7 +15,41 @@ private import semmle.code.csharp.frameworks.System private import semmle.code.csharp.Location private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate irrelevantAccessor(CS::Accessor a) { + a.getDeclaration().(CS::Property).isReadWrite() +} + +private predicate isUninterestingForModels(Callable api) { + api.getDeclaringType().getNamespace().getFullName() = "" + or + api instanceof CS::ConversionOperator + or + api instanceof Util::MainMethod + or + api instanceof CS::Destructor + or + api instanceof CS::AnonymousFunctionExpr + or + api.(CS::Constructor).isParameterless() + or + exists(Type decl | decl = api.getDeclaringType() | + decl instanceof SystemObjectClass or + decl instanceof SystemValueTypeClass + ) + or + // Disregard properties that have both a get and a set accessor, + // which implicitly means auto implemented properties. + irrelevantAccessor(api) +} + +private predicate relevant(Callable api) { + [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and + api.fromSource() and + api.isUnboundDeclaration() and + not isUninterestingForModels(api) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig { class Type = CS::Type; class Parameter = CS::Parameter; @@ -24,127 +58,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig`. - */ - private predicate isHigherOrder(Callable api) { - exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | - t instanceof SystemLinqExpressions::DelegateExtType - ) - } - - private predicate irrelevantAccessor(CS::Accessor a) { - a.getDeclaration().(CS::Property).isReadWrite() - } - - private predicate isUninterestingForModels(Callable api) { - api.getDeclaringType().getNamespace().getFullName() = "" - or - api instanceof CS::ConversionOperator - or - api instanceof Util::MainMethod - or - api instanceof CS::Destructor - or - api instanceof CS::AnonymousFunctionExpr - or - api.(CS::Constructor).isParameterless() - or - exists(Type decl | decl = api.getDeclaringType() | - decl instanceof SystemObjectClass or - decl instanceof SystemValueTypeClass - ) - or - // Disregard properties that have both a get and a set accessor, - // which implicitly means auto implemented properties. - irrelevantAccessor(api) - } - - private predicate relevant(Callable api) { - [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and - api.fromSource() and - api.isUnboundDeclaration() and - not isUninterestingForModels(api) - } - - private Callable getARelevantOverrideeOrImplementee(Overridable m) { - m.overridesOrImplements(result) and relevant(result) - } - - /** - * Gets the super implementation of `api` if it is relevant. - * If such a super implementation does not exist, returns `api` if it is relevant. - */ - private Callable liftedImpl(Callable api) { - ( - result = getARelevantOverrideeOrImplementee(api) - or - result = api and relevant(api) - ) and - not exists(getARelevantOverrideeOrImplementee(result)) - } - - private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSourceModel(Callable api) { - api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSinkModel(Callable api) { - api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) - } - - predicate isUninterestingForDataFlowModels(Callable api) { none() } - - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) } - - class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } - } - - class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } - } - - class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { - not hasManualSourceModel(this) and - // Do not generate source models for overridable callables - // as virtual dispatch implies that too many methods - // will be considered sources. - not this.(Overridable).overridesOrImplements(_) - } - } - - class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - Callable lift() { result = lift } - - predicate isRelevant() { - relevant(this) and - not hasManualSummaryModel(this) - } - } - /** * Holds if `t` is a type that is generally used for bulk data in collection types. * Eg. char[] is roughly equivalent to string and thus a highly @@ -205,6 +120,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig { @@ -251,62 +166,93 @@ module ModelGeneratorInput implements ModelGeneratorInputSig + +module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + Callable getAsExprEnclosingCallable(NodeExtended node) { + result = node.asExpr().getEnclosingCallable() } - private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) { - exists(DataFlowCall call | - dc1 = call.getEnclosingCallable() and - dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0) + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + + /** + * Holds if any of the parameters of `api` are `System.Func<>`. + */ + private predicate isHigherOrder(Callable api) { + exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | + t instanceof SystemLinqExpressions::DelegateExtType ) } - bindingset[dc1, dc2] - private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) = - fastTC(uniquelyCalls/2)(dc1, dc2) + private Callable getARelevantOverrideeOrImplementee(Overridable m) { + m.overridesOrImplements(result) and relevant(result) + } - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { - not exists(DataFlowCallable dc1, DataFlowCallable dc2 | - uniquelyCallsPlus(dc1, dc2) or dc1 = dc2 - | - dc1.getUnderlyingCallable() = api and - dc2.getUnderlyingCallable() = sourceEnclosing - ) + /** + * Gets the super implementation of `api` if it is relevant. + * If such a super implementation does not exist, returns `api` if it is relevant. + */ + private Callable liftedImpl(Callable api) { + ( + result = getARelevantOverrideeOrImplementee(api) + or + result = api and relevant(api) + ) and + not exists(getARelevantOverrideeOrImplementee(result)) } - string getInputArgument(DataFlow::Node source) { - exists(int pos | - pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and - result = "Argument[" + pos + "]" - ) - or - source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and - result = qualifierString() + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } + predicate isUninterestingForDataFlowModels(Callable api) { none() } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) } - predicate containerContent(DataFlow::ContentSet c) { c.isElement() } + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } + + Callable lift() { result = lift } + + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } + } predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and @@ -370,34 +316,96 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 97e58d439f0c..52611279b273 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import csharp as CS private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = CS::Callable; diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 62286c27888c..baba462c8a24 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -2,7 +2,8 @@ private import csharp private import semmle.code.csharp.frameworks.system.collections.Generic as GenericCollections private import semmle.code.csharp.dataflow.internal.DataFlowPrivate private import semmle.code.csharp.frameworks.system.linq.Expressions -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput +private import CaptureModels::SummaryModelGeneratorInput as SummaryModelGeneratorInput private import CaptureModelsPrinting /** @@ -189,7 +190,7 @@ private module Printing = ModelPrintingSummary; * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { +class TypeBasedFlowTargetApi extends SummaryModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 0d9e4cd52d9f..0c8134546d24 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 24cb66e427e7..b5a3b31a0354 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index d5aa685bfe31..e79cab74560c 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql index cc84ede4235e..0cc8dd6d08d3 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql index 4c10362960a2..2a54abf9b72d 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import csharp import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { From 2535055de054b8fa0d92510dfa56c813efe4ae30 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 25 Apr 2025 11:36:03 +0200 Subject: [PATCH 4/7] Java: Re-factor implementation to use the new model generator interface. --- .../CaptureContentSummaryModels.ql | 1 + .../modelgenerator/CaptureNeutralModels.ql | 1 + .../utils/modelgenerator/CaptureSinkModels.ql | 1 + .../modelgenerator/CaptureSourceModels.ql | 1 + .../modelgenerator/CaptureSummaryModels.ql | 1 + .../debug/CaptureSummaryModelsPartialPath.ql | 1 + .../debug/CaptureSummaryModelsPath.ql | 1 + .../modelgenerator/internal/CaptureModels.qll | 291 +++++++++--------- .../internal/CaptureModelsPrinting.qll | 2 +- .../CaptureTypeBasedSummaryModels.qll | 5 +- .../dataflow/CaptureContentSummaryModels.ql | 1 + .../dataflow/CaptureHeuristicSummaryModels.ql | 1 + .../dataflow/CaptureNeutralModels.ql | 1 + .../dataflow/CaptureSinkModels.ql | 1 + .../dataflow/CaptureSourceModels.ql | 1 + 15 files changed, 165 insertions(+), 145 deletions(-) diff --git a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index b1340e2c0d33..1fe70bae0b5b 100644 --- a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index d17c11d4a7b0..6008c3bfb8c4 100644 --- a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 7c316a02b090..7fcba8850d6d 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 4a955d4614b7..c623645820b6 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 34b6521e7b22..3bc49c31df27 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index 8895fdaefbb3..b9dc9ea236a0 100644 --- a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -11,6 +11,7 @@ import java import semmle.code.java.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index 8f6bf1c1f531..5925364800c9 100644 --- a/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/java/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -11,6 +11,7 @@ import java import semmle.code.java.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 9dd317b30067..cff4e97d8d33 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -25,7 +25,20 @@ predicate isPrimitiveTypeUsedForBulkData(J::Type t) { t.hasName(["byte", "char", "Byte", "Character"]) } -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate isInfrequentlyUsed(J::CompilationUnit cu) { + cu.getPackage().getName().matches("javax.swing%") or + cu.getPackage().getName().matches("java.awt%") +} + +private predicate relevant(Callable api) { + api.isPublic() and + api.getDeclaringType().isPublic() and + api.fromSource() and + not isUninterestingForModels(api) and + not isInfrequentlyUsed(api.getCompilationUnit()) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig { class Type = J::Type; class Parameter = J::Parameter; @@ -34,96 +47,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig + +module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + Callable getAsExprEnclosingCallable(NodeExtended node) { + result = node.asExpr().getEnclosingCallable() + } + + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + + private J::Method getARelevantOverride(J::Method m) { + result = m.getAnOverride() and + relevant(result) and + // Other exclusions for overrides. + not m instanceof J::ToStringMethod + } + + /** + * Gets the super implementation of `m` if it is relevant. + * If such a super implementations does not exist, returns `m` if it is relevant. + */ + private J::Callable liftedImpl(J::Callable m) { ( - source.asExpr().(J::FieldAccess).isOwnFieldAccess() or - source instanceof DataFlow::ParameterNode + result = getARelevantOverride(m) + or + result = m and relevant(m) ) and - exists(J::RefType t | - t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and - not t instanceof J::TypeObject and - t.isPublic() - ) + not exists(getARelevantOverride(result)) } - predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - - string getInputArgument(DataFlow::Node source) { - exists(int pos | - source.(DataFlow::ParameterNode).isParameterOf(_, pos) and - if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() - ) - or - source.asExpr() instanceof J::FieldAccess and - result = qualifierString() + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { - not kind = "log-injection" and - not kind.matches("regex-use%") and - not kind = "file-content-store" + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } + + Callable lift() { result = lift } + + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + predicate isUninterestingForDataFlowModels(Callable api) { + api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) + } - predicate containerContent = DataFlowPrivate::containerContent/1; + predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and @@ -287,34 +252,76 @@ module ModelGeneratorInput implements ModelGeneratorInputSig= 0 then result = "Argument[" + pos + "]" else result = qualifierString() + ) + or + source.asExpr() instanceof J::FieldAccess and + result = qualifierString() + } + + bindingset[kind] + predicate isRelevantSinkKind(string kind) { + not kind = "log-injection" and + not kind.matches("regex-use%") and + not kind = "file-content-store" + } predicate sinkNode = ExternalFlow::sinkNode/2; } -import MakeModelGenerator +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 4b376654afbe..0d26f36c690a 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import java as J private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = J::Callable; diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 9145a0779075..00c8c686c2d1 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -2,7 +2,8 @@ private import java private import semmle.code.java.Collections private import semmle.code.java.dataflow.internal.ContainerFlow private import CaptureModels as CaptureModels -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput +private import CaptureModels::SummaryModelGeneratorInput as SummaryModelGeneratorInput private import CaptureModelsPrinting /** @@ -296,7 +297,7 @@ private module Printing = ModelPrintingSummary; * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { +class TypeBasedFlowTargetApi extends SummaryModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 8dd23714fb79..b3d9101633b8 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 45485a8009a5..8d6021ab42e4 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index 3578153ddb82..ad567051922d 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql index 027670316c33..3d21b2e4f7d6 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql index d8346f0e3dca..bc95ecf3f2de 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import java import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { From c16d913f8a586daa995449a00651427d475466c0 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 25 Apr 2025 12:49:05 +0200 Subject: [PATCH 5/7] C++: Re-factor implementation to use the new model generator interface. --- .../CaptureContentSummaryModels.ql | 1 + .../modelgenerator/CaptureNeutralModels.ql | 1 + .../utils/modelgenerator/CaptureSinkModels.ql | 4 +- .../modelgenerator/CaptureSourceModels.ql | 4 +- .../modelgenerator/CaptureSummaryModels.ql | 1 + .../modelgenerator/internal/CaptureModels.qll | 311 +++++++++--------- .../internal/CaptureModelsPrinting.qll | 2 +- .../dataflow/CaptureContentSummaryModels.ql | 1 + .../dataflow/CaptureHeuristicSummaryModels.ql | 1 + 9 files changed, 173 insertions(+), 153 deletions(-) diff --git a/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index 8dc0c3d7f6b1..be0cf4434311 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index e9a5ea24deca..813eece65b96 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = captureNeutral(api) diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 5485a2645bcb..1ed9b0320709 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,8 +7,8 @@ */ import internal.CaptureModels -import Heuristic +import SinkModels from DataFlowSinkTargetApi api, string sink -where sink = captureSink(api) +where sink = Heuristic::captureSink(api) select sink order by sink diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql index c2240c646886..4f86fb6b6fb1 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,8 +7,8 @@ */ import internal.CaptureModels -import Heuristic +import SourceModels from DataFlowSourceTargetApi api, string source -where source = captureSource(api) +where source = Heuristic::captureSource(api) select source order by source diff --git a/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 60341abc0b59..a023afabd31c 100644 --- a/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/cpp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 58acfa011186..5b3beac34539 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -2,7 +2,7 @@ * Provides predicates related to capturing summary models of the Standard or a 3rd party library. */ -private import cpp +private import cpp as Cpp private import semmle.code.cpp.ir.IR private import semmle.code.cpp.dataflow.ExternalFlow as ExternalFlow private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon @@ -10,113 +10,67 @@ private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplSpecific private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate as DataFlowPrivate private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.code.cpp.ir.dataflow.internal.TaintTrackingImplSpecific -private import semmle.code.cpp.dataflow.new.TaintTracking +private import semmle.code.cpp.dataflow.new.TaintTracking as Tt +private import semmle.code.cpp.dataflow.new.DataFlow as Df private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl -module ModelGeneratorInput implements ModelGeneratorInputSig { +/** + * Holds if `f` is a "private" function. + * + * A "private" function does not contribute any models as it is assumed + * to be an implementation detail of some other "public" function for which + * we will generate a summary. + */ +private predicate isPrivateOrProtected(Cpp::Function f) { + f.getNamespace().getParentNamespace*().isAnonymous() + or + exists(Cpp::MemberFunction mf | mf = f | + mf.isPrivate() + or + mf.isProtected() + ) + or + f.isStatic() +} + +private predicate isUninterestingForModels(Callable api) { + // Note: This also makes all global/static-local variables + // not relevant (which is good!) + not api.(Cpp::Function).hasDefinition() + or + isPrivateOrProtected(api) + or + api instanceof Cpp::Destructor + or + api = any(Cpp::LambdaExpression lambda).getLambdaFunction() + or + api.isFromUninstantiatedTemplate(_) +} + +private predicate relevant(Callable api) { + api.fromSource() and + not isUninterestingForModels(api) +} + +module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig +{ + private module DataFlow = Df::DataFlow; + class Type = DataFlowPrivate::DataFlowType; // Note: This also includes `this` class Parameter = DataFlow::ParameterNode; - class Callable = Declaration; + class Callable = Cpp::Declaration; class NodeExtended extends DataFlow::Node { Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingDeclaration() } } - Parameter asParameter(NodeExtended n) { result = n } - Callable getEnclosingCallable(NodeExtended n) { result = n.getEnclosingCallable().asSourceCallable() } - Callable getAsExprEnclosingCallable(NodeExtended n) { - result = n.asExpr().getEnclosingDeclaration() - } - - /** Gets `api` if it is relevant. */ - private Callable liftedImpl(Callable api) { result = api and relevant(api) } - - private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSourceModel(Callable api) { - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) - } - - private predicate hasManualSinkModel(Callable api) { - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) - } - - /** - * Holds if `f` is a "private" function. - * - * A "private" function does not contribute any models as it is assumed - * to be an implementation detail of some other "public" function for which - * we will generate a summary. - */ - private predicate isPrivateOrProtected(Function f) { - f.getNamespace().getParentNamespace*().isAnonymous() - or - exists(MemberFunction mf | mf = f | - mf.isPrivate() - or - mf.isProtected() - ) - or - f.isStatic() - } - - private predicate isUninterestingForModels(Callable api) { - // Note: This also makes all global/static-local variables - // not relevant (which is good!) - not api.(Function).hasDefinition() - or - isPrivateOrProtected(api) - or - api instanceof Destructor - or - api = any(LambdaExpression lambda).getLambdaFunction() - or - api.isFromUninstantiatedTemplate(_) - } - - private predicate relevant(Callable api) { - api.fromSource() and - not isUninterestingForModels(api) - } - - class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - Callable lift() { result = lift } - - predicate isRelevant() { - relevant(this) and - not hasManualSummaryModel(this) - } - } - - class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } - } - - class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } - } - - class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { not hasManualSourceModel(this) } - } - class InstanceParameterNode extends DataFlow::ParameterNode { InstanceParameterNode() { DataFlowPrivate::nodeHasInstruction(this, @@ -124,7 +78,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig" @@ -166,7 +120,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig + +private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + private module DataFlow = Df::DataFlow; + + Parameter asParameter(NodeExtended n) { result = n } + + Callable getAsExprEnclosingCallable(NodeExtended n) { + result = n.asExpr().getEnclosingDeclaration() + } + + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) + } + + /** Gets `api` if it is relevant. */ + private Callable liftedImpl(Callable api) { result = api and relevant(api) } + + class SummaryTargetApi extends Callable { + private Callable lift; + + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } + + Callable lift() { result = lift } + + predicate isRelevant() { + relevant(this) and + not hasManualSummaryModel(this) + } + } predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and + Tt::TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and not exists(DataFlow::Content f | DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) ) @@ -341,7 +328,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 7841f8ed1a44..43342aa671ee 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import cpp as Cpp private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = Cpp::Declaration; diff --git a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 0156eaaeb988..8196f6329cc9 100644 --- a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -1,5 +1,6 @@ import cpp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import InlineModelsAsDataTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql index 3ab1dc6c4710..fc05c4fe434f 100644 --- a/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql +++ b/cpp/ql/test/library-tests/dataflow/modelgenerator/dataflow/CaptureHeuristicSummaryModels.ql @@ -1,5 +1,6 @@ import cpp import utils.modelgenerator.internal.CaptureModels +import SummaryModels import InlineModelsAsDataTest module InlineMadTestConfig implements InlineMadTestConfigSig { From a6b5645b139350a5527959492eeb7aa784b9cfc7 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 25 Apr 2025 13:09:13 +0200 Subject: [PATCH 6/7] Rust: Re-factor implementation to use the new model generator interface. --- .../CaptureContentSummaryModels.ql | 1 + .../modelgenerator/CaptureNeutralModels.ql | 1 + .../utils/modelgenerator/CaptureSinkModels.ql | 1 + .../modelgenerator/CaptureSourceModels.ql | 1 + .../modelgenerator/CaptureSummaryModels.ql | 1 + .../debug/CaptureSummaryModelsPartialPath.ql | 1 + .../debug/CaptureSummaryModelsPath.ql | 1 + .../modelgenerator/internal/CaptureModels.qll | 169 ++++++++++-------- .../internal/CaptureModelsPrinting.qll | 2 +- .../modelgenerator/CaptureSinkModels.ql | 1 + .../modelgenerator/CaptureSourceModels.ql | 1 + .../modelgenerator/CaptureSummaryModels.ql | 1 + 12 files changed, 103 insertions(+), 78 deletions(-) diff --git a/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index da90465197e5..4b672e74da59 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = ContentSensitive::captureFlow(api, _) diff --git a/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index 8efc8a485e12..556d1624f39d 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string noflow where noflow = Heuristic::captureNoFlow(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql index 36b1b8132977..989637a867e3 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSinkModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SinkModels from DataFlowSinkTargetApi api, string sink where sink = Heuristic::captureSink(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql index 7086f719b2d8..2ffc4894b18c 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSourceModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SourceModels from DataFlowSourceTargetApi api, string source where source = Heuristic::captureSource(api) diff --git a/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 8947dd015310..d0b3152c9be2 100644 --- a/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/rust/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,6 +7,7 @@ */ import internal.CaptureModels +import SummaryModels from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api, _) diff --git a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql index eb0cd638b534..b8855b94bf2a 100644 --- a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql +++ b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPartialPath.ql @@ -10,6 +10,7 @@ private import codeql.rust.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import PartialFlow::PartialPathGraph int explorationLimit() { result = 3 } diff --git a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql index 1ddec1ff618b..ac25306ceeee 100644 --- a/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql +++ b/rust/ql/src/utils/modelgenerator/debug/CaptureSummaryModelsPath.ql @@ -10,6 +10,7 @@ private import codeql.rust.dataflow.DataFlow import utils.modelgenerator.internal.CaptureModels +import SummaryModels import Heuristic import PropagateFlow::PathGraph diff --git a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 237da46750bd..6aeae06bbdb9 100644 --- a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -2,7 +2,7 @@ private import codeql.util.Unit private import rust private import rust as R private import codeql.rust.dataflow.DataFlow -private import codeql.rust.dataflow.internal.DataFlowImpl +private import codeql.rust.dataflow.internal.DataFlowImpl as DataFlowImpl private import codeql.rust.dataflow.internal.Node as Node private import codeql.rust.dataflow.internal.Content private import codeql.rust.dataflow.FlowSource as FlowSource @@ -11,7 +11,25 @@ private import codeql.rust.dataflow.internal.TaintTrackingImpl private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl private import codeql.rust.dataflow.internal.FlowSummaryImpl as FlowSummary -module ModelGeneratorInput implements ModelGeneratorInputSig { +private predicate relevant(Function api) { + // Only include functions that have a resolved path. + api.hasCrateOrigin() and + api.hasExtendedCanonicalPath() and + ( + // This excludes closures (these are not exported API endpoints) and + // functions without a `pub` visiblity. A function can be `pub` without + // ultimately being exported by a crate, so this is an overapproximation. + api.hasVisibility() + or + // If a method implements a public trait it is exposed through the trait. + // We overapproximate this by including all trait method implementations. + exists(Impl impl | impl.hasTrait() and impl.getAssocItemList().getAssocItem(_) = api) + ) +} + +module ModelGeneratorCommonInput implements + ModelGeneratorCommonInputSig +{ // NOTE: We are not using type information for now. class Type = Unit; @@ -23,55 +41,10 @@ module ModelGeneratorInput implements ModelGeneratorInputSig - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } +private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { + class SummaryTargetApi extends Callable { + private Callable lift; - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } + SummaryTargetApi() { + lift = this and + relevant(this) + } - predicate containerContent(DataFlow::ContentSet c) { - c.(SingletonContentSet).getContent() instanceof ElementContent + Callable lift() { result = lift } + + predicate isRelevant() { relevant(this) } } + Callable getAsExprEnclosingCallable(NodeExtended node) { result = node.asExpr().getScope() } + + Parameter asParameter(NodeExtended node) { result = node.asParameter() } + + predicate isUninterestingForDataFlowModels(Callable api) { none() } + + predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } + predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } predicate isField(DataFlow::ContentSet c) { @@ -159,7 +145,7 @@ module ModelGeneratorInput implements ModelGeneratorInputSig +import MakeSummaryModelGenerator as SummaryModels +import MakeSourceModelGenerator as SourceModels +import MakeSinkModelGenerator as SinkModels diff --git a/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 4c2da918f739..789113f7580d 100644 --- a/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/rust/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,6 +1,6 @@ private import rust as R private import codeql.mad.modelgenerator.internal.ModelPrinting -private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput +private import CaptureModels::ModelGeneratorCommonInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = R::Callable; diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql index 14edea3af8a1..4b8041fb4447 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSinkModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SinkModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql index 66f0780448c7..c535ce787043 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSourceModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SourceModels import utils.test.InlineMadTest import codeql.rust.dataflow.internal.ModelsAsData diff --git a/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql b/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql index 002689a20390..e0788253f43e 100644 --- a/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql +++ b/rust/ql/test/utils-tests/modelgenerator/CaptureSummaryModels.ql @@ -1,5 +1,6 @@ import rust import utils.modelgenerator.internal.CaptureModels +import SummaryModels import utils.test.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { From c9d01bc6079e4e698e90c82ba1099d11a8df5881 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 25 Apr 2025 14:06:04 +0200 Subject: [PATCH 7/7] Shared: Sprinkle some predicate defaults and clean up. --- .../modelgenerator/internal/CaptureModels.qll | 16 ------ .../modelgenerator/internal/CaptureModels.qll | 10 ---- .../modelgenerator/internal/CaptureModels.qll | 7 --- .../modelgenerator/internal/CaptureModels.qll | 15 ------ .../internal/ModelGeneratorImpl.qll | 53 ++++++++----------- 5 files changed, 22 insertions(+), 79 deletions(-) diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 5b3beac34539..93abe205f1a6 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -360,12 +360,6 @@ private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputS result = "Element[" + ec.getIndirectionIndex() + "]" ) } - - predicate isUninterestingForDataFlowModels(Callable api) { none() } - - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { - isUninterestingForDataFlowModels(api) - } } private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig { @@ -377,11 +371,6 @@ private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig SourceTargetApi() { relevant(this) and not hasManualSourceModel(this) } } - predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } - predicate sourceNode = ExternalFlow::sourceNode/2; } @@ -396,8 +385,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { SinkTargetApi() { relevant(this) and not hasManualSinkModel(this) } } - predicate sinkModelSanitizer(DataFlow::Node node) { none() } - predicate apiSource(DataFlow::Node source) { DataFlowPrivate::nodeHasOperand(source, any(DataFlow::FieldAddress fa), 1) or @@ -416,9 +403,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { result = qualifierString() } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } - predicate sinkNode = ExternalFlow::sinkNode/2; } diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 7bf8a248a6f2..b0300e4a87f1 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -234,8 +234,6 @@ module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) } - predicate isUninterestingForDataFlowModels(Callable api) { none() } - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { isHigherOrder(api) } class SummaryTargetApi extends Callable { @@ -356,9 +354,6 @@ private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig ) } - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } - predicate sourceNode = ExternalFlow::sourceNode/2; } @@ -372,8 +367,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { SinkTargetApi() { relevant(this) and not hasManualSinkModel(this) } } - predicate sinkModelSanitizer(DataFlow::Node node) { none() } - private predicate isRelevantMemberAccess(DataFlow::Node node) { exists(CS::MemberAccess access | access = node.asExpr() | access.hasThisQualifier() and @@ -400,9 +393,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { result = qualifierString() } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } - predicate sinkNode = ExternalFlow::sinkNode/2; } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index cff4e97d8d33..09223d23b1c5 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -211,8 +211,6 @@ module SummaryModelGeneratorInput implements SummaryModelGeneratorInputSig { api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) } - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } - predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and not exists(DataFlow::Content f | @@ -264,11 +262,6 @@ private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig SourceTargetApi() { relevant(this) and not hasManualSourceModel(this) } } - predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } - predicate sourceNode = ExternalFlow::sourceNode/2; } diff --git a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 6aeae06bbdb9..99e1c527b546 100644 --- a/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/rust/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -123,10 +123,6 @@ private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputS Parameter asParameter(NodeExtended node) { result = node.asParameter() } - predicate isUninterestingForDataFlowModels(Callable api) { none() } - - predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } - predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } predicate isField(DataFlow::ContentSet c) { @@ -169,12 +165,6 @@ private module SourceModelGeneratorInput implements SourceModelGeneratorInputSig SourceTargetApi() { relevant(this) } } - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { none() } - - bindingset[kind] - predicate isRelevantSourceKind(string kind) { any() } - predicate sourceNode(DataFlow::Node node, string kind) { FlowSource::sourceNode(node, kind) } } @@ -183,8 +173,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { SinkTargetApi() { relevant(this) } } - predicate sinkModelSanitizer(DataFlow::Node node) { none() } - /** * Holds if `source` is an API entrypoint, i.e., a source of input where data * can flow in to a library. This is used for creating sink models, as we @@ -197,9 +185,6 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig { result = "Argument[" + source.(Node::SourceParameterNode).getPosition().toString() + "]" } - bindingset[kind] - predicate isRelevantSinkKind(string kind) { any() } - predicate sinkNode(DataFlow::Node node, string kind) { FlowSink::sinkNode(node, kind) } } diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll index d981ef151fd7..dabd687b52b9 100644 --- a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -295,7 +295,7 @@ module MakeModelGeneratorFactory< * * This serves as an extra filter for the `relevant` predicate. */ - predicate isUninterestingForDataFlowModels(Callable api); + default predicate isUninterestingForDataFlowModels(Callable api) { none() } /** * Holds if it is irrelevant to generate models for `api` based on the heuristic @@ -304,7 +304,7 @@ module MakeModelGeneratorFactory< * This serves as an extra filter for the `relevant` * and `isUninterestingForDataFlowModels` predicates. */ - predicate isUninterestingForHeuristicDataFlowModels(Callable api); + default predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() } } /** @@ -940,24 +940,20 @@ module MakeModelGeneratorFactory< */ class SourceTargetApi extends Callable; - /** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. - */ - bindingset[sourceEnclosing, api] - predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api); - - /** - * Holds if `kind` is a relevant source kind for creating source models. - */ - bindingset[kind] - predicate isRelevantSourceKind(string kind); - /** * Holds if `node` is specified as a source with the given kind in a MaD flow * model. */ predicate sourceNode(Lang::Node node, string kind); + + /** + * Holds if it is not relevant to generate a source model for `api`, even + * if flow is detected from a node within `source` to a sink within `api`. + */ + bindingset[sourceEnclosing, api] + default predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { + none() + } } /** @@ -970,14 +966,15 @@ module MakeModelGeneratorFactory< class SinkTargetApi extends Callable; /** - * Gets the MaD input string representation of `source`. + * Holds if `node` is specified as a sink with the given kind in a MaD flow + * model. */ - string getInputArgument(Lang::Node source); + predicate sinkNode(Lang::Node node, string kind); /** - * Holds if `node` is a sanitizer for sink model construction. + * Gets the MaD input string representation of `source`. */ - predicate sinkModelSanitizer(Lang::Node node); + string getInputArgument(Lang::Node source); /** * Holds if `source` is an api entrypoint relevant for creating sink models. @@ -985,16 +982,15 @@ module MakeModelGeneratorFactory< predicate apiSource(Lang::Node source); /** - * Holds if `kind` is a relevant sink kind for creating sink models. + * Holds if `node` is a sanitizer for sink model construction. */ - bindingset[kind] - predicate isRelevantSinkKind(string kind); + default predicate sinkModelSanitizer(Lang::Node node) { none() } /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. + * Holds if `kind` is a relevant sink kind for creating sink models. */ - predicate sinkNode(Lang::Node node, string kind); + bindingset[kind] + default predicate isRelevantSinkKind(string kind) { any() } } /** @@ -1029,12 +1025,7 @@ module MakeModelGeneratorFactory< * via its return (then the API itself becomes a source). */ module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - sourceNode(source, kind) - ) - } + predicate isSource(DataFlow::Node source) { sourceNode(source, _) } predicate isSink(DataFlow::Node sink) { sink instanceof ReturnNodeExt and