@@ -613,6 +613,11 @@ class VariableNode extends Node, TVariableNode {
613613 */
614614InstructionNode instructionNode ( Instruction instr ) { result .getInstruction ( ) = instr }
615615
616+ /**
617+ * Gets the node corresponding to `operand`.
618+ */
619+ OperandNode operandNode ( Operand operand ) { result .getOperand ( ) = operand }
620+
616621/**
617622 * DEPRECATED: use `definitionByReferenceNodeFromArgument` instead.
618623 *
@@ -693,14 +698,32 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
693698 ReadNodeFlow:: flowThrough ( nodeFrom , nodeTo )
694699 or
695700 ReadNodeFlow:: flowOutOf ( nodeFrom , nodeTo )
701+ or
702+ // Adjacent-def-use and adjacent-use-use flow
703+ adjacentDefUseFlow ( nodeFrom , nodeTo )
696704}
697705
698- pragma [ noinline]
699- private predicate getFieldSizeOfClass ( Class c , Type type , int size ) {
700- exists ( Field f |
701- f .getDeclaringType ( ) = c and
702- f .getUnderlyingType ( ) = type and
703- type .getSize ( ) = size
706+ private predicate adjacentDefUseFlow ( Node nodeFrom , Node nodeTo ) {
707+ // Flow that isn't already covered by field flow out of store/read nodes.
708+ not nodeFrom .asInstruction ( ) = any ( StoreNode pun ) .getStoreInstruction ( ) and
709+ not nodeFrom .asInstruction ( ) = any ( ReadNode pun ) .getALoadInstruction ( ) and
710+ (
711+ //Def-use flow
712+ Ssa:: ssaFlow ( nodeFrom , nodeTo )
713+ or
714+ exists ( Instruction loadAddress | loadAddress = Ssa:: getSourceAddressFromNode ( nodeFrom ) |
715+ // Use-use flow through reads
716+ exists ( Node address |
717+ Ssa:: addressFlowTC ( address .asInstruction ( ) , loadAddress ) and
718+ Ssa:: ssaFlow ( address , nodeTo )
719+ )
720+ or
721+ // Use-use flow through stores.
722+ exists ( Node store |
723+ Ssa:: explicitWrite ( _, store .asInstruction ( ) , loadAddress ) and
724+ Ssa:: ssaFlow ( store , nodeTo )
725+ )
726+ )
704727 )
705728}
706729
@@ -789,40 +812,39 @@ private module StoreNodeFlow {
789812
790813private predicate simpleOperandLocalFlowStep ( Instruction iFrom , Operand opTo ) {
791814 // Propagate flow from an instruction to its exact uses.
815+ // We do this for all instruction/operand pairs, except when the operand is the
816+ // side effect operand of a ReturnIndirectionInstruction, or the load operand of a LoadInstruction.
817+ // This is because we get these flows through the shared SSA library already, and including this
818+ // flow here will create multiple dataflow paths which creates a blowup in stage 3 of dataflow.
819+ (
820+ not any ( ReturnIndirectionInstruction ret ) .getSideEffectOperand ( ) = opTo and
821+ not any ( LoadInstruction load ) .getSourceValueOperand ( ) = opTo and
822+ not any ( ReturnValueInstruction ret ) .getReturnValueOperand ( ) = opTo
823+ ) and
792824 opTo .getDef ( ) = iFrom
793- or
794- opTo = any ( ReadSideEffectInstruction read ) .getSideEffectOperand ( ) and
795- not iFrom .isResultConflated ( ) and
796- iFrom = opTo .getAnyDef ( )
797- or
798- // Loading a single `int` from an `int *` parameter is not an exact load since
799- // the parameter may point to an entire array rather than a single `int`. The
800- // following rule ensures that any flow going into the
801- // `InitializeIndirectionInstruction`, even if it's for a different array
802- // element, will propagate to a load of the first element.
803- //
804- // Since we're linking `InitializeIndirectionInstruction` and
805- // `LoadInstruction` together directly, this rule will break if there's any
806- // reassignment of the parameter indirection, including a conditional one that
807- // leads to a phi node.
808- exists ( InitializeIndirectionInstruction init |
809- iFrom = init and
810- opTo .( LoadOperand ) .getAnyDef ( ) = init and
811- // Check that the types match. Otherwise we can get flow from an object to
812- // its fields, which leads to field conflation when there's flow from other
813- // fields to the object elsewhere.
814- init .getParameter ( ) .getType ( ) .getUnspecifiedType ( ) .( DerivedType ) .getBaseType ( ) =
815- opTo .getType ( ) .getUnspecifiedType ( )
816- )
817- or
818- // Flow from stores to structs with a single field to a load of that field.
819- exists ( LoadInstruction load |
820- load .getSourceValueOperand ( ) = opTo and
821- opTo .getAnyDef ( ) = iFrom and
822- isSingleFieldClass ( pragma [ only_bind_out ] ( pragma [ only_bind_out ] ( iFrom ) .getResultType ( ) ) , opTo )
825+ }
826+
827+ pragma [ noinline]
828+ private predicate getAddressType ( LoadInstruction load , Type t ) {
829+ exists ( Instruction address |
830+ address = load .getSourceAddress ( ) and
831+ t = address .getResultType ( )
823832 )
824833}
825834
835+ /**
836+ * Like the AST dataflow library, we want to conflate the address and value of a reference. This class
837+ * represents the `LoadInstruction` that is generated from a reference dereference.
838+ */
839+ private class ReferenceDereferenceInstruction extends LoadInstruction {
840+ ReferenceDereferenceInstruction ( ) {
841+ exists ( ReferenceType ref |
842+ getAddressType ( this , ref ) and
843+ this .getResultType ( ) = ref .getBaseType ( )
844+ )
845+ }
846+ }
847+
826848private predicate simpleInstructionLocalFlowStep ( Operand opFrom , Instruction iTo ) {
827849 iTo .( CopyInstruction ) .getSourceValueOperand ( ) = opFrom
828850 or
@@ -835,40 +857,8 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
835857 or
836858 iTo .( InheritanceConversionInstruction ) .getUnaryOperand ( ) = opFrom
837859 or
838- // A chi instruction represents a point where a new value (the _partial_
839- // operand) may overwrite an old value (the _total_ operand), but the alias
840- // analysis couldn't determine that it surely will overwrite every bit of it or
841- // that it surely will overwrite no bit of it.
842- //
843- // By allowing flow through the total operand, we ensure that flow is not lost
844- // due to shortcomings of the alias analysis. We may get false flow in cases
845- // where the data is indeed overwritten.
846- //
847- // Flow through the partial operand belongs in the taint-tracking libraries
848- // for now.
849- iTo .getAnOperand ( ) .( ChiTotalOperand ) = opFrom
850- or
851- // Add flow from write side-effects to non-conflated chi instructions through their
852- // partial operands. From there, a `readStep` will find subsequent reads of that field.
853- // Consider the following example:
854- // ```
855- // void setX(Point* p, int new_x) {
856- // p->x = new_x;
857- // }
858- // ...
859- // setX(&p, taint());
860- // ```
861- // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
862- // `setX`, which will be melded into `p` through a chi instruction.
863- exists ( ChiInstruction chi | chi = iTo |
864- opFrom .getAnyDef ( ) instanceof WriteSideEffectInstruction and
865- chi .getPartialOperand ( ) = opFrom and
866- not chi .isResultConflated ( ) and
867- // In a call such as `set_value(&x->val);` we don't want the memory representing `x` to receive
868- // dataflow by a simple step. Instead, this is handled by field flow. If we add a simple step here
869- // we can get field-to-object flow.
870- not chi .isPartialUpdate ( )
871- )
860+ // Conflate references and values like in AST dataflow.
861+ iTo .( ReferenceDereferenceInstruction ) .getSourceAddressOperand ( ) = opFrom
872862 or
873863 // Flow through modeled functions
874864 modelFlow ( opFrom , iTo )
0 commit comments