@@ -197,28 +197,37 @@ private class CollectionContent extends Content, TCollectionContent {
197197}
198198
199199private class ArrayContent extends Content , TArrayContent {
200- override string toString ( ) { result = "array" }
200+ ArrayContent ( ) { this = TArrayContent ( ) }
201+
202+ override string toString ( ) { result = "array content" }
201203}
202204
203- private predicate storeStepNoChi ( Node node1 , Content f , PostUpdateNode node2 ) {
205+ private predicate fieldStoreStepNoChi ( Node node1 , FieldContent f , PostUpdateNode node2 ) {
204206 exists ( StoreInstruction store , Class c |
205207 store = node2 .asInstruction ( ) and
206208 store .getSourceValue ( ) = node1 .asInstruction ( ) and
207209 getWrittenField ( store , f .( FieldContent ) .getAField ( ) , c ) and
208- f .( FieldContent ) . hasOffset ( c , _, _)
210+ f .hasOffset ( c , _, _)
209211 )
210212}
211213
214+ private FieldAddressInstruction getFieldInstruction ( Instruction instr ) {
215+ result = instr or
216+ result = instr .( CopyValueInstruction ) .getUnary ( )
217+ }
218+
212219pragma [ noinline]
213- private predicate getWrittenField ( StoreInstruction store , Field f , Class c ) {
220+ private predicate getWrittenField ( Instruction instr , Field f , Class c ) {
214221 exists ( FieldAddressInstruction fa |
215- fa = store .getDestinationAddress ( ) and
222+ fa =
223+ getFieldInstruction ( [ instr .( StoreInstruction ) .getDestinationAddress ( ) ,
224+ instr .( WriteSideEffectInstruction ) .getDestinationAddress ( ) ] ) and
216225 f = fa .getField ( ) and
217226 c = f .getDeclaringType ( )
218227 )
219228}
220229
221- private predicate storeStepChi ( Node node1 , Content f , PostUpdateNode node2 ) {
230+ private predicate fieldStoreStepChi ( Node node1 , FieldContent f , PostUpdateNode node2 ) {
222231 exists ( StoreInstruction store , ChiInstruction chi |
223232 node1 .asInstruction ( ) = store and
224233 node2 .asInstruction ( ) = chi and
@@ -227,23 +236,59 @@ private predicate storeStepChi(Node node1, Content f, PostUpdateNode node2) {
227236 c = chi .getResultType ( ) and
228237 exists ( int startBit , int endBit |
229238 chi .getUpdatedInterval ( startBit , endBit ) and
230- f .( FieldContent ) . hasOffset ( c , startBit , endBit )
239+ f .hasOffset ( c , startBit , endBit )
231240 )
232241 or
233- getWrittenField ( store , f .( FieldContent ) . getAField ( ) , c ) and
234- f .( FieldContent ) . hasOffset ( c , _, _)
242+ getWrittenField ( store , f .getAField ( ) , c ) and
243+ f .hasOffset ( c , _, _)
235244 )
236245 )
237246}
238247
248+ private predicate arrayStoreStepChi ( Node node1 , ArrayContent a , PostUpdateNode node2 ) {
249+ a = TArrayContent ( ) and
250+ exists ( StoreInstruction store |
251+ node1 .asInstruction ( ) = store and
252+ (
253+ // `x[i] = taint()`
254+ // This matches the characteristic predicate in `ArrayStoreNode`.
255+ store .getDestinationAddress ( ) instanceof PointerAddInstruction
256+ or
257+ // `*p = taint()`
258+ // This matches the characteristic predicate in `PointerStoreNode`.
259+ store .getDestinationAddress ( ) .( CopyValueInstruction ) .getUnary ( ) instanceof LoadInstruction
260+ ) and
261+ // This `ChiInstruction` will always have a non-conflated result because both `ArrayStoreNode`
262+ // and `PointerStoreNode` require it in their characteristic predicates.
263+ node2 .asInstruction ( ) .( ChiInstruction ) .getPartial ( ) = store
264+ )
265+ }
266+
239267/**
240268 * Holds if data can flow from `node1` to `node2` via an assignment to `f`.
241269 * Thus, `node2` references an object with a field `f` that contains the
242270 * value of `node1`.
243271 */
244272predicate storeStep ( Node node1 , Content f , PostUpdateNode node2 ) {
245- storeStepNoChi ( node1 , f , node2 ) or
246- storeStepChi ( node1 , f , node2 )
273+ fieldStoreStepNoChi ( node1 , f , node2 ) or
274+ fieldStoreStepChi ( node1 , f , node2 ) or
275+ arrayStoreStepChi ( node1 , f , node2 ) or
276+ fieldStoreStepAfterArraySuppression ( node1 , f , node2 )
277+ }
278+
279+ // This predicate pushes the correct `FieldContent` onto the access path when the
280+ // `suppressArrayRead` predicate has popped off an `ArrayContent`.
281+ private predicate fieldStoreStepAfterArraySuppression (
282+ Node node1 , FieldContent f , PostUpdateNode node2
283+ ) {
284+ exists ( BufferMayWriteSideEffectInstruction write , ChiInstruction chi , Class c |
285+ not chi .isResultConflated ( ) and
286+ node1 .asInstruction ( ) = chi and
287+ node2 .asInstruction ( ) = chi and
288+ chi .getPartial ( ) = write and
289+ getWrittenField ( write , f .getAField ( ) , c ) and
290+ f .hasOffset ( c , _, _)
291+ )
247292}
248293
249294bindingset [ result , i]
@@ -263,23 +308,120 @@ private predicate getLoadedField(LoadInstruction load, Field f, Class c) {
263308 * Thus, `node1` references an object with a field `f` whose value ends up in
264309 * `node2`.
265310 */
266- predicate readStep ( Node node1 , Content f , Node node2 ) {
311+ private predicate fieldReadStep ( Node node1 , FieldContent f , Node node2 ) {
267312 exists ( LoadInstruction load |
268313 node2 .asInstruction ( ) = load and
269314 node1 .asInstruction ( ) = load .getSourceValueOperand ( ) .getAnyDef ( ) and
270315 exists ( Class c |
271316 c = load .getSourceValueOperand ( ) .getAnyDef ( ) .getResultType ( ) and
272317 exists ( int startBit , int endBit |
273318 load .getSourceValueOperand ( ) .getUsedInterval ( unbindInt ( startBit ) , unbindInt ( endBit ) ) and
274- f .( FieldContent ) . hasOffset ( c , startBit , endBit )
319+ f .hasOffset ( c , startBit , endBit )
275320 )
276321 or
277- getLoadedField ( load , f .( FieldContent ) .getAField ( ) , c ) and
278- f .( FieldContent ) .hasOffset ( c , _, _)
322+ getLoadedField ( load , f .getAField ( ) , c ) and
323+ f .hasOffset ( c , _, _)
324+ )
325+ )
326+ }
327+
328+ /**
329+ * When a store step happens in a function that looks like an array write such as:
330+ * ```cpp
331+ * void f(int* pa) {
332+ * pa = source();
333+ * }
334+ * ```
335+ * it can be a write to an array, but it can also happen that `f` is called as `f(&a.x)`. If that is
336+ * the case, the `ArrayContent` that was written by the call to `f` should be popped off the access
337+ * path, and a `FieldContent` containing `x` should be pushed instead.
338+ * So this case pops `ArrayContent` off the access path, and the `fieldStoreStepAfterArraySuppression`
339+ * predicate in `storeStep` ensures that we push the right `FieldContent` onto the access path.
340+ */
341+ predicate suppressArrayRead ( Node node1 , ArrayContent a , Node node2 ) {
342+ a = TArrayContent ( ) and
343+ exists ( BufferMayWriteSideEffectInstruction write , ChiInstruction chi |
344+ node1 .asInstruction ( ) = write and
345+ node2 .asInstruction ( ) = chi and
346+ chi .getPartial ( ) = write and
347+ getWrittenField ( write , _, _)
348+ )
349+ }
350+
351+ private class ArrayToPointerConvertInstruction extends ConvertInstruction {
352+ ArrayToPointerConvertInstruction ( ) {
353+ this .getUnary ( ) .getResultType ( ) instanceof ArrayType and
354+ this .getResultType ( ) instanceof PointerType
355+ }
356+ }
357+
358+ private Instruction skipOneCopyValueInstruction ( Instruction instr ) {
359+ not instr instanceof CopyValueInstruction and result = instr
360+ or
361+ result = instr .( CopyValueInstruction ) .getUnary ( )
362+ }
363+
364+ private Instruction skipCopyValueInstructions ( Instruction instr ) {
365+ result = skipOneCopyValueInstruction * ( instr ) and not result instanceof CopyValueInstruction
366+ }
367+
368+ private predicate arrayReadStep ( Node node1 , ArrayContent a , Node node2 ) {
369+ a = TArrayContent ( ) and
370+ // Explicit dereferences such as `*p` or `p[i]` where `p` is a pointer or array.
371+ exists ( LoadInstruction load , Instruction address |
372+ load .getSourceValueOperand ( ) .isDefinitionInexact ( ) and
373+ node1 .asInstruction ( ) = load .getSourceValueOperand ( ) .getAnyDef ( ) and
374+ load = node2 .asInstruction ( ) and
375+ address = skipCopyValueInstructions ( load .getSourceAddress ( ) ) and
376+ (
377+ address instanceof LoadInstruction or
378+ address instanceof ArrayToPointerConvertInstruction or
379+ address instanceof PointerOffsetInstruction
279380 )
280381 )
281382}
282383
384+ /**
385+ * In cases such as:
386+ * ```cpp
387+ * void f(int* pa) {
388+ * *pa = source();
389+ * }
390+ * ...
391+ * int x;
392+ * f(&x);
393+ * use(x);
394+ * ```
395+ * the load on `x` in `use(x)` will exactly overlap with its definition (in this case the definition
396+ * is a `BufferMayWriteSideEffect`). This predicate pops the `ArrayContent` (pushed by the store in `f`)
397+ * from the access path.
398+ */
399+ private predicate exactReadStep ( Node node1 , ArrayContent a , Node node2 ) {
400+ a = TArrayContent ( ) and
401+ exists ( BufferMayWriteSideEffectInstruction write , ChiInstruction chi |
402+ not chi .isResultConflated ( ) and
403+ chi .getPartial ( ) = write and
404+ node1 .asInstruction ( ) = write and
405+ node2 .asInstruction ( ) = chi and
406+ // To distinquish this case from the `arrayReadStep` case we require that the entire variable was
407+ // overwritten by the `BufferMayWriteSideEffectInstruction` (i.e., there is a load that reads the
408+ // entire variable).
409+ exists ( LoadInstruction load | load .getSourceValue ( ) = chi )
410+ )
411+ }
412+
413+ /**
414+ * Holds if data can flow from `node1` to `node2` via a read of `f`.
415+ * Thus, `node1` references an object with a field `f` whose value ends up in
416+ * `node2`.
417+ */
418+ predicate readStep ( Node node1 , Content f , Node node2 ) {
419+ fieldReadStep ( node1 , f , node2 ) or
420+ arrayReadStep ( node1 , f , node2 ) or
421+ exactReadStep ( node1 , f , node2 ) or
422+ suppressArrayRead ( node1 , f , node2 )
423+ }
424+
283425/**
284426 * Holds if values stored inside content `c` are cleared at node `n`.
285427 */
0 commit comments