11/**
22 * @name Non-constant format string
3- * @description Passing a non-constant 'format' string to a printf-like function can lead
3+ * @description Passing a value that is not a string literal 'format' string to a printf-like function can lead
44 * to a mismatch between the number of arguments defined by the 'format' and the number
55 * of arguments actually passed to the function. If the format string ultimately stems
66 * from an untrusted source, this can be used for exploits.
7+ * This query finds all sources leading to a format string that cannot be verified to be literal.
8+ * Even if the format string type is `const char*` it is still considered non-constant if the
9+ * value is not a string literal. For example, a parameter to a function that is never observed to be called
10+ * that takes in a `const char*` and uses it as a format string, there is no way to verify the originating
11+ * value was a string literal. This is especially problematic with conversion of c strings to char *,
12+ * via `c_str()`, which returns a `const char*`, regardless if the original string was a string literal or not.
13+ * The query does not consider uninitialized variables as non-constant sources. Uninitialized
14+ * variables are a separate vulnerability concern and should be addressed by a separate query.
715 * @kind problem
816 * @problem.severity recommendation
917 * @security-severity 9.3
@@ -32,40 +40,37 @@ class UncalledFunction extends Function {
3240 }
3341}
3442
35- /*
36- * const char* means (const char)*, so the pointer is not const, the pointed to value is.
37- * Grabs the base type of the underlying type of `t` if `t` is a pointer and checks `isConst()` else
38- * checks on the underlying type of `t` alone.
39- */
40- predicate hasConstSpecifier ( Type t ) {
41- if t .getUnderlyingType ( ) instanceof PointerType
42- then t .getUnderlyingType ( ) .( PointerType ) .getBaseType ( ) .isConst ( )
43- else t .getUnderlyingType ( ) .isConst ( )
44- }
45-
4643/**
47- * Holds if `node` is a non-constant source of data flow.
44+ * Holds if `node` is a non-constant source of data flow for non-const format string detection .
4845 * This is defined as either:
4946 * 1) a `FlowSource`
5047 * 2) a parameter of an 'uncalled' function
5148 * 3) an argument to a function with no definition that is not known to define the output through its input
5249 * 4) an out arg of a function with no definition that is not known to define the output through its input
5350 *
54- * With exception to `FlowSource` all non-const values have a type that is not const
55- * (declared without a `const` specifier)
56- * ASSUMPTION: any const values are assumed to be static if their assignment is not seen
57- * i.e., assuming users did not get non-const data and cast into a const
58- *
5951 * The latter two cases address identifying standard string manipulation libraries as input sources
60- * e.g., strcpy, but it will identify unknown function calls as possible non-constant source
61- * since it cannot be determined if the out argument or return is constant.
52+ * e.g., strcpy. More simply, functions without definitions that are known to manipulate the
53+ * input to produce an output are not sources. Instead the ultimate source of input to these functions
54+ * should be considered as the source.
55+ *
56+ * False Negative Implication: This approach has false negatives (fails to identify non-const sources)
57+ * when the source is a field of a struct or object and the initialization is not observed statically.
58+ * There are 3 general cases where this can occur:
59+ * 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string.
60+ * 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically.
61+ * e.g., an object constructor isn't known statically, or a function sets fields
62+ * of a struct, but the function is not known statically.
63+ * 3) A function meeting cases (3) and (4) above returns (through an out argument or return value)
64+ * a struct or object where a field containing a format string has been initialized.
65+ *
66+ * Note, uninitialized variables used as format strings are never detected by design.
67+ * Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query.
6268 */
6369predicate isNonConst ( DataFlow:: Node node ) {
6470 node instanceof FlowSource
6571 or
6672 // Parameters of uncalled functions that aren't const
6773 exists ( UncalledFunction f , Parameter p |
68- //not hasConstSpecifier(p.getType()) and
6974 f .getAParameter ( ) = p and
7075 p = node .asParameter ( )
7176 )
@@ -77,23 +82,20 @@ predicate isNonConst(DataFlow::Node node) {
7782 // are considered as possible non-const sources
7883 // The function's output must also not be const to be considered a non-const source
7984 exists ( Call c |
80- exists ( Expr arg | c .getAnArgument ( ) = arg |
81- arg = node .asDefiningArgument ( )
82- // and
83- // not hasConstSpecifier(arg.getType())
84- )
85+ exists ( Expr arg | c .getAnArgument ( ) = arg | arg = node .asDefiningArgument ( ) )
8586 or
86- c = node .asIndirectExpr ( )
87- // and not hasConstSpecifier(c.getType())
87+ c = node .asIndirectExpr ( )
8888 ) and
8989 not exists ( FunctionInput input , FunctionOutput output , CallInstruction call |
9090 // NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
9191 // variant function's output are now possible non-const sources
9292 (
93- pragma [ only_bind_out ] ( call .getStaticCallTarget ( ) ) .( DataFlowFunction ) .hasDataFlow ( input , output ) or
93+ pragma [ only_bind_out ] ( call .getStaticCallTarget ( ) )
94+ .( DataFlowFunction )
95+ .hasDataFlow ( input , output ) or
9496 pragma [ only_bind_out ] ( call .getStaticCallTarget ( ) ) .( TaintFunction ) .hasTaintFlow ( input , output )
9597 ) and
96- node = callOutput ( call , output )
98+ node = callOutput ( call , output )
9799 ) and
98100 not exists ( Call c |
99101 c .getTarget ( ) .hasDefinition ( ) and
0 commit comments