Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4622b69

Browse files
committed
Java: Update flow summary impl and external flow to support provenance and include testing in Csv model validation.
1 parent 649757c commit 4622b69

2 files changed

Lines changed: 57 additions & 38 deletions

File tree

java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@
6262
* sources "remote" indicates a default remote flow source, and for summaries
6363
* "taint" indicates a default additional taint step and "value" indicates a
6464
* globally applicable value-preserving step.
65+
* 9. The `provenance` column is tag to indicate the origin of the summary.
66+
* There are two supported values: "generated" and "manual". "generated" means that
67+
* the model has been emitted by the model generator tool and "manual" means
68+
* that the model has been written by hand.
6569
*/
6670

6771
import java
@@ -415,17 +419,10 @@ private predicate summaryModel(string row) {
415419
any(SummaryModelCsv s).row(row)
416420
}
417421

418-
bindingset[input]
419-
private predicate getKind(string input, string kind, boolean generated) {
420-
input.splitAt(":", 0) = "generated" and kind = input.splitAt(":", 1) and generated = true
421-
or
422-
not input.matches("%:%") and kind = input and generated = false
423-
}
424-
425422
/** Holds if a source model exists for the given parameters. */
426423
predicate sourceModel(
427424
string namespace, string type, boolean subtypes, string name, string signature, string ext,
428-
string output, string kind, boolean generated
425+
string output, string kind, string provenance
429426
) {
430427
exists(string row |
431428
sourceModel(row) and
@@ -437,14 +434,15 @@ predicate sourceModel(
437434
row.splitAt(";", 4) = signature and
438435
row.splitAt(";", 5) = ext and
439436
row.splitAt(";", 6) = output and
440-
exists(string k | row.splitAt(";", 7) = k and getKind(k, kind, generated))
437+
row.splitAt(";", 7) = kind and
438+
row.splitAt(";", 8) = provenance
441439
)
442440
}
443441

444442
/** Holds if a sink model exists for the given parameters. */
445443
predicate sinkModel(
446444
string namespace, string type, boolean subtypes, string name, string signature, string ext,
447-
string input, string kind, boolean generated
445+
string input, string kind, string provenance
448446
) {
449447
exists(string row |
450448
sinkModel(row) and
@@ -456,22 +454,23 @@ predicate sinkModel(
456454
row.splitAt(";", 4) = signature and
457455
row.splitAt(";", 5) = ext and
458456
row.splitAt(";", 6) = input and
459-
exists(string k | row.splitAt(";", 7) = k and getKind(k, kind, generated))
457+
row.splitAt(";", 7) = kind and
458+
row.splitAt(";", 8) = provenance
460459
)
461460
}
462461

463462
/** Holds if a summary model exists for the given parameters. */
464463
predicate summaryModel(
465464
string namespace, string type, boolean subtypes, string name, string signature, string ext,
466-
string input, string output, string kind, boolean generated
465+
string input, string output, string kind, string provenance
467466
) {
468-
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, generated, _)
467+
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance, _)
469468
}
470469

471470
/** Holds if a summary model `row` exists for the given parameters. */
472471
predicate summaryModel(
473472
string namespace, string type, boolean subtypes, string name, string signature, string ext,
474-
string input, string output, string kind, boolean generated, string row
473+
string input, string output, string kind, string provenance, string row
475474
) {
476475
summaryModel(row) and
477476
row.splitAt(";", 0) = namespace and
@@ -483,7 +482,8 @@ predicate summaryModel(
483482
row.splitAt(";", 5) = ext and
484483
row.splitAt(";", 6) = input and
485484
row.splitAt(";", 7) = output and
486-
exists(string k | row.splitAt(";", 8) = k and getKind(k, kind, generated))
485+
row.splitAt(";", 8) = kind and
486+
row.splitAt(";", 9) = provenance
487487
}
488488

489489
private predicate relevantPackage(string package) {
@@ -517,25 +517,25 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int
517517
part = "source" and
518518
n =
519519
strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
520-
string ext, string output, boolean generated |
520+
string ext, string output, string provenance |
521521
canonicalPkgLink(package, subpkg) and
522-
sourceModel(subpkg, type, subtypes, name, signature, ext, output, kind, generated)
522+
sourceModel(subpkg, type, subtypes, name, signature, ext, output, kind, provenance)
523523
)
524524
or
525525
part = "sink" and
526526
n =
527527
strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
528-
string ext, string input, boolean generated |
528+
string ext, string input, string provenance |
529529
canonicalPkgLink(package, subpkg) and
530-
sinkModel(subpkg, type, subtypes, name, signature, ext, input, kind, generated)
530+
sinkModel(subpkg, type, subtypes, name, signature, ext, input, kind, provenance)
531531
)
532532
or
533533
part = "summary" and
534534
n =
535535
strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
536-
string ext, string input, string output, boolean generated |
536+
string ext, string input, string output, string provenance |
537537
canonicalPkgLink(package, subpkg) and
538-
summaryModel(subpkg, type, subtypes, name, signature, ext, input, output, kind, generated)
538+
summaryModel(subpkg, type, subtypes, name, signature, ext, input, output, kind, provenance)
539539
)
540540
)
541541
}
@@ -544,12 +544,16 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int
544544
module CsvValidation {
545545
/** Holds if some row in a CSV-based flow model appears to contain typos. */
546546
query predicate invalidModelRow(string msg) {
547-
exists(string pred, string namespace, string type, string name, string signature, string ext |
548-
sourceModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "source"
547+
exists(
548+
string pred, string namespace, string type, string name, string signature, string ext,
549+
string provenance
550+
|
551+
sourceModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "source"
549552
or
550-
sinkModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "sink"
553+
sinkModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "sink"
551554
or
552-
summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary"
555+
summaryModel(namespace, type, _, name, signature, ext, _, _, _, provenance) and
556+
pred = "summary"
553557
|
554558
not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
555559
msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
@@ -565,6 +569,9 @@ module CsvValidation {
565569
or
566570
not ext.regexpMatch("|Annotated") and
567571
msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
572+
or
573+
not provenance = ["manual", "generated"] and
574+
msg = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
568575
)
569576
or
570577
exists(string pred, string input, string part |
@@ -596,18 +603,18 @@ module CsvValidation {
596603
)
597604
or
598605
exists(string pred, string row, int expect |
599-
sourceModel(row) and expect = 8 and pred = "source"
606+
sourceModel(row) and expect = 9 and pred = "source"
600607
or
601-
sinkModel(row) and expect = 8 and pred = "sink"
608+
sinkModel(row) and expect = 9 and pred = "sink"
602609
or
603-
summaryModel(row) and expect = 9 and pred = "summary"
610+
summaryModel(row) and expect = 10 and pred = "summary"
604611
|
605612
exists(int cols |
606613
cols = 1 + max(int n | exists(row.splitAt(";", n))) and
607614
cols != expect and
608615
msg =
609616
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
610-
"."
617+
" in " + row + "."
611618
)
612619
or
613620
exists(string b |
@@ -617,9 +624,8 @@ module CsvValidation {
617624
)
618625
)
619626
or
620-
exists(string row, string k, string kind | summaryModel(row) |
621-
k = row.splitAt(";", 8) and
622-
getKind(k, kind, _) and
627+
exists(string row, string kind | summaryModel(row) |
628+
kind = row.splitAt(";", 8) and
623629
not kind = ["taint", "value"] and
624630
msg = "Invalid kind \"" + kind + "\" in summary model."
625631
)

java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,25 @@ DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) {
5555
exists(rk)
5656
}
5757

58+
bindingset[provenance]
59+
private boolean isGenerated(string provenance) {
60+
provenance = "generated" and result = true
61+
or
62+
provenance != "generated" and result = false
63+
}
64+
5865
/**
5966
* Holds if an external flow summary exists for `c` with input specification
6067
* `input`, output specification `output`, kind `kind`, and a flag `generated`
6168
* stating whether the summary is autogenerated.
6269
*/
6370
predicate summaryElement(Callable c, string input, string output, string kind, boolean generated) {
6471
exists(
65-
string namespace, string type, boolean subtypes, string name, string signature, string ext
72+
string namespace, string type, boolean subtypes, string name, string signature, string ext,
73+
string provenance
6674
|
67-
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, generated) and
75+
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and
76+
generated = isGenerated(provenance) and
6877
c = interpretElement(namespace, type, subtypes, name, signature, ext)
6978
)
7079
}
@@ -122,9 +131,11 @@ class SourceOrSinkElement = Top;
122131
*/
123132
predicate sourceElement(SourceOrSinkElement e, string output, string kind, boolean generated) {
124133
exists(
125-
string namespace, string type, boolean subtypes, string name, string signature, string ext
134+
string namespace, string type, boolean subtypes, string name, string signature, string ext,
135+
string provenance
126136
|
127-
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, generated) and
137+
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and
138+
generated = isGenerated(provenance) and
128139
e = interpretElement(namespace, type, subtypes, name, signature, ext)
129140
)
130141
}
@@ -136,9 +147,11 @@ predicate sourceElement(SourceOrSinkElement e, string output, string kind, boole
136147
*/
137148
predicate sinkElement(SourceOrSinkElement e, string input, string kind, boolean generated) {
138149
exists(
139-
string namespace, string type, boolean subtypes, string name, string signature, string ext
150+
string namespace, string type, boolean subtypes, string name, string signature, string ext,
151+
string provenance
140152
|
141-
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, generated) and
153+
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance) and
154+
generated = isGenerated(provenance) and
142155
e = interpretElement(namespace, type, subtypes, name, signature, ext)
143156
)
144157
}

0 commit comments

Comments
 (0)