Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4a2ab49

Browse files
am0o0tausbn
authored andcommitted
better structure for pandas DataFrame, it is now much better readable and also we can find much more DataFrame objects
1 parent 8b93e81 commit 4a2ab49

1 file changed

Lines changed: 81 additions & 76 deletions

File tree

python/ql/lib/semmle/python/frameworks/Pandas.qll

Lines changed: 81 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -35,96 +35,99 @@ private module Pandas {
3535
override string getFormat() { result = "pickle" }
3636
}
3737

38+
/**
39+
* Provides security related models for `pandas.DataFrame`.
40+
* See https://pandas.pydata.org/docs/reference/frame.html
41+
*/
3842
module DataFrame {
3943
/**
4044
* A `pandas.DataFrame` Object.
45+
*
46+
* Extend this class to model new APIs.
4147
* See https://pandas.pydata.org/docs/reference/frame.html
4248
*/
43-
abstract class Range extends API::Node {
49+
abstract class DataFrame extends API::Node {
4450
override string toString() { result = this.(API::Node).toString() }
4551
}
46-
}
4752

48-
/**
49-
* The `pandas.DataFrame` Objects including secondary `pandas.DataFrame` Objects.
50-
* Use this class where you want to find all `pandas.DataFrame` Objects.
51-
* See https://pandas.pydata.org/pandas-docs/stable/reference/frame.html
52-
*/
53-
class DataFrame extends API::Node {
54-
DataFrame() {
55-
this = any(DataFrame::Range df)
56-
or
57-
exists(API::Node dataFrame | dataFrame = any(DataFrame::Range df) |
53+
/**
54+
* A `pandas.DataFrame` instantiation.
55+
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
56+
*/
57+
class DataFrameConstructor extends DataFrame {
58+
DataFrameConstructor() {
59+
this = API::moduleImport("pandas").getMember("DataFrame").getReturn()
60+
}
61+
}
62+
63+
/**
64+
* The `pandas.read_*` functions that return a `pandas.DataFrame`.
65+
* See https://pandas.pydata.org/docs/reference/io.html
66+
*/
67+
class InputRead extends DataFrame {
68+
InputRead() {
5869
this =
59-
dataFrame
70+
API::moduleImport("pandas")
6071
.getMember([
61-
"copy", "from_records", "from_dict", "from_spmatrix", "assign", "select_dtypes",
62-
"set_flags", "astype", "infer_objects", "head", "xs", "get", "isin", "where",
63-
"mask", "query", "add", "mul", "truediv", "mod", "pow", "dot", "radd", "rsub",
64-
"rdiv", "rfloordiv", "rtruediv", "rpow", "lt", "gt", "le", "ne", "agg", "combine",
65-
"apply", "aggregate", "transform", "all", "any", "clip", "corr", "cov", "cummax",
66-
"cummin", "cumprod", "describe", "mode", "pct_change", "quantile", "rank",
67-
"round", "sem", "add_prefix", "add_suffix", "at_time", "between_time", "drop",
68-
"drop_duplicates", "filter", "first", "head", "idxmin", "last", "reindex",
69-
"reindex_like", "reset_index", "sample", "set_axis", "tail", "take", "truncate",
70-
"bfill", "dropna", "ffill", "fillna", "interpolate", "isna", "isnull", "notna",
71-
"notnull", "pad", "replace", "droplevel", "pivot", "pivot_table",
72-
"reorder_levels", "sort_values", "sort_index", "nlargest", "nsmallest",
73-
"swaplevel", "stack", "unstack", "isnull", "notna", "notnull", "replace",
74-
"droplevel", "pivot", "pivot_table", "reorder_levels", "sort_values",
75-
"sort_index", "nlargest", "nsmallest", "swaplevel", "stack", "unstack", "melt",
76-
"explode", "squeeze", "T", "transpose", "compare", "join", "from_spmatrix",
77-
"shift", "asof", "merge", "from_dict", "tz_convert", "to_period", "asfreq",
78-
"to_dense", "tz_localize", "box", "__dataframe__"
72+
"read_csv", "read_fwf", "read_pickle", "read_table", "read_clipboard",
73+
"read_excel", "read_xml", "read_parquet", "read_orc", "read_spss",
74+
"read_sql_table", "read_sql_query", "read_sql", "read_gbq", "read_stata"
7975
])
8076
.getReturn()
81-
)
77+
or
78+
this = API::moduleImport("pandas").getMember("read_html").getReturn().getASubscript()
79+
or
80+
exists(API::Node readSas, API::CallNode readSasCall |
81+
readSas = API::moduleImport("pandas").getMember("read_sas") and
82+
this = readSas.getReturn() and
83+
readSasCall = readSas.getACall()
84+
|
85+
// Returns DataFrame if iterator=False and chunksize=None, Also with default values it returns DataFrame.
86+
(
87+
not readSasCall.getParameter(5, "iterator").asSink().asExpr().(BooleanLiteral)
88+
instanceof True
89+
or
90+
not exists(readSasCall.getParameter(5, "iterator").asSink())
91+
) and
92+
not exists(
93+
readSasCall.getParameter(4, "chunksize").asSink().asExpr().(IntegerLiteral).getN()
94+
)
95+
)
96+
}
8297
}
8398

84-
override string toString() { result = this.(API::Node).toString() }
85-
}
86-
87-
/**
88-
* A `pandas.DataFrame` instantiation.
89-
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
90-
*/
91-
class DataFrameConstructor extends DataFrame::Range {
92-
DataFrameConstructor() { this = API::moduleImport("pandas").getMember("DataFrame").getReturn() }
93-
}
94-
95-
/**
96-
* The `pandas.read_*` functions that return a `pandas.DataFrame`.
97-
* See https://pandas.pydata.org/docs/reference/io.html
98-
*/
99-
class InputRead extends DataFrame::Range {
100-
InputRead() {
101-
this =
102-
API::moduleImport("pandas")
103-
.getMember([
104-
"read_csv", "read_fwf", "read_pickle", "read_table", "read_clipboard", "read_excel",
105-
"read_xml", "read_parquet", "read_orc", "read_spss", "read_sql_table",
106-
"read_sql_query", "read_sql", "read_gbq", "read_stata"
107-
])
108-
.getReturn()
109-
or
110-
this = API::moduleImport("pandas").getMember("read_html").getReturn().getASubscript()
111-
or
112-
exists(API::Node readSas, API::CallNode readSasCall |
113-
readSas = API::moduleImport("pandas").getMember("read_sas") and
114-
this = readSas.getReturn() and
115-
readSasCall = readSas.getACall()
116-
|
117-
// Returns DataFrame if iterator=False and chunksize=None, With default values it returns DataFrame.
118-
(
119-
not readSasCall.getParameter(5, "iterator").asSink().asExpr().(BooleanLiteral) instanceof
120-
True
121-
or
122-
not exists(readSasCall.getParameter(5, "iterator").asSink())
123-
) and
124-
not exists(
125-
readSasCall.getParameter(4, "chunksize").asSink().asExpr().(IntegerLiteral).getN()
99+
/**
100+
* The `pandas.DataFrame.*` methods that return a `pandas.DataFrame` object.
101+
* See https://pandas.pydata.org/docs/reference/io.html
102+
*/
103+
class DataFrameMethods extends DataFrame {
104+
DataFrameMethods() {
105+
exists(API::Node dataFrame | dataFrame = any(DataFrame df) |
106+
this =
107+
dataFrame
108+
.getMember([
109+
"copy", "from_records", "from_dict", "from_spmatrix", "assign", "select_dtypes",
110+
"set_flags", "astype", "infer_objects", "head", "xs", "get", "isin", "where",
111+
"mask", "query", "add", "mul", "truediv", "mod", "pow", "dot", "radd", "rsub",
112+
"rdiv", "rfloordiv", "rtruediv", "rpow", "lt", "gt", "le", "ne", "agg",
113+
"combine", "apply", "aggregate", "transform", "all", "any", "clip", "corr",
114+
"cov", "cummax", "cummin", "cumprod", "describe", "mode", "pct_change",
115+
"quantile", "rank", "round", "sem", "add_prefix", "add_suffix", "at_time",
116+
"between_time", "drop", "drop_duplicates", "filter", "first", "head", "idxmin",
117+
"last", "reindex", "reindex_like", "reset_index", "sample", "set_axis", "tail",
118+
"take", "truncate", "bfill", "dropna", "ffill", "fillna", "interpolate", "isna",
119+
"isnull", "notna", "notnull", "pad", "replace", "droplevel", "pivot",
120+
"pivot_table", "reorder_levels", "sort_values", "sort_index", "nlargest",
121+
"nsmallest", "swaplevel", "stack", "unstack", "isnull", "notna", "notnull",
122+
"replace", "droplevel", "pivot", "pivot_table", "reorder_levels", "sort_values",
123+
"sort_index", "nlargest", "nsmallest", "swaplevel", "stack", "unstack", "melt",
124+
"explode", "squeeze", "T", "transpose", "compare", "join", "from_spmatrix",
125+
"shift", "asof", "merge", "from_dict", "tz_convert", "to_period", "asfreq",
126+
"to_dense", "tz_localize", "box", "__dataframe__"
127+
])
128+
.getReturn()
126129
)
127-
)
130+
}
128131
}
129132
}
130133

@@ -134,7 +137,9 @@ private module Pandas {
134137
* https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.eval.html
135138
*/
136139
class DataFlowQueryCall extends CodeExecution::Range, API::CallNode {
137-
DataFlowQueryCall() { this = any(DataFrame df).getMember(["query", "eval"]).getACall() }
140+
DataFlowQueryCall() {
141+
this = any(DataFrame::DataFrame df).getMember(["query", "eval"]).getACall()
142+
}
138143

139144
override DataFlow::Node getCode() { result = this.getParameter(0, "expr").asSink() }
140145
}

0 commit comments

Comments
 (0)