@@ -15,12 +15,14 @@ private import semmle.python.Concepts
1515private import semmle.python.frameworks.PEP249:: PEP249 as PEP249
1616
1717/**
18+ * INTERNAL: Do not use.
19+ *
1820 * Provides models for the `SQLAlchemy` PyPI package.
1921 * See
2022 * - https://pypi.org/project/SQLAlchemy/
2123 * - https://docs.sqlalchemy.org/en/14/index.html
2224 */
23- private module SqlAlchemy {
25+ module SqlAlchemy {
2426 /**
2527 * Provides models for the `sqlalchemy.engine.Engine` and `sqlalchemy.future.Engine` classes.
2628 *
@@ -279,80 +281,62 @@ private module SqlAlchemy {
279281 }
280282
281283 /**
282- * Additional taint-steps for `sqlalchemy.text()`
284+ * Provides models for the `sqlalchemy.sql.expression.TextClause` class,
285+ * which represents a textual SQL string directly.
283286 *
284- * See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
285- * See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause
286- */
287- class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking:: AdditionalTaintStep {
288- override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
289- exists ( DataFlow:: CallCfgNode call |
290- (
291- call = API:: moduleImport ( "sqlalchemy" ) .getMember ( "text" ) .getACall ( )
292- or
293- call = API:: moduleImport ( "sqlalchemy" ) .getMember ( "sql" ) .getMember ( "text" ) .getACall ( )
294- or
295- call =
296- API:: moduleImport ( "sqlalchemy" )
297- .getMember ( "sql" )
298- .getMember ( "expression" )
299- .getMember ( "text" )
300- .getACall ( )
301- or
302- call =
303- API:: moduleImport ( "sqlalchemy" )
304- .getMember ( "sql" )
305- .getMember ( "expression" )
306- .getMember ( "TextClause" )
307- .getACall ( )
308- ) and
309- nodeFrom in [ call .getArg ( 0 ) , call .getArgByName ( "text" ) ] and
310- nodeTo = call
311- )
312- }
313- }
314- }
315-
316- private module OldModeling {
317- /**
318- * Returns an instantization of a SqlAlchemy Session object.
319- * See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and
320- * https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker
321- */
322- private API:: Node getSqlAlchemySessionInstance ( ) {
323- result = API:: moduleImport ( "sqlalchemy.orm" ) .getMember ( "Session" ) .getReturn ( ) or
324- result = API:: moduleImport ( "sqlalchemy.orm" ) .getMember ( "sessionmaker" ) .getReturn ( ) .getReturn ( )
325- }
326-
327- /**
328- * Returns an instantization of a SqlAlchemy Query object.
329- * See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
287+ * ```py
288+ * session.query(For14).filter_by(description=sqlalchemy.text(f"'{user_input}'")).all()
289+ * ```
290+ *
291+ * Initially I wanted to add lots of additional taint steps for such that the normal
292+ * SQL injection query would be able to find cases as the one above where an ORM query
293+ * includes a TextClause that includes user-input directly... But that presented 2
294+ * problems:
295+ *
296+ * - which part of the query construction above should be marked as SQL to fit our
297+ * `SqlExecution` concept. Nothing really fits this well, since all the SQL
298+ * execution happens under the hood.
299+ * - This would require a LOT of modeling for these additional taint steps, since
300+ * there are many many constructs we would need to have models for. (see the 2
301+ * examples below)
302+ *
303+ * So instead we flag user-input to a TextClause with its' own query
304+ * (`py/sqlalchemy-textclause-injection`). And so we don't highlight any parts of an
305+ * ORM constructed query such as these as containing SQL, and don't need the additional
306+ * taint steps either.
307+ *
308+ * See
309+ * - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.
310+ * - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
330311 */
331- private API:: Node getSqlAlchemyQueryInstance ( ) {
332- result = getSqlAlchemySessionInstance ( ) .getMember ( "query" ) .getReturn ( )
333- }
312+ module TextClause {
313+ /**
314+ * A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
315+ * textual SQL string directly.
316+ */
317+ class TextClauseConstruction extends DataFlow:: CallCfgNode {
318+ TextClauseConstruction ( ) {
319+ this = API:: moduleImport ( "sqlalchemy" ) .getMember ( "text" ) .getACall ( )
320+ or
321+ this = API:: moduleImport ( "sqlalchemy" ) .getMember ( "sql" ) .getMember ( "text" ) .getACall ( )
322+ or
323+ this =
324+ API:: moduleImport ( "sqlalchemy" )
325+ .getMember ( "sql" )
326+ .getMember ( "expression" )
327+ .getMember ( "text" )
328+ .getACall ( )
329+ or
330+ this =
331+ API:: moduleImport ( "sqlalchemy" )
332+ .getMember ( "sql" )
333+ .getMember ( "expression" )
334+ .getMember ( "TextClause" )
335+ .getACall ( )
336+ }
334337
335- /**
336- * A call on a Query object
337- * See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
338- */
339- private class SqlAlchemyQueryCall extends DataFlow:: CallCfgNode , SqlExecution:: Range {
340- SqlAlchemyQueryCall ( ) {
341- this =
342- getSqlAlchemyQueryInstance ( )
343- .getMember ( any ( SqlAlchemyVulnerableMethodNames methodName ) )
344- .getACall ( )
338+ /** Gets the argument that specifies the SQL text. */
339+ DataFlow:: Node getTextArg ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "text" ) ] }
345340 }
346-
347- override DataFlow:: Node getSql ( ) { result = this .getArg ( 0 ) }
348- }
349-
350- /**
351- * This class represents a list of methods vulnerable to sql injection.
352- *
353- * See https://github.com/jty-team/codeql/pull/2#issue-611592361
354- */
355- private class SqlAlchemyVulnerableMethodNames extends string {
356- SqlAlchemyVulnerableMethodNames ( ) { this in [ "filter" , "filter_by" , "group_by" , "order_by" ] }
357341 }
358342}
0 commit comments