From c968a95fef4d63f3fa976dc9d8f1c9d25ae8d63e Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Fri, 6 Jun 2025 18:06:13 +0100 Subject: [PATCH 1/2] Ruby: generate overlay discard predicates --- .../codeql/ruby/ast/internal/TreeSitter.qll | 52 +++++ .../src/generator/mod.rs | 29 ++- .../tree-sitter-extractor/src/generator/ql.rs | 25 +++ .../src/generator/ql_gen.rs | 183 ++++++++++++++++++ 4 files changed, 287 insertions(+), 2 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll b/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll index e339b07d35b9..3532a5d2a21f 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll @@ -5,6 +5,10 @@ import codeql.Locations as L +/** Holds if the database is an overlay. */ +overlay[local] +private predicate isOverlay() { databaseMetadata("isOverlay", "true") } + module Ruby { /** The base class for all AST nodes */ class AstNode extends @ruby_ast_node { @@ -48,6 +52,30 @@ module Ruby { final override string getAPrimaryQlClass() { result = "ReservedWord" } } + /** Gets the file containing the given `node`. */ + overlay[local] + private @file getNodeFile(@ruby_ast_node node) { + exists(@location_default loc | ruby_ast_node_location(node, loc) | + locations_default(loc, result, _, _, _, _) + ) + } + + /** Holds if `file` was extracted as part of the overlay database. */ + overlay[local] + private predicate discardFile(@file file) { isOverlay() and file = getNodeFile(_) } + + /** Holds if `node` is in the `file` and is part of the overlay base database. */ + overlay[local] + private predicate discardableAstNode(@file file, @ruby_ast_node node) { + not isOverlay() and file = getNodeFile(node) + } + + /** Holds if `node` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */ + overlay[discard_entity] + private predicate discardAstNode(@ruby_ast_node node) { + exists(@file file | discardableAstNode(file, node) and discardFile(file)) + } + class UnderscoreArg extends @ruby_underscore_arg, AstNode { } class UnderscoreCallOperator extends @ruby_underscore_call_operator, AstNode { } @@ -1970,6 +1998,30 @@ module Erb { final override string getAPrimaryQlClass() { result = "ReservedWord" } } + /** Gets the file containing the given `node`. */ + overlay[local] + private @file getNodeFile(@erb_ast_node node) { + exists(@location_default loc | erb_ast_node_location(node, loc) | + locations_default(loc, result, _, _, _, _) + ) + } + + /** Holds if `file` was extracted as part of the overlay database. */ + overlay[local] + private predicate discardFile(@file file) { isOverlay() and file = getNodeFile(_) } + + /** Holds if `node` is in the `file` and is part of the overlay base database. */ + overlay[local] + private predicate discardableAstNode(@file file, @erb_ast_node node) { + not isOverlay() and file = getNodeFile(node) + } + + /** Holds if `node` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */ + overlay[discard_entity] + private predicate discardAstNode(@erb_ast_node node) { + exists(@file file | discardableAstNode(file, node) and discardFile(file)) + } + /** A class representing `code` tokens. */ class Code extends @erb_token_code, Token { /** Gets the name of the primary QL class for this element. */ diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index cb35f7ab7682..fc0abc7f2737 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -17,7 +17,7 @@ pub fn generate( languages: Vec, dbscheme_path: PathBuf, ql_library_path: PathBuf, - add_metadata_relation: bool, + overlay_support: bool, ) -> std::io::Result<()> { let dbscheme_file = File::create(dbscheme_path).map_err(|e| { tracing::error!("Failed to create dbscheme file: {}", e); @@ -35,7 +35,7 @@ pub fn generate( // Eventually all languages will have the metadata relation (for overlay support), at which // point this could be moved to prefix.dbscheme. - if add_metadata_relation { + if overlay_support { writeln!(dbscheme_writer, "/*- Database metadata -*/",)?; dbscheme::write( &mut dbscheme_writer, @@ -60,6 +60,15 @@ pub fn generate( })], )?; + if overlay_support { + ql::write( + &mut ql_writer, + &[ql::TopLevel::Predicate( + ql_gen::create_is_overlay_predicate(), + )], + )?; + } + for language in languages { let prefix = node_types::to_snake_case(&language.name); let ast_node_name = format!("{}_ast_node", &prefix); @@ -103,6 +112,22 @@ pub fn generate( ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)), ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)), ]; + + if overlay_support { + body.push(ql::TopLevel::Predicate( + ql_gen::create_get_node_file_predicate(&ast_node_name, &node_location_table_name), + )); + body.push(ql::TopLevel::Predicate( + ql_gen::create_discard_file_predicate(), + )); + body.push(ql::TopLevel::Predicate( + ql_gen::create_discardable_ast_node_predicate(&ast_node_name), + )); + body.push(ql::TopLevel::Predicate( + ql_gen::create_discard_ast_node_predicate(&ast_node_name), + )); + } + body.append(&mut ql_gen::convert_nodes(&nodes)); ql::write( &mut ql_writer, diff --git a/shared/tree-sitter-extractor/src/generator/ql.rs b/shared/tree-sitter-extractor/src/generator/ql.rs index 8e899462ac39..e4c87b61bdb8 100644 --- a/shared/tree-sitter-extractor/src/generator/ql.rs +++ b/shared/tree-sitter-extractor/src/generator/ql.rs @@ -6,6 +6,7 @@ pub enum TopLevel<'a> { Class(Class<'a>), Import(Import<'a>), Module(Module<'a>), + Predicate(Predicate<'a>), } impl fmt::Display for TopLevel<'_> { @@ -14,6 +15,7 @@ impl fmt::Display for TopLevel<'_> { TopLevel::Import(imp) => write!(f, "{}", imp), TopLevel::Class(cls) => write!(f, "{}", cls), TopLevel::Module(m) => write!(f, "{}", m), + TopLevel::Predicate(pred) => write!(f, "{}", pred), } } } @@ -68,10 +70,12 @@ impl fmt::Display for Class<'_> { qldoc: None, name: self.name, overridden: false, + is_private: false, is_final: false, return_type: None, formal_parameters: vec![], body: charpred.clone(), + overlay: None, } )?; } @@ -150,6 +154,7 @@ pub enum Expression<'a> { expr: Box>, second_expr: Option>>, }, + Negation(Box>), } impl fmt::Display for Expression<'_> { @@ -231,19 +236,28 @@ impl fmt::Display for Expression<'_> { } write!(f, ")") } + Expression::Negation(e) => write!(f, "not ({})", e), } } } +#[derive(Clone, Eq, PartialEq, Hash)] +pub enum OverlayAnnotation { + Local, + DiscardEntity, +} + #[derive(Clone, Eq, PartialEq, Hash)] pub struct Predicate<'a> { pub qldoc: Option, pub name: &'a str, pub overridden: bool, + pub is_private: bool, pub is_final: bool, pub return_type: Option>, pub formal_parameters: Vec>, pub body: Expression<'a>, + pub overlay: Option, } impl fmt::Display for Predicate<'_> { @@ -251,6 +265,17 @@ impl fmt::Display for Predicate<'_> { if let Some(qldoc) = &self.qldoc { write!(f, "/** {} */", qldoc)?; } + if let Some(overlay_annotation) = &self.overlay { + write!(f, "overlay[")?; + match overlay_annotation { + OverlayAnnotation::Local => write!(f, "local")?, + OverlayAnnotation::DiscardEntity => write!(f, "discard_entity")?, + } + write!(f, "] ")?; + } + if self.is_private { + write!(f, "private ")?; + } if self.is_final { write!(f, "final ")?; } diff --git a/shared/tree-sitter-extractor/src/generator/ql_gen.rs b/shared/tree-sitter-extractor/src/generator/ql_gen.rs index 919ff43af428..32da103e5fbd 100644 --- a/shared/tree-sitter-extractor/src/generator/ql_gen.rs +++ b/shared/tree-sitter-extractor/src/generator/ql_gen.rs @@ -16,6 +16,7 @@ pub fn create_ast_node_class<'a>( )), name: "toString", overridden: false, + is_private: false, is_final: false, return_type: Some(ql::Type::String), formal_parameters: vec![], @@ -27,11 +28,13 @@ pub fn create_ast_node_class<'a>( vec![], )), ), + overlay: None, }; let get_location = ql::Predicate { name: "getLocation", qldoc: Some(String::from("Gets the location of this element.")), overridden: false, + is_private: false, is_final: true, return_type: Some(ql::Type::Normal("L::Location")), formal_parameters: vec![], @@ -39,6 +42,7 @@ pub fn create_ast_node_class<'a>( node_location_table, vec![ql::Expression::Var("this"), ql::Expression::Var("result")], ), + overlay: None, }; let get_a_field_or_child = create_none_predicate( Some(String::from("Gets a field or child node of this node.")), @@ -50,6 +54,7 @@ pub fn create_ast_node_class<'a>( qldoc: Some(String::from("Gets the parent of this element.")), name: "getParent", overridden: false, + is_private: false, is_final: true, return_type: Some(ql::Type::Normal("AstNode")), formal_parameters: vec![], @@ -61,6 +66,7 @@ pub fn create_ast_node_class<'a>( ql::Expression::Var("_"), ], ), + overlay: None, }; let get_parent_index = ql::Predicate { qldoc: Some(String::from( @@ -68,6 +74,7 @@ pub fn create_ast_node_class<'a>( )), name: "getParentIndex", overridden: false, + is_private: false, is_final: true, return_type: Some(ql::Type::Int), formal_parameters: vec![], @@ -79,6 +86,7 @@ pub fn create_ast_node_class<'a>( ql::Expression::Var("result"), ], ), + overlay: None, }; let get_a_primary_ql_class = ql::Predicate { qldoc: Some(String::from( @@ -86,6 +94,7 @@ pub fn create_ast_node_class<'a>( )), name: "getAPrimaryQlClass", overridden: false, + is_private: false, is_final: false, return_type: Some(ql::Type::String), formal_parameters: vec![], @@ -93,6 +102,7 @@ pub fn create_ast_node_class<'a>( Box::new(ql::Expression::Var("result")), Box::new(ql::Expression::String("???")), ), + overlay: None, }; let get_primary_ql_classes = ql::Predicate { qldoc: Some( @@ -102,6 +112,7 @@ pub fn create_ast_node_class<'a>( ), name: "getPrimaryQlClasses", overridden: false, + is_private: false, is_final: false, return_type: Some(ql::Type::String), formal_parameters: vec![], @@ -119,6 +130,7 @@ pub fn create_ast_node_class<'a>( second_expr: Some(Box::new(ql::Expression::String(","))), }), ), + overlay: None, }; ql::Class { qldoc: Some(String::from("The base class for all AST nodes")), @@ -144,10 +156,12 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl qldoc: Some(String::from("Gets the value of this token.")), name: "getValue", overridden: false, + is_private: false, is_final: true, return_type: Some(ql::Type::String), formal_parameters: vec![], body: create_get_field_expr_for_column_storage("result", tokeninfo, 1, tokeninfo_arity), + overlay: None, }; let to_string = ql::Predicate { qldoc: Some(String::from( @@ -155,6 +169,7 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl )), name: "toString", overridden: true, + is_private: false, is_final: true, return_type: Some(ql::Type::String), formal_parameters: vec![], @@ -166,6 +181,7 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl vec![], )), ), + overlay: None, }; ql::Class { qldoc: Some(String::from("A token.")), @@ -210,10 +226,12 @@ fn create_none_predicate<'a>( qldoc, name, overridden, + is_private: false, is_final: false, return_type, formal_parameters: Vec::new(), body: ql::Expression::Pred("none", vec![]), + overlay: None, } } @@ -226,6 +244,7 @@ fn create_get_a_primary_ql_class(class_name: &str, is_final: bool) -> ql::Predic )), name: "getAPrimaryQlClass", overridden: true, + is_private: false, is_final, return_type: Some(ql::Type::String), formal_parameters: vec![], @@ -233,6 +252,166 @@ fn create_get_a_primary_ql_class(class_name: &str, is_final: bool) -> ql::Predic Box::new(ql::Expression::Var("result")), Box::new(ql::Expression::String(class_name)), ), + overlay: None, + } +} + +pub fn create_is_overlay_predicate() -> ql::Predicate<'static> { + ql::Predicate { + name: "isOverlay", + qldoc: Some(String::from("Holds if the database is an overlay.")), + overridden: false, + is_private: true, + is_final: false, + return_type: None, + overlay: Some(ql::OverlayAnnotation::Local), + formal_parameters: vec![], + body: ql::Expression::Pred( + "databaseMetadata", + vec![ + ql::Expression::String("isOverlay"), + ql::Expression::String("true"), + ], + ), + } +} + +pub fn create_get_node_file_predicate<'a>( + ast_node_name: &'a str, + node_location_table_name: &'a str, +) -> ql::Predicate<'a> { + ql::Predicate { + name: "getNodeFile", + qldoc: Some(String::from("Gets the file containing the given `node`.")), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::Local), + return_type: Some(ql::Type::At("file")), + formal_parameters: vec![ql::FormalParameter { + name: "node", + param_type: ql::Type::At(ast_node_name), + }], + body: ql::Expression::Aggregate { + name: "exists", + vars: vec![ql::FormalParameter { + name: "loc", + param_type: ql::Type::At("location_default"), + }], + range: Some(Box::new(ql::Expression::Pred( + node_location_table_name, + vec![ql::Expression::Var("node"), ql::Expression::Var("loc")], + ))), + expr: Box::new(ql::Expression::Pred( + "locations_default", + vec![ + ql::Expression::Var("loc"), + ql::Expression::Var("result"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ], + )), + second_expr: None, + }, + } +} + +pub fn create_discard_file_predicate<'a>() -> ql::Predicate<'a> { + ql::Predicate { + name: "discardFile", + qldoc: Some(String::from( + "Holds if `file` was extracted as part of the overlay database.", + )), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::Local), + return_type: None, + formal_parameters: vec![ql::FormalParameter { + name: "file", + param_type: ql::Type::At("file"), + }], + body: ql::Expression::And(vec![ + ql::Expression::Pred("isOverlay", vec![]), + ql::Expression::Equals( + Box::new(ql::Expression::Var("file")), + Box::new(ql::Expression::Pred( + "getNodeFile", + vec![ql::Expression::Var("_")], + )), + ), + ]), + } +} + +pub fn create_discardable_ast_node_predicate<'a>(ast_node_name: &'a str) -> ql::Predicate<'a> { + ql::Predicate { + name: "discardableAstNode", + qldoc: Some(String::from( + "Holds if `node` is in the `file` and is part of the overlay base database.", + )), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::Local), + return_type: None, + formal_parameters: vec![ + ql::FormalParameter { + name: "file", + param_type: ql::Type::At("file"), + }, + ql::FormalParameter { + name: "node", + param_type: ql::Type::At(ast_node_name), + }, + ], + body: ql::Expression::And(vec![ + ql::Expression::Negation(Box::new(ql::Expression::Pred("isOverlay", vec![]))), + ql::Expression::Equals( + Box::new(ql::Expression::Var("file")), + Box::new(ql::Expression::Pred( + "getNodeFile", + vec![ql::Expression::Var("node")], + )), + ), + ]), + } +} + +pub fn create_discard_ast_node_predicate<'a>(ast_node_name: &'a str) -> ql::Predicate<'a> { + ql::Predicate { + name: "discardAstNode", + qldoc: Some(String::from( + "Holds if `node` should be discarded, because it is part of the overlay base \ + and is in a file that was also extracted as part of the overlay database.", + )), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::DiscardEntity), + return_type: None, + formal_parameters: vec![ql::FormalParameter { + name: "node", + param_type: ql::Type::At(ast_node_name), + }], + body: ql::Expression::Aggregate { + name: "exists", + vars: vec![ql::FormalParameter { + name: "file", + param_type: ql::Type::At("file"), + }], + range: None, + expr: Box::new(ql::Expression::And(vec![ + ql::Expression::Pred( + "discardableAstNode", + vec![ql::Expression::Var("file"), ql::Expression::Var("node")], + ), + ql::Expression::Pred("discardFile", vec![ql::Expression::Var("file")]), + ])), + second_expr: None, + }, } } @@ -435,10 +614,12 @@ fn create_field_getters<'a>( qldoc: Some(qldoc), name: &field.getter_name, overridden: false, + is_private: false, is_final: true, return_type, formal_parameters, body, + overlay: None, }, optional_expr, ) @@ -548,10 +729,12 @@ pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec { qldoc: Some(String::from("Gets a field or child node of this node.")), name: "getAFieldOrChild", overridden: true, + is_private: false, is_final: true, return_type: Some(ql::Type::Normal("AstNode")), formal_parameters: vec![], body: ql::Expression::Or(get_child_exprs), + overlay: None, }); classes.push(ql::TopLevel::Class(main_class)); From b96dc5ab92475e7e1ad50cd9650e997a523549ba Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 11 Jun 2025 14:09:29 +0100 Subject: [PATCH 2/2] Ruby: enable overlay compilation --- ruby/ql/lib/qlpack.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index ab4215ced208..556b07a49f8b 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -17,3 +17,4 @@ dataExtensions: - codeql/ruby/frameworks/**/model.yml - codeql/ruby/frameworks/**/*.model.yml warnOnImplicitThis: true +compileForOverlayEval: true