Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d986bea

Browse files
authored
Merge pull request #238 from github/aibaars/extract-erb
Extract ERB tags
2 parents e757d2e + 00a0b93 commit d986bea

53 files changed

Lines changed: 41549 additions & 10666 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
run: cargo build --release
4848
- name: Generate dbscheme
4949
if: ${{ matrix.os == 'ubuntu-latest' }}
50-
run: target/release/ruby-generator
50+
run: target/release/ruby-generator --dbscheme ql/src/ruby.dbscheme --library ql/src/codeql_ruby/ast/internal/TreeSitter.qll
5151
- uses: actions/upload-artifact@v2
5252
if: ${{ matrix.os == 'ubuntu-latest' }}
5353
with:

.github/workflows/dataset_measure.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ jobs:
7373
path: stats
7474
- run: |
7575
python -m pip install --user lxml
76-
find stats -name 'stats.xml' | sort | xargs python scripts/merge_stats.py --output ql/src/ruby.dbscheme.stats --normalise tokeninfo
76+
find stats -name 'stats.xml' | sort | xargs python scripts/merge_stats.py --output ql/src/ruby.dbscheme.stats --normalise ruby_tokeninfo
7777
- uses: actions/upload-artifact@v2
7878
with:
7979
name: ruby.dbscheme.stats

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The generated `ql/src/ruby.dbscheme` and `ql/src/codeql_ruby/ast/internal/TreeSi
1616

1717
```bash
1818
# Run the generator
19-
cargo run --release -p ruby-generator
19+
cargo run --release -p ruby-generator -- --dbscheme ql/src/ruby.dbscheme --library ql/src/codeql_ruby/ast/internal/TreeSitter.qll
2020
# Then auto-format the QL library
2121
codeql query format -i ql/src/codeql_ruby/ast/internal/TreeSitter.qll
2222
```

create-extractor-pack.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cargo build --release
22

3-
cargo run --release -p ruby-generator
3+
cargo run --release -p ruby-generator -- --dbscheme ql/src/ruby.dbscheme --library ql/src/codeql_ruby/ast/internal/TreeSitter.qll
44
codeql query format -i ql\src\codeql_ruby\ast\internal\TreeSitter.qll
55

66
rm -Recurse -Force extractor-pack

create-extractor-pack.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ fi
1212

1313
cargo build --release
1414

15-
cargo run --release -p ruby-generator
15+
cargo run --release -p ruby-generator -- --dbscheme ql/src/ruby.dbscheme --library ql/src/codeql_ruby/ast/internal/TreeSitter.qll
1616
codeql query format -i ql/src/codeql_ruby/ast/internal/TreeSitter.qll
1717

1818
rm -rf extractor-pack

extractor/src/extractor.rs

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@ use std::borrow::Cow;
33
use std::collections::BTreeMap as Map;
44
use std::collections::BTreeSet as Set;
55
use std::fmt;
6+
use std::io::Write;
67
use std::path::Path;
8+
79
use tracing::{error, info, span, Level};
810
use tree_sitter::{Language, Node, Parser, Range, Tree};
911

10-
struct TrapWriter {
12+
pub struct TrapWriter {
1113
/// The accumulated trap entries
1214
trap_output: Vec<TrapEntry>,
1315
/// A counter for generating fresh labels
@@ -16,7 +18,7 @@ struct TrapWriter {
1618
global_keys: std::collections::HashMap<String, Label>,
1719
}
1820

19-
fn new_trap_writer() -> TrapWriter {
21+
pub fn new_trap_writer() -> TrapWriter {
2022
TrapWriter {
2123
counter: 0,
2224
trap_output: Vec::new(),
@@ -147,16 +149,22 @@ impl TrapWriter {
147149
fn comment(&mut self, text: String) {
148150
self.trap_output.push(TrapEntry::Comment(text));
149151
}
152+
153+
pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> {
154+
write!(writer, "{}", Program(self.trap_output))
155+
}
150156
}
151157

152158
/// Extracts the source file at `path`, which is assumed to be canonicalized.
153159
pub fn extract(
154160
language: Language,
161+
language_prefix: &str,
155162
schema: &NodeTypeMap,
163+
trap_writer: &mut TrapWriter,
156164
path: &Path,
157165
source: &Vec<u8>,
158166
ranges: &[Range],
159-
) -> std::io::Result<Program> {
167+
) -> std::io::Result<()> {
160168
let span = span!(
161169
Level::TRACE,
162170
"extract",
@@ -171,8 +179,7 @@ pub fn extract(
171179
parser.set_language(language).unwrap();
172180
parser.set_included_ranges(&ranges).unwrap();
173181
let tree = parser.parse(&source, None).expect("Failed to parse file");
174-
let mut trap_writer = new_trap_writer();
175-
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
182+
&trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
176183
let file_label = &trap_writer.populate_file(path);
177184
let mut visitor = Visitor {
178185
source: &source,
@@ -183,12 +190,13 @@ pub fn extract(
183190
token_counter: 0,
184191
toplevel_child_counter: 0,
185192
stack: Vec::new(),
193+
language_prefix,
186194
schema,
187195
};
188196
traverse(&tree, &mut visitor);
189197

190198
parser.reset();
191-
Ok(Program(visitor.trap_writer.trap_output))
199+
Ok(())
192200
}
193201

194202
/// Escapes a string for use in a TRAP key, by replacing special characters with
@@ -288,11 +296,13 @@ struct Visitor<'a> {
288296
/// The source code as a UTF-8 byte array
289297
source: &'a Vec<u8>,
290298
/// A TrapWriter to accumulate trap entries
291-
trap_writer: TrapWriter,
299+
trap_writer: &'a mut TrapWriter,
292300
/// A counter for tokens
293301
token_counter: usize,
294302
/// A counter for top-level child nodes
295303
toplevel_child_counter: usize,
304+
/// Language prefix
305+
language_prefix: &'a str,
296306
/// A lookup table from type name to node types
297307
schema: &'a NodeTypeMap,
298308
/// A stack for gathering information from child nodes. Whenever a node is
@@ -400,15 +410,15 @@ impl Visitor<'_> {
400410
match &table.kind {
401411
EntryKind::Token { kind_id, .. } => {
402412
self.trap_writer.add_tuple(
403-
"ast_node_parent",
413+
&format!("{}_ast_node_parent", self.language_prefix),
404414
vec![
405415
Arg::Label(id),
406416
Arg::Label(parent_id),
407417
Arg::Int(parent_index),
408418
],
409419
);
410420
self.trap_writer.add_tuple(
411-
"tokeninfo",
421+
&format!("{}_tokeninfo", self.language_prefix),
412422
vec![
413423
Arg::Label(id),
414424
Arg::Int(*kind_id),
@@ -426,7 +436,7 @@ impl Visitor<'_> {
426436
} => {
427437
if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) {
428438
self.trap_writer.add_tuple(
429-
"ast_node_parent",
439+
&format!("{}_ast_node_parent", self.language_prefix),
430440
vec![
431441
Arg::Label(id),
432442
Arg::Label(parent_id),

extractor/src/main.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use clap;
66
use flate2::write::GzEncoder;
77
use rayon::prelude::*;
88
use std::fs;
9-
use std::io::{BufRead, BufWriter, Write};
9+
use std::io::{BufRead, BufWriter};
1010
use std::path::{Path, PathBuf};
1111
use tree_sitter::{Language, Parser, Range};
1212

@@ -124,7 +124,9 @@ fn main() -> std::io::Result<()> {
124124

125125
let language = tree_sitter_ruby::language();
126126
let erb = tree_sitter_embedded_template::language();
127-
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
127+
let schema = node_types::read_node_types_str("ruby", tree_sitter_ruby::NODE_TYPES)?;
128+
let erb_schema =
129+
node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?;
128130
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
129131
let lines = lines?;
130132
lines.par_iter().try_for_each(|line| {
@@ -133,8 +135,19 @@ fn main() -> std::io::Result<()> {
133135
let src_archive_file = path_for(&src_archive_dir, &path, "");
134136
let mut source = std::fs::read(&path)?;
135137
let code_ranges;
138+
let mut trap_writer = extractor::new_trap_writer();
136139
if path.extension().map_or(false, |x| x == "erb") {
137140
tracing::info!("scanning: {}", path.display());
141+
extractor::extract(
142+
erb,
143+
"erb",
144+
&erb_schema,
145+
&mut trap_writer,
146+
&path,
147+
&source,
148+
&[],
149+
)?;
150+
138151
let (ranges, line_breaks) = scan_erb(erb, &source);
139152
for i in line_breaks {
140153
if i < source.len() {
@@ -145,17 +158,25 @@ fn main() -> std::io::Result<()> {
145158
} else {
146159
code_ranges = vec![];
147160
}
148-
let trap = extractor::extract(language, &schema, &path, &source, &code_ranges)?;
161+
extractor::extract(
162+
language,
163+
"ruby",
164+
&schema,
165+
&mut trap_writer,
166+
&path,
167+
&source,
168+
&code_ranges,
169+
)?;
149170
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
150171
std::fs::copy(&path, &src_archive_file)?;
151172
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
152173
let trap_file = std::fs::File::create(&trap_file)?;
153174
let mut trap_file = BufWriter::new(trap_file);
154175
match trap_compression {
155-
TrapCompression::None => write!(trap_file, "{}", trap),
176+
TrapCompression::None => trap_writer.output(&mut trap_file),
156177
TrapCompression::Gzip => {
157178
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
158-
write!(compressed_writer, "{}", trap)
179+
trap_writer.output(&mut compressed_writer)
159180
}
160181
}
161182
})

generator/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ edition = "2018"
77
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
88

99
[dependencies]
10+
clap = "2.33"
1011
node-types = { path = "../node-types" }
1112
tracing = "0.1"
1213
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
14+
tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template", rev = "d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" }
1315
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "32cd5a04adb4accb0c121f037ab59df3c3488228" }

generator/src/dbscheme.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,7 @@ impl<'a> fmt::Display for Union<'a> {
117117
}
118118

119119
/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
120-
pub fn write<'a>(
121-
language_name: &str,
122-
file: &mut dyn std::io::Write,
123-
entries: &'a [Entry],
124-
) -> std::io::Result<()> {
125-
write!(file, "// CodeQL database schema for {}\n", language_name)?;
126-
write!(
127-
file,
128-
"// Automatically generated from the tree-sitter grammar; do not edit\n\n"
129-
)?;
130-
120+
pub fn write<'a>(file: &mut dyn std::io::Write, entries: &'a [Entry]) -> std::io::Result<()> {
131121
for entry in entries {
132122
match entry {
133123
Entry::Case(case) => write!(file, "{}\n\n", case)?,

0 commit comments

Comments
 (0)