1- #[ macro_use]
2- extern crate lazy_static;
3-
4- use clap:: arg;
1+ use clap:: Args ;
2+ use lazy_static:: lazy_static;
53use rayon:: prelude:: * ;
64use std:: borrow:: Cow ;
75use std:: fs;
@@ -11,23 +9,22 @@ use tree_sitter::{Language, Parser, Range};
119
1210use codeql_extractor:: { diagnostics, extractor, file_paths, node_types, trap} ;
1311
14- lazy_static ! {
15- static ref CP_NUMBER : regex:: Regex = regex:: Regex :: new( "cp([0-9]+)" ) . unwrap( ) ;
16- }
12+ #[ derive( Args ) ]
13+ pub struct Options {
14+ /// Sets a custom source achive folder
15+ #[ arg( long) ]
16+ source_archive_dir : String ,
1717
18- /// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
19- fn encoding_from_name ( encoding_name : & str ) -> Option < & ( dyn encoding:: Encoding + Send + Sync ) > {
20- match encoding:: label:: encoding_from_whatwg_label ( encoding_name) {
21- s @ Some ( _) => s,
22- None => CP_NUMBER . captures ( encoding_name) . and_then ( |cap| {
23- encoding:: label:: encoding_from_windows_code_page (
24- str:: parse ( cap. get ( 1 ) . unwrap ( ) . as_str ( ) ) . unwrap ( ) ,
25- )
26- } ) ,
27- }
18+ /// Sets a custom trap folder
19+ #[ arg( long) ]
20+ output_dir : String ,
21+
22+ /// A text file containing the paths of the files to extract
23+ #[ arg( long) ]
24+ file_list : String ,
2825}
2926
30- fn main ( ) -> std:: io:: Result < ( ) > {
27+ pub fn run ( options : Options ) -> std:: io:: Result < ( ) > {
3128 tracing_subscriber:: fmt ( )
3229 . with_target ( false )
3330 . without_time ( )
@@ -82,29 +79,11 @@ fn main() -> std::io::Result<()> {
8279 . build_global ( )
8380 . unwrap ( ) ;
8481
85- let matches = clap:: Command :: new ( "Ruby extractor" )
86- . version ( "1.0" )
87- . author ( "GitHub" )
88- . about ( "CodeQL Ruby extractor" )
89- . arg ( arg ! ( --"source-archive-dir" <DIR > "Sets a custom source archive folder" ) )
90- . arg ( arg ! ( --"output-dir" <DIR > "Sets a custom trap folder" ) )
91- . arg ( arg ! ( --"file-list" <FILE_LIST > "A text file containing the paths of the files to extract" ) )
92- . get_matches ( ) ;
82+ let src_archive_dir = file_paths:: path_from_string ( & options. source_archive_dir ) ;
9383
94- let src_archive_dir = matches
95- . get_one :: < String > ( "source-archive-dir" )
96- . expect ( "missing --source-archive-dir" ) ;
97- let src_archive_dir = file_paths:: path_from_string ( src_archive_dir) ;
84+ let trap_dir = file_paths:: path_from_string ( & options. output_dir ) ;
9885
99- let trap_dir = matches
100- . get_one :: < String > ( "output-dir" )
101- . expect ( "missing --output-dir" ) ;
102- let trap_dir = file_paths:: path_from_string ( & trap_dir) ;
103-
104- let file_list = matches
105- . get_one :: < String > ( "file-list" )
106- . expect ( "missing --file-list" ) ;
107- let file_list = fs:: File :: open ( file_paths:: path_from_string ( & file_list) ) ?;
86+ let file_list = fs:: File :: open ( file_paths:: path_from_string ( & options. file_list ) ) ?;
10887
10988 let language = tree_sitter_ruby:: language ( ) ;
11089 let erb = tree_sitter_embedded_template:: language ( ) ;
@@ -242,6 +221,22 @@ fn main() -> std::io::Result<()> {
242221 write_trap ( & trap_dir, path, & trap_writer, trap_compression)
243222}
244223
224+ lazy_static ! {
225+ static ref CP_NUMBER : regex:: Regex = regex:: Regex :: new( "cp([0-9]+)" ) . unwrap( ) ;
226+ }
227+
228+ /// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
229+ fn encoding_from_name ( encoding_name : & str ) -> Option < & ( dyn encoding:: Encoding + Send + Sync ) > {
230+ match encoding:: label:: encoding_from_whatwg_label ( encoding_name) {
231+ s @ Some ( _) => s,
232+ None => CP_NUMBER . captures ( encoding_name) . and_then ( |cap| {
233+ encoding:: label:: encoding_from_windows_code_page (
234+ str:: parse ( cap. get ( 1 ) . unwrap ( ) . as_str ( ) ) . unwrap ( ) ,
235+ )
236+ } ) ,
237+ }
238+ }
239+
245240fn write_trap (
246241 trap_dir : & Path ,
247242 path : PathBuf ,
@@ -373,67 +368,3 @@ fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
373368 }
374369 None
375370}
376-
377- #[ test]
378- fn test_scan_coding_comment ( ) {
379- let text = "# encoding: utf-8" ;
380- let result = scan_coding_comment ( text. as_bytes ( ) ) ;
381- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
382-
383- let text = "#coding:utf-8" ;
384- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
385- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
386-
387- let text = "# foo\n # encoding: utf-8" ;
388- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
389- assert_eq ! ( result, None ) ;
390-
391- let text = "# encoding: latin1 encoding: utf-8" ;
392- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
393- assert_eq ! ( result, Some ( "latin1" . into( ) ) ) ;
394-
395- let text = "# encoding: nonsense" ;
396- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
397- assert_eq ! ( result, Some ( "nonsense" . into( ) ) ) ;
398-
399- let text = "# coding = utf-8" ;
400- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
401- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
402-
403- let text = "# CODING = utf-8" ;
404- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
405- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
406-
407- let text = "# CoDiNg = utf-8" ;
408- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
409- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
410-
411- let text = "# blah blahblahcoding = utf-8" ;
412- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
413- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
414-
415- // unicode BOM is ignored
416- let text = "\u{FEFF} # encoding: utf-8" ;
417- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
418- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
419-
420- let text = "\u{FEFF} # encoding: utf-8" ;
421- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
422- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
423-
424- let text = "#! /usr/bin/env ruby\n # encoding: utf-8" ;
425- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
426- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
427-
428- let text = "\u{FEFF} #! /usr/bin/env ruby\n # encoding: utf-8" ;
429- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
430- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
431-
432- // A #! must be the first thing on a line, otherwise it's a normal comment
433- let text = " #! /usr/bin/env ruby encoding = utf-8" ;
434- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
435- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
436- let text = " #! /usr/bin/env ruby \n # encoding = utf-8" ;
437- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
438- assert_eq ! ( result, None ) ;
439- }
0 commit comments