1616import java .nio .file .attribute .BasicFileAttributes ;
1717import java .util .ArrayList ;
1818import java .util .Arrays ;
19+ import java .util .LinkedHashMap ;
1920import java .util .LinkedHashSet ;
2021import java .util .List ;
22+ import java .util .Map ;
2123import java .util .Set ;
2224import java .util .concurrent .ExecutorService ;
2325import java .util .concurrent .Executors ;
5860 * </ul>
5961 *
6062 * <p>
63+ * Additionally, the following environment variables may be set to customise extraction
64+ * (explained in more detail below):
65+ * </p>
66+ *
67+ * <ul>
68+ * <li><code>LGTM_INDEX_INCLUDE</code>: a newline-separated list of paths to include</li>
69+ * <li><code>LGTM_INDEX_EXCLUDE</code>: a newline-separated list of paths to exclude</li>
70+ * <li><code>LGTM_REPOSITORY_FOLDERS_CSV</code>: the path of a CSV file containing file classifications</li>
71+ * <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
72+ * patterns that can be used to refine the list of files to include and exclude</li>
73+ * <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
74+ * <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
75+ * specifying which {@link FileType} to use for the given extension</li>
76+ * <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
77+ * <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
78+ * <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
79+ </ul>
80+ *
81+ * <p>
6182 * It extracts the following:
6283 * </p>
6384 *
143164 * </p>
144165 *
145166 * <p>
167+ * The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
168+ * list of file type specifications of the form <code>.extension:filetype</code>, causing all
169+ * files whose name ends in <code>.extension</code> to also be included by default.
170+ * </p>
171+ *
172+ * <p>
146173 * The default exclusion patterns cause the following files to be excluded:
147174 * </p>
148175 * <ul>
157184 * </p>
158185 *
159186 * <p>
187+ * The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
188+ * environment variable explained above.
189+ * </p>
190+ *
191+ * <p>
160192 * Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
161193 * externs is not customisable.
162194 * </p>
176208public class AutoBuild {
177209 private final ExtractorOutputConfig outputConfig ;
178210 private final ITrapCache trapCache ;
211+ private final Map <String , FileType > fileTypes = new LinkedHashMap <>();
179212 private final Set <Path > includes = new LinkedHashSet <>();
180213 private final Set <Path > excludes = new LinkedHashSet <>();
181214 private ProjectLayout filters ;
@@ -191,6 +224,7 @@ public AutoBuild() {
191224 this .trapCache = mkTrapCache ();
192225 this .typeScriptMode = getEnumFromEnvVar ("LGTM_INDEX_TYPESCRIPT" , TypeScriptMode .class , TypeScriptMode .BASIC );
193226 this .defaultEncoding = getEnvVar ("LGTM_INDEX_DEFAULT_ENCODING" );
227+ setupFileTypes ();
194228 setupMatchers ();
195229 }
196230
@@ -260,6 +294,25 @@ private ITrapCache mkTrapCache() {
260294 return trapCache ;
261295 }
262296
297+ private void setupFileTypes () {
298+ for (String spec : Main .NEWLINE .split (getEnvVar ("LGTM_INDEX_FILETYPES" , "" ))) {
299+ spec = spec .trim ();
300+ if (spec .isEmpty ())
301+ continue ;
302+ String [] fields = spec .split (":" );
303+ if (fields .length != 2 )
304+ continue ;
305+ String extension = fields [0 ].trim ();
306+ String fileType = fields [1 ].trim ();
307+ try {
308+ fileTypes .put (extension , FileType .valueOf (StringUtil .uc (fileType )));
309+ } catch (IllegalArgumentException e ) {
310+ Exceptions .ignore (e , "We construct a better error message." );
311+ throw new UserError ("Invalid file type '" + fileType + "'." );
312+ }
313+ }
314+ }
315+
263316 /**
264317 * Set up include and exclude matchers based on environment variables.
265318 */
@@ -333,6 +386,10 @@ private void setupFilters() {
333386 patterns .add ("**/.eslintrc*" );
334387 patterns .add ("**/package.json" );
335388
389+ // include any explicitly specified extensions
390+ for (String extension : fileTypes .keySet ())
391+ patterns .add ("**/*" + extension );
392+
336393 // exclude files whose name strongly suggests they are minified
337394 patterns .add ("-**/*.min.js" );
338395 patterns .add ("-**/*-min.js" );
@@ -466,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
466523 * Extract all supported candidate files that pass the filters.
467524 */
468525 private void extractSource () throws IOException {
469- ExtractorConfig config = new ExtractorConfig (true );
470- config = config .withSourceType (getSourceType ());
471- config = config .withTypeScriptMode (typeScriptMode );
472- if (defaultEncoding != null )
473- config = config .withDefaultEncoding (defaultEncoding );
474- FileExtractor extractor = new FileExtractor (config , outputConfig , trapCache );
526+ // default extractor
527+ FileExtractor defaultExtractor = new FileExtractor (mkExtractorConfig (), outputConfig , trapCache );
528+
529+ // custom extractor for explicitly specified file types
530+ Map <String , FileExtractor > customExtractors = new LinkedHashMap <>();
531+ for (Map .Entry <String , FileType > spec : fileTypes .entrySet ()) {
532+ String extension = spec .getKey ();
533+ String fileType = spec .getValue ().name ();
534+ ExtractorConfig extractorConfig = mkExtractorConfig ().withFileType (fileType );
535+ customExtractors .put (extension , new FileExtractor (extractorConfig , outputConfig , trapCache ));
536+ }
475537
476538 Set <Path > filesToExtract = new LinkedHashSet <>();
477539 List <Path > tsconfigFiles = new ArrayList <>();
478- findFilesToExtract (extractor , filesToExtract , tsconfigFiles );
540+ findFilesToExtract (defaultExtractor , filesToExtract , tsconfigFiles );
479541
480542 // extract TypeScript projects and files
481- Set <Path > extractedFiles = extractTypeScript (extractor , filesToExtract , tsconfigFiles );
543+ Set <Path > extractedFiles = extractTypeScript (defaultExtractor , filesToExtract , tsconfigFiles );
482544
483545 // extract remaining files
484546 for (Path f : filesToExtract ) {
485547 if (extractedFiles .add (f )) {
548+ FileExtractor extractor = defaultExtractor ;
549+ if (!fileTypes .isEmpty ()) {
550+ String extension = FileUtil .extension (f );
551+ if (customExtractors .containsKey (extension ))
552+ extractor = customExtractors .get (extension );
553+ }
486554 extract (extractor , f , null );
487555 }
488556 }
489557 }
490558
559+ private ExtractorConfig mkExtractorConfig () {
560+ ExtractorConfig config = new ExtractorConfig (true );
561+ config = config .withSourceType (getSourceType ());
562+ config = config .withTypeScriptMode (typeScriptMode );
563+ if (defaultEncoding != null )
564+ config = config .withDefaultEncoding (defaultEncoding );
565+ return config ;
566+ }
567+
491568 private Set <Path > extractTypeScript (FileExtractor extractor , Set <Path > files , List <Path > tsconfig ) {
492569 Set <Path > extractedFiles = new LinkedHashSet <>();
493570
@@ -574,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
574651 return FileVisitResult .SKIP_SUBTREE ;
575652
576653 // extract files that are supported and pass the include/exclude patterns
577- if (extractor .supports (file .toFile ()) && isFileIncluded (file )) {
654+ boolean supported = extractor .supports (file .toFile ());
655+ if (!supported && !fileTypes .isEmpty ()) {
656+ supported = fileTypes .containsKey (FileUtil .extension (file ));
657+ }
658+ if (supported && isFileIncluded (file )) {
578659 filesToExtract .add (normalizePath (file ));
579660 }
580661
0 commit comments