Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9d77619

Browse files
author
Max Schaefer
committed
JavaScript: Make file types customisable in AutoBuild.
Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped. To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort. This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only. Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where `filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`. For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript. This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript.
1 parent 2ed3790 commit 9d77619

2 files changed

Lines changed: 128 additions & 15 deletions

File tree

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import java.nio.file.attribute.BasicFileAttributes;
1717
import java.util.ArrayList;
1818
import java.util.Arrays;
19+
import java.util.LinkedHashMap;
1920
import java.util.LinkedHashSet;
2021
import java.util.List;
22+
import java.util.Map;
2123
import java.util.Set;
2224
import java.util.concurrent.ExecutorService;
2325
import java.util.concurrent.Executors;
@@ -69,6 +71,8 @@
6971
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
7072
* patterns that can be used to refine the list of files to include and exclude</li>
7173
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
74+
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
75+
* specifying which {@link FileType} to use for the given extension</li>
7276
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
7377
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
7478
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
@@ -160,6 +164,12 @@
160164
* </p>
161165
*
162166
* <p>
167+
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
168+
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
169+
* files whose name ends in <code>.extension</code> to also be included by default.
170+
* </p>
171+
*
172+
* <p>
163173
* The default exclusion patterns cause the following files to be excluded:
164174
* </p>
165175
* <ul>
@@ -174,6 +184,11 @@
174184
* </p>
175185
*
176186
* <p>
187+
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
188+
* environment variable explained above.
189+
* </p>
190+
*
191+
* <p>
177192
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
178193
* externs is not customisable.
179194
* </p>
@@ -193,6 +208,7 @@
193208
public class AutoBuild {
194209
private final ExtractorOutputConfig outputConfig;
195210
private final ITrapCache trapCache;
211+
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
196212
private final Set<Path> includes = new LinkedHashSet<>();
197213
private final Set<Path> excludes = new LinkedHashSet<>();
198214
private ProjectLayout filters;
@@ -208,6 +224,7 @@ public AutoBuild() {
208224
this.trapCache = mkTrapCache();
209225
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
210226
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
227+
setupFileTypes();
211228
setupMatchers();
212229
}
213230

@@ -277,6 +294,25 @@ private ITrapCache mkTrapCache() {
277294
return trapCache;
278295
}
279296

297+
private void setupFileTypes() {
298+
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
299+
spec = spec.trim();
300+
if (spec.isEmpty())
301+
continue;
302+
String[] fields = spec.split(":");
303+
if (fields.length != 2)
304+
continue;
305+
String extension = fields[0].trim();
306+
String fileType = fields[1].trim();
307+
try {
308+
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
309+
} catch (IllegalArgumentException e) {
310+
Exceptions.ignore(e, "We construct a better error message.");
311+
throw new UserError("Invalid file type '" + fileType + "'.");
312+
}
313+
}
314+
}
315+
280316
/**
281317
* Set up include and exclude matchers based on environment variables.
282318
*/
@@ -350,6 +386,10 @@ private void setupFilters() {
350386
patterns.add("**/.eslintrc*");
351387
patterns.add("**/package.json");
352388

389+
// include any explicitly specified extensions
390+
for (String extension : fileTypes.keySet())
391+
patterns.add("**/*" + extension);
392+
353393
// exclude files whose name strongly suggests they are minified
354394
patterns.add("-**/*.min.js");
355395
patterns.add("-**/*-min.js");
@@ -483,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
483523
* Extract all supported candidate files that pass the filters.
484524
*/
485525
private void extractSource() throws IOException {
486-
ExtractorConfig config = new ExtractorConfig(true);
487-
config = config.withSourceType(getSourceType());
488-
config = config.withTypeScriptMode(typeScriptMode);
489-
if (defaultEncoding != null)
490-
config = config.withDefaultEncoding(defaultEncoding);
491-
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
526+
// default extractor
527+
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
528+
529+
// custom extractor for explicitly specified file types
530+
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
531+
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
532+
String extension = spec.getKey();
533+
String fileType = spec.getValue().name();
534+
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
535+
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
536+
}
492537

493538
Set<Path> filesToExtract = new LinkedHashSet<>();
494539
List<Path> tsconfigFiles = new ArrayList<>();
495-
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
540+
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
496541

497542
// extract TypeScript projects and files
498-
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
543+
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
499544

500545
// extract remaining files
501546
for (Path f : filesToExtract) {
502547
if (extractedFiles.add(f)) {
548+
FileExtractor extractor = defaultExtractor;
549+
if (!fileTypes.isEmpty()) {
550+
String extension = FileUtil.extension(f);
551+
if (customExtractors.containsKey(extension))
552+
extractor = customExtractors.get(extension);
553+
}
503554
extract(extractor, f, null);
504555
}
505556
}
506557
}
507558

559+
private ExtractorConfig mkExtractorConfig() {
560+
ExtractorConfig config = new ExtractorConfig(true);
561+
config = config.withSourceType(getSourceType());
562+
config = config.withTypeScriptMode(typeScriptMode);
563+
if (defaultEncoding != null)
564+
config = config.withDefaultEncoding(defaultEncoding);
565+
return config;
566+
}
567+
508568
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
509569
Set<Path> extractedFiles = new LinkedHashSet<>();
510570

@@ -591,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
591651
return FileVisitResult.SKIP_SUBTREE;
592652

593653
// extract files that are supported and pass the include/exclude patterns
594-
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
654+
boolean supported = extractor.supports(file.toFile());
655+
if (!supported && !fileTypes.isEmpty()) {
656+
supported = fileTypes.containsKey(FileUtil.extension(file));
657+
}
658+
if (supported && isFileIncluded(file)) {
595659
filesToExtract.add(normalizePath(file));
596660
}
597661

javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.semmle.js.extractor.AutoBuild;
2424
import com.semmle.js.extractor.ExtractorState;
2525
import com.semmle.js.extractor.FileExtractor;
26+
import com.semmle.js.extractor.FileExtractor.FileType;
2627
import com.semmle.util.data.StringUtil;
2728
import com.semmle.util.exception.UserError;
2829
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public void teardown() throws IOException {
7475
/**
7576
* Add a file under {@code root} that we either do or don't expect to be extracted,
7677
* depending on the value of {@code extracted}. If the file is expected to be
77-
* extracted, its path is added to {@link #expected}.
78+
* extracted, its path is added to {@link #expected}. If non-null, parameter
79+
* {@code fileType} indicates the file type with which we expect the file to be extracted.
80+
*/
81+
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
82+
Path f = addFile(root, components);
83+
if (extracted) {
84+
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
85+
}
86+
return f;
87+
}
88+
89+
/**
90+
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
7891
*/
7992
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
93+
return addFile(extracted, null, root, components);
94+
}
95+
96+
/**
97+
* Create a file at the specified path under {@code root} and return it.
98+
*/
99+
private Path addFile(Path root, String... components) throws IOException {
80100
Path p = Paths.get(root.toString(), components);
81101
Files.createDirectories(p.getParent());
82-
Path f = Files.createFile(p);
83-
if (extracted)
84-
expected.add(f.toString());
85-
return f;
102+
return Files.createFile(p);
86103
}
87104

88105
/**
@@ -96,7 +113,10 @@ private void runTest() throws IOException {
96113
new AutoBuild() {
97114
@Override
98115
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
99-
actual.add(file.toString());
116+
String extracted = file.toString();
117+
if (extractor.getConfig().hasFileType())
118+
extracted += ":" + extractor.getFileType(file.toFile());
119+
actual.add(extracted);
100120
}
101121

102122
@Override
@@ -453,4 +473,33 @@ public void minifiedFilesCanBeReIncluded() throws IOException {
453473
addFile(true, LGTM_SRC, "compute_min.js");
454474
runTest();
455475
}
476+
477+
@Test
478+
public void customExtensions() throws IOException {
479+
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
480+
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
481+
addFile(false, LGTM_SRC, "tstjsm");
482+
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
483+
addFile(true, LGTM_SRC, "tst.html");
484+
addFile(true, LGTM_SRC, "tst.js");
485+
runTest();
486+
}
487+
488+
@Test
489+
public void overrideExtension() throws IOException {
490+
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
491+
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
492+
runTest();
493+
}
494+
495+
@Test
496+
public void invalidFileType() throws IOException {
497+
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
498+
try {
499+
runTest();
500+
Assert.fail("expected UserError");
501+
} catch (UserError ue) {
502+
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
503+
}
504+
}
456505
}

0 commit comments

Comments
 (0)