Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5478e0d

Browse files
authored
Merge pull request #998 from xiemaisi/js/autobuild-file-types
JavaScript: Make file types customisable in AutoBuild.
2 parents 2ecabad + 9d77619 commit 5478e0d

2 files changed

Lines changed: 145 additions & 15 deletions

File tree

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 90 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import java.nio.file.attribute.BasicFileAttributes;
1717
import java.util.ArrayList;
1818
import java.util.Arrays;
19+
import java.util.LinkedHashMap;
1920
import java.util.LinkedHashSet;
2021
import java.util.List;
22+
import java.util.Map;
2123
import java.util.Set;
2224
import java.util.concurrent.ExecutorService;
2325
import java.util.concurrent.Executors;
@@ -58,6 +60,25 @@
5860
* </ul>
5961
*
6062
* <p>
63+
* Additionally, the following environment variables may be set to customise extraction
64+
* (explained in more detail below):
65+
* </p>
66+
*
67+
* <ul>
68+
* <li><code>LGTM_INDEX_INCLUDE</code>: a newline-separated list of paths to include</li>
69+
* <li><code>LGTM_INDEX_EXCLUDE</code>: a newline-separated list of paths to exclude</li>
70+
* <li><code>LGTM_REPOSITORY_FOLDERS_CSV</code>: the path of a CSV file containing file classifications</li>
71+
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
72+
* patterns that can be used to refine the list of files to include and exclude</li>
73+
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
74+
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
75+
* specifying which {@link FileType} to use for the given extension</li>
76+
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
77+
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
78+
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
79+
</ul>
80+
*
81+
* <p>
6182
* It extracts the following:
6283
* </p>
6384
*
@@ -143,6 +164,12 @@
143164
* </p>
144165
*
145166
* <p>
167+
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
168+
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
169+
* files whose name ends in <code>.extension</code> to also be included by default.
170+
* </p>
171+
*
172+
* <p>
146173
* The default exclusion patterns cause the following files to be excluded:
147174
* </p>
148175
* <ul>
@@ -157,6 +184,11 @@
157184
* </p>
158185
*
159186
* <p>
187+
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
188+
* environment variable explained above.
189+
* </p>
190+
*
191+
* <p>
160192
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
161193
* externs is not customisable.
162194
* </p>
@@ -176,6 +208,7 @@
176208
public class AutoBuild {
177209
private final ExtractorOutputConfig outputConfig;
178210
private final ITrapCache trapCache;
211+
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
179212
private final Set<Path> includes = new LinkedHashSet<>();
180213
private final Set<Path> excludes = new LinkedHashSet<>();
181214
private ProjectLayout filters;
@@ -191,6 +224,7 @@ public AutoBuild() {
191224
this.trapCache = mkTrapCache();
192225
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
193226
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
227+
setupFileTypes();
194228
setupMatchers();
195229
}
196230

@@ -260,6 +294,25 @@ private ITrapCache mkTrapCache() {
260294
return trapCache;
261295
}
262296

297+
private void setupFileTypes() {
298+
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
299+
spec = spec.trim();
300+
if (spec.isEmpty())
301+
continue;
302+
String[] fields = spec.split(":");
303+
if (fields.length != 2)
304+
continue;
305+
String extension = fields[0].trim();
306+
String fileType = fields[1].trim();
307+
try {
308+
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
309+
} catch (IllegalArgumentException e) {
310+
Exceptions.ignore(e, "We construct a better error message.");
311+
throw new UserError("Invalid file type '" + fileType + "'.");
312+
}
313+
}
314+
}
315+
263316
/**
264317
* Set up include and exclude matchers based on environment variables.
265318
*/
@@ -333,6 +386,10 @@ private void setupFilters() {
333386
patterns.add("**/.eslintrc*");
334387
patterns.add("**/package.json");
335388

389+
// include any explicitly specified extensions
390+
for (String extension : fileTypes.keySet())
391+
patterns.add("**/*" + extension);
392+
336393
// exclude files whose name strongly suggests they are minified
337394
patterns.add("-**/*.min.js");
338395
patterns.add("-**/*-min.js");
@@ -466,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
466523
* Extract all supported candidate files that pass the filters.
467524
*/
468525
private void extractSource() throws IOException {
469-
ExtractorConfig config = new ExtractorConfig(true);
470-
config = config.withSourceType(getSourceType());
471-
config = config.withTypeScriptMode(typeScriptMode);
472-
if (defaultEncoding != null)
473-
config = config.withDefaultEncoding(defaultEncoding);
474-
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
526+
// default extractor
527+
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
528+
529+
// custom extractor for explicitly specified file types
530+
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
531+
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
532+
String extension = spec.getKey();
533+
String fileType = spec.getValue().name();
534+
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
535+
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
536+
}
475537

476538
Set<Path> filesToExtract = new LinkedHashSet<>();
477539
List<Path> tsconfigFiles = new ArrayList<>();
478-
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
540+
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
479541

480542
// extract TypeScript projects and files
481-
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
543+
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
482544

483545
// extract remaining files
484546
for (Path f : filesToExtract) {
485547
if (extractedFiles.add(f)) {
548+
FileExtractor extractor = defaultExtractor;
549+
if (!fileTypes.isEmpty()) {
550+
String extension = FileUtil.extension(f);
551+
if (customExtractors.containsKey(extension))
552+
extractor = customExtractors.get(extension);
553+
}
486554
extract(extractor, f, null);
487555
}
488556
}
489557
}
490558

559+
private ExtractorConfig mkExtractorConfig() {
560+
ExtractorConfig config = new ExtractorConfig(true);
561+
config = config.withSourceType(getSourceType());
562+
config = config.withTypeScriptMode(typeScriptMode);
563+
if (defaultEncoding != null)
564+
config = config.withDefaultEncoding(defaultEncoding);
565+
return config;
566+
}
567+
491568
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
492569
Set<Path> extractedFiles = new LinkedHashSet<>();
493570

@@ -574,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
574651
return FileVisitResult.SKIP_SUBTREE;
575652

576653
// extract files that are supported and pass the include/exclude patterns
577-
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
654+
boolean supported = extractor.supports(file.toFile());
655+
if (!supported && !fileTypes.isEmpty()) {
656+
supported = fileTypes.containsKey(FileUtil.extension(file));
657+
}
658+
if (supported && isFileIncluded(file)) {
578659
filesToExtract.add(normalizePath(file));
579660
}
580661

javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.semmle.js.extractor.AutoBuild;
2424
import com.semmle.js.extractor.ExtractorState;
2525
import com.semmle.js.extractor.FileExtractor;
26+
import com.semmle.js.extractor.FileExtractor.FileType;
2627
import com.semmle.util.data.StringUtil;
2728
import com.semmle.util.exception.UserError;
2829
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public void teardown() throws IOException {
7475
/**
7576
* Add a file under {@code root} that we either do or don't expect to be extracted,
7677
* depending on the value of {@code extracted}. If the file is expected to be
77-
* extracted, its path is added to {@link #expected}.
78+
* extracted, its path is added to {@link #expected}. If non-null, parameter
79+
* {@code fileType} indicates the file type with which we expect the file to be extracted.
80+
*/
81+
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
82+
Path f = addFile(root, components);
83+
if (extracted) {
84+
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
85+
}
86+
return f;
87+
}
88+
89+
/**
90+
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
7891
*/
7992
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
93+
return addFile(extracted, null, root, components);
94+
}
95+
96+
/**
97+
* Create a file at the specified path under {@code root} and return it.
98+
*/
99+
private Path addFile(Path root, String... components) throws IOException {
80100
Path p = Paths.get(root.toString(), components);
81101
Files.createDirectories(p.getParent());
82-
Path f = Files.createFile(p);
83-
if (extracted)
84-
expected.add(f.toString());
85-
return f;
102+
return Files.createFile(p);
86103
}
87104

88105
/**
@@ -96,7 +113,10 @@ private void runTest() throws IOException {
96113
new AutoBuild() {
97114
@Override
98115
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
99-
actual.add(file.toString());
116+
String extracted = file.toString();
117+
if (extractor.getConfig().hasFileType())
118+
extracted += ":" + extractor.getFileType(file.toFile());
119+
actual.add(extracted);
100120
}
101121

102122
@Override
@@ -453,4 +473,33 @@ public void minifiedFilesCanBeReIncluded() throws IOException {
453473
addFile(true, LGTM_SRC, "compute_min.js");
454474
runTest();
455475
}
476+
477+
@Test
478+
public void customExtensions() throws IOException {
479+
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
480+
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
481+
addFile(false, LGTM_SRC, "tstjsm");
482+
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
483+
addFile(true, LGTM_SRC, "tst.html");
484+
addFile(true, LGTM_SRC, "tst.js");
485+
runTest();
486+
}
487+
488+
@Test
489+
public void overrideExtension() throws IOException {
490+
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
491+
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
492+
runTest();
493+
}
494+
495+
@Test
496+
public void invalidFileType() throws IOException {
497+
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
498+
try {
499+
runTest();
500+
Assert.fail("expected UserError");
501+
} catch (UserError ue) {
502+
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
503+
}
504+
}
456505
}

0 commit comments

Comments
 (0)