Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 57a588c

Browse files
committed
JS: Share extraction results via symlinks
1 parent c84e43d commit 57a588c

4 files changed

Lines changed: 55 additions & 18 deletions

File tree

javascript/extractor/src/com/semmle/js/dependencies/AsyncFetcher.java

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package com.semmle.js.dependencies;
22

33
import java.io.IOException;
4+
import java.nio.file.Files;
45
import java.nio.file.Path;
56
import java.util.LinkedHashMap;
7+
import java.util.List;
68
import java.util.Map;
79
import java.util.concurrent.CompletableFuture;
810
import java.util.concurrent.CompletionException;
@@ -68,17 +70,51 @@ public CompletableFuture<Packument> getPackument(String packageName) {
6870
});
6971
}
7072

73+
/** Result of a tarball extraction */
74+
class ExtractionResult {
75+
/** The directory into which the tarball was extracted. */
76+
Path destDir;
77+
78+
/** Files created by the extraction, relative to <code>destDir</code>. */
79+
List<Path> relativePaths;
80+
81+
ExtractionResult(Path destDir, List<Path> relativePaths) {
82+
this.destDir = destDir;
83+
this.relativePaths = relativePaths;
84+
}
85+
}
86+
87+
private CachedOperation<String, ExtractionResult> tarballExtractions = new CachedOperation<>();
88+
7189
/**
7290
* Extracts the relevant contents of the given tarball URL in the given folder;
7391
* the returned future completes when done.
7492
*/
7593
public CompletableFuture<Void> installFromTarballUrl(String tarballUrl, Path destDir) {
76-
return CompletableFuture.runAsync(() -> {
94+
return tarballExtractions.get(tarballUrl, () -> {
7795
try {
78-
fetcher.extractFromTarballUrl(tarballUrl, destDir);
96+
List<Path> relativePaths = fetcher.extractFromTarballUrl(tarballUrl, destDir);
97+
return new ExtractionResult(destDir, relativePaths);
7998
} catch (IOException e) {
8099
throw makeError("Could not install package from " + tarballUrl, e);
81100
}
82-
}, executor);
101+
}).thenAccept(extractionResult -> {
102+
if (!extractionResult.destDir.equals(destDir)) {
103+
// We've been asked to extract the same tarball into multiple directories (due to multiple package.json files).
104+
// Symlink files from the original directory instead of extracting again.
105+
// In principle we could symlink the whole directory, but directory symlinks are hard to create in a portable way.
106+
System.out.println("Creating symlink farm from " + destDir + " to " + extractionResult.destDir);
107+
for (Path relativePath : extractionResult.relativePaths) {
108+
Path originalFile = extractionResult.destDir.resolve(relativePath);
109+
Path newFile = destDir.resolve(relativePath);
110+
try {
111+
fetcher.mkdirp(newFile.getParent());
112+
Files.createSymbolicLink(newFile, originalFile);
113+
} catch (IOException e) {
114+
throw makeError("Failed to create symlink " + newFile + " -> " + originalFile, e);
115+
}
116+
}
117+
}
118+
});
83119
}
84120
}

javascript/extractor/src/com/semmle/js/dependencies/DependencyResolver.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import java.util.Map;
1313
import java.util.Set;
1414
import java.util.concurrent.CompletableFuture;
15-
import java.util.concurrent.CompletionException;
1615
import java.util.concurrent.ExecutorService;
1716
import java.util.concurrent.Executors;
1817

@@ -45,15 +44,11 @@ String getTargetPackageName() {
4544
}
4645
}
4746

48-
public DependencyResolver(ExecutorService threadPool, Set<String> packagesInRepo) {
49-
this.fetcher = new AsyncFetcher(threadPool, this::reportError);
47+
public DependencyResolver(AsyncFetcher fetcher, Set<String> packagesInRepo) {
48+
this.fetcher = fetcher;
5049
this.packagesInRepo = packagesInRepo;
5150
}
5251

53-
private void reportError(CompletionException ex) {
54-
System.err.println(ex);
55-
}
56-
5752
private void addConstraint(Constraint constraint) {
5853
synchronized(constraints) {
5954
constraints.add(constraint);
@@ -207,7 +202,7 @@ public CompletableFuture<Void> installDependencies(PackageJson rootPackage, Path
207202
public static void main(String[] args) throws IOException {
208203
ExecutorService executors = Executors.newFixedThreadPool(50);
209204
try {
210-
DependencyResolver resolver = new DependencyResolver(executors, Collections.emptySet());
205+
DependencyResolver resolver = new DependencyResolver(new AsyncFetcher(executors, err -> { System.err.println(err); }), Collections.emptySet());
211206
for (String packageJsonPath : args) {
212207
Path path = Paths.get(packageJsonPath).toAbsolutePath();
213208
PackageJson packageJson = new Gson().fromJson(Files.newBufferedReader(path), PackageJson.class);

javascript/extractor/src/com/semmle/js/dependencies/Fetcher.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import java.nio.file.Files;
1515
import java.nio.file.Path;
1616
import java.nio.file.Paths;
17+
import java.util.ArrayList;
18+
import java.util.List;
1719
import java.util.regex.Pattern;
1820

1921
import com.google.gson.Gson;
@@ -35,7 +37,7 @@ public class Fetcher {
3537
private Object mkdirpLock = new Object();
3638

3739
/** Creates the given directory and its parent directories. Only one thread is allowed to create directories at once. */
38-
private void mkdirp(Path dir) throws IOException {
40+
public void mkdirp(Path dir) throws IOException {
3941
synchronized (mkdirpLock) {
4042
Files.createDirectories(dir);
4143
}
@@ -88,14 +90,17 @@ public Packument getPackument(String packageName) throws IOException {
8890

8991
/**
9092
* Extracts the package at the given tarball URL into the given directory.
91-
*
93+
* <p>
9294
* Only `package.json` and `.d.ts` files are extracted.
95+
*
96+
* @return paths of the files created by this call, relative to <code>destDir</code>
9397
*/
94-
public void extractFromTarballUrl(String tarballUrl, Path destDir) throws IOException {
98+
public List<Path> extractFromTarballUrl(String tarballUrl, Path destDir) throws IOException {
9599
if (!tarballUrl.startsWith("https://registry.npmjs.org/") || !tarballUrl.endsWith(".tgz")) { // Paranoid check
96100
throw new IOException("Tarball URL has unexpected format: " + tarballUrl);
97101
}
98102
System.out.println("Unpacking " + tarballUrl + " to " + destDir);
103+
List<Path> relativePaths = new ArrayList<>();
99104
try (InputStream rawStream = new URL(tarballUrl).openStream()) {
100105
// Despite having the .tgz extension, the file is not always gzipped, sometimes it's just a raw tar archive,
101106
// regardless of what Accept-Encoding header we send.
@@ -129,12 +134,14 @@ public void extractFromTarballUrl(String tarballUrl, Path destDir) throws IOExce
129134
if (!filename.endsWith(".d.ts") && !filename.equals("package.json")) {
130135
continue; // Only extract .d.ts files and package.json
131136
}
137+
relativePaths.add(entryPath);
132138
Path outputFile = destDir.resolve(entryPath);
133139
mkdirp(outputFile.getParent());
134140
try (OutputStream output = new BufferedOutputStream(Files.newOutputStream(outputFile))) {
135141
IOUtils.copy(stream, output);
136142
}
137143
}
138144
}
145+
return relativePaths;
139146
}
140147
}

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
import java.util.stream.Stream;
3333

3434
import com.google.gson.Gson;
35-
import com.google.gson.JsonElement;
36-
import com.google.gson.JsonObject;
3735
import com.google.gson.JsonParseException;
38-
import com.google.gson.JsonPrimitive;
36+
import com.semmle.js.dependencies.AsyncFetcher;
3937
import com.semmle.js.dependencies.DependencyResolver;
4038
import com.semmle.js.dependencies.packument.PackageJson;
4139
import com.semmle.js.extractor.ExtractorConfig.SourceType;
@@ -791,12 +789,13 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
791789
// Use more threads for dependency installation than for extraction, as this is mainly I/O bound and we want
792790
// many concurrent HTTP requests.
793791
ExecutorService installationThreadPool = Executors.newFixedThreadPool(50);
792+
AsyncFetcher fetcher = new AsyncFetcher(installationThreadPool, err -> { System.err.println(err); });
794793
try {
795794
List<CompletableFuture<Void>> futures = new ArrayList<>();
796795
packageJsonFiles.forEach((file, packageJson) -> {
797796
Path virtualFile = virtualSourceRoot.toVirtualFile(file);
798797
Path nodeModulesDir = virtualFile.getParent().resolve("node_modules");
799-
futures.add(new DependencyResolver(installationThreadPool, packagesInRepo.keySet()).installDependencies(packageJson, nodeModulesDir));
798+
futures.add(new DependencyResolver(fetcher, packagesInRepo.keySet()).installDependencies(packageJson, nodeModulesDir));
800799
});
801800
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
802801
} finally {

0 commit comments

Comments
 (0)