Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4ae6263

Browse files
committed
feat: extract html scripts at vite scanner plugin
1 parent 81e52d2 commit 4ae6263

4 files changed

Lines changed: 207 additions & 9 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/rolldown_plugin_vite_scanner/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ repository.workspace = true
1111
[dependencies]
1212
rolldown = { path = "../rolldown" }
1313
rolldown_error = { path = "../rolldown_error" }
14+
rolldown_fs = { path = "../rolldown_fs" }
1415
async-trait = { workspace = true }
1516
regex = { workspace = true }
1617
once_cell = { workspace = true }
18+
rustc-hash = { workspace = true }

crates/rolldown_plugin_vite_scanner/src/lib.rs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
use once_cell::sync::Lazy;
22
use regex::Regex;
33
use rolldown::{
4-
HookLoadArgs, HookLoadReturn, HookResolveIdArgs, HookResolveIdOutput, HookResolveIdReturn,
5-
Plugin, PluginContext,
4+
HookLoadArgs, HookLoadOutput, HookLoadReturn, HookResolveIdArgs, HookResolveIdOutput,
5+
HookResolveIdReturn, Plugin, PluginContext,
66
};
7-
use std::borrow::Cow;
8-
7+
use rolldown_fs::FileSystem;
8+
use std::{borrow::Cow, fmt::Debug, path::PathBuf};
9+
use util::extract_html_module_scripts;
10+
mod util;
911
static HTTP_URL_REGEX: Lazy<Regex> =
1012
Lazy::new(|| Regex::new(r"^(https?:)?\/\/").expect("Init HTTP_URL_REGEX failed"));
1113
static DATA_URL_REGEX: Lazy<Regex> =
@@ -29,15 +31,19 @@ static HTML_TYPE_REGEX: Lazy<Regex> = Lazy::new(|| {
2931
Regex::new(r"\.(html|vue|svelte|astro|imba)$").expect("Init HTML_TYPE_REGEX failed")
3032
});
3133

32-
#[derive(Debug)]
33-
pub struct ViteScannerPlugin {
34+
pub struct ViteScannerPlugin<T: FileSystem + Default> {
3435
pub entries: Vec<String>,
36+
pub fs: T,
3537
}
3638

37-
impl ViteScannerPlugin {}
39+
impl<T: FileSystem + 'static + Default> Debug for ViteScannerPlugin<T> {
40+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41+
f.debug_struct("ViteScannerPlugin").field("entries", &self.entries).finish()
42+
}
43+
}
3844

3945
#[async_trait::async_trait]
40-
impl Plugin for ViteScannerPlugin {
46+
impl<T: FileSystem + 'static + Default> Plugin for ViteScannerPlugin<T> {
4147
fn name(&self) -> Cow<'static, str> {
4248
"rolldown_plugin_vite_scanner".into()
4349
}
@@ -95,7 +101,11 @@ impl Plugin for ViteScannerPlugin {
95101

96102
// extract scripts inside HTML-like files and treat it as a js module
97103
if HTML_TYPE_REGEX.is_match(id) {
98-
// TODO
104+
let path = PathBuf::from(id);
105+
let content = self.fs.read_to_string(&path)?;
106+
// TODO store scripts
107+
let (content, _) = extract_html_module_scripts(content, path);
108+
return Ok(Some(HookLoadOutput { code: content }));
99109
}
100110

101111
Ok(None)
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
use std::{borrow::Cow, path::PathBuf};
2+
3+
use once_cell::sync::Lazy;
4+
use regex::{NoExpand, Regex};
5+
use rustc_hash::FxHashMap;
6+
7+
static COMMENT_REGEX: Lazy<Regex> =
8+
Lazy::new(|| Regex::new(r#"<!--.*?-->"#).expect("Init COMMENT_REGEX failed"));
9+
static SCRIPT_REGEX: Lazy<Regex> = Lazy::new(|| {
10+
Regex::new(r#"(<script(?:\s+[a-z_:][-\w:]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^"'<>=\s]+))?)*\s*>)(.*?)<\/script>"#).expect("Init SCRIPT_REGEX failed")
11+
});
12+
static SRC_REGEX: Lazy<Regex> = Lazy::new(|| {
13+
Regex::new(r#"(\bsrc\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s'">]+))"#).expect("Init SRC_REGEX failed")
14+
});
15+
static TYPE_REGEX: Lazy<Regex> = Lazy::new(|| {
16+
Regex::new(r#"\btype\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s'">]+))"#).expect("Init TYPE_REGEX failed")
17+
});
18+
static LANG_REGEX: Lazy<Regex> = Lazy::new(|| {
19+
Regex::new(r#"\blang\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s'">]+))"#).expect("Init LANG_REGEX failed")
20+
});
21+
static CONTEXT_REGEX: Lazy<Regex> = Lazy::new(|| {
22+
Regex::new(r#"\bcontext\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s'">]+))"#)
23+
.expect("Init CONTEXT_REGEX failed")
24+
});
25+
static MULTILINE_COMMENT_REGEX: Lazy<Regex> = Lazy::new(|| {
26+
Regex::new(r#"\/\*[^*]*\*+(?:[^/*][^*]*\*+)*\/"#).expect("Init MULTILINE_COMMENT_REGEX failed")
27+
});
28+
static SINGLE_COMMENT_REGEX: Lazy<Regex> =
29+
Lazy::new(|| Regex::new(r#"\/\/.*"#).expect("Init MULTILINE_COMMENT_REGEX failed"));
30+
// A simple regex to detect import sources. This is only used on
31+
// <script lang="ts"> blocks in vue (setup only) or svelte files, since
32+
// seemingly unused imports are dropped by bundler when transpiling TS which
33+
// prevents it from crawling further.
34+
static IMPORTS_FROM_BLOCK_REGEX: Lazy<Regex> = Lazy::new(|| {
35+
Regex::new(r#"import([\w*{}\n\r\t, ]+from)?\s*([\w"']*)"#)
36+
.expect("Init IMPORTS_FROM_BLOCK_REGEX failed")
37+
});
38+
static VIRTUAL_MODULE_PREFIX: &str = "virtual-module:";
39+
40+
pub fn extract_html_module_scripts(
41+
content: String,
42+
path: PathBuf,
43+
) -> (String, FxHashMap<String, String>) {
44+
let mut scripts = FxHashMap::default();
45+
let mut result = String::new();
46+
let extension = path.extension().unwrap_or_default();
47+
let is_html = extension == "html";
48+
let is_astro = extension == "astro";
49+
let is_svelte = extension == "svelte";
50+
let raw = COMMENT_REGEX.replace(&content, NoExpand("<!---->"));
51+
52+
for (index, c) in SCRIPT_REGEX.captures_iter(&raw).enumerate() {
53+
let (_, [open_tag, content]) = c.extract();
54+
55+
let script_type = TYPE_REGEX.captures(open_tag).map(|caps| {
56+
let (_, [a, b, c]) = caps.extract();
57+
format!("{a}{b}{c}")
58+
});
59+
60+
let script_lang = LANG_REGEX.captures(open_tag).map(|caps| {
61+
let (_, [a, b, c]) = caps.extract();
62+
format!("{a}{b}{c}")
63+
});
64+
65+
// skip non type module script
66+
if is_html && !matches!(script_type, Some(ref v) if v == "module") {
67+
continue;
68+
}
69+
70+
// skip type="application/ld+json" and other non-JS types
71+
if matches!(script_type, Some(ref v) if !(v.contains("javascript") || v.contains("ecmascript") || v == "module"))
72+
{
73+
continue;
74+
}
75+
76+
let script_src = SRC_REGEX.captures(open_tag).map(|caps| {
77+
let (_, [a, b, c]) = caps.extract();
78+
format!("{a}{b}{c}")
79+
});
80+
81+
if let Some(script_src) = script_src {
82+
result.push_str(&format!("import '{script_src}';\n"));
83+
}
84+
// The reason why virtual modules are needed:
85+
// 1. There can be module scripts (`<script context="module">` in Svelte and `<script>` in Vue)
86+
// or local scripts (`<script>` in Svelte and `<script setup>` in Vue)
87+
// 2. There can be multiple module scripts in html
88+
// We need to handle these separately in case variable names are reused between them
89+
90+
// append imports in TS to prevent bundler from removing them
91+
// since they may be used in the template
92+
let mut contents = content.trim().to_string();
93+
if !contents.is_empty() {
94+
if matches!(script_lang, Some(ref v) if v == "ts" || v == "tsx") || is_astro {
95+
contents.push_str(&extract_import_paths(content));
96+
}
97+
98+
let loader: Cow<'_, str> = if let Some(script_lang) = script_lang {
99+
script_lang.into()
100+
} else if is_astro {
101+
"ts".into()
102+
} else {
103+
"".into()
104+
};
105+
// Here append loader to query, it can be used to transform the script content at vite.
106+
let key = format!("{}?id={index}&loader={loader}", path.to_string_lossy());
107+
// Glob Import need transform, so legacy the logic to vite.
108+
scripts.insert(key.clone(), contents);
109+
110+
let virtual_module_path = format!("'{VIRTUAL_MODULE_PREFIX}{key}'");
111+
let context = CONTEXT_REGEX.captures(open_tag).map(|caps| {
112+
let (_, [a, b, c]) = caps.extract();
113+
format!("{a}{b}{c}")
114+
});
115+
116+
// Especially for Svelte files, exports in <script context="module"> means module exports,
117+
// exports in <script> means component props. To avoid having two same export name from the
118+
// star exports, we need to ignore exports in <script>
119+
if is_svelte && matches!(context, Some(v) if v != "module") {
120+
result.push_str(&format!("import {virtual_module_path}\n"));
121+
} else {
122+
result.push_str(&format!("export * from {virtual_module_path}\n"));
123+
}
124+
}
125+
}
126+
127+
// This will trigger incorrectly if `export default` is contained
128+
// anywhere in a string. Svelte and Astro files can't have
129+
// `export default` as code so we know if it's encountered it's a
130+
// false positive (e.g. contained in a string)
131+
if extension != "vue" || !result.contains("export default") {
132+
result.push_str("\nexport default {}");
133+
}
134+
135+
(result, scripts)
136+
}
137+
138+
/**
139+
* when using TS + (Vue + `<script setup>`) or Svelte, imports may seem
140+
* unused to bundler and dropped in the build output, which prevents
141+
* bundler from crawling further.
142+
* the solution is to add `import 'x'` for every source to force
143+
* bundler to keep crawling due to potential side effects.
144+
*/
145+
fn extract_import_paths(code: &str) -> String {
146+
let mut result = String::new();
147+
148+
let value = MULTILINE_COMMENT_REGEX.replace_all(code, NoExpand("/* */"));
149+
let raw = SINGLE_COMMENT_REGEX.replace_all(&value, NoExpand(""));
150+
151+
for c in IMPORTS_FROM_BLOCK_REGEX.captures_iter(&raw) {
152+
if let Some(src) = c.get(2) {
153+
result.push_str(&format!("\nimport {};", src.as_str()));
154+
}
155+
}
156+
157+
result
158+
}
159+
160+
#[test]
161+
fn test_extract_import_paths() {
162+
assert_eq!(
163+
extract_import_paths("import 'a';\n // import 'b';\nimport {c} from 'c';\nconsole.log(1);"),
164+
"\nimport 'a';\nimport 'c';".to_string()
165+
);
166+
}
167+
168+
#[test]
169+
fn test_extract_html_module_scripts() {
170+
// skip non type module script
171+
assert_eq!(
172+
extract_html_module_scripts("<script></script>".to_string(), PathBuf::from("a.html")),
173+
("\nexport default {}".to_string(), FxHashMap::default())
174+
);
175+
// TODO add test
176+
// skip type="application/ld+json" and other non-JS types
177+
// assert_eq!(
178+
// extract_html_module_scripts(
179+
// r#"<script type="application/ld+json"></script>"#.to_string(),
180+
// PathBuf::from("a.vue")
181+
// ),
182+
// ("\nexport default {}".to_string(), FxHashMap::default())
183+
// );
184+
}

0 commit comments

Comments
 (0)