Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions crates/biome_tailwind_parser/src/lexer/base_name_store.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
use std::sync::LazyLock;

use biome_tailwind_syntax::metadata::BASENAMES_WITH_DASHES;

/// A global store of dashed basenames for efficient reuse across lexing.
///
/// Ideally, this will be built by parsing a project's tailwind config, since it can add custom utilities and variants.
pub static BASENAME_STORE: LazyLock<BaseNameStore> =
LazyLock::new(|| BaseNameStore::new(BASENAMES_WITH_DASHES));

/// A compact trie storing all dashed basenames to efficiently match the longest valid basename
/// at the beginning of a byte slice.
///
/// Build it once from `BASENAMES_WITH_DASHES` and reuse it across lexing.
pub(crate) struct BaseNameStore {
nodes: Vec<Node>,
}

#[derive(Default)]
struct Node {
terminal: bool,
// Children are stored as (byte, child_index)
children: Vec<(u8, usize)>,
}

impl BaseNameStore {
/// Creates a store from a list of ASCII basenames.
pub(crate) fn new(list: &[&str]) -> Self {
let mut store = Self {
nodes: vec![Node::default()], // root
};

for name in list {
store.insert(name.as_bytes());
}

// Optional: sort children to enable binary search (currently linear search is fine).
for i in 0..store.nodes.len() {
let children = &mut store.nodes[i].children;
children.sort_unstable_by_key(|(b, _)| *b);
// Deduplicate in case of construction anomalies (shouldn't happen, but cheap insurance)
children.dedup_by_key(|(b, _)| *b);
}

store
}

fn insert(&mut self, word: &[u8]) {
let mut node_idx = 0usize;

for &b in word {
node_idx = match self.find_child(node_idx, b) {
Some(next) => next,
None => {
let next = self.new_node();
self.nodes[node_idx].children.push((b, next));
next
}
};
}

self.nodes[node_idx].terminal = true;
}

#[inline]
fn new_node(&mut self) -> usize {
let next_index = self.nodes.len();
self.nodes.push(Node::default());
next_index
}

#[inline]
fn find_child(&self, node: usize, byte: u8) -> Option<usize> {
// Linear search is fine because children fan-out is small in practice.
self.nodes[node]
.children
.iter()
.find_map(|(b, idx)| (*b == byte).then_some(*idx))
}

/// Creates a matcher for the provided text slice, starting at offset 0.
#[inline]
pub(crate) fn matcher<'s, 't>(&'s self, text: &'t [u8]) -> BaseNameMatcher<'s, 't> {
BaseNameMatcher { store: self, text }
}
}

/// A streaming helper that scans the provided text slice from the beginning
/// and returns the longest dashed basename prefix end if any.
///
/// It stops scanning when:
/// - It encounters a byte that isn't a path in the trie,
/// - Or it hits a delimiter (`whitespace`, `!`, `:`).
///
/// The returned `usize` is the number of bytes consumed for the matched basename.
/// The caller is responsible for verifying the boundary (e.g. next byte is `-`, `:`, whitespace, or end-of-input).
pub(crate) struct BaseNameMatcher<'s, 't> {
store: &'s BaseNameStore,
text: &'t [u8],
}

impl<'s, 't> BaseNameMatcher<'s, 't> {
pub(crate) fn base_end(&self) -> usize {
let mut node_idx = 0usize;
let mut best_end: Option<usize> = None;

// Scan for dashed basename using the trie until a delimiter or mismatch
let mut i = 0usize;
while i < self.text.len() {
let b = self.text[i];
if is_delimiter(b) {
break;
}

match self.store.find_child(node_idx, b) {
Some(next) => {
node_idx = next;
i += 1;
// SAFETY: This is safe because `next` will always be a valid index into `self.store.nodes`.
// If it isn't, the trie is malformed.
if self.store.nodes[node_idx].terminal {
best_end = Some(i);
}
}
None => {
break;
}
}
}

// If we found a dashed match, accept it only if the next byte is a valid boundary or end-of-input
if let Some(end) = best_end
&& self.text.get(end).is_none_or(|b| is_boundary_byte(*b))
{
return end;
}

// Fallback: naive basename ends at the first '-', whitespace, '!' or ':'
let mut j = 0usize;
while j < self.text.len() {
let b = self.text[j];
if b == b'-' || is_delimiter(b) {
break;
}
j += 1;
}
j
}
}

#[inline]
const fn is_delimiter(b: u8) -> bool {
// Delimiters that cannot be part of a basename (excluding '-' which may be inside dashed basenames):
// - whitespace
// - '!' important modifier
// - ':' variant separator
matches!(b, b' ' | b'\n' | b'\r' | b'\t' | b'!' | b':')
}

#[inline]
const fn is_boundary_byte(b: u8) -> bool {
// Valid boundary after a dashed basename:
// - '-' indicates a value follows
// - ':' indicates a variant boundary
// - whitespace
b == b'-' || b == b':' || matches!(b, b' ' | b'\n' | b'\r' | b'\t')
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn matches_longest_prefix() {
let store = BaseNameStore::new(&["border-t", "border", "mask-radial-at"]);
assert_eq!(
store.matcher(b"border-t-red-300").base_end(),
"border-t".len()
);
assert_eq!(
store.matcher(b"mask-radial-at-top").base_end(),
"mask-radial-at".len()
);
// Does not match undashed names that aren't present
assert_eq!(store.matcher(b"bg-red-500").base_end(), "bg".len());
}

#[test]
fn respects_delimiters() {
let store = BaseNameStore::new(&["nth-last", "rounded-bl"]);
assert_eq!(store.matcher(b"nth-last:odd").base_end(), "nth-last".len());
assert_eq!(
store.matcher(b"rounded-bl-lg").base_end(),
"rounded-bl".len()
);
}

#[test]
fn stops_on_non_edge() {
let store = BaseNameStore::new(&["border-t"]);
// No edge for 'f' after "border-t", so it shouldn't match "border-tf"
assert_eq!(store.matcher(b"border-tfoo").base_end(), "border".len());
// If the very first char doesn't match, None
assert_eq!(store.matcher(b"bg-red-500").base_end(), "bg".len());
}
}
35 changes: 6 additions & 29 deletions crates/biome_tailwind_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
mod base_name_store;
mod tests;

use crate::lexer::base_name_store::BASENAME_STORE;
use crate::token_source::TailwindLexContext;
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags};
use biome_rowan::{SyntaxKind, TextLen};
use biome_tailwind_syntax::T;
use biome_tailwind_syntax::TailwindSyntaxKind::*;
use biome_tailwind_syntax::metadata::BASENAMES_WITH_DASHES;
use biome_tailwind_syntax::{TailwindSyntaxKind, TextSize};

pub(crate) struct TailwindLexer<'src> {
Expand Down Expand Up @@ -132,34 +133,10 @@ impl<'src> TailwindLexer<'src> {
fn consume_base(&mut self) -> TailwindSyntaxKind {
self.assert_current_char_boundary();

// Find the longest matching base name
let source_from_position = &self.source[self.position..];
let base_name = BASENAMES_WITH_DASHES
.iter()
.rfind(|&name| source_from_position.starts_with(name));

if let Some(base_name) = base_name {
// we need to make sure that either the base is followed by a `-` to signify a value is coming, or a whitespace/end of input to signify the end of the base
match self.byte_at(base_name.len()) {
Some(b'-') | None => {
self.advance(base_name.len());
return TW_BASE;
}
Some(b) if b.is_ascii_whitespace() => {
self.advance(base_name.len());
return TW_BASE;
}
_ => {}
}
}

while let Some(byte) = self.current_byte() {
let char = self.current_char_unchecked();
if char.is_whitespace() || byte == b'-' || byte == b'!' || byte == b':' {
break;
}
self.advance(char.len_utf8());
}
let bytes = self.source.as_bytes();
let slice = &bytes[self.position..];
let end = BASENAME_STORE.matcher(slice).base_end();
self.advance(end);

TW_BASE
}
Expand Down