Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 78 additions & 36 deletions crates/biome_yaml_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub(crate) struct YamlLexer<'src> {
/// Cache of tokens to be emitted to the parser
tokens: LinkedList<LexToken>,

/// Cached of tokens that should only be after the current scope has been properly closed.
/// Cache of tokens that should only be emitted after the current scope has been properly closed.
cached_scope_closing_tokens: Option<LinkedList<LexToken>>,
}

Expand Down Expand Up @@ -53,30 +53,27 @@ impl<'src> YamlLexer<'src> {
/// ```
fn consume_tokens(&mut self) {
let Some(current) = self.current_byte() else {
while let Some(scope) = self.scopes.pop() {
self.tokens.push_back(LexToken::pseudo(
scope.close_token_kind(),
self.current_coordinate,
));
}
let mut tokens = self.close_all_scopes();
self.tokens.append(&mut tokens);
self.tokens
.push_back(LexToken::pseudo(EOF, self.current_coordinate));
return;
};

let start = self.text_position();

let mut tokens = match (current, self.peek_byte()) {
(c, _) if is_space(c) => self.consume_whitespace_token().into(),
(b'#', _) => self.consume_comment().into(),
(c, _) if is_break(c) => self.evaluate_block_scope(),
(current, peek) if maybe_at_mapping_start(current, peek) => {
let mut tokens = match current {
c if is_break(c) => self.evaluate_block_scope(),
c if is_space(c) => self.consume_whitespace_token().into(),
b'#' => self.consume_comment().into(),
b'.' if self.is_at_doc_end() => self.consume_doc_end(),
current if maybe_at_mapping_start(current, self.peek_byte()) => {
self.consume_potential_mapping_start(current)
}
// ':', '?', '-' can be a valid plain token start
(b'?' | b':', _) => self.consume_mapping_key(current),
(b'-', _) => self.consume_sequence_entry(),
(b'|' | b'>', _) => self.consume_block_scalar(current),
b'?' | b':' => self.consume_mapping_key(current),
b'-' => self.consume_sequence_entry(),
b'|' | b'>' => self.consume_block_scalar(current),
_ => self.consume_unexpected_token().into(),
};
self.tokens.append(&mut tokens);
Expand Down Expand Up @@ -216,25 +213,9 @@ impl<'src> YamlLexer<'src> {
}
}

// The spec only allows trailing trivia followed a block header
let mut trivia = self.consume_trivia(true);
let mut trivia = self.consume_trailing_trivia();
tokens.append(&mut trivia);

if self.current_byte().is_none_or(is_break) {
return tokens;
}

// Consume the rest of the invalid characters so that the block content can cleanly start
// at a newline.
let start = self.current_coordinate;
while let Some(c) = self.current_byte() {
if is_break(c) {
break;
}
self.advance_char_unchecked();
}

tokens.push_back(LexToken::new(ERROR_TOKEN, start, self.current_coordinate));
tokens
}

Expand Down Expand Up @@ -296,13 +277,16 @@ impl<'src> YamlLexer<'src> {
debug_assert!(self.current_byte().is_some_and(is_break));
let start = self.current_coordinate;
let mut trivia = self.consume_trivia(false);
let mut scope_end_tokens = self.close_scope(start);
let mut scope_end_tokens = self.close_breached_scopes(start);
scope_end_tokens.append(&mut trivia);
scope_end_tokens
}

/// Close all violated scopes, and emit closing tokens right after the last non trivia token
fn close_scope(&mut self, scope_end_coordinate: TextCoordinate) -> LinkedList<LexToken> {
fn close_breached_scopes(
&mut self,
scope_end_coordinate: TextCoordinate,
) -> LinkedList<LexToken> {
let mut scope_end_tokens = LinkedList::new();
while let Some(scope) = self.scopes.pop() {
if scope.contains(
Expand All @@ -321,6 +305,17 @@ impl<'src> YamlLexer<'src> {
scope_end_tokens
}

fn close_all_scopes(&mut self) -> LinkedList<LexToken> {
let tokens = LinkedList::new();
while let Some(scope) = self.scopes.pop() {
self.tokens.push_back(LexToken::pseudo(
scope.close_token_kind(),
self.current_coordinate,
));
}
tokens
}

/// Consume a YAML flow value that can be used inside an implicit mapping key
/// https://yaml.org/spec/1.2.2/#rule-ns-s-block-map-implicit-key
fn consume_potential_mapping_key(&mut self, current: u8) -> LinkedList<LexToken> {
Expand Down Expand Up @@ -533,6 +528,30 @@ impl<'src> YamlLexer<'src> {
LexToken::new(SINGLE_QUOTED_LITERAL, start, token_end)
}

fn is_at_doc_end(&self) -> bool {
let is_dot = |c: u8| c == b'.';
// A DOC_END token can be evaluated as a plain token if it's not placed at the start of
// line
self.current_coordinate.column == 0
&& self.current_byte().is_some_and(is_dot)
&& self.peek_byte().is_some_and(is_dot)
&& self.byte_at(2).is_some_and(is_dot)
}

fn consume_doc_end(&mut self) -> LinkedList<LexToken> {
self.assert_byte(b'.');
debug_assert_eq!(self.byte_at(1), Some(b'.'));
debug_assert_eq!(self.byte_at(2), Some(b'.'));
let start = self.current_coordinate;
let mut tokens = self.close_all_scopes();
self.advance(3);
tokens.push_back(LexToken::new(DOC_END, start, self.current_coordinate));
let mut trivia = self.consume_trailing_trivia();
tokens.append(&mut trivia);

tokens
}

/// Bumps the current byte and creates a lexed token of the passed in kind.
#[inline]
fn consume_byte_as_token(&mut self, tok: YamlSyntaxKind) -> LexToken {
Expand All @@ -548,7 +567,7 @@ impl<'src> YamlLexer<'src> {
let start = self.current_coordinate;
let mut trivia = self.consume_trivia(false);
if self.breach_parent_scope() {
let mut scope_end_tokens = self.close_scope(start);
let mut scope_end_tokens = self.close_breached_scopes(start);
scope_end_tokens.append(&mut trivia);
self.cached_scope_closing_tokens = Some(scope_end_tokens);
None
Expand Down Expand Up @@ -590,7 +609,7 @@ impl<'src> YamlLexer<'src> {
}
}
if self.breach_parent_scope() {
let mut scope_end_tokens = self.close_scope(start);
let mut scope_end_tokens = self.close_breached_scopes(start);
scope_end_tokens.append(&mut trivia);
self.cached_scope_closing_tokens = Some(scope_end_tokens);
false
Expand Down Expand Up @@ -633,6 +652,29 @@ impl<'src> YamlLexer<'src> {
LexToken::new(ERROR_TOKEN, start, self.current_coordinate)
}

/// Some constructs, like block header or document end (`...`), don't allow any trailing tokens
/// except for trivia.
/// This function is responsible for consuming the trailing trivia and any unexpected tokens
fn consume_trailing_trivia(&mut self) -> LinkedList<LexToken> {
self.assert_current_char_boundary();

let mut tokens = self.consume_trivia(true);

if self.current_byte().is_none_or(is_break) {
return tokens;
}

let start = self.current_coordinate;
while let Some(c) = self.current_byte() {
if is_break(c) {
break;
}
self.advance_char_unchecked();
}
tokens.push_back(LexToken::new(ERROR_TOKEN, start, self.current_coordinate));
tokens
}

fn consume_unexpected_character(&mut self) {
self.assert_current_char_boundary();

Expand Down
49 changes: 49 additions & 0 deletions crates/biome_yaml_parser/src/lexer/tests/document.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use crate::assert_lex;

#[test]
fn lex_doc_end() {
assert_lex!(
"...",
DOC_END:3,
);
}

#[test]
fn lex_doc_end_followed_by_trivia() {
assert_lex!(
"... # trivia",
DOC_END:3,
WHITESPACE:1,
COMMENT:8,
);
}

#[test]
fn lex_doc_end_followed_unexpected_token() {
assert_lex!(
"... 10",
DOC_END:3,
WHITESPACE:1,
ERROR_TOKEN:2,
);
}

#[test]
fn lex_doc_end_close_previous_document() {
assert_lex!(
r#"a: b
...
"#,
MAPPING_START:0,
PLAIN_LITERAL:1,
COLON:1,
WHITESPACE:1,
FLOW_START:0,
PLAIN_LITERAL:1,
FLOW_END:0,
NEWLINE:1,
MAPPING_END:0,
DOC_END:3,
NEWLINE:1,
);
}
11 changes: 11 additions & 0 deletions crates/biome_yaml_parser/src/lexer/tests/flow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,17 @@ plain token
);
}

#[test]
fn lex_document_end_like_plain_token() {
assert_lex!(
" ...",
WHITESPACE:1,
FLOW_START:0,
PLAIN_LITERAL:3,
FLOW_END:0,
);
}

#[test]
fn lex_mapping_with_multiline_plain() {
assert_lex!(
Expand Down
1 change: 1 addition & 0 deletions crates/biome_yaml_parser/src/lexer/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![cfg(test)]

mod block;
mod document;
mod flow;

use super::TextSize;
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_yaml_parser/src/parser/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use biome_yaml_syntax::{
use super::{
YamlParser,
block::{is_at_any_block_node, parse_any_block_node},
parse_error::{expected_directive, malformed_document},
parse_error::{expected_directive, unexpected_token},
};

#[derive(Default)]
Expand Down Expand Up @@ -41,7 +41,7 @@ impl ParseNodeList for DocumentList {
parsed_element.or_recover_with_token_set(
p,
&ParseRecoveryTokenSet::new(YamlSyntaxKind::YAML_BOGUS, token_set![EOF]),
malformed_document,
unexpected_token,
)
}
}
Expand Down
23 changes: 16 additions & 7 deletions crates/biome_yaml_parser/src/parser/parse_error.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
use crate::parser::YamlParser;
use biome_parser::diagnostic::{ParseDiagnostic, expected_node};
use biome_rowan::TextRange;
use biome_diagnostics::location::AsSpan;
use biome_parser::{
Parser,
diagnostic::{ParseDiagnostic, expected_node},
prelude::TokenSource,
};
use biome_rowan::{TextLen, TextRange};

pub(crate) fn expected_block_mapping_entry(p: &YamlParser, range: TextRange) -> ParseDiagnostic {
expected_node("block mapping entry", range, p)
Expand All @@ -10,11 +15,6 @@ pub(crate) fn expected_block_sequence_entry(p: &YamlParser, range: TextRange) ->
expected_node("block sequence entry", range, p)
}

// This shouldn't happen that often
pub(crate) fn malformed_document(_p: &YamlParser, range: TextRange) -> ParseDiagnostic {
ParseDiagnostic::new("Malformed document", range)
}

pub(crate) fn expected_directive(p: &YamlParser, range: TextRange) -> ParseDiagnostic {
expected_node("directive", range, p)
}
Expand All @@ -38,3 +38,12 @@ pub(crate) fn expected_flow_sequence_closing_bracket(range: TextRange) -> ParseD
pub(crate) fn expected_header(p: &YamlParser, range: TextRange) -> ParseDiagnostic {
expected_node("block header", range, p)
}

pub(crate) fn unexpected_token(p: &YamlParser, range: TextRange) -> ParseDiagnostic {
let msg = if p.source().text().text_len() <= range.start() {
"Unexpected end of file."
} else {
"Unexpected token."
};
ParseDiagnostic::new(msg, range.as_span())
}
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ mapping_key_contains_multiple_values.yaml:2:9 parse ━━━━━━━━━

mapping_key_contains_multiple_values.yaml:3:1 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

× Malformed document
× Unexpected end of file.

1 │ a:
2 │ "aaa" 'a' acb: 40
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
... "abc"
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
source: crates/biome_yaml_parser/tests/spec_test.rs
expression: snapshot
---
## Input
```yaml
... "abc"

```

## AST

```
YamlRoot {
documents: YamlDocumentList [
YamlDocument {
bom_token: missing (optional),
directives: YamlDirectiveList [],
dashdashdash_token: missing (optional),
node: missing (optional),
dotdotdot_token: [email protected] "..." [] [Whitespace(" ")],
},
YamlBogus {
items: [
[email protected] "\"abc\"" [] [],
],
},
],
eof_token: [email protected] "" [Newline("\n")] [],
}
```

## CST

```
0: [email protected]
0: [email protected]
0: [email protected]
0: (empty)
1: [email protected]
2: (empty)
3: (empty)
4: [email protected] "..." [] [Whitespace(" ")]
1: [email protected]
0: [email protected] "\"abc\"" [] []
1: [email protected] "" [Newline("\n")] []

```

## Diagnostics

```
doc_end_with_trailing_tokens.yaml:1:5 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

× Unexpected token.

> 1 │ ... "abc"
│ ^^^^^
2 │

```
Loading