diff --git a/juniper/CHANGELOG.md b/juniper/CHANGELOG.md index 4863ea4bf..bf3a67070 100644 --- a/juniper/CHANGELOG.md +++ b/juniper/CHANGELOG.md @@ -15,19 +15,24 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - [September 2025] GraphQL spec: ([#1347]) - Made `includeDeprecated` argument of `__Type.fields`, `__Type.enumValues`, `__Type.inputFields`, `__Field.args` and `__Directive.args` fields non-`Null`. ([#1348], [graphql/graphql-spec#1142]) - Made `@deprecated(reason:)` argument non-`Null`. ([#1348], [graphql/graphql-spec#1040]) +- Changed `ScalarToken::String` to contain raw quoted and escaped `StringLiteral` (was unquoted but escaped string before). ([#1349]) +- Added `LexerError::UnterminatedBlockString` variant. ([#1349]) ### Added - [September 2025] GraphQL spec: ([#1347]) - `__Type.isOneOf` field. ([#1348], [graphql/graphql-spec#825]) - `SCHEMA`, `OBJECT`, `ARGUMENT_DEFINITION`, `INTERFACE`, `UNION`, `ENUM`, `INPUT_OBJECT` and `INPUT_FIELD_DEFINITION` values to `__DirectiveLocation` enum. ([#1348]) - - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) + - Arguments and input object fields deprecation: ([#1348], [#864], [graphql/graphql-spec#525], [graphql/graphql-spec#805]) - Placing `#[graphql(deprecated)]` and `#[deprecated]` attributes on struct fields in `#[derive(GraphQLInputObject)]` macro. - Placing `#[graphql(deprecated)]` attribute on method arguments in `#[graphql_object]` and `#[graphql_interface]` macros. - Placing `@deprecated` directive on arguments and input object fields. - `includeDeprecated` argument to `__Type.inputFields`, `__Field.args` and `__Directive.args` fields. - `__InputValue.isDeprecated` and `__InputValue.deprecationReason` fields. - `schema::meta::Argument::deprecation_status` field. + - Support for variable-length escaped Unicode characters (e.g. `\u{110000}`) in strings. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) + - Support full Unicode range. ([#1349], [graphql/graphql-spec#849], [graphql/graphql-spec#687]) +- Support for [block strings][0180-1]. ([#1349]) ### Changed @@ -38,15 +43,20 @@ All user visible changes to `juniper` crate will be documented in this file. Thi - Incorrect `__Type.specifiedByUrl` field to `__Type.specifiedByURL`. ([#1348]) - Missing `@specifiedBy(url:)` directive in [SDL] generated by `RootNode::as_sdl()` and `RootNode::as_document()` methods. ([#1348]) +- Incorrect double escaping in `ScalarToken::String` `Display`ing. ([#1349]) [#864]: /../../issues/864 [#1347]: /../../issues/1347 [#1348]: /../../pull/1348 +[#1349]: /../../pull/1349 [graphql/graphql-spec#525]: https://github.com/graphql/graphql-spec/pull/525 +[graphql/graphql-spec#687]: https://github.com/graphql/graphql-spec/issues/687 [graphql/graphql-spec#805]: https://github.com/graphql/graphql-spec/pull/805 [graphql/graphql-spec#825]: https://github.com/graphql/graphql-spec/pull/825 +[graphql/graphql-spec#849]: https://github.com/graphql/graphql-spec/pull/849 [graphql/graphql-spec#1040]: https://github.com/graphql/graphql-spec/pull/1040 [graphql/graphql-spec#1142]: https://github.com/graphql/graphql-spec/pull/1142 +[0180-1]: https://spec.graphql.org/September2025/#sec-String-Value.Block-Strings diff --git a/juniper/src/ast.rs b/juniper/src/ast.rs index 0c2208cb2..4f22bcc65 100644 --- a/juniper/src/ast.rs +++ b/juniper/src/ast.rs @@ -384,6 +384,7 @@ pub enum OperationType { #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Debug, PartialEq)] pub struct Operation<'a, S> { + //pub description: Option>, pub operation_type: OperationType, pub name: Option>, pub variable_definitions: Option>>, diff --git a/juniper/src/lib.rs b/juniper/src/lib.rs index e79c5290a..f0f563313 100644 --- a/juniper/src/lib.rs +++ b/juniper/src/lib.rs @@ -85,7 +85,7 @@ pub use crate::{ }, introspection::IntrospectionFormat, macros::helper::subscription::{ExtractTypeFromStream, IntoFieldResult}, - parser::{ParseError, ScalarToken, Span, Spanning}, + parser::{ParseError, ScalarToken, Span, Spanning, StringLiteral}, schema::{ meta, model::{RootNode, SchemaType}, diff --git a/juniper/src/parser/lexer.rs b/juniper/src/parser/lexer.rs index 56b1a61de..e4b57f108 100644 --- a/juniper/src/parser/lexer.rs +++ b/juniper/src/parser/lexer.rs @@ -1,4 +1,4 @@ -use std::{char, iter::Peekable, str::CharIndices}; +use std::{char, ops::Deref, str::CharIndices}; use derive_more::with_trait::{Display, Error}; @@ -7,25 +7,51 @@ use crate::parser::{SourcePosition, Spanning}; #[doc(hidden)] #[derive(Debug)] pub struct Lexer<'a> { - iterator: Peekable>, + iterator: itertools::PeekNth>, source: &'a str, length: usize, position: SourcePosition, has_reached_eof: bool, } -/// A single scalar value literal +/// Representation of a raw unparsed scalar value literal. /// /// This is only used for tagging how the lexer has interpreted a value literal #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] pub enum ScalarToken<'a> { - #[display("\"{}\"", _0.replace('\\', "\\\\").replace('"', "\\\""))] - String(&'a str), + String(StringLiteral<'a>), Float(&'a str), Int(&'a str), } +/// Representation of a raw unparsed [String Value] literal (with quotes included). +/// +/// [String Value]: https://spec.graphql.org/October2021#sec-String-Value +#[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] +pub enum StringLiteral<'a> { + /// [Quoted][0] literal (denoted by single quotes `"`). + /// + /// [0]: https://spec.graphql.org/October2021#StringCharacter + Quoted(&'a str), + + /// [Block][0] literal (denoted by triple quotes `"""`). + /// + /// [0]: https://spec.graphql.org/October2021#BlockStringCharacter + Block(&'a str), +} + +impl Deref for StringLiteral<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match self { + Self::Quoted(s) => s, + Self::Block(s) => s, + } + } +} + /// A single token in the input source #[expect(missing_docs, reason = "self-explanatory")] #[derive(Clone, Copy, Debug, Display, Eq, PartialEq)] @@ -87,12 +113,9 @@ pub enum LexerError { #[display("Unterminated string literal")] UnterminatedString, - /// An unknown character in a string literal was found - /// - /// This occurs when an invalid source character is found in a string - /// literal, such as ASCII control characters. - #[display("Unknown character \"{_0}\" in string literal")] - UnknownCharacterInString(#[error(not(source))] char), + /// An unterminated block string literal was found. + #[display("Unterminated block string literal")] + UnterminatedBlockString, /// An unknown escape sequence in a string literal was found /// @@ -119,7 +142,7 @@ impl<'a> Lexer<'a> { #[doc(hidden)] pub fn new(source: &'a str) -> Lexer<'a> { Lexer { - iterator: source.char_indices().peekable(), + iterator: itertools::peek_nth(source.char_indices()), source, length: source.len(), position: SourcePosition::new_origin(), @@ -162,25 +185,51 @@ impl<'a> Lexer<'a> { Spanning::single_width(&start_pos, t) } + /// Advances this [`Lexer`] over any [ignored] character until a non-[ignored] is met. + /// + /// [ignored]: https://spec.graphql.org/September2025#Ignored fn scan_over_whitespace(&mut self) { while let Some((_, ch)) = self.peek_char() { - if ch == '\t' || ch == ' ' || ch == '\n' || ch == '\r' || ch == ',' { - self.next_char(); - } else if ch == '#' { - self.next_char(); - - while let Some((_, ch)) = self.peek_char() { - if is_source_char(ch) && (ch == '\n' || ch == '\r') { - self.next_char(); - break; - } else if is_source_char(ch) { - self.next_char(); - } else { - break; + // Ignored :: + // UnicodeBOM + // WhiteSpace + // LineTerminator + // Comment + // Comma + match ch { + // UnicodeBOM :: + // Byte Order Mark (U+FEFF) + // Whitespace :: + // Horizontal Tab (U+0009) + // Space (U+0020) + // LineTerminator :: + // New Line (U+000A) + // Carriage Return (U+000D) [lookahead != New Line (U+000A)] + // Carriage Return (U+000D) New Line (U+000A) + // Comma :: + // , + '\u{FEFF}' | '\t' | ' ' | '\n' | '\r' | ',' => _ = self.next_char(), + // Comment :: + // #CommentChar[list][opt] [lookahead != CommentChar] + // CommentChar :: + // SourceCharacter but not LineTerminator + '#' => { + _ = self.next_char(); + while let Some((_, ch)) = self.peek_char() { + _ = self.next_char(); + match ch { + '\r' if matches!(self.peek_char(), Some((_, '\n'))) => { + _ = self.next_char(); + break; + } + '\n' | '\r' => break, + // Continue scanning `Comment`. + _ => {} + } } } - } else { - break; + // Any other character is not `Ignored`. + _ => break, } } } @@ -232,7 +281,16 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [string] by this [`Lexer`], but not a [block string]. + /// + /// [string]: https://spec.graphql.org/September2025#StringValue + /// [block string]: https://spec.graphql.org/September2025#BlockString fn scan_string(&mut self) -> LexerResult<'a> { + // StringValue :: + // "" [lookahead != "] + // "StringCharacter[list]" + // BlockString + let start_pos = self.position; let (start_idx, start_ch) = self .next_char() @@ -247,10 +305,19 @@ impl<'a> Lexer<'a> { let mut escaped = false; let mut old_pos = self.position; while let Some((idx, ch)) = self.next_char() { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter match ch { - 'b' | 'f' | 'n' | 'r' | 't' | '\\' | '/' | '"' if escaped => { + // EscapedCharacter :: one of + // " \ / b f n r t + '"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' if escaped => { escaped = false; } + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit 'u' if escaped => { self.scan_escaped_unicode(&old_pos)?; escaped = false; @@ -266,7 +333,9 @@ impl<'a> Lexer<'a> { return Ok(Spanning::start_end( &start_pos, &self.position, - Token::Scalar(ScalarToken::String(&self.source[start_idx + 1..idx])), + Token::Scalar(ScalarToken::String(StringLiteral::Quoted( + &self.source[start_idx..=idx], + ))), )); } '\n' | '\r' => { @@ -275,12 +344,8 @@ impl<'a> Lexer<'a> { LexerError::UnterminatedString, )); } - c if !is_source_char(c) => { - return Err(Spanning::zero_width( - &old_pos, - LexerError::UnknownCharacterInString(ch), - )); - } + // Any other valid Unicode scalar value is a `SourceCharacter`: + // https://spec.graphql.org/September2025#SourceCharacter _ => {} } old_pos = self.position; @@ -292,27 +357,105 @@ impl<'a> Lexer<'a> { )) } + /// Scans a [block string] by this [`Lexer`]. + /// + /// [block string]: https://spec.graphql.org/September2025#BlockString + fn scan_block_string(&mut self) -> LexerResult<'a> { + // BlockString :: + // """BlockStringCharacter[list][opt]""" + + let start_pos = self.position; + let (start_idx, mut start_ch) = self + .next_char() + .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile))?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedString, + )); + } + for _ in 0..2 { + (_, start_ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnexpectedEndOfFile) + })?; + if start_ch != '"' { + return Err(Spanning::zero_width( + &self.position, + LexerError::UnexpectedCharacter(start_ch), + )); + } + } + let (mut quotes, mut escaped) = (0, false); + while let Some((idx, ch)) = self.next_char() { + // BlockStringCharacter :: + // SourceCharacter but not """ or \""" + // \""" + match ch { + '\\' => (quotes, escaped) = (0, true), + '"' if escaped => (quotes, escaped) = (0, false), + '"' if quotes < 2 => quotes += 1, + '"' if quotes == 2 => { + return Ok(Spanning::start_end( + &start_pos, + &self.position, + Token::Scalar(ScalarToken::String(StringLiteral::Block( + &self.source[start_idx..=idx], + ))), + )); + } + _ => (quotes, escaped) = (0, false), + } + } + + Err(Spanning::zero_width( + &self.position, + LexerError::UnterminatedBlockString, + )) + } + + /// Scans an [escaped unicode] character by this [`Lexer`]. + /// + /// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode fn scan_escaped_unicode( &mut self, start_pos: &SourcePosition, ) -> Result<(), Spanning> { - let (start_idx, _) = self + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let (start_idx, mut curr_ch) = self .peek_char() .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; let mut end_idx = start_idx; let mut len = 0; - for _ in 0..4 { - let (idx, ch) = self.next_char().ok_or_else(|| { - Spanning::zero_width(&self.position, LexerError::UnterminatedString) - })?; - - if !ch.is_alphanumeric() { - break; + let is_variable_width = curr_ch == '{'; + if is_variable_width { + _ = self.next_char(); + loop { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + end_idx = idx; + len += 1; + if !curr_ch.is_alphanumeric() { + break; + } + } + } else { + for _ in 0..4 { + let (idx, ch) = self.next_char().ok_or_else(|| { + Spanning::zero_width(&self.position, LexerError::UnterminatedString) + })?; + curr_ch = ch; + if !curr_ch.is_alphanumeric() { + break; + } + end_idx = idx; + len += 1; } - - end_idx = idx; - len += 1; } // Make sure we are on a valid char boundary. @@ -321,17 +464,30 @@ impl<'a> Lexer<'a> { .get(start_idx..=end_idx) .ok_or_else(|| Spanning::zero_width(&self.position, LexerError::UnterminatedString))?; - if len != 4 { - return Err(Spanning::zero_width( - start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), - )); + let code_point = if is_variable_width { + if curr_ch != '}' { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!( + r"\u{}", + &escape[..escape.len() - 1], + )), + )); + } + u32::from_str_radix(&escape[1..escape.len() - 1], 16) + } else { + if len != 4 { + return Err(Spanning::zero_width( + start_pos, + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), + )); + } + u32::from_str_radix(escape, 16) } - - let code_point = u32::from_str_radix(escape, 16).map_err(|_| { + .map_err(|_| { Spanning::zero_width( start_pos, - LexerError::UnknownEscapeSequence(format!("\\u{escape}")), + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), ) })?; @@ -339,10 +495,10 @@ impl<'a> Lexer<'a> { .ok_or_else(|| { Spanning::zero_width( start_pos, - LexerError::UnknownEscapeSequence("\\u".to_owned() + escape), + LexerError::UnknownEscapeSequence(format!(r"\u{escape}")), ) }) - .map(|_| ()) + .map(drop) } fn scan_number(&mut self) -> LexerResult<'a> { @@ -480,7 +636,15 @@ impl<'a> Iterator for Lexer<'a> { Some('@') => Ok(self.emit_single_char(Token::At)), Some('|') => Ok(self.emit_single_char(Token::Pipe)), Some('.') => self.scan_ellipsis(), - Some('"') => self.scan_string(), + Some('"') => { + if self.iterator.peek_nth(1).map(|&(_, ch)| ch) == Some('"') + && self.iterator.peek_nth(2).map(|&(_, ch)| ch) == Some('"') + { + self.scan_block_string() + } else { + self.scan_string() + } + } Some(ch) => { if is_number_start(ch) { self.scan_number() @@ -501,10 +665,6 @@ impl<'a> Iterator for Lexer<'a> { } } -fn is_source_char(c: char) -> bool { - c == '\t' || c == '\n' || c == '\r' || c >= ' ' -} - fn is_name_start(c: char) -> bool { c == '_' || c.is_ascii_alphabetic() } @@ -516,3 +676,936 @@ fn is_name_cont(c: char) -> bool { fn is_number_start(c: char) -> bool { c == '-' || c.is_ascii_digit() } + +#[cfg(test)] +mod test { + use crate::parser::{ + Lexer, LexerError, ScalarToken, SourcePosition, Spanning, + StringLiteral::{Block, Quoted}, + Token, + }; + + #[track_caller] + fn tokenize_to_vec(s: &str) -> Vec>> { + let mut tokens = Vec::new(); + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + let at_eof = t.item == Token::EndOfFile; + tokens.push(t); + if at_eof { + break; + } + } + Some(Err(e)) => panic!("error in input stream: {e} for {s:#?}"), + None => panic!("EOF before `Token::EndOfFile` in {s:#?}"), + } + } + + tokens + } + + #[track_caller] + fn tokenize_single(s: &str) -> Spanning> { + let mut tokens = tokenize_to_vec(s); + + assert_eq!(tokens.len(), 2); + assert_eq!(tokens[1].item, Token::EndOfFile); + + tokens.remove(0) + } + + #[track_caller] + fn tokenize_error(s: &str) -> Spanning { + let mut lexer = Lexer::new(s); + + loop { + match lexer.next() { + Some(Ok(t)) => { + if t.item == Token::EndOfFile { + panic!("lexer did not return error for {s:#?}"); + } + } + Some(Err(e)) => { + return e; + } + None => panic!("lexer did not return error for {s:#?}"), + } + } + } + + #[test] + fn empty_source() { + assert_eq!( + tokenize_to_vec(""), + vec![Spanning::zero_width( + &SourcePosition::new_origin(), + Token::EndOfFile, + )] + ); + } + + #[test] + fn disallow_control_codes() { + assert_eq!( + Lexer::new("\u{0007}").next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new_origin(), + LexerError::UnknownCharacter('\u{0007}'), + ))) + ); + } + + #[test] + fn skip_whitespace() { + assert_eq!( + tokenize_to_vec( + r#" + + foo + + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(14, 2, 12), + &SourcePosition::new(17, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_comments() { + assert_eq!( + tokenize_to_vec( + r#" + #comment + foo#comment + "# + ), + vec![ + Spanning::start_end( + &SourcePosition::new(34, 2, 12), + &SourcePosition::new(37, 2, 15), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), + ] + ); + } + + #[test] + fn skip_commas() { + assert_eq!( + tokenize_to_vec(r#",,,foo,,,"#), + vec![ + Spanning::start_end( + &SourcePosition::new(3, 0, 3), + &SourcePosition::new(6, 0, 6), + Token::Name("foo"), + ), + Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), + ] + ); + } + + #[test] + fn error_positions() { + assert_eq!( + Lexer::new( + r#" + + ? + + "#, + ) + .next(), + Some(Err(Spanning::zero_width( + &SourcePosition::new(14, 2, 12), + LexerError::UnknownCharacter('?'), + ))), + ); + } + + #[test] + fn strings() { + assert_eq!( + tokenize_single(r#""simple""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(8, 0, 8), + Token::Scalar(ScalarToken::String(Quoted(r#""simple""#))), + ), + ); + + assert_eq!( + tokenize_single(r#"" white space ""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#"" white space ""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""quote \"""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(10, 0, 10), + Token::Scalar(ScalarToken::String(Quoted(r#""quote \"""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""escaped \n\r\b\t\f""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(20, 0, 20), + Token::Scalar(ScalarToken::String(Quoted(r#""escaped \n\r\b\t\f""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""slashes \\ \/""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(15, 0, 15), + Token::Scalar(ScalarToken::String(Quoted(r#""slashes \\ \/""#))), + ), + ); + + assert_eq!( + tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(34, 0, 34), + Token::Scalar(ScalarToken::String(Quoted( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + ))), + ), + ); + + assert_eq!( + tokenize_single(r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(58, 0, 58), + Token::Scalar(ScalarToken::String(Quoted( + r#""variable-width unicode \u{1234}\u{5678}\u{90AB}\u{1F4A9}""#, + ))), + ), + ); + + assert_eq!( + tokenize_single("\"contains unescaped \u{0007} control char\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(35, 0, 35), + Token::Scalar(ScalarToken::String(Quoted( + "\"contains unescaped \u{0007} control char\"", + ))), + ), + ); + + assert_eq!( + tokenize_single("\"null-byte is not \u{0000} end of file\""), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Quoted( + "\"null-byte is not \u{0000} end of file\"", + ))), + ), + ); + } + + #[test] + fn string_errors() { + assert_eq!( + tokenize_error(r#"""#), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(13, 0, 13), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\nline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error("\"multi\rline\""), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""bad \z esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\z".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \x esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\x".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u0XX1 esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u0XX1".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uFXXX esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uFXXX".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \uXXXF esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\uXXXF".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{110000} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{110000}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{DEAD} esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{DEAD}".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""bad \u{DEA esc""#), + Spanning::zero_width( + &SourcePosition::new(6, 0, 6), + LexerError::UnknownEscapeSequence(r"\u{DEA".into()), + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated in string \""#), + Spanning::zero_width( + &SourcePosition::new(26, 0, 26), + LexerError::UnterminatedString, + ), + ); + + assert_eq!( + tokenize_error(r#""unterminated \"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedString, + ), + ); + + // Found by fuzzing. + assert_eq!( + tokenize_error(r#""\uɠ^A"#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedString, + ), + ); + } + + #[test] + fn block_strings() { + assert_eq!( + tokenize_single(r#""""""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(6, 0, 6), + Token::Scalar(ScalarToken::String(Block(r#""""""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""simple""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(12, 0, 12), + Token::Scalar(ScalarToken::String(Block(r#""""simple""""#))), + ), + ); + assert_eq!( + tokenize_single(r#"""" white space """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(19, 0, 19), + Token::Scalar(ScalarToken::String(Block(r#"""" white space """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains " quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""contains " quote""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(32, 0, 32), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \"" double quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \"" double quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""contains \\""" triple quote""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(33, 0, 33), + Token::Scalar(ScalarToken::String(Block( + r#""""contains \\""" triple quote""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""\"""quote" """"#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""\"""quote" """"#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\nline""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(17, 0, 17), + Token::Scalar(ScalarToken::String(Block(r#""""multi\nline""""#))), + ), + ); + assert_eq!( + tokenize_single(r#""""multi\rline\r\nnormalized""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(31, 0, 31), + Token::Scalar(ScalarToken::String(Block( + r#""""multi\rline\r\nnormalized""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(38, 0, 38), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""# + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""unescaped unicode outside BMP \u{1f600}""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(45, 0, 45), + Token::Scalar(ScalarToken::String(Block( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + ))), + ), + ); + assert_eq!( + tokenize_single(r#""""slashes \\\\ \\/""""#), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(22, 0, 22), + Token::Scalar(ScalarToken::String(Block(r#""""slashes \\\\ \\/""""#))), + ), + ); + assert_eq!( + tokenize_single( + r#"""" + + spans + multiple + lines + + """"#, + ), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(76, 6, 11), + Token::Scalar(ScalarToken::String(Block( + r#"""" + + spans + multiple + lines + + """"#, + ))), + ), + ); + } + + #[test] + fn block_string_errors() { + assert_eq!( + tokenize_error(r#""""""#), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#"""""""#), + Spanning::zero_width( + &SourcePosition::new(5, 0, 5), + LexerError::UnterminatedBlockString, + ), + ); + assert_eq!( + tokenize_error(r#""""no end quote"#), + Spanning::zero_width( + &SourcePosition::new(15, 0, 15), + LexerError::UnterminatedBlockString, + ), + ); + } + + #[test] + fn numbers() { + fn assert_float_token_eq( + source: &str, + start: SourcePosition, + end: SourcePosition, + expected: &str, + ) { + let parsed = tokenize_single(source); + assert_eq!(parsed.span.start, start); + assert_eq!(parsed.span.end, end); + + match parsed.item { + Token::Scalar(ScalarToken::Float(actual)) => assert_eq!(actual, expected), + _ => assert!(false), + } + } + + assert_eq!( + tokenize_single("4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("4")) + ) + ); + + assert_float_token_eq( + "4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "4.123", + ); + + assert_float_token_eq( + "4.0", + SourcePosition::new(0, 0, 0), + SourcePosition::new(3, 0, 3), + "4.0", + ); + + assert_eq!( + tokenize_single("-4"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(2, 0, 2), + Token::Scalar(ScalarToken::Int("-4")), + ) + ); + + assert_eq!( + tokenize_single("9"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("9")), + ) + ); + + assert_eq!( + tokenize_single("0"), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(1, 0, 1), + Token::Scalar(ScalarToken::Int("0")), + ) + ); + + assert_float_token_eq( + "-4.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "-4.123", + ); + + assert_float_token_eq( + "0.123", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "0.123", + ); + + assert_float_token_eq( + "123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123e4", + ); + + assert_float_token_eq( + "123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(5, 0, 5), + "123E4", + ); + + assert_float_token_eq( + "123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e-4", + ); + + assert_float_token_eq( + "123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(6, 0, 6), + "123e+4", + ); + + assert_float_token_eq( + "-1.123e4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123e4", + ); + + assert_float_token_eq( + "-1.123E4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(8, 0, 8), + "-1.123E4", + ); + + assert_float_token_eq( + "-1.123e-4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e-4", + ); + + assert_float_token_eq( + "-1.123e+4", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e+4", + ); + + assert_float_token_eq( + "-1.123e45", + SourcePosition::new(0, 0, 0), + SourcePosition::new(9, 0, 9), + "-1.123e45", + ); + } + + #[test] + fn numbers_errors() { + assert_eq!( + tokenize_error("00"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('0'), + ) + ); + + assert_eq!( + tokenize_error("+1"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('+'), + ) + ); + + assert_eq!( + tokenize_error("1."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error(".123"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnexpectedCharacter('.'), + ) + ); + + assert_eq!( + tokenize_error("1.A"), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("-A"), + Spanning::zero_width( + &SourcePosition::new(1, 0, 1), + LexerError::UnexpectedCharacter('A'), + ) + ); + + assert_eq!( + tokenize_error("1.0e"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("1.0eA"), + Spanning::zero_width( + &SourcePosition::new(4, 0, 4), + LexerError::UnexpectedCharacter('A'), + ) + ); + } + + #[test] + fn punctuation() { + assert_eq!( + tokenize_single("!"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark), + ); + + assert_eq!( + tokenize_single("$"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar), + ); + + assert_eq!( + tokenize_single("("), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen), + ); + + assert_eq!( + tokenize_single(")"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose), + ); + + assert_eq!( + tokenize_single("..."), + Spanning::start_end( + &SourcePosition::new(0, 0, 0), + &SourcePosition::new(3, 0, 3), + Token::Ellipsis, + ) + ); + + assert_eq!( + tokenize_single(":"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon), + ); + + assert_eq!( + tokenize_single("="), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals), + ); + + assert_eq!( + tokenize_single("@"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At), + ); + + assert_eq!( + tokenize_single("["), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen), + ); + + assert_eq!( + tokenize_single("]"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose), + ); + + assert_eq!( + tokenize_single("{"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen), + ); + + assert_eq!( + tokenize_single("}"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose), + ); + + assert_eq!( + tokenize_single("|"), + Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe), + ); + } + + #[test] + fn punctuation_error() { + assert_eq!( + tokenize_error(".."), + Spanning::zero_width( + &SourcePosition::new(2, 0, 2), + LexerError::UnexpectedEndOfFile, + ) + ); + + assert_eq!( + tokenize_error("?"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('?'), + ) + ); + + assert_eq!( + tokenize_error("\u{203b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{203b}'), + ) + ); + + assert_eq!( + tokenize_error("\u{200b}"), + Spanning::zero_width( + &SourcePosition::new(0, 0, 0), + LexerError::UnknownCharacter('\u{200b}'), + ) + ); + } + + #[test] + fn display() { + for (input, expected) in [ + (Token::Name("identifier"), "identifier"), + (Token::Scalar(ScalarToken::Int("123")), "123"), + (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), + ( + Token::Scalar(ScalarToken::String(Quoted(r#""some string""#))), + r#""some string""#, + ), + ( + Token::Scalar(ScalarToken::String(Quoted( + r#""string with \\ escape and \" quote""#, + ))), + r#""string with \\ escape and \" quote""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""string with \\ escape and \" quote""""#, + ))), + r#""""string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block string with \\ escape and \" quote""""#, + ))), + r#""""block string with \\ escape and \" quote""""#, + ), + ( + Token::Scalar(ScalarToken::String(Block( + r#""""block + multiline + string"""#, + ))), + r#""""block + multiline + string"""#, + ), + (Token::ExclamationMark, "!"), + (Token::Dollar, "$"), + (Token::ParenOpen, "("), + (Token::ParenClose, ")"), + (Token::BracketOpen, "["), + (Token::BracketClose, "]"), + (Token::CurlyOpen, "{"), + (Token::CurlyClose, "}"), + (Token::Ellipsis, "..."), + (Token::Colon, ":"), + (Token::Equals, "="), + (Token::At, "@"), + (Token::Pipe, "|"), + ] { + assert_eq!(input.to_string(), expected); + } + } +} diff --git a/juniper/src/parser/mod.rs b/juniper/src/parser/mod.rs index eb2d0d3c1..f7d98b114 100644 --- a/juniper/src/parser/mod.rs +++ b/juniper/src/parser/mod.rs @@ -13,7 +13,7 @@ mod tests; pub use self::document::parse_document_source; pub use self::{ - lexer::{Lexer, LexerError, ScalarToken, Token}, + lexer::{Lexer, LexerError, ScalarToken, StringLiteral, Token}, parser::{OptionParseResult, ParseError, ParseResult, Parser, UnlocatedParseResult}, utils::{SourcePosition, Span, Spanning}, }; diff --git a/juniper/src/parser/parser.rs b/juniper/src/parser/parser.rs index 2459df205..c88bc6733 100644 --- a/juniper/src/parser/parser.rs +++ b/juniper/src/parser/parser.rs @@ -1,9 +1,9 @@ -use std::fmt; +use std::{borrow::Cow, fmt, iter}; use compact_str::{CompactString, format_compact}; use derive_more::with_trait::{Display, Error}; -use crate::parser::{Lexer, LexerError, Spanning, Token}; +use crate::parser::{Lexer, LexerError, ScalarToken, Spanning, StringLiteral, Token}; /// Error while parsing a GraphQL query #[derive(Clone, Debug, Display, Eq, Error, PartialEq)] @@ -199,3 +199,194 @@ impl<'a> Parser<'a> { } } } + +impl<'a> StringLiteral<'a> { + /// Parses this [`StringLiteral`] returning an unescaped and unquoted string value. + /// + /// # Errors + /// + /// If this [`StringLiteral`] is invalid. + pub fn parse(self) -> Result, ParseError> { + match self { + Self::Quoted(lit) => { + if !lit.starts_with('"') { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with('"') { + return Err(ParseError::LexerError(LexerError::UnterminatedString)); + } + + let unquoted = &lit[1..lit.len() - 1]; + if !unquoted.contains('\\') { + return Ok(unquoted.into()); + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut char_iter = unquoted.chars(); + while let Some(ch) = char_iter.next() { + match ch { + // StringCharacter :: + // SourceCharacter but not " or \ or LineTerminator + // \uEscapedUnicode + // \EscapedCharacter + '\\' => match char_iter.next() { + // EscapedCharacter :: one of + // " \ / b f n r t + Some('"') => unescaped.push('"'), + Some('\\') => unescaped.push('\\'), + Some('/') => unescaped.push('/'), + Some('b') => unescaped.push('\u{0008}'), + Some('f') => unescaped.push('\u{000C}'), + Some('n') => unescaped.push('\n'), + Some('r') => unescaped.push('\r'), + Some('t') => unescaped.push('\t'), + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + Some('u') => { + unescaped.push(parse_unicode_codepoint(&mut char_iter)?); + } + Some(s) => { + return Err(ParseError::LexerError( + LexerError::UnknownEscapeSequence(format!(r"\{s}")), + )); + } + None => { + return Err(ParseError::LexerError(LexerError::UnterminatedString)); + } + }, + ch => { + unescaped.push(ch); + } + } + } + Ok(unescaped.into()) + } + Self::Block(lit) => { + if !lit.starts_with(r#"""""#) { + return Err(ParseError::unexpected_token(Token::Scalar( + ScalarToken::String(self), + ))); + } + if !lit.ends_with(r#"""""#) { + return Err(ParseError::LexerError(LexerError::UnterminatedBlockString)); + } + + let unquoted = &lit[3..lit.len() - 3]; + + let (mut indent, mut total_lines) = (usize::MAX, 0); + let (mut first_text_line, mut last_text_line) = (None, 0); + for (n, line) in unquoted.lines().enumerate() { + total_lines += 1; + + let trimmed = line.trim_start(); + if trimmed.is_empty() { + continue; + } + + _ = first_text_line.get_or_insert(n); + last_text_line = n; + + if n != 0 { + indent = indent.min(line.len() - trimmed.len()); + } + } + + let Some(first_text_line) = first_text_line else { + return Ok("".into()); // no text, only whitespaces + }; + if (indent == 0 || total_lines == 1) && !unquoted.contains(r#"\""""#) { + return Ok(unquoted.into()); // nothing to dedent or unescape + } + + let mut unescaped = String::with_capacity(unquoted.len()); + let mut lines = unquoted + .lines() + .enumerate() + .skip(first_text_line) + .take(last_text_line - first_text_line + 1) + .map(|(n, line)| { + if n != 0 && line.len() >= indent { + &line[indent..] + } else { + line + } + }) + .map(|x| x.replace(r#"\""""#, r#"""""#)); + if let Some(line) = lines.next() { + unescaped.push_str(&line); + for line in lines { + unescaped.push('\n'); + unescaped.push_str(&line); + } + } + Ok(unescaped.into()) + } + } + } +} + +/// Parses an [escaped unicode] character. +/// +/// [escaped unicode]: https://spec.graphql.org/September2025#EscapedUnicode +// TODO: Add tests +// TODO: Check surrogate pairs? +fn parse_unicode_codepoint(char_iter: &mut impl Iterator) -> Result { + // EscapedUnicode :: + // {HexDigit[list]} + // HexDigit HexDigit HexDigit HexDigit + + let Some(mut curr_ch) = char_iter.next() else { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + r"\u".into(), + ))); + }; + let mut escaped_code_point = String::with_capacity(6); // `\u{10FFFF}` is max code point + + let is_variable_width = curr_ch == '{'; + if is_variable_width { + loop { + curr_ch = char_iter.next().ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{{{escaped_code_point}" + ))) + })?; + if curr_ch == '}' { + break; + } else if !curr_ch.is_alphanumeric() { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + format!(r"\u{{{escaped_code_point}"), + ))); + } + escaped_code_point.push(curr_ch); + } + } else { + let mut char_iter = iter::once(curr_ch).chain(char_iter); + for _ in 0..4 { + curr_ch = char_iter.next().ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( + r"\u{escaped_code_point}" + ))) + })?; + if !curr_ch.is_alphanumeric() { + return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( + format!(r"\u{escaped_code_point}"), + ))); + } + escaped_code_point.push(curr_ch); + } + } + + u32::from_str_radix(&escaped_code_point, 16) + .ok() + .and_then(char::from_u32) + .ok_or_else(|| { + ParseError::LexerError(LexerError::UnknownEscapeSequence(if is_variable_width { + format!(r"\u{{{escaped_code_point}}}") + } else { + format!(r"\u{escaped_code_point}") + })) + }) +} diff --git a/juniper/src/parser/tests/lexer.rs b/juniper/src/parser/tests/lexer.rs deleted file mode 100644 index 0eb62ae73..000000000 --- a/juniper/src/parser/tests/lexer.rs +++ /dev/null @@ -1,691 +0,0 @@ -use crate::parser::{Lexer, LexerError, ScalarToken, SourcePosition, Spanning, Token}; - -fn tokenize_to_vec(s: &str) -> Vec>> { - let mut tokens = Vec::new(); - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - let at_eof = t.item == Token::EndOfFile; - tokens.push(t); - if at_eof { - break; - } - } - Some(Err(e)) => panic!("Error in input stream: {e:#?} for {s:#?}"), - None => panic!("EOF before EndOfFile token in {s:#?}"), - } - } - - tokens -} - -fn tokenize_single(s: &str) -> Spanning> { - let mut tokens = tokenize_to_vec(s); - - assert_eq!(tokens.len(), 2); - assert_eq!(tokens[1].item, Token::EndOfFile); - - tokens.remove(0) -} - -fn tokenize_error(s: &str) -> Spanning { - let mut lexer = Lexer::new(s); - - loop { - match lexer.next() { - Some(Ok(t)) => { - if t.item == Token::EndOfFile { - panic!("Tokenizer did not return error for {s:#?}"); - } - } - Some(Err(e)) => { - return e; - } - None => panic!("Tokenizer did not return error for {s:#?}"), - } - } -} - -#[test] -fn empty_source() { - assert_eq!( - tokenize_to_vec(""), - vec![Spanning::zero_width( - &SourcePosition::new_origin(), - Token::EndOfFile, - )] - ); -} - -#[test] -fn disallow_control_codes() { - assert_eq!( - Lexer::new("\u{0007}").next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new_origin(), - LexerError::UnknownCharacter('\u{0007}') - ))) - ); -} - -#[test] -fn skip_whitespace() { - assert_eq!( - tokenize_to_vec( - r#" - - foo - - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(14, 2, 12), - &SourcePosition::new(17, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(31, 4, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_comments() { - assert_eq!( - tokenize_to_vec( - r#" - #comment - foo#comment - "# - ), - vec![ - Spanning::start_end( - &SourcePosition::new(34, 2, 12), - &SourcePosition::new(37, 2, 15), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(58, 3, 12), Token::EndOfFile), - ] - ); -} - -#[test] -fn skip_commas() { - assert_eq!( - tokenize_to_vec(r#",,,foo,,,"#), - vec![ - Spanning::start_end( - &SourcePosition::new(3, 0, 3), - &SourcePosition::new(6, 0, 6), - Token::Name("foo"), - ), - Spanning::zero_width(&SourcePosition::new(9, 0, 9), Token::EndOfFile), - ] - ); -} - -#[test] -fn error_positions() { - assert_eq!( - Lexer::new( - r#" - - ? - - "# - ) - .next(), - Some(Err(Spanning::zero_width( - &SourcePosition::new(14, 2, 12), - LexerError::UnknownCharacter('?') - ))) - ); -} - -#[test] -fn strings() { - assert_eq!( - tokenize_single(r#""simple""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(8, 0, 8), - Token::Scalar(ScalarToken::String("simple")) - ) - ); - - assert_eq!( - tokenize_single(r#"" white space ""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(" white space ")) - ) - ); - - assert_eq!( - tokenize_single(r#""quote \"""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(10, 0, 10), - Token::Scalar(ScalarToken::String(r#"quote \""#)) - ) - ); - - assert_eq!( - tokenize_single(r#""escaped \n\r\b\t\f""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(20, 0, 20), - Token::Scalar(ScalarToken::String(r"escaped \n\r\b\t\f")) - ) - ); - - assert_eq!( - tokenize_single(r#""slashes \\ \/""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(15, 0, 15), - Token::Scalar(ScalarToken::String(r"slashes \\ \/")) - ) - ); - - assert_eq!( - tokenize_single(r#""unicode \u1234\u5678\u90AB\uCDEF""#), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(34, 0, 34), - Token::Scalar(ScalarToken::String(r"unicode \u1234\u5678\u90AB\uCDEF")), - ) - ); -} - -#[test] -fn string_errors() { - assert_eq!( - tokenize_error("\""), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"no end quote"), - Spanning::zero_width( - &SourcePosition::new(13, 0, 13), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"contains unescaped \u{0007} control char\""), - Spanning::zero_width( - &SourcePosition::new(20, 0, 20), - LexerError::UnknownCharacterInString('\u{0007}'), - ) - ); - - assert_eq!( - tokenize_error("\"null-byte is not \u{0000} end of file\""), - Spanning::zero_width( - &SourcePosition::new(18, 0, 18), - LexerError::UnknownCharacterInString('\u{0000}'), - ) - ); - - assert_eq!( - tokenize_error("\"multi\nline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error("\"multi\rline\""), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnterminatedString, - ) - ); - - assert_eq!( - tokenize_error(r#""bad \z esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\z".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \x esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\x".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \u1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u1".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \u0XX1 esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\u0XX1".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXX".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uFXXX esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uFXXX".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""bad \uXXXF esc""#), - Spanning::zero_width( - &SourcePosition::new(6, 0, 6), - LexerError::UnknownEscapeSequence("\\uXXXF".into()), - ) - ); - - assert_eq!( - tokenize_error(r#""unterminated in string \""#), - Spanning::zero_width( - &SourcePosition::new(26, 0, 26), - LexerError::UnterminatedString - ) - ); - - assert_eq!( - tokenize_error(r#""unterminated \"#), - Spanning::zero_width( - &SourcePosition::new(15, 0, 15), - LexerError::UnterminatedString - ) - ); - - // Found by fuzzing. - assert_eq!( - tokenize_error(r#""\uɠ^A"#), - Spanning::zero_width( - &SourcePosition::new(5, 0, 5), - LexerError::UnterminatedString - ) - ); -} - -#[test] -fn numbers() { - fn assert_float_token_eq( - source: &str, - start: SourcePosition, - end: SourcePosition, - expected: &str, - ) { - let parsed = tokenize_single(source); - assert_eq!(parsed.span.start, start); - assert_eq!(parsed.span.end, end); - - match parsed.item { - Token::Scalar(ScalarToken::Float(actual)) => { - assert!( - expected == actual, - "[expected] {expected} != {actual} [actual]", - ); - } - _ => assert!(false), - } - } - - assert_eq!( - tokenize_single("4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("4")) - ) - ); - - assert_float_token_eq( - "4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "4.123", - ); - - assert_float_token_eq( - "4.0", - SourcePosition::new(0, 0, 0), - SourcePosition::new(3, 0, 3), - "4.0", - ); - - assert_eq!( - tokenize_single("-4"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(2, 0, 2), - Token::Scalar(ScalarToken::Int("-4")) - ) - ); - - assert_eq!( - tokenize_single("9"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("9")) - ) - ); - - assert_eq!( - tokenize_single("0"), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(1, 0, 1), - Token::Scalar(ScalarToken::Int("0")) - ) - ); - - assert_float_token_eq( - "-4.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "-4.123", - ); - - assert_float_token_eq( - "0.123", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "0.123", - ); - - assert_float_token_eq( - "123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123e4", - ); - - assert_float_token_eq( - "123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(5, 0, 5), - "123E4", - ); - - assert_float_token_eq( - "123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e-4", - ); - - assert_float_token_eq( - "123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(6, 0, 6), - "123e+4", - ); - - assert_float_token_eq( - "-1.123e4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123e4", - ); - - assert_float_token_eq( - "-1.123E4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(8, 0, 8), - "-1.123E4", - ); - - assert_float_token_eq( - "-1.123e-4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e-4", - ); - - assert_float_token_eq( - "-1.123e+4", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e+4", - ); - - assert_float_token_eq( - "-1.123e45", - SourcePosition::new(0, 0, 0), - SourcePosition::new(9, 0, 9), - "-1.123e45", - ); -} - -#[test] -fn numbers_errors() { - assert_eq!( - tokenize_error("00"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('0') - ) - ); - - assert_eq!( - tokenize_error("+1"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('+') - ) - ); - - assert_eq!( - tokenize_error("1."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error(".123"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnexpectedCharacter('.') - ) - ); - - assert_eq!( - tokenize_error("1.A"), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("-A"), - Spanning::zero_width( - &SourcePosition::new(1, 0, 1), - LexerError::UnexpectedCharacter('A') - ) - ); - - assert_eq!( - tokenize_error("1.0e"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("1.0eA"), - Spanning::zero_width( - &SourcePosition::new(4, 0, 4), - LexerError::UnexpectedCharacter('A') - ) - ); -} - -#[test] -fn punctuation() { - assert_eq!( - tokenize_single("!"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ExclamationMark) - ); - - assert_eq!( - tokenize_single("$"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Dollar) - ); - - assert_eq!( - tokenize_single("("), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenOpen) - ); - - assert_eq!( - tokenize_single(")"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::ParenClose) - ); - - assert_eq!( - tokenize_single("..."), - Spanning::start_end( - &SourcePosition::new(0, 0, 0), - &SourcePosition::new(3, 0, 3), - Token::Ellipsis - ) - ); - - assert_eq!( - tokenize_single(":"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Colon) - ); - - assert_eq!( - tokenize_single("="), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Equals) - ); - - assert_eq!( - tokenize_single("@"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::At) - ); - - assert_eq!( - tokenize_single("["), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketOpen) - ); - - assert_eq!( - tokenize_single("]"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::BracketClose) - ); - - assert_eq!( - tokenize_single("{"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyOpen) - ); - - assert_eq!( - tokenize_single("}"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::CurlyClose) - ); - - assert_eq!( - tokenize_single("|"), - Spanning::single_width(&SourcePosition::new(0, 0, 0), Token::Pipe) - ); -} - -#[test] -fn punctuation_error() { - assert_eq!( - tokenize_error(".."), - Spanning::zero_width( - &SourcePosition::new(2, 0, 2), - LexerError::UnexpectedEndOfFile - ) - ); - - assert_eq!( - tokenize_error("?"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('?') - ) - ); - - assert_eq!( - tokenize_error("\u{203b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{203b}') - ) - ); - - assert_eq!( - tokenize_error("\u{200b}"), - Spanning::zero_width( - &SourcePosition::new(0, 0, 0), - LexerError::UnknownCharacter('\u{200b}') - ) - ); -} - -#[test] -fn display() { - for (input, expected) in [ - (Token::Name("identifier"), "identifier"), - (Token::Scalar(ScalarToken::Int("123")), "123"), - (Token::Scalar(ScalarToken::Float("4.5")), "4.5"), - ( - Token::Scalar(ScalarToken::String("some string")), - "\"some string\"", - ), - ( - Token::Scalar(ScalarToken::String("string with \\ escape and \" quote")), - "\"string with \\\\ escape and \\\" quote\"", - ), - (Token::ExclamationMark, "!"), - (Token::Dollar, "$"), - (Token::ParenOpen, "("), - (Token::ParenClose, ")"), - (Token::BracketOpen, "["), - (Token::BracketClose, "]"), - (Token::CurlyOpen, "{"), - (Token::CurlyClose, "}"), - (Token::Ellipsis, "..."), - (Token::Colon, ":"), - (Token::Equals, "="), - (Token::At, "@"), - (Token::Pipe, "|"), - ] { - assert_eq!(input.to_string(), expected); - } -} diff --git a/juniper/src/parser/tests/mod.rs b/juniper/src/parser/tests/mod.rs index 18df2c92d..ab77d55c7 100644 --- a/juniper/src/parser/tests/mod.rs +++ b/juniper/src/parser/tests/mod.rs @@ -1,3 +1,2 @@ mod document; -mod lexer; mod value; diff --git a/juniper/src/tests/fixtures/starwars/schema.rs b/juniper/src/tests/fixtures/starwars/schema.rs index 10d1f87a6..43ff46d9e 100644 --- a/juniper/src/tests/fixtures/starwars/schema.rs +++ b/juniper/src/tests/fixtures/starwars/schema.rs @@ -1,4 +1,5 @@ -#![expect(missing_docs, reason = "GraphQL schema testing")] +#![cfg_attr(test, expect(dead_code, reason = "GraphQL schema testing"))] +#![cfg_attr(not(test), expect(missing_docs, reason = "GraphQL schema testing"))] use std::{collections::HashMap, pin::Pin}; diff --git a/juniper/src/types/scalars.rs b/juniper/src/types/scalars.rs index 8467e8c0b..1934d41ca 100644 --- a/juniper/src/types/scalars.rs +++ b/juniper/src/types/scalars.rs @@ -1,4 +1,4 @@ -use std::{char, marker::PhantomData, rc::Rc, thread::JoinHandle}; +use std::{marker::PhantomData, rc::Rc, thread::JoinHandle}; use derive_more::with_trait::{Deref, Display, From, Into}; use serde::{Deserialize, Serialize}; @@ -9,7 +9,7 @@ use crate::{ executor::{ExecutionResult, Executor, Registry}, graphql_scalar, macros::reflect, - parser::{LexerError, ParseError, ScalarToken, Token}, + parser::{ParseError, ScalarToken, Token}, schema::meta::MetaType, types::{ async_await::GraphQLValueAsync, @@ -81,109 +81,16 @@ mod impl_string_scalar { } pub(super) fn parse_token(value: ScalarToken<'_>) -> ParseScalarResult { - if let ScalarToken::String(value) = value { - let mut ret = String::with_capacity(value.len()); - let mut char_iter = value.chars(); - while let Some(ch) = char_iter.next() { - match ch { - '\\' => match char_iter.next() { - Some('"') => { - ret.push('"'); - } - Some('/') => { - ret.push('/'); - } - Some('n') => { - ret.push('\n'); - } - Some('r') => { - ret.push('\r'); - } - Some('t') => { - ret.push('\t'); - } - Some('\\') => { - ret.push('\\'); - } - Some('f') => { - ret.push('\u{000c}'); - } - Some('b') => { - ret.push('\u{0008}'); - } - Some('u') => { - ret.push(parse_unicode_codepoint(&mut char_iter)?); - } - Some(s) => { - return Err(ParseError::LexerError(LexerError::UnknownEscapeSequence( - format!("\\{s}"), - ))); - } - None => return Err(ParseError::LexerError(LexerError::UnterminatedString)), - }, - ch => { - ret.push(ch); - } - } - } - Ok(ret.into()) + if let ScalarToken::String(lit) = value { + let parsed = lit.parse()?; + // TODO: Allow cheaper from `Cow<'_, str>` conversions for `ScalarValue`. + Ok(parsed.into_owned().into()) } else { Err(ParseError::unexpected_token(Token::Scalar(value))) } } } -fn parse_unicode_codepoint(char_iter: &mut I) -> Result -where - I: Iterator, -{ - let escaped_code_point = char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(String::from("\\u"))) - }) - .and_then(|c1| { - char_iter - .next() - .map(|c2| format!("{c1}{c2}")) - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{c1}"))) - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - }) - .and_then(|mut s| { - char_iter - .next() - .ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!("\\u{s}"))) - }) - .map(|c2| { - s.push(c2); - s - }) - })?; - let code_point = u32::from_str_radix(&escaped_code_point, 16).map_err(|_| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - })?; - char::from_u32(code_point).ok_or_else(|| { - ParseError::LexerError(LexerError::UnknownEscapeSequence(format!( - "\\u{escaped_code_point}", - ))) - }) -} - #[graphql_scalar] #[graphql( name = "String", @@ -537,7 +444,7 @@ impl Default for EmptySubscription { #[cfg(test)] mod tests { use crate::{ - parser::ScalarToken, + parser::{ScalarToken, StringLiteral}, value::{DefaultScalarValue, ParseScalarValue, ScalarValue as _}, }; @@ -571,24 +478,124 @@ mod tests { #[test] fn parse_strings() { - fn parse_string(s: &str, expected: &str) { - let s = - >::from_str(ScalarToken::String(s)); - assert!(s.is_ok(), "A parsing error occurred: {s:?}"); - let s: Option = s.unwrap().try_to().ok(); - assert!(s.is_some(), "No string returned"); + for (input, expected) in [ + (r#""simple""#, "simple"), + (r#"" white space ""#, " white space "), + (r#""quote \"""#, r#"quote ""#), + (r#""escaped \n\r\b\t\f""#, "escaped \n\r\u{0008}\t\u{000c}"), + (r#""slashes \\ \/""#, r"slashes \ /"), + ( + r#""unicode \u1234\u5678\u90AB\uCDEF""#, + "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Quoted(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); + + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); assert_eq!(s.unwrap(), expected); } + } + + #[test] + fn parse_block_strings() { + for (input, expected) in [ + (r#""""""""#, ""), + (r#""""simple""""#, "simple"), + (r#"""" white space """"#, " white space "), + (r#""""contains " quote""""#, r#"contains " quote"#), + ( + r#""""contains \""" triple quote""""#, + r#"contains """ triple quote"#, + ), + ( + r#""""contains \"" double quote""""#, + r#"contains \"" double quote"#, + ), + ( + r#""""contains \\""" triple quote""""#, + r#"contains \""" triple quote"#, + ), + (r#""""\"""quote" """"#, r#""""quote" "#), + (r#""""multi\nline""""#, r"multi\nline"), + ( + r#""""multi\rline\r\nnormalized""""#, + r"multi\rline\r\nnormalized", + ), + ( + r#""""unescaped \\n\\r\\b\\t\\f\\u1234""""#, + r"unescaped \\n\\r\\b\\t\\f\\u1234", + ), + ( + r#""""unescaped unicode outside BMP \u{1f600}""""#, + r"unescaped unicode outside BMP \u{1f600}", + ), + (r#""""slashes \\\\ \\/""""#, r"slashes \\\\ \\/"), + ( + r#"""" + + spans + multiple + lines + + """"#, + "spans\n multiple\n lines", + ), + // removes uniform indentation + ( + r#"""" + Hello, + World! + + Yours, + GraphQL.""""#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // removes empty leading and trailing lines + ( + r#"""" + + Hello, + World! + + Yours, + GraphQL. + + """"#, + "Hello,\n World!\n\nYours,\n GraphQL.", + ), + // retains indentation from first line + ( + r#"""" Hello, + World! + + Yours, + GraphQL.""""#, + " Hello,\n World!\n\nYours,\n GraphQL.", + ), + // does not alter trailing spaces + ( + r#"""" + Hello, + World! + + Yours, + GraphQL. """"#, + "Hello,\n World!\n\nYours,\n GraphQL. ", + ), + ] { + let res = >::from_str( + ScalarToken::String(StringLiteral::Block(input)), + ); + assert!(res.is_ok(), "parsing error occurred: {}", res.unwrap_err()); - parse_string("simple", "simple"); - parse_string(" white space ", " white space "); - parse_string(r#"quote \""#, "quote \""); - parse_string(r"escaped \n\r\b\t\f", "escaped \n\r\u{0008}\t\u{000c}"); - parse_string(r"slashes \\ \/", "slashes \\ /"); - parse_string( - r"unicode \u1234\u5678\u90AB\uCDEF", - "unicode \u{1234}\u{5678}\u{90ab}\u{cdef}", - ); + let s: Option = res.unwrap().try_to().ok(); + assert!(s.is_some(), "no string returned"); + assert_eq!(s.unwrap(), expected); + } } #[test]