diff --git a/src/Symfony/Component/JsonPath/JsonCrawler.php b/src/Symfony/Component/JsonPath/JsonCrawler.php index 35ad6a93a080c..0793a5c5d7b14 100644 --- a/src/Symfony/Component/JsonPath/JsonCrawler.php +++ b/src/Symfony/Component/JsonPath/JsonCrawler.php @@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array return []; } - if ('*' === $expr) { + if (str_contains($expr, ',') && (str_starts_with($trimmed = trim($expr), ',') || str_ends_with($trimmed, ','))) { + throw new JsonCrawlerException($expr, 'Expression cannot have leading or trailing commas'); + } + + if ('*' === $expr = JsonPathUtils::normalizeWhitespace($expr)) { return array_values($value); } @@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array return $result; } - // start, end and step - if (preg_match('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/', $expr, $matches)) { + if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $expr, $matches)) { if (!array_is_list($value)) { return []; } @@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array // filter expressions if (preg_match('/^\?(.*)$/', $expr, $matches)) { - $filterExpr = $matches[1]; - - if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) { + if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr = trim($matches[1]))) { $filterExpr = "($filterExpr)"; } if (!str_starts_with($filterExpr, '(')) { - throw new JsonCrawlerException($expr, 'Invalid filter expression'); + $filterExpr = "($filterExpr)"; } // remove outer filter parentheses @@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array // comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']` if (str_contains($expr, ',')) { - $parts = $this->parseCommaSeparatedValues($expr); + $parts = JsonPathUtils::parseCommaSeparatedValues($expr); $result = []; - $keysIndices = array_keys($value); - $isList = array_is_list($value); foreach ($parts as $part) { $part = trim($part); - if (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) { + if ('*' === $part) { + $result = array_merge($result, array_values($value)); + } elseif (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $part, $matches)) { + // slice notation + $sliceResult = $this->evaluateBracket($part, $value); + $result = array_merge($result, $sliceResult); + } elseif (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) { $key = JsonPathUtils::unescapeString($matches[2], $matches[1]); - if ($isList) { + if (array_is_list($value)) { + // for arrays, find ALL objects that contain this key foreach ($value as $item) { if (\is_array($item) && \array_key_exists($key, $item)) { $result[] = $item; - break; } } - - continue; // no results here - } - - if (\array_key_exists($key, $value)) { + } elseif (\array_key_exists($key, $value)) { // for objects, get the value for this key $result[] = $value[$key]; } } elseif (preg_match('/^-?\d+$/', $part)) { @@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array $index = \count($value) + $index; } - if ($isList && \array_key_exists($index, $value)) { + if (array_is_list($value) && \array_key_exists($index, $value)) { $result[] = $value[$index]; - continue; - } - - // numeric index on a hashmap - if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) { - $result[] = $value[$keysIndices[$index]]; + } else { + // numeric index on a hashmap + $keysIndices = array_keys($value); + if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) { + $result[] = $value[$keysIndices[$index]]; + } } } } @@ -310,7 +311,29 @@ private function evaluateFilter(string $expr, mixed $value): array private function evaluateFilterExpression(string $expr, mixed $context): bool { - $expr = trim($expr); + $expr = JsonPathUtils::normalizeWhitespace($expr); + + // remove outer parentheses if they wrap the entire expression + if (str_starts_with($expr, '(') && str_ends_with($expr, ')')) { + $depth = 0; + $isWrapped = true; + $i = -1; + while (null !== $char = $expr[++$i] ?? null) { + if ('(' === $char) { + ++$depth; + } elseif (')' === $char && 0 === --$depth && isset($expr[$i + 1])) { + $isWrapped = false; + break; + } + } + if ($isWrapped) { + $expr = trim(substr($expr, 1, -1)); + } + } + + if (str_starts_with($expr, '!')) { + return !$this->evaluateFilterExpression(trim(substr($expr, 1)), $context); + } if (str_contains($expr, '&&')) { $parts = array_map('trim', explode('&&', $expr)); @@ -353,8 +376,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool } // function calls - if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) { - $functionName = $matches[1]; + if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) { + $functionName = trim($matches[1]); if (!isset(self::RFC9535_FUNCTIONS[$functionName])) { throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName)); } @@ -369,8 +392,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool private function evaluateScalar(string $expr, mixed $context): mixed { - if (is_numeric($expr)) { - return str_contains($expr, '.') ? (float) $expr : (int) $expr; + $expr = JsonPathUtils::normalizeWhitespace($expr); + + if (JsonPathUtils::isJsonNumber($expr)) { + return str_contains($expr, '.') || str_contains(strtolower($expr), 'e') ? (float) $expr : (int) $expr; + } + + // only validate tokens that look like standalone numbers + if (preg_match('/^[\d+\-.eE]+$/', $expr) && preg_match('/\d/', $expr)) { + throw new JsonCrawlerException($expr, \sprintf('Invalid number format "%s"', $expr)); } if ('@' === $expr) { @@ -404,9 +434,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed } // function calls - if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) { - $functionName = $matches[1]; - if (!isset(self::RFC9535_FUNCTIONS[$functionName])) { + if (preg_match('/^(\w++)\((.*)\)$/', $expr, $matches)) { + if (!isset(self::RFC9535_FUNCTIONS[$functionName = trim($matches[1])])) { throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName)); } @@ -416,14 +445,43 @@ private function evaluateScalar(string $expr, mixed $context): mixed return null; } - private function evaluateFunction(string $name, string $args, array $context): mixed + private function evaluateFunction(string $name, string $args, mixed $context): mixed { - $args = array_map( - fn ($arg) => $this->evaluateScalar(trim($arg), $context), - explode(',', $args) - ); + $argList = []; + $nodelistSizes = []; + if ($args = trim($args)) { + $args = JsonPathUtils::parseCommaSeparatedValues($args); + foreach ($args as $arg) { + $arg = trim($arg); + if (str_starts_with($arg, '$')) { // special handling for absolute paths + $results = $this->evaluate(new JsonPath($arg)); + $argList[] = $results[0] ?? null; + $nodelistSizes[] = \count($results); + } elseif (!str_starts_with($arg, '@')) { // special handling for @ to track nodelist size + $argList[] = $this->evaluateScalar($arg, $context); + $nodelistSizes[] = 1; + } elseif ('@' === $arg) { + $argList[] = $context; + $nodelistSizes[] = 1; + } elseif (!\is_array($context)) { + $argList[] = null; + $nodelistSizes[] = 0; + } elseif (str_starts_with($pathPart = substr($arg, 1), '[')) { + // handle bracket expressions like @['a','d'] + $results = $this->evaluateBracket(substr($pathPart, 1, -1), $context); + $argList[] = $results; + $nodelistSizes[] = \count($results); + } else { + // handle dot notation like @.a + $results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$pathPart)), $context); + $argList[] = $results[0] ?? null; + $nodelistSizes[] = \count($results); + } + } + } - $value = $args[0] ?? null; + $value = $argList[0] ?? null; + $nodelistSize = $nodelistSizes[0] ?? 0; return match ($name) { 'length' => match (true) { @@ -431,16 +489,16 @@ private function evaluateFunction(string $name, string $args, array $context): m \is_array($value) => \count($value), default => 0, }, - 'count' => \is_array($value) ? \count($value) : 0, + 'count' => $nodelistSize, 'match' => match (true) { - \is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $args[1]), $value), + \is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/u', $this->transformJsonPathRegex($argList[1])), $value), default => false, }, 'search' => match (true) { - \is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match("/$args[1]/", $value), + \is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$this->transformJsonPathRegex($argList[1])}/u", $value), default => false, }, - 'value' => $value, + 'value' => 1 < $nodelistSize ? null : (1 === $nodelistSize ? (\is_array($value) ? ($value[0] ?? null) : $value) : $value), default => null, }; } @@ -474,43 +532,51 @@ private function compare(mixed $left, mixed $right, string $operator): bool }; } - private function parseCommaSeparatedValues(string $expr): array + /** + * Transforms JSONPath regex patterns to comply with RFC 9535. + * + * The main issue is that '.' should not match \r or \n but should + * match Unicode line separators U+2028 and U+2029. + */ + private function transformJsonPathRegex(string $pattern): string { - $parts = []; - $current = ''; - $inQuotes = false; - $quoteChar = null; - - for ($i = 0; $i < \strlen($expr); ++$i) { - $char = $expr[$i]; - - if ('\\' === $char && $i + 1 < \strlen($expr)) { - $current .= $char.$expr[++$i]; + $result = ''; + $inCharClass = false; + $escaped = false; + $i = -1; + + while (null !== $char = $pattern[++$i] ?? null) { + if ($escaped) { + $result .= $char; + $escaped = false; continue; } - if ('"' === $char || "'" === $char) { - if (!$inQuotes) { - $inQuotes = true; - $quoteChar = $char; - } elseif ($char === $quoteChar) { - $inQuotes = false; - $quoteChar = null; - } - } elseif (!$inQuotes && ',' === $char) { - $parts[] = trim($current); - $current = ''; + if ('\\' === $char) { + $result .= $char; + $escaped = true; + continue; + } + if ('[' === $char && !$inCharClass) { + $inCharClass = true; + $result .= $char; continue; } - $current .= $char; - } + if (']' === $char && $inCharClass) { + $inCharClass = false; + $result .= $char; + continue; + } - if ('' !== $current) { - $parts[] = trim($current); + if ('.' === $char && !$inCharClass) { + $result .= '(?:[^\r\n]|\x{2028}|\x{2029})'; + } else { + $result .= $char; + } } - return $parts; + return $result; } } diff --git a/src/Symfony/Component/JsonPath/JsonPathUtils.php b/src/Symfony/Component/JsonPath/JsonPathUtils.php index 6f971d20115b2..30bf446b6a9d5 100644 --- a/src/Symfony/Component/JsonPath/JsonPathUtils.php +++ b/src/Symfony/Component/JsonPath/JsonPathUtils.php @@ -99,10 +99,10 @@ public static function unescapeString(string $str, string $quoteChar): string } $result = ''; - $length = \strlen($str); + $i = -1; - for ($i = 0; $i < $length; ++$i) { - if ('\\' === $str[$i] && $i + 1 < $length) { + while (null !== $char = $str[++$i] ?? null) { + if ('\\' === $char && isset($str[$i + 1])) { $result .= match ($str[$i + 1]) { '"' => '"', "'" => "'", @@ -113,22 +113,22 @@ public static function unescapeString(string $str, string $quoteChar): string 'n' => "\n", 'r' => "\r", 't' => "\t", - 'u' => self::unescapeUnicodeSequence($str, $length, $i), - default => $str[$i].$str[$i + 1], // keep the backslash + 'u' => self::unescapeUnicodeSequence($str, $i), + default => $char.$str[$i + 1], // keep the backslash }; ++$i; } else { - $result .= $str[$i]; + $result .= $char; } } return $result; } - private static function unescapeUnicodeSequence(string $str, int $length, int &$i): string + private static function unescapeUnicodeSequence(string $str, int &$i): string { - if ($i + 5 >= $length) { + if (!isset($str[$i + 5])) { // not enough characters for Unicode escape, treat as literal return $str[$i]; } @@ -141,7 +141,7 @@ private static function unescapeUnicodeSequence(string $str, int $length, int &$ $codepoint = hexdec($hex); // looks like a valid Unicode codepoint, string length is sufficient and it starts with \u - if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && $i + 11 < $length && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) { + if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && isset($str[$i + 11]) && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) { $lowHex = substr($str, $i + 8, 4); if (ctype_xdigit($lowHex)) { $lowSurrogate = hexdec($lowHex); @@ -159,4 +159,72 @@ private static function unescapeUnicodeSequence(string $str, int $length, int &$ return mb_chr($codepoint, 'UTF-8'); } + + /** + * @see https://datatracker.ietf.org/doc/rfc9535/, section 2.1.1 + */ + public static function normalizeWhitespace(string $input): string + { + $normalized = strtr($input, [ + "\t" => ' ', + "\n" => ' ', + "\r" => ' ', + ]); + + return trim($normalized); + } + + /** + * Check a number is RFC 9535 compliant using strict JSON number format. + */ + public static function isJsonNumber(string $value): bool + { + return preg_match('/^-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?$/', $value); + } + + public static function parseCommaSeparatedValues(string $expr): array + { + $parts = []; + $current = ''; + $inQuotes = false; + $quoteChar = null; + $bracketDepth = 0; + $i = -1; + + while (null !== $char = $expr[++$i] ?? null) { + if ('\\' === $char && isset($expr[$i + 1])) { + $current .= $char.$expr[++$i]; + continue; + } + + if ('"' === $char || "'" === $char) { + if (!$inQuotes) { + $inQuotes = true; + $quoteChar = $char; + } elseif ($char === $quoteChar) { + $inQuotes = false; + $quoteChar = null; + } + } elseif (!$inQuotes) { + if ('[' === $char) { + ++$bracketDepth; + } elseif (']' === $char) { + --$bracketDepth; + } elseif (0 === $bracketDepth && ',' === $char) { + $parts[] = trim($current); + $current = ''; + + continue; + } + } + + $current .= $char; + } + + if ('' !== $current) { + $parts[] = trim($current); + } + + return $parts; + } } diff --git a/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php b/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php index a52d586fac869..1d1eb4be3b431 100644 --- a/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php +++ b/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php @@ -500,6 +500,28 @@ public function testLengthFunctionWithOuterParentheses() $this->assertSame('J. R. R. Tolkien', $result[1]['author']); } + public function testMatchFunctionWithMultipleSpacesTrimmed() + { + $result = self::getBookstoreCrawler()->find("$.store.book[?(match(@.title, 'Sword of Honour'))]"); + + $this->assertSame([], $result); + } + + public function testFilterMultiline() + { + $result = self::getBookstoreCrawler()->find( + '$ + .store + .book[? + length(@.author)>12 + ]' + ); + + $this->assertCount(2, $result); + $this->assertSame('Herman Melville', $result[0]['author']); + $this->assertSame('J. R. R. Tolkien', $result[1]['author']); + } + public function testCountFunction() { $result = self::getBookstoreCrawler()->find('$.store.book[?count(@.extra) != 0]'); @@ -577,10 +599,6 @@ public static function provideUnicodeEscapeSequencesProvider(): array '$["tab\there"]', ['with tab'], ], - [ - '$["new\nline"]', - ['with newline'], - ], [ '$["quote\"here"]', ['with quote'], diff --git a/src/Symfony/Component/JsonPath/Tests/JsonPathComplianceTestSuiteTest.php b/src/Symfony/Component/JsonPath/Tests/JsonPathComplianceTestSuiteTest.php index 851a45e275e7c..b39b68abcd463 100644 --- a/src/Symfony/Component/JsonPath/Tests/JsonPathComplianceTestSuiteTest.php +++ b/src/Symfony/Component/JsonPath/Tests/JsonPathComplianceTestSuiteTest.php @@ -18,7 +18,6 @@ final class JsonPathComplianceTestSuiteTest extends TestCase { private const UNSUPPORTED_TEST_CASES = [ - 'basic, multiple selectors, name and index, array data', 'basic, multiple selectors, name and index, object data', 'basic, multiple selectors, index and slice', 'basic, multiple selectors, index and slice, overlapping', @@ -26,25 +25,12 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'basic, multiple selectors, wildcard and name', 'basic, multiple selectors, wildcard and slice', 'basic, multiple selectors, multiple wildcards', - 'basic, selector, leading comma', - 'basic, selector, trailing comma', 'filter, existence, without segments', - 'filter, existence', 'filter, existence, present with null', 'filter, absolute existence, without segments', 'filter, absolute existence, with segments', - 'filter, equals string, single quotes', - 'filter, equals numeric string, single quotes', - 'filter, equals string, double quotes', - 'filter, equals numeric string, double quotes', - 'filter, equals number', - 'filter, equals null', 'filter, equals null, absent from data', - 'filter, equals true', - 'filter, equals false', - 'filter, equals self', 'filter, absolute, equals self', - 'filter, equals, absent from index selector equals absent from name selector', 'filter, deep equality, arrays', 'filter, deep equality, objects', 'filter, not-equals string, single quotes', @@ -53,26 +39,12 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'filter, not-equals string, double quotes', 'filter, not-equals numeric string, double quotes', 'filter, not-equals string, double quotes, different types', - 'filter, not-equals number', - 'filter, not-equals number, different types', - 'filter, not-equals null', 'filter, not-equals null, absent from data', - 'filter, not-equals true', - 'filter, not-equals false', - 'filter, less than string, single quotes', - 'filter, less than string, double quotes', 'filter, less than number', 'filter, less than null', 'filter, less than true', 'filter, less than false', - 'filter, less than or equal to string, single quotes', - 'filter, less than or equal to string, double quotes', - 'filter, less than or equal to number', - 'filter, less than or equal to null', 'filter, less than or equal to true', - 'filter, less than or equal to false', - 'filter, greater than string, single quotes', - 'filter, greater than string, double quotes', 'filter, greater than number', 'filter, greater than null', 'filter, greater than true', @@ -88,8 +60,6 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'filter, exists or exists, data false', 'filter, and', 'filter, or', - 'filter, not expression', - 'filter, not exists', 'filter, not exists, data null', 'filter, non-singular existence, wildcard', 'filter, non-singular existence, multiple', @@ -131,11 +101,6 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'filter, and binds more tightly than or', 'filter, left to right evaluation', 'filter, group terms, right', - 'filter, string literal, single quote in double quotes', - 'filter, string literal, double quote in single quotes', - 'filter, string literal, escaped single quote in single quotes', - 'filter, string literal, escaped double quote in double quotes', - 'functions, value, multi-value nodelist', 'name selector, double quotes, escaped reverse solidus', 'name selector, single quotes, escaped reverse solidus', 'slice selector, slice selector with everything omitted, long form', @@ -143,130 +108,7 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'slice selector, start, max exact', 'slice selector, end, min exact', 'slice selector, end, max exact', - 'functions, length, arg is special nothing', - 'functions, match, don\'t select match', - 'functions, match, select non-match', - 'functions, match, arg is a function expression', - 'functions, search, don\'t select match', - 'functions, search, select non-match', - 'functions, search, arg is a function expression', - 'whitespace, filter, space between question mark and expression', - 'whitespace, filter, newline between question mark and expression', - 'whitespace, filter, tab between question mark and expression', - 'whitespace, filter, return between question mark and expression', - 'whitespace, filter, space between question mark and parenthesized expression', - 'whitespace, filter, newline between question mark and parenthesized expression', - 'whitespace, filter, tab between question mark and parenthesized expression', - 'whitespace, filter, return between question mark and parenthesized expression', - 'whitespace, filter, space between bracket and question mark', - 'whitespace, filter, newline between bracket and question mark', - 'whitespace, filter, tab between bracket and question mark', - 'whitespace, filter, return between bracket and question mark', - 'whitespace, functions, newline between parenthesis and arg', - 'whitespace, functions, newline between arg and comma', - 'whitespace, functions, newline between comma and arg', - 'whitespace, functions, newline between arg and parenthesis', - 'whitespace, functions, newlines in a relative singular selector', - 'whitespace, functions, newlines in an absolute singular selector', - 'whitespace, operators, space before ||', - 'whitespace, operators, newline before ||', - 'whitespace, operators, tab before ||', - 'whitespace, operators, return before ||', - 'whitespace, operators, space after ||', - 'whitespace, operators, newline after ||', - 'whitespace, operators, tab after ||', - 'whitespace, operators, return after ||', - 'whitespace, operators, space before &&', - 'whitespace, operators, newline before &&', - 'whitespace, operators, tab before &&', - 'whitespace, operators, return before &&', - 'whitespace, operators, space after &&', - 'whitespace, operators, newline after &&', - 'whitespace, operators, tab after &&', - 'whitespace, operators, return after &&', - 'whitespace, operators, space before ==', - 'whitespace, operators, newline before ==', - 'whitespace, operators, tab before ==', - 'whitespace, operators, return before ==', - 'whitespace, operators, space after ==', - 'whitespace, operators, newline after ==', - 'whitespace, operators, tab after ==', - 'whitespace, operators, return after ==', - 'whitespace, operators, space before !=', - 'whitespace, operators, newline before !=', - 'whitespace, operators, tab before !=', - 'whitespace, operators, return before !=', - 'whitespace, operators, space after !=', - 'whitespace, operators, newline after !=', - 'whitespace, operators, tab after !=', - 'whitespace, operators, return after !=', - 'whitespace, operators, space before <', - 'whitespace, operators, newline before <', - 'whitespace, operators, tab before <', - 'whitespace, operators, return before <', - 'whitespace, operators, space after <', - 'whitespace, operators, newline after <', - 'whitespace, operators, tab after <', - 'whitespace, operators, return after <', - 'whitespace, operators, space before >', - 'whitespace, operators, newline before >', - 'whitespace, operators, tab before >', - 'whitespace, operators, return before >', - 'whitespace, operators, space after >', - 'whitespace, operators, newline after >', - 'whitespace, operators, tab after >', - 'whitespace, operators, return after >', - 'whitespace, operators, space before <=', - 'whitespace, operators, newline before <=', - 'whitespace, operators, tab before <=', - 'whitespace, operators, return before <=', - 'whitespace, operators, space after <=', - 'whitespace, operators, newline after <=', - 'whitespace, operators, tab after <=', - 'whitespace, operators, return after <=', - 'whitespace, operators, space before >=', - 'whitespace, operators, newline before >=', - 'whitespace, operators, tab before >=', - 'whitespace, operators, return before >=', - 'whitespace, operators, space after >=', - 'whitespace, operators, newline after >=', - 'whitespace, operators, tab after >=', - 'whitespace, operators, return after >=', - 'whitespace, operators, space between logical not and test expression', - 'whitespace, operators, newline between logical not and test expression', - 'whitespace, operators, tab between logical not and test expression', - 'whitespace, operators, return between logical not and test expression', - 'whitespace, operators, space between logical not and parenthesized expression', - 'whitespace, operators, newline between logical not and parenthesized expression', - 'whitespace, operators, tab between logical not and parenthesized expression', - 'whitespace, operators, return between logical not and parenthesized expression', - 'whitespace, selectors, space between bracket and selector', - 'whitespace, selectors, newline between bracket and selector', - 'whitespace, selectors, tab between bracket and selector', - 'whitespace, selectors, return between bracket and selector', - 'whitespace, selectors, space between selector and bracket', - 'whitespace, selectors, tab between selector and bracket', - 'whitespace, selectors, return between selector and bracket', - 'whitespace, selectors, newline between selector and comma', - 'whitespace, selectors, newline between comma and selector', - 'whitespace, slice, space between start and colon', - 'whitespace, slice, newline between start and colon', - 'whitespace, slice, tab between start and colon', - 'whitespace, slice, return between start and colon', - 'whitespace, slice, space between colon and end', - 'whitespace, slice, newline between colon and end', - 'whitespace, slice, tab between colon and end', - 'whitespace, slice, return between colon and end', - 'whitespace, slice, space between end and colon', - 'whitespace, slice, newline between end and colon', - 'whitespace, slice, tab between end and colon', - 'whitespace, slice, return between end and colon', - 'whitespace, slice, space between colon and step', - 'whitespace, slice, newline between colon and step', - 'whitespace, slice, tab between colon and step', - 'whitespace, slice, return between colon and step', 'basic, descendant segment, multiple selectors', - 'basic, descendant segment, object traversal, multiple selectors', 'basic, bald descendant segment', 'filter, relative non-singular query, index, equal', 'filter, relative non-singular query, index, not equal', @@ -306,48 +148,7 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'index selector, leading 0', 'index selector, -0', 'index selector, leading -0', - 'name selector, double quotes, embedded U+0000', - 'name selector, double quotes, embedded U+0001', - 'name selector, double quotes, embedded U+0002', - 'name selector, double quotes, embedded U+0003', - 'name selector, double quotes, embedded U+0004', - 'name selector, double quotes, embedded U+0005', - 'name selector, double quotes, embedded U+0006', - 'name selector, double quotes, embedded U+0007', - 'name selector, double quotes, embedded U+0008', - 'name selector, double quotes, embedded U+0009', - 'name selector, double quotes, embedded U+000B', - 'name selector, double quotes, embedded U+000C', - 'name selector, double quotes, embedded U+000D', - 'name selector, double quotes, embedded U+000E', - 'name selector, double quotes, embedded U+000F', - 'name selector, double quotes, embedded U+0010', - 'name selector, double quotes, embedded U+0011', - 'name selector, double quotes, embedded U+0012', - 'name selector, double quotes, embedded U+0013', - 'name selector, double quotes, embedded U+0014', - 'name selector, double quotes, embedded U+0015', - 'name selector, double quotes, embedded U+0016', - 'name selector, double quotes, embedded U+0017', - 'name selector, double quotes, embedded U+0018', - 'name selector, double quotes, embedded U+0019', - 'name selector, double quotes, embedded U+001A', - 'name selector, double quotes, embedded U+001B', - 'name selector, double quotes, embedded U+001C', - 'name selector, double quotes, embedded U+001D', - 'name selector, double quotes, embedded U+001E', - 'name selector, double quotes, embedded U+001F', - 'name selector, double quotes, escaped backspace', - 'name selector, double quotes, escaped form feed', 'name selector, double quotes, escaped line feed', - 'name selector, double quotes, escaped carriage return', - 'name selector, double quotes, escaped tab', - 'name selector, double quotes, escaped ☺, upper case hex', - 'name selector, double quotes, escaped ☺, lower case hex', - 'name selector, double quotes, surrogate pair 𝄞', - 'name selector, double quotes, surrogate pair 😀', - 'name selector, double quotes, before high surrogates', - 'name selector, double quotes, after low surrogates', 'name selector, double quotes, invalid escaped single quote', 'name selector, double quotes, question mark escape', 'name selector, double quotes, bell escape', @@ -366,51 +167,10 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'name selector, double quotes, single low surrogate', 'name selector, double quotes, high high surrogate', 'name selector, double quotes, low low surrogate', - 'name selector, double quotes, surrogate non-surrogate', - 'name selector, double quotes, non-surrogate surrogate', - 'name selector, double quotes, surrogate supplementary', 'name selector, double quotes, supplementary surrogate', 'name selector, double quotes, surrogate incomplete low', - 'name selector, single quotes, embedded U+0000', - 'name selector, single quotes, embedded U+0001', - 'name selector, single quotes, embedded U+0002', - 'name selector, single quotes, embedded U+0003', - 'name selector, single quotes, embedded U+0004', - 'name selector, single quotes, embedded U+0005', - 'name selector, single quotes, embedded U+0006', - 'name selector, single quotes, embedded U+0007', - 'name selector, single quotes, embedded U+0008', - 'name selector, single quotes, embedded U+0009', - 'name selector, single quotes, embedded U+000B', - 'name selector, single quotes, embedded U+000C', - 'name selector, single quotes, embedded U+000D', - 'name selector, single quotes, embedded U+000E', - 'name selector, single quotes, embedded U+000F', - 'name selector, single quotes, embedded U+0010', - 'name selector, single quotes, embedded U+0011', - 'name selector, single quotes, embedded U+0012', - 'name selector, single quotes, embedded U+0013', - 'name selector, single quotes, embedded U+0014', - 'name selector, single quotes, embedded U+0015', - 'name selector, single quotes, embedded U+0016', - 'name selector, single quotes, embedded U+0017', - 'name selector, single quotes, embedded U+0018', - 'name selector, single quotes, embedded U+0019', - 'name selector, single quotes, embedded U+001A', - 'name selector, single quotes, embedded U+001B', - 'name selector, single quotes, embedded U+001C', - 'name selector, single quotes, embedded U+001D', - 'name selector, single quotes, embedded U+001E', - 'name selector, single quotes, embedded U+001F', 'name selector, single quotes, escaped backspace', - 'name selector, single quotes, escaped form feed', 'name selector, single quotes, escaped line feed', - 'name selector, single quotes, escaped carriage return', - 'name selector, single quotes, escaped tab', - 'name selector, single quotes, escaped ☺, upper case hex', - 'name selector, single quotes, escaped ☺, lower case hex', - 'name selector, single quotes, surrogate pair 𝄞', - 'name selector, single quotes, surrogate pair 😀', 'name selector, single quotes, invalid escaped double quote', 'slice selector, excessively large from value with negative step', 'slice selector, step, min exact - 1', @@ -424,99 +184,6 @@ final class JsonPathComplianceTestSuiteTest extends TestCase 'slice selector, step, leading 0', 'slice selector, step, -0', 'slice selector, step, leading -0', - 'functions, count, count function', - 'functions, count, single-node arg', - 'functions, count, multiple-selector arg', - 'functions, count, non-query arg, number', - 'functions, count, non-query arg, string', - 'functions, count, non-query arg, true', - 'functions, count, non-query arg, false', - 'functions, count, non-query arg, null', - 'functions, count, result must be compared', - 'functions, count, no params', - 'functions, count, too many params', - 'functions, length, string data, unicode', - 'functions, length, result must be compared', - 'functions, length, no params', - 'functions, length, too many params', - 'functions, length, non-singular query arg', - 'functions, length, arg is a function expression', - 'functions, match, regex from the document', - 'functions, match, filter, match function, unicode char class, uppercase', - 'functions, match, filter, match function, unicode char class negated, uppercase', - 'functions, match, filter, match function, unicode, surrogate pair', - 'functions, match, dot matcher on \u2028', - 'functions, match, dot matcher on \u2029', - 'functions, match, result cannot be compared', - 'functions, match, too few params', - 'functions, match, too many params', - 'functions, match, dot in character class', - 'functions, match, escaped dot', - 'functions, match, escaped backslash before dot', - 'functions, match, escaped left square bracket', - 'functions, match, escaped right square bracket', - 'functions, match, explicit caret', - 'functions, match, explicit dollar', - 'functions, search, regex from the document', - 'functions, search, filter, search function, unicode char class, uppercase', - 'functions, search, filter, search function, unicode char class negated, uppercase', - 'functions, search, filter, search function, unicode, surrogate pair', - 'functions, search, dot matcher on \u2028', - 'functions, search, dot matcher on \u2029', - 'functions, search, result cannot be compared', - 'functions, search, too few params', - 'functions, search, too many params', - 'functions, search, dot in character class', - 'functions, search, escaped dot', - 'functions, search, escaped backslash before dot', - 'functions, search, escaped left square bracket', - 'functions, search, escaped right square bracket', - 'functions, value, single-value nodelist', - 'functions, value, too few params', - 'functions, value, too many params', - 'functions, value, result must be compared', - 'whitespace, filter, space between parenthesized expression and bracket', - 'whitespace, filter, tab between parenthesized expression and bracket', - 'whitespace, filter, return between parenthesized expression and bracket', - 'whitespace, functions, space between function name and parenthesis', - 'whitespace, functions, tab between function name and parenthesis', - 'whitespace, functions, return between function name and parenthesis', - 'whitespace, functions, space between parenthesis and arg', - 'whitespace, functions, tab between parenthesis and arg', - 'whitespace, functions, return between parenthesis and arg', - 'whitespace, functions, space between arg and comma', - 'whitespace, functions, tab between arg and comma', - 'whitespace, functions, return between arg and comma', - 'whitespace, functions, space between comma and arg', - 'whitespace, functions, tab between comma and arg', - 'whitespace, functions, return between comma and arg', - 'whitespace, functions, space between arg and parenthesis', - 'whitespace, functions, tab between arg and parenthesis', - 'whitespace, functions, return between arg and parenthesis', - 'whitespace, functions, spaces in a relative singular selector', - 'whitespace, functions, tabs in a relative singular selector', - 'whitespace, functions, returns in a relative singular selector', - 'whitespace, functions, spaces in an absolute singular selector', - 'whitespace, functions, tabs in an absolute singular selector', - 'whitespace, functions, returns in an absolute singular selector', - 'whitespace, selectors, space between root and bracket', - 'whitespace, selectors, newline between root and bracket', - 'whitespace, selectors, tab between root and bracket', - 'whitespace, selectors, return between root and bracket', - 'whitespace, selectors, space between bracket and bracket', - 'whitespace, selectors, newline between bracket and bracket', - 'whitespace, selectors, tab between bracket and bracket', - 'whitespace, selectors, return between bracket and bracket', - 'whitespace, selectors, space between root and dot', - 'whitespace, selectors, newline between root and dot', - 'whitespace, selectors, tab between root and dot', - 'whitespace, selectors, return between root and dot', - 'whitespace, selectors, space between selector and comma', - 'whitespace, selectors, tab between selector and comma', - 'whitespace, selectors, return between selector and comma', - 'whitespace, selectors, space between comma and selector', - 'whitespace, selectors, tab between comma and selector', - 'whitespace, selectors, return between comma and selector', ]; /** @@ -539,7 +206,7 @@ public function testComplianceTestCase(string $selector, array $document, array public static function complianceCaseProvider(): iterable { - $data = json_decode(file_get_contents(__DIR__ . '/Fixtures/cts.json'), true, flags: JSON_THROW_ON_ERROR); + $data = json_decode(file_get_contents(__DIR__.'/Fixtures/cts.json'), true, flags: \JSON_THROW_ON_ERROR); foreach ($data['tests'] as $test) { if (\in_array($test['name'], self::UNSUPPORTED_TEST_CASES, true)) { diff --git a/src/Symfony/Component/JsonPath/Tests/Tokenizer/JsonPathTokenizerTest.php b/src/Symfony/Component/JsonPath/Tests/Tokenizer/JsonPathTokenizerTest.php index b6768ff7ac9db..fdbd36d3cbc36 100644 --- a/src/Symfony/Component/JsonPath/Tests/Tokenizer/JsonPathTokenizerTest.php +++ b/src/Symfony/Component/JsonPath/Tests/Tokenizer/JsonPathTokenizerTest.php @@ -355,9 +355,7 @@ public static function provideInvalidUtf8PropertyName(): array 'special char first' => ['#test'], 'start with digit' => ['123test'], 'asterisk' => ['test*test'], - 'space not allowed' => [' test'], 'at sign not allowed' => ['@test'], - 'start control char' => ["\0test"], 'ending control char' => ["test\xFF\xFA"], 'dash sign' => ['-test'], ]; diff --git a/src/Symfony/Component/JsonPath/Tokenizer/JsonPathTokenizer.php b/src/Symfony/Component/JsonPath/Tokenizer/JsonPathTokenizer.php index d7c5fe44457e7..e9ca872f223b9 100644 --- a/src/Symfony/Component/JsonPath/Tokenizer/JsonPathTokenizer.php +++ b/src/Symfony/Component/JsonPath/Tokenizer/JsonPathTokenizer.php @@ -13,6 +13,7 @@ use Symfony\Component\JsonPath\Exception\InvalidJsonPathException; use Symfony\Component\JsonPath\JsonPath; +use Symfony\Component\JsonPath\JsonPathUtils; /** * @author Alexandre Daubois @@ -21,6 +22,9 @@ */ final class JsonPathTokenizer { + private const RFC9535_WHITESPACE_CHARS = [' ', "\t", "\n", "\r"]; + private const BARE_LITERAL_REGEX = '(true|false|null|\d+(\.\d+)?([eE][+-]?\d+)?|\'[^\']*\'|"[^"]*")'; + /** * @return JsonPathToken[] */ @@ -34,6 +38,8 @@ public static function tokenize(JsonPath $query): array $inQuote = false; $quoteChar = ''; $filterParenthesisDepth = 0; + $filterBracketDepth = 0; + $hasContentAfterRoot = false; $chars = mb_str_split((string) $query); $length = \count($chars); @@ -42,14 +48,36 @@ public static function tokenize(JsonPath $query): array throw new InvalidJsonPathException('empty JSONPath expression.'); } - if ('$' !== $chars[0]) { + $i = self::skipWhitespace($chars, 0, $length); + if ($i >= $length || '$' !== $chars[$i]) { throw new InvalidJsonPathException('expression must start with $.'); } + $rootIndex = $i; + if ($rootIndex + 1 < $length) { + $hasContentAfterRoot = true; + } + for ($i = 0; $i < $length; ++$i) { $char = $chars[$i]; $position = $i; + if (!$inQuote && !$inBracket && self::isWhitespace($char)) { + if ('' !== $current) { + $tokens[] = new JsonPathToken(TokenType::Name, $current); + $current = ''; + } + + $nextNonWhitespaceIndex = self::skipWhitespace($chars, $i, $length); + if ($nextNonWhitespaceIndex < $length && '[' !== $chars[$nextNonWhitespaceIndex] && '.' !== $chars[$nextNonWhitespaceIndex]) { + throw new InvalidJsonPathException('whitespace is not allowed in property names.', $i); + } + + $i = $nextNonWhitespaceIndex - 1; + + continue; + } + if (('"' === $char || "'" === $char) && !$inQuote) { $inQuote = true; $quoteChar = $char; @@ -58,10 +86,32 @@ public static function tokenize(JsonPath $query): array } if ($inQuote) { + // literal control characters (U+0000 through U+001F) in quoted strings + // are not be allowed unless they are part of escape sequences + $ord = \ord($char); + if ($inBracket) { + if ($ord <= 31) { + $isEscapedChar = ($i > 0 && '\\' === $chars[$i - 1]); + + if (!$isEscapedChar) { + throw new InvalidJsonPathException('control characters are not allowed in quoted strings.', $position); + } + } + + if ("\n" === $char && $i > 0 && '\\' === $chars[$i - 1]) { + throw new InvalidJsonPathException('escaped newlines are not allowed in quoted strings.', $position); + } + + if ('u' === $char && $i > 0 && '\\' === $chars[$i - 1]) { + self::validateUnicodeEscape($chars, $i, $position); + } + } + $current .= $char; - if ($char === $quoteChar && '\\' !== $chars[$i - 1]) { + if ($char === $quoteChar && (0 === $i || '\\' !== $chars[$i - 1])) { $inQuote = false; } + if ($i === $length - 1 && $inQuote) { throw new InvalidJsonPathException('unclosed string literal.', $position); } @@ -80,11 +130,22 @@ public static function tokenize(JsonPath $query): array $inBracket = true; ++$bracketDepth; + $i = self::skipWhitespace($chars, $i + 1, $length) - 1; // -1 because loop will increment + + continue; + } + + if ('[' === $char && $inFilter) { + // inside filter expressions, brackets are part of the filter content + ++$filterBracketDepth; + $current .= $char; continue; } if (']' === $char) { - if ($inFilter && $filterParenthesisDepth > 0) { + if ($inFilter && $filterBracketDepth > 0) { + // inside filter expressions, brackets are part of the filter content + --$filterBracketDepth; $current .= $char; continue; } @@ -94,35 +155,61 @@ public static function tokenize(JsonPath $query): array } if (0 === $bracketDepth) { - if ('' === $current) { + if ('' === $current = trim($current)) { throw new InvalidJsonPathException('empty brackets are not allowed.', $position); } + // validate filter expressions + if (str_starts_with($current, '?')) { + if ($filterParenthesisDepth > 0) { + throw new InvalidJsonPathException('unclosed bracket.', $position); + } + self::validateFilterExpression($current, $position); + } + $tokens[] = new JsonPathToken(TokenType::Bracket, $current); $current = ''; $inBracket = false; $inFilter = false; $filterParenthesisDepth = 0; + $filterBracketDepth = 0; continue; } } if ('?' === $char && $inBracket && !$inFilter) { - if ('' !== $current) { + if ('' !== trim($current)) { throw new InvalidJsonPathException('unexpected characters before filter expression.', $position); } + + $current = '?'; $inFilter = true; $filterParenthesisDepth = 0; + $filterBracketDepth = 0; + + continue; } if ($inFilter) { if ('(' === $char) { + if (preg_match('/\w\s+$/', $current)) { + throw new InvalidJsonPathException('whitespace is not allowed between function name and parenthesis.', $position); + } ++$filterParenthesisDepth; } elseif (')' === $char) { if (--$filterParenthesisDepth < 0) { throw new InvalidJsonPathException('unmatched closing parenthesis in filter.', $position); } } + $current .= $char; + + continue; + } + + if ($inBracket && self::isWhitespace($char)) { + $current .= $char; + + continue; } // recursive descent @@ -158,7 +245,7 @@ public static function tokenize(JsonPath $query): array throw new InvalidJsonPathException('unclosed string literal.', $length - 1); } - if ('' !== $current) { + if ('' !== $current = trim($current)) { // final validation of the whole name if (!preg_match('/^(?:\*|[a-zA-Z_\x{0080}-\x{D7FF}\x{E000}-\x{10FFFF}][a-zA-Z0-9_\x{0080}-\x{D7FF}\x{E000}-\x{10FFFF}]*)$/u', $current)) { throw new InvalidJsonPathException(\sprintf('invalid character in property name "%s"', $current)); @@ -167,6 +254,230 @@ public static function tokenize(JsonPath $query): array $tokens[] = new JsonPathToken(TokenType::Name, $current); } + if ($hasContentAfterRoot && !$tokens) { + throw new InvalidJsonPathException('invalid JSONPath expression.'); + } + return $tokens; } + + private static function isWhitespace(string $char): bool + { + return \in_array($char, self::RFC9535_WHITESPACE_CHARS, true); + } + + private static function skipWhitespace(array $chars, int $index, int $length): int + { + while ($index < $length && self::isWhitespace($chars[$index])) { + ++$index; + } + + return $index; + } + + private static function validateFilterExpression(string $expr, int $position): void + { + self::validateBareLiterals($expr, $position); + + $filterExpr = ltrim($expr, '?'); + $filterExpr = trim($filterExpr); + + $comparisonOps = ['==', '!=', '>=', '<=', '>', '<']; + foreach ($comparisonOps as $op) { + if (str_contains($filterExpr, $op)) { + [$left, $right] = array_map('trim', explode($op, $filterExpr, 2)); + + // check if either side contains non-singular queries + if (self::isNonSingularQuery($left) || self::isNonSingularQuery($right)) { + throw new InvalidJsonPathException('Non-singular query is not comparable.', $position); + } + + break; + } + } + + // look for invalid number formats in filter expressions + $operators = [...$comparisonOps, '&&', '||']; + $tokens = [$filterExpr]; + + foreach ($operators as $op) { + $newTokens = []; + foreach ($tokens as $token) { + $newTokens = array_merge($newTokens, explode($op, $token)); + } + + $tokens = $newTokens; + } + + foreach ($tokens as $token) { + if ( + '' === ($token = trim($token)) + || \in_array($token, ['true', 'false', 'null'], true) + || false !== strpbrk($token[0], '@"\'') + || false !== strpbrk($token, '()[]$') + || (str_contains($token, '.') && !preg_match('/^[\d+\-.eE\s]*\./', $token)) + ) { + continue; + } + + // strict JSON number format validation + if ( + preg_match('/^(?=[\d+\-.eE\s]+$)(?=.*\d)/', $token) + && !preg_match('/^-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?$/', $token) + ) { + throw new InvalidJsonPathException(\sprintf('Invalid number format "%s" in filter expression.', $token), $position); + } + } + } + + private static function validateBareLiterals(string $expr, int $position): void + { + $filterExpr = ltrim($expr, '?'); + $filterExpr = trim($filterExpr); + + if (preg_match('/\b(True|False|Null)\b/', $filterExpr)) { + throw new InvalidJsonPathException('Incorrectly capitalized literal in filter expression.', $position); + } + + if (preg_match('/^(length|count|value)\s*\([^)]*\)$/', $filterExpr)) { + throw new InvalidJsonPathException('Function result must be compared.', $position); + } + + if (preg_match('/\b(length|count|value)\s*\(([^)]*)\)/', $filterExpr, $matches)) { + $functionName = $matches[1]; + $args = trim($matches[2]); + if (!$args) { + throw new InvalidJsonPathException('Function requires exactly one argument.', $position); + } + + $argParts = JsonPathUtils::parseCommaSeparatedValues($args); + if (1 !== \count($argParts)) { + throw new InvalidJsonPathException('Function requires exactly one argument.', $position); + } + + $arg = trim($argParts[0]); + + if ('count' === $functionName && preg_match('/^'.self::BARE_LITERAL_REGEX.'$/', $arg)) { + throw new InvalidJsonPathException('count() function requires a query argument, not a literal.', $position); + } + + if ('length' === $functionName && preg_match('/@\.\*/', $arg)) { + throw new InvalidJsonPathException('Function argument must be a singular query.', $position); + } + } + + if (preg_match('/\b(match|search)\s*\(([^)]*)\)/', $filterExpr, $matches)) { + $args = trim($matches[2]); + if (!$args) { + throw new InvalidJsonPathException('Function requires exactly two arguments.', $position); + } + + $argParts = JsonPathUtils::parseCommaSeparatedValues($args); + if (2 !== \count($argParts)) { + throw new InvalidJsonPathException('Function requires exactly two arguments.', $position); + } + } + + if (preg_match('/^'.self::BARE_LITERAL_REGEX.'$/', $filterExpr)) { + throw new InvalidJsonPathException('Bare literal in filter expression - literals must be compared.', $position); + } + + if (preg_match('/\b'.self::BARE_LITERAL_REGEX.'\s*(&&|\|\|)\s*'.self::BARE_LITERAL_REGEX.'\b/', $filterExpr)) { + throw new InvalidJsonPathException('Bare literals in logical expression - literals must be compared.', $position); + } + + if (preg_match('/\b(match|search|length|count|value)\s*\([^)]*\)\s*[=!]=\s*(true|false)\b/', $filterExpr) + || preg_match('/\b(true|false)\s*[=!]=\s*(match|search|length|count|value)\s*\([^)]*\)/', $filterExpr)) { + throw new InvalidJsonPathException('Function result cannot be compared to boolean literal.', $position); + } + + if (preg_match('/\b'.self::BARE_LITERAL_REGEX.'\s*(&&|\|\|)/', $filterExpr) + || preg_match('/(&&|\|\|)\s*'.self::BARE_LITERAL_REGEX.'\b/', $filterExpr)) { + // check if the literal is not part of a comparison + if (!preg_match('/(@[^=<>!]*|[^=<>!@]+)\s*[=<>!]+\s*'.self::BARE_LITERAL_REGEX.'/', $filterExpr) + && !preg_match('/'.self::BARE_LITERAL_REGEX.'\s*[=<>!]+\s*(@[^=<>!]*|[^=<>!@]+)/', $filterExpr) + ) { + throw new InvalidJsonPathException('Bare literal in logical expression - literals must be compared.', $position); + } + } + } + + private static function isNonSingularQuery(string $query): bool + { + if (!str_starts_with($query = trim($query), '@')) { + return false; + } + + if (preg_match('/@(\.\.)|(.*\[\*])|(.*\.\*)|(.*\[.*:.*])|(.*\[.*,.*])/', $query)) { + return true; + } + + return false; + } + + private static function validateUnicodeEscape(array $chars, int $index, int $position): void + { + if ($index + 4 >= \count($chars)) { + return; + } + + $hexDigits = ''; + for ($i = 1; $i <= 4; ++$i) { + $hexDigits .= $chars[$index + $i]; + } + + if (!preg_match('/^[0-9A-Fa-f]{4}$/', $hexDigits)) { + return; + } + + $codePoint = hexdec($hexDigits); + + if ($codePoint >= 0xD800 && $codePoint <= 0xDBFF) { + $nextIndex = $index + 5; + + if ($nextIndex + 1 < \count($chars) + && '\\' === $chars[$nextIndex] && 'u' === $chars[$nextIndex + 1] + ) { + $nextHexDigits = ''; + for ($i = 2; $i <= 5; ++$i) { + $nextHexDigits .= $chars[$nextIndex + $i]; + } + + if (preg_match('/^[0-9A-Fa-f]{4}$/', $nextHexDigits)) { + $nextCodePoint = hexdec($nextHexDigits); + + // high surrogate must be followed by low surrogate + if ($nextCodePoint < 0xDC00 || $nextCodePoint > 0xDFFF) { + throw new InvalidJsonPathException('Invalid Unicode surrogate pair.', $position); + } + } + } else { + // high surrogate not followed by low surrogate + throw new InvalidJsonPathException('Invalid Unicode surrogate pair.', $position); + } + } elseif ($codePoint >= 0xDC00 && $codePoint <= 0xDFFF) { + $prevIndex = $index - 7; // position of \ in previous \uXXXX (7 positions back: u+4hex+\+u) + + if ($prevIndex >= 0 + && '\\' === $chars[$prevIndex] && 'u' === $chars[$prevIndex + 1] + ) { + $prevHexDigits = ''; + for ($i = 2; $i <= 5; ++$i) { + $prevHexDigits .= $chars[$prevIndex + $i]; + } + + if (preg_match('/^[0-9A-Fa-f]{4}$/', $prevHexDigits)) { + $prevCodePoint = hexdec($prevHexDigits); + + // low surrogate must be preceded by high surrogate + if ($prevCodePoint < 0xD800 || $prevCodePoint > 0xDBFF) { + throw new InvalidJsonPathException('Invalid Unicode surrogate pair.', $position); + } + } + } else { + // low surrogate not preceded by high surrogate + throw new InvalidJsonPathException('Invalid Unicode surrogate pair.', $position); + } + } + } }