diff --git a/src/Symfony/Component/JsonPath/JsonCrawler.php b/src/Symfony/Component/JsonPath/JsonCrawler.php index b1d7ef0bf94d8..492f56e77bba7 100644 --- a/src/Symfony/Component/JsonPath/JsonCrawler.php +++ b/src/Symfony/Component/JsonPath/JsonCrawler.php @@ -230,7 +230,7 @@ private function evaluateBracket(string $expr, mixed $value): array // quoted strings for object keys if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) { - $key = stripslashes($matches[2]); + $key = JsonPathUtils::unescapeString($matches[2], $matches[1]); return \array_key_exists($key, $value) ? [$value[$key]] : []; } @@ -335,7 +335,7 @@ private function evaluateScalar(string $expr, array $context): mixed // string literals if (preg_match('/^([\'"])(.*)\1$/', $expr, $matches)) { - return $matches[2]; + return JsonPathUtils::unescapeString($matches[2], $matches[1]); } // current node references diff --git a/src/Symfony/Component/JsonPath/JsonCrawlerInterface.php b/src/Symfony/Component/JsonPath/JsonCrawlerInterface.php index 3e8a222f0ba8e..4859c2bde076b 100644 --- a/src/Symfony/Component/JsonPath/JsonCrawlerInterface.php +++ b/src/Symfony/Component/JsonPath/JsonCrawlerInterface.php @@ -25,7 +25,7 @@ interface JsonCrawlerInterface * @return list * * @throws InvalidArgumentException When the JSON string provided to the crawler cannot be decoded - * @throws JsonCrawlerException When a syntax error occurs in the provided JSON path + * @throws JsonCrawlerException When a syntax error occurs in the provided JSON path */ public function find(string|JsonPath $query): array; } diff --git a/src/Symfony/Component/JsonPath/JsonPath.php b/src/Symfony/Component/JsonPath/JsonPath.php index e716167eb3f64..e36fc9ffd2ef1 100644 --- a/src/Symfony/Component/JsonPath/JsonPath.php +++ b/src/Symfony/Component/JsonPath/JsonPath.php @@ -92,12 +92,12 @@ private function escapeKey(string $key): string "\r" => '\\r', "\t" => '\\t', "\b" => '\\b', - "\f" => '\\f' + "\f" => '\\f', ]); - for ($i = 0; $i <= 31; $i++) { + for ($i = 0; $i <= 31; ++$i) { if ($i < 8 || $i > 13) { - $key = str_replace(chr($i), sprintf('\\u%04x', $i), $key); + $key = str_replace(\chr($i), \sprintf('\\u%04x', $i), $key); } } diff --git a/src/Symfony/Component/JsonPath/JsonPathUtils.php b/src/Symfony/Component/JsonPath/JsonPathUtils.php index b5ac2ae6b8d0a..6f971d20115b2 100644 --- a/src/Symfony/Component/JsonPath/JsonPathUtils.php +++ b/src/Symfony/Component/JsonPath/JsonPathUtils.php @@ -85,4 +85,78 @@ public static function findSmallestDeserializableStringAndPath(array $tokens, mi 'tokens' => $remainingTokens, ]; } + + public static function unescapeString(string $str, string $quoteChar): string + { + if ('"' === $quoteChar) { + // try JSON decoding first for unicode sequences + $jsonStr = '"'.$str.'"'; + $decoded = json_decode($jsonStr, true); + + if (null !== $decoded) { + return $decoded; + } + } + + $result = ''; + $length = \strlen($str); + + for ($i = 0; $i < $length; ++$i) { + if ('\\' === $str[$i] && $i + 1 < $length) { + $result .= match ($str[$i + 1]) { + '"' => '"', + "'" => "'", + '\\' => '\\', + '/' => '/', + 'b' => "\b", + 'f' => "\f", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'u' => self::unescapeUnicodeSequence($str, $length, $i), + default => $str[$i].$str[$i + 1], // keep the backslash + }; + + ++$i; + } else { + $result .= $str[$i]; + } + } + + return $result; + } + + private static function unescapeUnicodeSequence(string $str, int $length, int &$i): string + { + if ($i + 5 >= $length) { + // not enough characters for Unicode escape, treat as literal + return $str[$i]; + } + + $hex = substr($str, $i + 2, 4); + if (!ctype_xdigit($hex)) { + // invalid hex, treat as literal + return $str[$i]; + } + + $codepoint = hexdec($hex); + // looks like a valid Unicode codepoint, string length is sufficient and it starts with \u + if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && $i + 11 < $length && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) { + $lowHex = substr($str, $i + 8, 4); + if (ctype_xdigit($lowHex)) { + $lowSurrogate = hexdec($lowHex); + if (0xDC00 <= $lowSurrogate && $lowSurrogate <= 0xDFFF) { + $codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF); + $i += 10; // skip surrogate pair + + return mb_chr($codepoint, 'UTF-8'); + } + } + } + + // single Unicode character or invalid surrogate, skip the sequence + $i += 4; + + return mb_chr($codepoint, 'UTF-8'); + } } diff --git a/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php b/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php index 66ccfc2642141..213ae06afa7db 100644 --- a/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php +++ b/src/Symfony/Component/JsonPath/Tests/JsonCrawlerTest.php @@ -465,6 +465,251 @@ public function testStarAsKey() $this->assertSame(['a' => 1, 'b' => 2], $result[0]); } + /** + * @dataProvider provideUnicodeEscapeSequencesProvider + */ + public function testUnicodeEscapeSequences(string $jsonPath, array $expected) + { + $this->assertSame($expected, self::getUnicodeDocumentCrawler()->find($jsonPath)); + } + + public static function provideUnicodeEscapeSequencesProvider(): array + { + return [ + [ + '$["caf\u00e9"]', + ['coffee'], + ], + [ + '$["\u65e5\u672c"]', + ['Japan'], + ], + [ + '$["M\u00fcller"]', + [], + ], + [ + '$["emoji\ud83d\ude00"]', + ['smiley'], + ], + [ + '$["tab\there"]', + ['with tab'], + ], + [ + '$["new\nline"]', + ['with newline'], + ], + [ + '$["quote\"here"]', + ['with quote'], + ], + [ + '$["backslash\\\\here"]', + ['with backslash'], + ], + [ + '$["apostrophe\'here"]', + ['with apostrophe'], + ], + [ + '$["control\u0001char"]', + ['with control char'], + ], + [ + '$["\u0063af\u00e9"]', + ['coffee'], + ], + ]; + } + + /** + * @dataProvider provideSingleQuotedStringProvider + */ + public function testSingleQuotedStrings(string $jsonPath, array $expected) + { + $this->assertSame($expected, self::getUnicodeDocumentCrawler()->find($jsonPath)); + } + + public static function provideSingleQuotedStringProvider(): array + { + return [ + [ + "$['caf\\u00e9']", + ['coffee'], + ], + [ + "$['\\u65e5\\u672c']", + ['Japan'], + ], + [ + "$['quote\"here']", + ['with quote'], + ], + [ + "$['M\\u00fcller']", + [], + ], + [ + "$['emoji\\ud83d\\ude00']", + ['smiley'], + ], + [ + "$['tab\\there']", + ['with tab'], + ], + [ + "$['quote\\\"here']", + ['with quote'], + ], + [ + "$['backslash\\\\here']", + ['with backslash'], + ], + [ + "$['apostrophe\\'here']", + ['with apostrophe'], + ], + [ + "$['control\\u0001char']", + ['with control char'], + ], + [ + "$['\\u0063af\\u00e9']", + ['coffee'], + ], + ]; + } + + /** + * @dataProvider provideFilterWithUnicodeProvider + */ + public function testFilterWithUnicodeStrings(string $jsonPath, int $expectedCount, string $expectedCountry) + { + $result = self::getUnicodeDocumentCrawler()->find($jsonPath); + + $this->assertCount($expectedCount, $result); + + if ($expectedCount > 0) { + $this->assertSame($expectedCountry, $result[0]['country']); + } + } + + public static function provideFilterWithUnicodeProvider(): array + { + return [ + [ + '$.users[?(@.name == "caf\u00e9")]', + 1, + 'France', + ], + [ + '$.users[?(@.name == "\u65e5\u672c\u592a\u90ce")]', + 1, + 'Japan', + ], + [ + '$.users[?(@.name == "Jos\u00e9")]', + 1, + 'Spain', + ], + [ + '$.users[?(@.name == "John")]', + 1, + 'USA', + ], + [ + '$.users[?(@.name == "NonExistent\u0020Name")]', + 0, + '', + ], + ]; + } + + /** + * @dataProvider provideInvalidUnicodeSequenceProvider + */ + public function testInvalidUnicodeSequencesAreProcessedAsLiterals(string $jsonPath) + { + $this->assertIsArray(self::getUnicodeDocumentCrawler()->find($jsonPath), 'invalid unicode sequence should be treated as literal and not throw'); + } + + public static function provideInvalidUnicodeSequenceProvider(): array + { + return [ + [ + '$["test\uZZZZ"]', + ], + [ + '$["test\u123"]', + ], + [ + '$["test\u"]', + ], + ]; + } + + /** + * @dataProvider provideComplexUnicodePath + */ + public function testComplexUnicodePaths(string $jsonPath, array $expected) + { + $complexJson = [ + 'データ' => [ + 'ユーザー' => [ + ['名前' => 'テスト', 'ID' => 1], + ['名前' => 'サンプル', 'ID' => 2], + ], + ], + 'special🔑' => [ + 'value💎' => 'treasure', + ], + ]; + + $crawler = new JsonCrawler(json_encode($complexJson)); + + $this->assertSame($expected, $crawler->find($jsonPath)); + } + + public static function provideComplexUnicodePath(): array + { + return [ + [ + '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][0]["\u540d\u524d"]', + ['テスト'], + ], + [ + '$["special\ud83d\udd11"]["value\ud83d\udc8e"]', + ['treasure'], + ], + [ + '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][*]["\u540d\u524d"]', + ['テスト', 'サンプル'], + ], + ]; + } + + public function testSurrogatePairHandling() + { + $json = ['𝒽𝑒𝓁𝓁𝑜' => 'mathematical script hello']; + $crawler = new JsonCrawler(json_encode($json)); + + // mathematical script "hello" requires surrogate pairs for each character + $result = $crawler->find('$["\ud835\udcbd\ud835\udc52\ud835\udcc1\ud835\udcc1\ud835\udc5c"]'); + $this->assertSame(['mathematical script hello'], $result); + } + + public function testMixedQuoteTypes() + { + $json = ['key"with"quotes' => 'value1', "key'with'apostrophes" => 'value2']; + $crawler = new JsonCrawler(json_encode($json)); + + $result = $crawler->find('$[\'key"with"quotes\']'); + $this->assertSame(['value1'], $result); + + $result = $crawler->find('$["key\'with\'apostrophes"]'); + $this->assertSame(['value2'], $result); + } private static function getBookstoreCrawler(): JsonCrawler { @@ -515,4 +760,28 @@ private static function getSimpleCollectionCrawler(): JsonCrawler {"a": [3, 5, 1, 2, 4, 6]} JSON); } + + private static function getUnicodeDocumentCrawler(): JsonCrawler + { + $json = [ + 'café' => 'coffee', + '日本' => 'Japan', + 'emoji😀' => 'smiley', + 'tab here' => 'with tab', + "new\nline" => 'with newline', + 'quote"here' => 'with quote', + 'backslash\\here' => 'with backslash', + 'apostrophe\'here' => 'with apostrophe', + "control\x01char" => 'with control char', + 'users' => [ + ['name' => 'café', 'country' => 'France'], + ['name' => '日本太郎', 'country' => 'Japan'], + ['name' => 'John', 'country' => 'USA'], + ['name' => 'Müller', 'country' => 'Germany'], + ['name' => 'José', 'country' => 'Spain'], + ], + ]; + + return new JsonCrawler(json_encode($json)); + } } diff --git a/src/Symfony/Component/JsonPath/Tests/Test/JsonPathAssertionsTraitTest.php b/src/Symfony/Component/JsonPath/Tests/Test/JsonPathAssertionsTraitTest.php index 62d64b53e1e8d..1044e7658672b 100644 --- a/src/Symfony/Component/JsonPath/Tests/Test/JsonPathAssertionsTraitTest.php +++ b/src/Symfony/Component/JsonPath/Tests/Test/JsonPathAssertionsTraitTest.php @@ -1,5 +1,14 @@ + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + namespace Symfony\Component\JsonPath\Tests\Test; use PHPUnit\Framework\AssertionFailedError; diff --git a/src/Symfony/Component/JsonPath/composer.json b/src/Symfony/Component/JsonPath/composer.json index fe8ddf84dd82d..feb8158aa5be2 100644 --- a/src/Symfony/Component/JsonPath/composer.json +++ b/src/Symfony/Component/JsonPath/composer.json @@ -17,6 +17,7 @@ ], "require": { "php": ">=8.2", + "symfony/polyfill-ctype": "^1.8", "symfony/polyfill-mbstring": "~1.0" }, "require-dev": {