Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 876e3d9

Browse files
[JsonPath] Handle special whitespaces in filters
1 parent c492fc0 commit 876e3d9

File tree

6 files changed

+545
-414
lines changed

6 files changed

+545
-414
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 135 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array
133133
return [];
134134
}
135135

136-
if ('*' === $expr) {
136+
if (str_contains($expr, ',') && (str_starts_with($trimmed = trim($expr), ',') || str_ends_with($trimmed, ','))) {
137+
throw new JsonCrawlerException($expr, 'Expression cannot have leading or trailing commas');
138+
}
139+
140+
if ('*' === $expr = JsonPathUtils::normalizeWhitespace($expr)) {
137141
return array_values($value);
138142
}
139143

@@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array
168172
return $result;
169173
}
170174

171-
// start, end and step
172-
if (preg_match('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/', $expr, $matches)) {
175+
if (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $expr, $matches)) {
173176
if (!array_is_list($value)) {
174177
return [];
175178
}
@@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array
217220

218221
// filter expressions
219222
if (preg_match('/^\?(.*)$/', $expr, $matches)) {
220-
$filterExpr = $matches[1];
221-
222-
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr)) {
223+
if (preg_match('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/', $filterExpr = trim($matches[1]))) {
223224
$filterExpr = "($filterExpr)";
224225
}
225226

226227
if (!str_starts_with($filterExpr, '(')) {
227-
throw new JsonCrawlerException($expr, 'Invalid filter expression');
228+
$filterExpr = "($filterExpr)";
228229
}
229230

230231
// remove outer filter parentheses
@@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array
235236

236237
// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
237238
if (str_contains($expr, ',')) {
238-
$parts = $this->parseCommaSeparatedValues($expr);
239+
$parts = JsonPathUtils::parseCommaSeparatedValues($expr);
239240

240241
$result = [];
241-
$keysIndices = array_keys($value);
242-
$isList = array_is_list($value);
243242

244243
foreach ($parts as $part) {
245244
$part = trim($part);
246245

247-
if (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
246+
if ('*' === $part) {
247+
$result = array_merge($result, array_values($value));
248+
} elseif (preg_match('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/', $part, $matches)) {
249+
// slice notation
250+
$sliceResult = $this->evaluateBracket($part, $value);
251+
$result = array_merge($result, $sliceResult);
252+
} elseif (preg_match('/^([\'"])(.*)\1$/', $part, $matches)) {
248253
$key = JsonPathUtils::unescapeString($matches[2], $matches[1]);
249254

250-
if ($isList) {
255+
if (array_is_list($value)) {
256+
// for arrays, find ALL objects that contain this key
251257
foreach ($value as $item) {
252258
if (\is_array($item) && \array_key_exists($key, $item)) {
253259
$result[] = $item;
254-
break;
255260
}
256261
}
257-
258-
continue; // no results here
259-
}
260-
261-
if (\array_key_exists($key, $value)) {
262+
} elseif (\array_key_exists($key, $value)) { // for objects, get the value for this key
262263
$result[] = $value[$key];
263264
}
264265
} elseif (preg_match('/^-?\d+$/', $part)) {
@@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array
268269
$index = \count($value) + $index;
269270
}
270271

271-
if ($isList && \array_key_exists($index, $value)) {
272+
if (array_is_list($value) && \array_key_exists($index, $value)) {
272273
$result[] = $value[$index];
273-
continue;
274-
}
275-
276-
// numeric index on a hashmap
277-
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278-
$result[] = $value[$keysIndices[$index]];
274+
} else {
275+
// numeric index on a hashmap
276+
$keysIndices = array_keys($value);
277+
if (isset($keysIndices[$index]) && isset($value[$keysIndices[$index]])) {
278+
$result[] = $value[$keysIndices[$index]];
279+
}
279280
}
280281
}
281282
}
@@ -310,7 +311,28 @@ private function evaluateFilter(string $expr, mixed $value): array
310311

311312
private function evaluateFilterExpression(string $expr, mixed $context): bool
312313
{
313-
$expr = trim($expr);
314+
$expr = JsonPathUtils::normalizeWhitespace($expr);
315+
316+
// remove outer parentheses if they wrap the entire expression
317+
if (str_starts_with($expr, '(') && str_ends_with($expr, ')')) {
318+
$depth = 0;
319+
$isWrapped = true;
320+
for ($i = 0; $i < \strlen($expr); ++$i) {
321+
if ('(' === $expr[$i]) {
322+
++$depth;
323+
} elseif (')' === $expr[$i] && 0 === --$depth && $i < \strlen($expr) - 1) {
324+
$isWrapped = false;
325+
break;
326+
}
327+
}
328+
if ($isWrapped) {
329+
$expr = trim(substr($expr, 1, -1));
330+
}
331+
}
332+
333+
if (str_starts_with($expr, '!')) {
334+
return !$this->evaluateFilterExpression(trim(substr($expr, 1)), $context);
335+
}
314336

315337
if (str_contains($expr, '&&')) {
316338
$parts = array_map('trim', explode('&&', $expr));
@@ -353,8 +375,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
353375
}
354376

355377
// function calls
356-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
357-
$functionName = $matches[1];
378+
if (preg_match('/^(\w++)\s*+\((.*)\)$/', $expr, $matches)) {
379+
$functionName = trim($matches[1]);
358380
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
359381
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
360382
}
@@ -369,8 +391,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
369391

370392
private function evaluateScalar(string $expr, mixed $context): mixed
371393
{
372-
if (is_numeric($expr)) {
373-
return str_contains($expr, '.') ? (float) $expr : (int) $expr;
394+
$expr = JsonPathUtils::normalizeWhitespace($expr);
395+
396+
if (JsonPathUtils::isJsonNumber($expr)) {
397+
return str_contains($expr, '.') || str_contains(strtolower($expr), 'e') ? (float) $expr : (int) $expr;
398+
}
399+
400+
// only validate tokens that look like standalone numbers
401+
if (preg_match('/^[\d+\-.eE]+$/', $expr) && preg_match('/\d/', $expr)) {
402+
throw new JsonCrawlerException($expr, \sprintf('Invalid number format "%s"', $expr));
374403
}
375404

376405
if ('@' === $expr) {
@@ -404,9 +433,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed
404433
}
405434

406435
// function calls
407-
if (preg_match('/^(\w+)\((.*)\)$/', $expr, $matches)) {
408-
$functionName = $matches[1];
409-
if (!isset(self::RFC9535_FUNCTIONS[$functionName])) {
436+
if (preg_match('/^(\w++)\((.*)\)$/', $expr, $matches)) {
437+
if (!isset(self::RFC9535_FUNCTIONS[$functionName = trim($matches[1])])) {
410438
throw new JsonCrawlerException($expr, \sprintf('invalid function "%s"', $functionName));
411439
}
412440

@@ -416,31 +444,60 @@ private function evaluateScalar(string $expr, mixed $context): mixed
416444
return null;
417445
}
418446

419-
private function evaluateFunction(string $name, string $args, array $context): mixed
447+
private function evaluateFunction(string $name, string $args, mixed $context): mixed
420448
{
421-
$args = array_map(
422-
fn ($arg) => $this->evaluateScalar(trim($arg), $context),
423-
explode(',', $args)
424-
);
449+
$argList = [];
450+
$nodelistSizes = [];
451+
if ($args = trim($args)) {
452+
$args = JsonPathUtils::parseCommaSeparatedValues($args);
453+
foreach ($args as $arg) {
454+
$arg = trim($arg);
455+
if (str_starts_with($arg, '$')) { // special handling for absolute paths
456+
$results = $this->evaluate(new JsonPath($arg));
457+
$argList[] = $results[0] ?? null;
458+
$nodelistSizes[] = \count($results);
459+
} elseif (!str_starts_with($arg, '@')) { // special handling for @ to track nodelist size
460+
$argList[] = $this->evaluateScalar($arg, $context);
461+
$nodelistSizes[] = 1;
462+
} elseif ('@' === $arg) {
463+
$argList[] = $context;
464+
$nodelistSizes[] = 1;
465+
} elseif (!\is_array($context)) {
466+
$argList[] = null;
467+
$nodelistSizes[] = 0;
468+
} elseif (str_starts_with($pathPart = substr($arg, 1), '[')) {
469+
// handle bracket expressions like @['a','d']
470+
$results = $this->evaluateBracket(substr($pathPart, 1, -1), $context);
471+
$argList[] = $results;
472+
$nodelistSizes[] = \count($results);
473+
} else {
474+
// handle dot notation like @.a
475+
$results = $this->evaluateTokensOnDecodedData(JsonPathTokenizer::tokenize(new JsonPath('$'.$pathPart)), $context);
476+
$argList[] = $results[0] ?? null;
477+
$nodelistSizes[] = \count($results);
478+
}
479+
}
480+
}
425481

426-
$value = $args[0] ?? null;
482+
$value = $argList[0] ?? null;
483+
$nodelistSize = $nodelistSizes[0] ?? 0;
427484

428485
return match ($name) {
429486
'length' => match (true) {
430487
\is_string($value) => mb_strlen($value),
431488
\is_array($value) => \count($value),
432489
default => 0,
433490
},
434-
'count' => \is_array($value) ? \count($value) : 0,
491+
'count' => $nodelistSize,
435492
'match' => match (true) {
436-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/', $args[1]), $value),
493+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match(\sprintf('/^%s$/u', $this->transformJsonPathRegex($argList[1])), $value),
437494
default => false,
438495
},
439496
'search' => match (true) {
440-
\is_string($value) && \is_string($args[1] ?? null) => (bool) @preg_match("/$args[1]/", $value),
497+
\is_string($value) && \is_string($argList[1] ?? null) => (bool) @preg_match("/{$this->transformJsonPathRegex($argList[1])}/u", $value),
441498
default => false,
442499
},
443-
'value' => $value,
500+
'value' => 1 < $nodelistSize ? null : (1 === $nodelistSize ? (\is_array($value) ? ($value[0] ?? null) : $value) : $value),
444501
default => null,
445502
};
446503
}
@@ -474,43 +531,53 @@ private function compare(mixed $left, mixed $right, string $operator): bool
474531
};
475532
}
476533

477-
private function parseCommaSeparatedValues(string $expr): array
534+
/**
535+
* Transforms JSONPath regex patterns to comply with RFC 9535.
536+
*
537+
* The main issue is that '.' should not match \r or \n but should
538+
* match Unicode line separators U+2028 and U+2029.
539+
*/
540+
private function transformJsonPathRegex(string $pattern): string
478541
{
479-
$parts = [];
480-
$current = '';
481-
$inQuotes = false;
482-
$quoteChar = null;
542+
$result = '';
543+
$inCharClass = false;
544+
$escaped = false;
545+
$length = \strlen($pattern);
483546

484-
for ($i = 0; $i < \strlen($expr); ++$i) {
485-
$char = $expr[$i];
547+
for ($i = 0; $i < $length; ++$i) {
548+
$char = $pattern[$i];
486549

487-
if ('\\' === $char && $i + 1 < \strlen($expr)) {
488-
$current .= $char.$expr[++$i];
550+
if ($escaped) {
551+
$result .= $char;
552+
$escaped = false;
489553
continue;
490554
}
491555

492-
if ('"' === $char || "'" === $char) {
493-
if (!$inQuotes) {
494-
$inQuotes = true;
495-
$quoteChar = $char;
496-
} elseif ($char === $quoteChar) {
497-
$inQuotes = false;
498-
$quoteChar = null;
499-
}
500-
} elseif (!$inQuotes && ',' === $char) {
501-
$parts[] = trim($current);
502-
$current = '';
556+
if ('\\' === $char) {
557+
$result .= $char;
558+
$escaped = true;
559+
continue;
560+
}
503561

562+
if ('[' === $char && !$inCharClass) {
563+
$inCharClass = true;
564+
$result .= $char;
504565
continue;
505566
}
506567

507-
$current .= $char;
508-
}
568+
if (']' === $char && $inCharClass) {
569+
$inCharClass = false;
570+
$result .= $char;
571+
continue;
572+
}
509573

510-
if ('' !== $current) {
511-
$parts[] = trim($current);
574+
if ('.' === $char && !$inCharClass) {
575+
$result .= '(?:[^\r\n]|\x{2028}|\x{2029})';
576+
} else {
577+
$result .= $char;
578+
}
512579
}
513580

514-
return $parts;
581+
return $result;
515582
}
516583
}

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,74 @@ private static function unescapeUnicodeSequence(string $str, int $length, int &$
159159

160160
return mb_chr($codepoint, 'UTF-8');
161161
}
162+
163+
/**
164+
* @see https://datatracker.ietf.org/doc/rfc9535/, section 2.1.1
165+
*/
166+
public static function normalizeWhitespace(string $input): string
167+
{
168+
$normalized = strtr($input, [
169+
"\t" => ' ',
170+
"\n" => ' ',
171+
"\r" => ' ',
172+
]);
173+
174+
return trim($normalized);
175+
}
176+
177+
/**
178+
* Check a number is RFC 9535 compliant using strict JSON number format.
179+
*/
180+
public static function isJsonNumber(string $value): bool
181+
{
182+
return preg_match('/^-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?$/', $value);
183+
}
184+
185+
public static function parseCommaSeparatedValues(string $expr): array
186+
{
187+
$parts = [];
188+
$current = '';
189+
$inQuotes = false;
190+
$quoteChar = null;
191+
$bracketDepth = 0;
192+
193+
$exprLen = \strlen($expr);
194+
for ($i = 0; $i < $exprLen; ++$i) {
195+
$char = $expr[$i];
196+
197+
if ('\\' === $char && $i + 1 < $exprLen) {
198+
$current .= $char.$expr[++$i];
199+
continue;
200+
}
201+
202+
if ('"' === $char || "'" === $char) {
203+
if (!$inQuotes) {
204+
$inQuotes = true;
205+
$quoteChar = $char;
206+
} elseif ($char === $quoteChar) {
207+
$inQuotes = false;
208+
$quoteChar = null;
209+
}
210+
} elseif (!$inQuotes) {
211+
if ('[' === $char) {
212+
++$bracketDepth;
213+
} elseif (']' === $char) {
214+
--$bracketDepth;
215+
} elseif (0 === $bracketDepth && ',' === $char) {
216+
$parts[] = trim($current);
217+
$current = '';
218+
219+
continue;
220+
}
221+
}
222+
223+
$current .= $char;
224+
}
225+
226+
if ('' !== $current) {
227+
$parts[] = trim($current);
228+
}
229+
230+
return $parts;
231+
}
162232
}

0 commit comments

Comments
 (0)