Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ee7bf5c

Browse files
[DomCrawler] Optimize DomCrawler::relativize()
1 parent d4b381d commit ee7bf5c

File tree

1 file changed

+42
-52
lines changed

1 file changed

+42
-52
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -974,24 +974,48 @@ private function relativize($xpath)
974974
// We cannot simply drop
975975
$nonMatchingExpression = 'a[name() = "b"]';
976976

977-
// Split any unions into individual expressions.
978-
foreach ($this->splitUnionParts($xpath) as $expression) {
979-
$expression = trim($expression);
980-
$parenthesis = '';
981-
982-
// If the union is inside some braces, we need to preserve the opening braces and apply
983-
// the change only inside it.
984-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
985-
$parenthesis = $matches[0];
986-
$expression = substr($expression, strlen($parenthesis));
977+
$xpathLen = strlen($xpath);
978+
$openedBrackets = 0;
979+
$lastUnion = strspn($xpath, " \t\n\r\0\x0B");
980+
981+
for ($i = $lastUnion; $i <= $xpathLen; ++$i) {
982+
$i += strcspn($xpath, '"\'[]|', $i);
983+
984+
if ($i < $xpathLen) {
985+
switch ($xpath[$i]) {
986+
case '"':
987+
case "'":
988+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
989+
return $xpath; // The XPath expression is invalid
990+
}
991+
continue 2;
992+
case '[':
993+
case ']':
994+
$openedBrackets += '[' === $xpath[$i] ? 1 : -1;
995+
continue 2;
996+
}
997+
}
998+
if ($openedBrackets) {
999+
continue;
9871000
}
9881001

1002+
if ('(' === $xpath[$lastUnion]) {
1003+
// If the union is inside some braces, we need to preserve the opening braces and apply
1004+
// the change only inside it.
1005+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $lastUnion + 1);
1006+
$parenthesis = substr($xpath, $lastUnion, $j);
1007+
$lastUnion += $j;
1008+
} else {
1009+
$parenthesis = '';
1010+
}
1011+
$expression = rtrim(substr($xpath, $lastUnion, $i - $lastUnion));
1012+
9891013
if (0 === strpos($expression, 'self::*/')) {
9901014
$expression = './'.substr($expression, 8);
9911015
}
9921016

9931017
// add prefix before absolute element selector
994-
if (empty($expression)) {
1018+
if ('' === $expression) {
9951019
$expression = $nonMatchingExpression;
9961020
} elseif (0 === strpos($expression, '//')) {
9971021
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -1004,58 +1028,24 @@ private function relativize($xpath)
10041028
} elseif ('/' === $expression[0] || '.' === $expression[0] || 0 === strpos($expression, 'self::')) {
10051029
$expression = $nonMatchingExpression;
10061030
} elseif (0 === strpos($expression, 'descendant::')) {
1007-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
1031+
$expression = 'descendant-or-self::'.substr($expression, 12);
10081032
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
10091033
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
10101034
$expression = $nonMatchingExpression;
10111035
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
10121036
$expression = 'self::'.$expression;
10131037
}
10141038
$expressions[] = $parenthesis.$expression;
1015-
}
1016-
1017-
return implode(' | ', $expressions);
1018-
}
10191039

1020-
/**
1021-
* Splits the XPath into parts that are separated by the union operator.
1022-
*
1023-
* @param string $xpath
1024-
*
1025-
* @return string[]
1026-
*/
1027-
private function splitUnionParts($xpath)
1028-
{
1029-
// Split any unions into individual expressions. We need to iterate
1030-
// through the string to correctly parse opening/closing quotes and
1031-
// braces which is not possible with regular expressions.
1032-
$unionParts = array();
1033-
$inSingleQuotedString = false;
1034-
$inDoubleQuotedString = false;
1035-
$openedBrackets = 0;
1036-
$lastUnion = 0;
1037-
$xpathLength = strlen($xpath);
1038-
for ($i = 0; $i < $xpathLength; ++$i) {
1039-
$char = $xpath[$i];
1040-
1041-
if ($char === "'" && !$inDoubleQuotedString) {
1042-
$inSingleQuotedString = !$inSingleQuotedString;
1043-
} elseif ($char === '"' && !$inSingleQuotedString) {
1044-
$inDoubleQuotedString = !$inDoubleQuotedString;
1045-
} elseif (!$inSingleQuotedString && !$inDoubleQuotedString) {
1046-
if ($char === '[') {
1047-
++$openedBrackets;
1048-
} elseif ($char === ']') {
1049-
--$openedBrackets;
1050-
} elseif ($char === '|' && $openedBrackets === 0) {
1051-
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
1052-
$lastUnion = $i + 1;
1053-
}
1040+
if ($i === $xpathLen) {
1041+
return implode(' | ', $expressions);
10541042
}
1043+
1044+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
1045+
$lastUnion = $i + 1;
10551046
}
1056-
$unionParts[] = substr($xpath, $lastUnion);
10571047

1058-
return $unionParts;
1048+
return $xpath; // The XPath expression is invalid
10591049
}
10601050

10611051
/**

0 commit comments

Comments
 (0)