bug #10207 [DomCrawler] Fixed filterXPath() chaining (robbertkl)

fabpot · fabpot · commit c11c5888f31b · 2014-02-05T17:38:09.000+01:00
This PR was merged into the 2.3 branch. Discussion ---------- [DomCrawler] Fixed filterXPath() chaining | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | BC breaks? | debatable (see below) | Deprecations? | no | Tests pass? | yes | Fixed tickets | #10206 | License | MIT | Doc PR | As @stof mentions in #10206, each node in the Crawler can belong to a different \DOMDocument. Therefore, I've made each node do its own XPath, relative to itself, and add all the results to a new Crawler. This way, all resulting nodes are still part of their original \DOMDocument and thus can reach all of their parent nodes. No current tests break on this change. I've added a new test for this case, by checking if the number of parents is correct after obtaining a node through chaining of `filterXPath()`. Now for BC: I can think of a number of cases where this change would give a different result. However, it's debatable/unclear if: - the old behavior was a bug in the first place (which would validate this change), or - the old behavior was intended (which would make this a BC breaking change) As an example, consider the following HTML: ```html <div name="a"><div name="b"><div name="c"></div></div></div> ``` What would happen if we run this: ```php echo $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'); ``` Aside from breaking reachability of the parent nodes by chaining, with the original code it would echo 'a'. With this patch it would echo 'c', which, to me, makes more sense. Commits ------- 43a7716 [DomCrawler] Fixed filterXPath() chaining
diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php
@@ -441,7 +441,7 @@ public function parents()
         $nodes = array();
 
         while ($node = $node->parentNode) {
-            if (1 === $node->nodeType && '_root' !== $node->nodeName) {
+            if (1 === $node->nodeType) {
                 $nodes[] = $node;
             }
         }
@@ -584,15 +584,13 @@ public function extract($attributes)
      */
     public function filterXPath($xpath)
     {
-        $document = new \DOMDocument('1.0', 'UTF-8');
-        $root = $document->appendChild($document->createElement('_root'));
+        $crawler = new static(null, $this->uri);
         foreach ($this as $node) {
-            $root->appendChild($document->importNode($node, true));
+            $domxpath = new \DOMXPath($node->ownerDocument);
+            $crawler->add($domxpath->query($xpath, $node));
         }
 
-        $domxpath = new \DOMXPath($document);
-
-        return new static($domxpath->query($xpath), $this->uri);
+        return $crawler;
     }
 
     /**
diff --git a/src/Symfony/Component/DomCrawler/Field/FormField.php b/src/Symfony/Component/DomCrawler/Field/FormField.php
@@ -52,13 +52,7 @@ public function __construct(\DOMNode $node)
     {
         $this->node = $node;
         $this->name = $node->getAttribute('name');
-
-        $this->document = new \DOMDocument('1.0', 'UTF-8');
-        $this->node = $this->document->importNode($this->node, true);
-
-        $root = $this->document->appendChild($this->document->createElement('_root'));
-        $root->appendChild($this->node);
-        $this->xpath = new \DOMXPath($this->document);
+        $this->xpath = new \DOMXPath($node->ownerDocument);
 
         $this->initialize();
     }
diff --git a/src/Symfony/Component/DomCrawler/Form.php b/src/Symfony/Component/DomCrawler/Form.php
@@ -378,9 +378,7 @@ private function initialize()
     {
         $this->fields = new FormFieldRegistry();
 
-        $document = new \DOMDocument('1.0', 'UTF-8');
-        $xpath = new \DOMXPath($document);
-        $root = $document->appendChild($document->createElement('_root'));
+        $xpath = new \DOMXPath($this->node->ownerDocument);
 
         // add submitted button if it has a valid name
         if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
@@ -390,39 +388,33 @@ private function initialize()
 
                 // temporarily change the name of the input node for the x coordinate
                 $this->button->setAttribute('name', $name.'.x');
-                $this->set(new Field\InputFormField($document->importNode($this->button, true)));
+                $this->set(new Field\InputFormField($this->button));
 
                 // temporarily change the name of the input node for the y coordinate
                 $this->button->setAttribute('name', $name.'.y');
-                $this->set(new Field\InputFormField($document->importNode($this->button, true)));
+                $this->set(new Field\InputFormField($this->button));
 
                 // restore the original name of the input node
                 $this->button->setAttribute('name', $name);
-            }
-            else {
-                $this->set(new Field\InputFormField($document->importNode($this->button, true)));
+            } else {
+                $this->set(new Field\InputFormField($this->button));
             }
         }
 
         // find form elements corresponding to the current form
         if ($this->node->hasAttribute('id')) {
-            // traverse through the whole document
-            $node = $document->importNode($this->node->ownerDocument->documentElement, true);
-            $root->appendChild($node);
-
             // corresponding elements are either descendants or have a matching HTML5 form attribute
             $formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
-            $fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);
+
+            // do the xpath query without $this->node as the context node (i.e. traverse through the whole document)
+            $fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
             foreach ($fieldNodes as $node) {
                 $this->addField($node);
             }
         } else {
-            // parent form has no id, add descendant elements only
-            $node = $document->importNode($this->node, true);
-            $root->appendChild($node);
-
-            // descendant elements with form attribute are not part of this form
-            $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
+            // do the xpath query with $this->node as the context node, to only find descendant elements
+            // however, descendant elements with form attribute are not part of this form
+            $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
             foreach ($fieldNodes as $node) {
                 $this->addField($node);
             }
diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
@@ -378,8 +378,10 @@ public function testFilterXPath()
         $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
 
         $crawler = $this->createTestCrawler()->filterXPath('//ul');
-
         $this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
+
+        $crawler = $this->createTestCrawler();
+        $this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
     }
 
     /**

Original file line number	Diff line number	Diff line change
`@@ -441,7 +441,7 @@ public function parents()`
`441`	`441`	`$nodes = array();`
`442`	`442`
`443`	`443`	`while ($node = $node->parentNode) {`
`444`		`- if (1 === $node->nodeType && '_root' !== $node->nodeName) {`
	`444`	`+ if (1 === $node->nodeType) {`
`445`	`445`	`$nodes[] = $node;`
`446`	`446`	`}`
`447`	`447`	`}`
`@@ -584,15 +584,13 @@ public function extract($attributes)`
`584`	`584`	`*/`
`585`	`585`	`public function filterXPath($xpath)`
`586`	`586`	`{`
`587`		`- $document = new \DOMDocument('1.0', 'UTF-8');`
`588`		`- $root = $document->appendChild($document->createElement('_root'));`
	`587`	`+ $crawler = new static(null, $this->uri);`
`589`	`588`	`foreach ($this as $node) {`
`590`		`- $root->appendChild($document->importNode($node, true));`
	`589`	`+ $domxpath = new \DOMXPath($node->ownerDocument);`
	`590`	`+ $crawler->add($domxpath->query($xpath, $node));`
`591`	`591`	`}`
`592`	`592`
`593`		`- $domxpath = new \DOMXPath($document);`
`594`		`-`
`595`		`- return new static($domxpath->query($xpath), $this->uri);`
	`593`	`+ return $crawler;`
`596`	`594`	`}`
`597`	`595`
`598`	`596`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -52,13 +52,7 @@ public function __construct(\DOMNode $node)`
`52`	`52`	`{`
`53`	`53`	`$this->node = $node;`
`54`	`54`	`$this->name = $node->getAttribute('name');`
`55`		`-`
`56`		`- $this->document = new \DOMDocument('1.0', 'UTF-8');`
`57`		`- $this->node = $this->document->importNode($this->node, true);`
`58`		`-`
`59`		`- $root = $this->document->appendChild($this->document->createElement('_root'));`
`60`		`- $root->appendChild($this->node);`
`61`		`- $this->xpath = new \DOMXPath($this->document);`
	`55`	`+ $this->xpath = new \DOMXPath($node->ownerDocument);`
`62`	`56`
`63`	`57`	`$this->initialize();`
`64`	`58`	`}`