Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bb0c214

Browse files
otschnicolas-grekas
authored andcommitted
[DomCrawler] Add argument $normalizeWhitespace to Crawler::innerText() and make it return the first non-empty text
1 parent 21191d5 commit bb0c214

File tree

3 files changed

+80
-10
lines changed

3 files changed

+80
-10
lines changed

src/Symfony/Component/DomCrawler/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ CHANGELOG
55
---
66

77
* Add `CrawlerSelectorCount` test constraint
8+
* Add argument `$normalizeWhitespace` to `Crawler::innerText()`
9+
* Make `Crawler::innerText()` return the first non-empty text
810

911
6.0
1012
---

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,18 +553,34 @@ public function text(string $default = null, bool $normalizeWhitespace = true):
553553
$text = $this->getNode(0)->nodeValue;
554554

555555
if ($normalizeWhitespace) {
556-
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $text), " \n\r\t\x0C");
556+
return $this->normalizeWhitespace($text);
557557
}
558558

559559
return $text;
560560
}
561561

562562
/**
563563
* Returns only the inner text that is the direct descendent of the current node, excluding any child nodes.
564+
*
565+
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
564566
*/
565-
public function innerText(): string
567+
public function innerText(/* bool $normalizeWhitespace = true */): string
566568
{
567-
return $this->filterXPath('.//text()')->text();
569+
$normalizeWhitespace = 1 <= \func_num_args() ? func_get_arg(0) : true;
570+
571+
foreach ($this->getNode(0)->childNodes as $childNode) {
572+
if (\XML_TEXT_NODE !== $childNode->nodeType) {
573+
continue;
574+
}
575+
if (!$normalizeWhitespace) {
576+
return $childNode->nodeValue;
577+
}
578+
if ('' !== trim($childNode->nodeValue)) {
579+
return $this->normalizeWhitespace($childNode->nodeValue);
580+
}
581+
}
582+
583+
return '';
568584
}
569585

570586
/**
@@ -1189,4 +1205,9 @@ private function isValidHtml5Heading(string $heading): bool
11891205
{
11901206
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
11911207
}
1208+
1209+
private function normalizeWhitespace(string $string): string
1210+
{
1211+
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
1212+
}
11921213
}

src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -348,12 +348,56 @@ public function testText()
348348
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
349349
}
350350

351-
public function testInnerText()
351+
public function provideInnerTextExamples()
352352
{
353-
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath('//*[@id="complex-element"]'));
353+
return [
354+
[
355+
'//*[@id="complex-elements"]/*[@class="one"]', // XPath query
356+
'Parent text Child text', // Result of Crawler::text()
357+
'Parent text', // Result of Crawler::innerText()
358+
' Parent text ', // Result of Crawler::innerText(false)
359+
],
360+
[
361+
'//*[@id="complex-elements"]/*[@class="two"]',
362+
'Child text Parent text',
363+
'Parent text',
364+
' ',
365+
],
366+
[
367+
'//*[@id="complex-elements"]/*[@class="three"]',
368+
'Parent text Child text Parent text',
369+
'Parent text',
370+
' Parent text ',
371+
],
372+
[
373+
'//*[@id="complex-elements"]/*[@class="four"]',
374+
'Child text',
375+
'',
376+
' ',
377+
],
378+
[
379+
'//*[@id="complex-elements"]/*[@class="five"]',
380+
'Child text Another child',
381+
'',
382+
' ',
383+
],
384+
];
385+
}
386+
387+
/**
388+
* @dataProvider provideInnerTextExamples
389+
*/
390+
public function testInnerText(
391+
string $xPathQuery,
392+
string $expectedText,
393+
string $expectedInnerText,
394+
string $expectedInnerTextNormalizeWhitespaceFalse,
395+
) {
396+
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath($xPathQuery));
354397

355-
self::assertSame('Parent text Child text', $crawler->text());
356-
self::assertSame('Parent text', $crawler->innerText());
398+
self::assertSame($expectedText, $crawler->text());
399+
self::assertSame($expectedInnerText, $crawler->innerText());
400+
self::assertSame($expectedInnerTextNormalizeWhitespaceFalse, $crawler->innerText(false));
357401
}
358402

359403
public function testHtml()
@@ -1265,9 +1309,12 @@ public function createTestCrawler($uri = null)
12651309
<div id="child2" xmlns:foo="http://example.com"></div>
12661310
</div>
12671311
<div id="sibling"><img /></div>
1268-
<div id="complex-element">
1269-
Parent text
1270-
<span>Child text</span>
1312+
<div id="complex-elements">
1313+
<div class="one"> Parent text <span>Child text</span> </div>
1314+
<div class="two"> <span>Child text</span> Parent text </div>
1315+
<div class="three"> Parent text <span>Child text</span> Parent text </div>
1316+
<div class="four"> <span>Child text</span> </div>
1317+
<div class="five"><span>Child text</span> <span>Another child</span></div>
12711318
</div>
12721319
</body>
12731320
</html>

0 commit comments

Comments
 (0)