Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 48dda59

Browse files
[HtmlSanitizer] Use the native HTML5 parser when using PHP 8.4+
1 parent de952d2 commit 48dda59

File tree

14 files changed

+180
-70
lines changed

14 files changed

+180
-70
lines changed

UPGRADE-7.4.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Cache
1616
Console
1717
-------
1818

19-
* Deprecate `Symfony\Component\Console\Application::add()` in favor of `Symfony\Component\Console\Application::addCommand()`
19+
* Deprecate `Symfony\Component\Console\Application::add()` in favor of `addCommand()`
2020

2121
DependencyInjection
2222
-------------------
@@ -32,7 +32,14 @@ DoctrineBridge
3232
FrameworkBundle
3333
---------------
3434

35-
* Deprecate `Symfony\Bundle\FrameworkBundle\Console\Application::add()` in favor of `Symfony\Bundle\FrameworkBundle\Console\Application::addCommand()`
35+
* Deprecate `Symfony\Bundle\FrameworkBundle\Console\Application::add()` in favor of `addCommand()`
36+
37+
HtmlSanitizer
38+
-------------
39+
40+
* Use the native HTML5 parser when using PHP 8.4+
41+
* Deprecate `MastermindsParser`; use `NativeParser` instead
42+
* Deprecate `ParserInterface`; pass a `Closure` instead
3643

3744
HttpClient
3845
----------

src/Symfony/Component/HtmlSanitizer/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
CHANGELOG
22
=========
33

4+
7.4
5+
---
6+
7+
* Use the native HTML5 parser when using PHP 8.4+
8+
* Deprecate `MastermindsParser`; use `NativeParser` instead
9+
* Deprecate `ParserInterface`; pass a `Closure` instead
10+
411
7.2
512
---
613

src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\HtmlSanitizer;
1313

1414
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
15+
use Symfony\Component\HtmlSanitizer\Parser\NativeParser;
1516
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
1617
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
1718
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
@@ -22,19 +23,23 @@
2223
*/
2324
final class HtmlSanitizer implements HtmlSanitizerInterface
2425
{
25-
private ParserInterface $parser;
26+
private \Closure $parser;
2627

2728
/**
2829
* @var array<string, DomVisitor>
2930
*/
3031
private array $domVisitors = [];
3132

33+
/**
34+
* @param \Closure(string):\Dom\Node|\DOMNode|null $parser
35+
*/
3236
public function __construct(
3337
private HtmlSanitizerConfig $config,
34-
?ParserInterface $parser = null,
38+
ParserInterface|\Closure|null $parser = null,
3539
) {
40+
$parser ??= (\PHP_VERSION_ID < 80400 ? new MastermindsParser() : new NativeParser())->parse(...);
3641
$this->config = $config;
37-
$this->parser = $parser ?? new MastermindsParser();
42+
$this->parser = $parser instanceof ParserInterface ? $parser->parse(...) : $parser;
3843
}
3944

4045
public function sanitize(string $input): string
@@ -71,11 +76,11 @@ private function sanitizeWithContext(string $context, string $input): string
7176
return '';
7277
}
7378

74-
// Remove NULL character
75-
$input = str_replace(\chr(0), '', $input);
79+
// Remove NULL character and HTML entities for null byte
80+
$input = str_replace([\chr(0), '&#0;', '&#x00;', '&#X00;', '&#000;'], '', $input);
7681

7782
// Parse as HTML
78-
if (!$parsed = $this->parser->parse($input)) {
83+
if ('' === trim($input) || !$parsed = ($this->parser)($input, $context)) {
7984
return '';
8085
}
8186

src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,17 @@
1414
use Masterminds\HTML5;
1515

1616
/**
17+
* @deprecated since Symfony 7.4, use `NativeParser` instead
18+
*
1719
* @author Titouan Galopin <[email protected]>
1820
*/
1921
final class MastermindsParser implements ParserInterface
2022
{
2123
public function __construct(private array $defaultOptions = [])
2224
{
25+
if (\PHP_VERSION_ID < 80400) {
26+
trigger_deprecation('symfony/html-sanitizer', '7.4', '"%s" is deprecated since Symfony 7.4 and will be removed in 8.0. Use the "NativeParser" instead.', self::class);
27+
}
2328
}
2429

2530
public function parse(string $html): ?\DOMNode
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\HtmlSanitizer\Parser;
13+
14+
/**
15+
* Parser using PHP 8.4's new Dom API.
16+
*/
17+
final class NativeParser
18+
{
19+
public function __construct()
20+
{
21+
if (\PHP_VERSION_ID < 80400) {
22+
throw new \LogicException(self::class.' requires PHP 8.4 or higher.');
23+
}
24+
}
25+
26+
public function parse(string $html, string $context = 'body'): ?\Dom\Node
27+
{
28+
$document = @\Dom\HTMLDocument::createFromString(\sprintf('<!DOCTYPE html><%s>%s</%1$s>', $context, $html), \Dom\HTML_NO_DEFAULT_NS);
29+
$fragment = $document->createDocumentFragment();
30+
31+
$context = $document->getElementsByTagName($context)->item(0);
32+
$count1 = $context->childElementCount;
33+
34+
$fragment = $document->createDocumentFragment();
35+
while ($context->firstChild) {
36+
$fragment->appendChild($context->firstChild);
37+
}
38+
39+
$context->innerHTML = $html;
40+
$count2 = $context->childElementCount;
41+
42+
if ($count2 <= $count1) {
43+
return $fragment->hasChildNodes() ? $fragment : null;
44+
}
45+
46+
$fragment = $document->createDocumentFragment();
47+
while ($context->firstChild) {
48+
$fragment->appendChild($context->firstChild);
49+
}
50+
51+
return $fragment;
52+
}
53+
}

src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
* Transforms an untrusted HTML input string into a DOM tree.
1616
*
1717
* @author Titouan Galopin <[email protected]>
18+
*
19+
* @deprecated since Symfony 7.4
1820
*/
1921
interface ParserInterface
2022
{

src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,11 @@ public static function provideSanitizeHead()
6464
}
6565

6666
#[DataProvider('provideSanitizeBody')]
67-
public function testSanitizeBody(string $input, string $expected)
67+
public function testSanitizeBody(string $input, string $expected, ?string $legacyExpected = null)
6868
{
69+
if (\PHP_VERSION_ID < 80400) {
70+
$expected = $legacyExpected ?? $expected;
71+
}
6972
$this->assertSame($expected, $this->createSanitizer()->sanitize($input));
7073
}
7174

@@ -83,6 +86,7 @@ public static function provideSanitizeBody()
8386
],
8487
[
8588
'< Hello',
89+
'&lt; Hello',
8690
' Hello',
8791
],
8892
[
@@ -127,6 +131,7 @@ public static function provideSanitizeBody()
127131
],
128132
[
129133
'<<a href="javascript:evil"/>a href="javascript:evil"/>',
134+
'&lt;<a>a href&#61;&#34;javascript:evil&#34;/&gt;</a>',
130135
'<a>a href&#61;&#34;javascript:evil&#34;/&gt;</a>',
131136
],
132137
[
@@ -163,10 +168,12 @@ public static function provideSanitizeBody()
163168
],
164169
[
165170
'<<img src="javascript:evil"/>iframe src="javascript:evil"/>',
171+
'&lt;<img />iframe src&#61;&#34;javascript:evil&#34;/&gt;',
166172
'<img />iframe src&#61;&#34;javascript:evil&#34;/&gt;',
167173
],
168174
[
169175
'<<img src="javascript:evil"/>img src="javascript:evil"/>',
176+
'&lt;<img />img src&#61;&#34;javascript:evil&#34;/&gt;',
170177
'<img />img src&#61;&#34;javascript:evil&#34;/&gt;',
171178
],
172179
[
@@ -211,10 +218,12 @@ public static function provideSanitizeBody()
211218
],
212219
[
213220
'<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>',
221+
'<img />',
214222
'<img src="&amp;#0000106&amp;#0000097&amp;#0000118&amp;#0000097&amp;#0000115&amp;#0000099&amp;#0000114&amp;#0000105&amp;#0000112&amp;#0000116&amp;#0000058&amp;#0000097&amp;#0000108&amp;#0000101&amp;#0000114&amp;#0000116&amp;#0000040&amp;#0000039&amp;#0000088&amp;#0000083&amp;#0000083&amp;#0000039&amp;#0000041" />',
215223
],
216224
[
217225
'<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>',
226+
'<img />',
218227
'<img src="&amp;#x6A&amp;#x61&amp;#x76&amp;#x61&amp;#x73&amp;#x63&amp;#x72&amp;#x69&amp;#x70&amp;#x74&amp;#x3A&amp;#x61&amp;#x6C&amp;#x65&amp;#x72&amp;#x74&amp;#x28&amp;#x27&amp;#x58&amp;#x53&amp;#x53&amp;#x27&amp;#x29" />',
219228
],
220229
[
@@ -529,6 +538,7 @@ public static function provideSanitizeBody()
529538
],
530539
[
531540
'<table>Lorem ipsum</table>',
541+
'Lorem ipsum<table></table>',
532542
'<table>Lorem ipsum</table>',
533543
],
534544
[

src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111

1212
namespace Symfony\Component\HtmlSanitizer\Tests\Parser;
1313

14+
use PHPUnit\Framework\Attributes\Group;
15+
use PHPUnit\Framework\Attributes\IgnoreDeprecations;
1416
use PHPUnit\Framework\TestCase;
1517
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
1618

19+
#[IgnoreDeprecations]
20+
#[Group('legacy')]
1721
class MastermindsParserTest extends TestCase
1822
{
1923
public function testParseValid()
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\HtmlSanitizer\Tests\Parser;
13+
14+
use PHPUnit\Framework\TestCase;
15+
use Symfony\Component\HtmlSanitizer\Parser\NativeParser;
16+
17+
class NativeParserTest extends TestCase
18+
{
19+
public function testParseValid()
20+
{
21+
$node = (new NativeParser())->parse('<div></div>');
22+
$this->assertInstanceOf(\Dom\Node::class, $node);
23+
$this->assertSame('#document-fragment', $node->nodeName);
24+
$this->assertCount(1, $node->childNodes);
25+
$this->assertSame('div', $node->childNodes->item(0)->nodeName);
26+
}
27+
28+
public function testParseHtml()
29+
{
30+
$html = '<div><p>Hello <strong>World</strong>!</p></div>';
31+
$node = (new NativeParser())->parse($html);
32+
$this->assertInstanceOf(\Dom\Node::class, $node);
33+
$this->assertSame('#document-fragment', $node->nodeName);
34+
$this->assertCount(1, $node->childNodes);
35+
$this->assertSame('div', $node->childNodes->item(0)->nodeName);
36+
}
37+
}

src/Symfony/Component/HtmlSanitizer/TextSanitizer/StringSanitizer.php

Lines changed: 19 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,46 +16,24 @@
1616
*/
1717
final class StringSanitizer
1818
{
19-
private const LOWERCASE = [
20-
'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
21-
'abcdefghijklmnopqrstuvwxyz',
22-
];
23-
2419
private const REPLACEMENTS = [
25-
[
26-
// "&#34;" is shorter than "&quot;"
27-
'&quot;',
28-
29-
// Fix several potential issues in how browsers interpret attributes values
30-
'+',
31-
'=',
32-
'@',
33-
'`',
34-
35-
// Some DB engines will transform UTF8 full-width characters their classical version
36-
// if the data is saved in a non-UTF8 field
37-
'',
38-
'',
39-
'',
40-
'',
41-
'',
42-
'',
43-
],
44-
[
45-
'&#34;',
46-
47-
'&#43;',
48-
'&#61;',
49-
'&#64;',
50-
'&#96;',
51-
52-
'&#xFF1C;',
53-
'&#xFF1E;',
54-
'&#xFF0B;',
55-
'&#xFF1D;',
56-
'&#xFF20;',
57-
'&#xFF40;',
58-
],
20+
// "&#34;" is shorter than "&quot;"
21+
'&quot;' => '&#34;',
22+
23+
// Fix several potential issues in how browsers interpret attribute values
24+
'+' => '&#43;',
25+
'=' => '&#61;',
26+
'@' => '&#64;',
27+
'`' => '&#96;',
28+
29+
// Some DB engines will transform UTF8 full-width characters with
30+
// their classical version if the data is saved in a non-UTF8 field
31+
'' => '&#xFF1C;',
32+
'' => '&#xFF1E;',
33+
'' => '&#xFF0B;',
34+
'' => '&#xFF1D;',
35+
'' => '&#xFF20;',
36+
'' => '&#xFF40;',
5937
];
6038

6139
/**
@@ -65,18 +43,14 @@ final class StringSanitizer
6543
*/
6644
public static function htmlLower(string $string): string
6745
{
68-
return strtr($string, self::LOWERCASE[0], self::LOWERCASE[1]);
46+
return strtolower($string);
6947
}
7048

7149
/**
7250
* Encodes the HTML entities in the given string for safe injection in a document's DOM.
7351
*/
7452
public static function encodeHtmlEntities(string $string): string
7553
{
76-
return str_replace(
77-
self::REPLACEMENTS[0],
78-
self::REPLACEMENTS[1],
79-
htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, 'UTF-8')
80-
);
54+
return strtr(htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, 'UTF-8'), self::REPLACEMENTS);
8155
}
8256
}

0 commit comments

Comments
 (0)