-
-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathHtmlSanitizer.php
More file actions
135 lines (110 loc) · 4.92 KB
/
HtmlSanitizer.php
File metadata and controls
135 lines (110 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\HtmlSanitizer;
use Symfony\Component\HtmlSanitizer\Parser\NativeParser;
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;
/**
* @author Titouan Galopin <[email protected]>
*/
final class HtmlSanitizer implements HtmlSanitizerInterface
{
private ParserInterface $parser;
/**
* @var array<string, DomVisitor>
*/
private array $domVisitors = [];
public function __construct(
private HtmlSanitizerConfig $config,
?ParserInterface $parser = null,
) {
$this->parser = $parser ?? new NativeParser();
}
public function sanitize(string $input): string
{
return $this->sanitizeFor(W3CReference::CONTEXT_BODY, $input);
}
public function sanitizeFor(string $element, string $input): string
{
$element = StringSanitizer::htmlLower($element);
$context = W3CReference::CONTEXTS_MAP[$element] ?? W3CReference::CONTEXT_BODY;
$element = isset(W3CReference::BODY_ELEMENTS[$element]) ? $element : $context;
// Prevent DOS attack induced by extremely long HTML strings
if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) {
$input = substr($input, 0, $this->config->getMaxInputLength());
}
// Text context: early return with HTML encoding
if (W3CReference::CONTEXT_TEXT === $context) {
return StringSanitizer::encodeHtmlEntities($input);
}
// Other context: build a DOM visitor
$this->domVisitors[$context] ??= $this->createDomVisitorForContext($context);
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
// site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
if (!$this->isValidUtf8($input)) {
return '';
}
// Remove NULL character and HTML entities for null byte
$input = str_replace(\chr(0), '�', $input);
// Parse as HTML
if ('' === trim($input) || !$parsed = $this->parser->parse($input, $element)) {
return '';
}
// Visit the DOM tree and render the sanitized nodes
return $this->domVisitors[$context]->visit($parsed)?->render() ?? '';
}
private function isValidUtf8(string $html): bool
{
// preg_match() fails silently on strings containing invalid UTF-8.
return '' === $html || preg_match('//u', $html);
}
private function createDomVisitorForContext(string $context): DomVisitor
{
$elementsConfig = [];
// Head: only a few elements are allowed
if (W3CReference::CONTEXT_HEAD === $context) {
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
}
}
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
// Body: allow any configured element that isn't in <head>
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
}
}
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
}
}
return new DomVisitor($this->config, $elementsConfig);
}
}