Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 504e092

Browse files
committed
bpo-17239: XML entity expansion limitation
Modelled after https://github.com/GNOME/libxml2/blob/v2.9.8/parser.c#L99 Signed-off-by: Christian Heimes <[email protected]>
1 parent 24b447e commit 504e092

14 files changed

+633
-29
lines changed

Include/pyexpat.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,17 @@
33

44
/* note: you must import expat.h before importing this module! */
55

6-
#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
6+
#include "expat.h"
7+
8+
#define PyExpat_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
9+
10+
#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.2"
711
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"
812

13+
#if PyExpat_COMBINED_VERSION < 20300
14+
enum XML_Option {};
15+
#endif
16+
917
struct PyExpat_CAPI
1018
{
1119
char* magic; /* set to PyExpat_CAPI_MAGIC */
@@ -50,6 +58,9 @@ struct PyExpat_CAPI
5058
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
5159
/* might be none for expat < 2.1.0 */
5260
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
61+
/* expat >= 2.3.0 */
62+
enum XML_Status (*SetOption)(XML_Parser parser, enum XML_Option option, void *value);
63+
enum XML_Status (*GetOption)(XML_Parser parser, enum XML_Option option, void *rvalue);
5364
/* always add new stuff to the end! */
5465
};
5566

Lib/test/test_sax.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
XMLFilterBase, prepare_input_source
1515
from xml.sax.expatreader import create_parser
1616
from xml.sax.handler import feature_namespaces, feature_external_ges
17+
from xml.sax.handler import feature_huge_xml
18+
from xml.sax.handler import ErrorHandler
1719
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
1820
from io import BytesIO, StringIO
1921
import codecs
@@ -32,6 +34,10 @@
3234
except UnicodeEncodeError:
3335
raise unittest.SkipTest("filename is not encodable to utf8")
3436

37+
TEST_ENTITYTOOLARGE = findfile("entitytoolarge.xml", subdir="xmltestdata")
38+
TEST_EXPANSIONLIMIT = findfile("expansionlimit.xml", subdir="xmltestdata")
39+
TEST_RECURSIONLIMIT = findfile("nestinglimit.xml", subdir="xmltestdata")
40+
3541
supports_nonascii_filenames = True
3642
if not os.path.supports_unicode_filenames:
3743
try:
@@ -1311,6 +1317,65 @@ def test_nsattrs_wattr(self):
13111317
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
13121318

13131319

1320+
class NullSink(StringIO):
1321+
def write(self, *args):
1322+
"""/dev/null write"""
1323+
pass
1324+
1325+
1326+
class XmlEntityExpansion(unittest.TestCase):
1327+
1328+
def get_parser(self, huge_xml=None):
1329+
result = NullSink()
1330+
handler = XMLGenerator(result, 'utf-8')
1331+
parser = create_parser()
1332+
parser.setContentHandler(handler)
1333+
parser.setErrorHandler(ErrorHandler())
1334+
if huge_xml is not None:
1335+
parser.setFeature(feature_huge_xml, huge_xml)
1336+
return parser
1337+
1338+
def check_parse(self, source, huge_xml=None):
1339+
parser = self.get_parser(huge_xml)
1340+
parser.parse(source)
1341+
1342+
def test_entitytoolarge(self):
1343+
header = "<!DOCTYPE he [<!ELEMENT he (#PCDATA)*><!ENTITY e '"
1344+
entity = "0123456789" * 100
1345+
footer = "'>]><he>&e;</he>"
1346+
1347+
parser = self.get_parser()
1348+
parser.feed(header)
1349+
# feed 1MB + 1 byte as entity text
1350+
for i in range(1000):
1351+
parser.feed(entity)
1352+
parser.feed('-')
1353+
1354+
with self.assertRaisesRegex(SAXParseException,
1355+
"entity text is too large"):
1356+
parser.feed(footer, True)
1357+
1358+
parser = self.get_parser(True)
1359+
parser.feed(header)
1360+
# feed 1MB + 1 byte as entity text
1361+
for i in range(1000):
1362+
parser.feed(entity)
1363+
parser.feed('-')
1364+
parser.feed(footer, True)
1365+
1366+
def test_expansionlimit(self):
1367+
with self.assertRaisesRegex(SAXParseException,
1368+
"entity expansion limit reached"):
1369+
self.check_parse(TEST_EXPANSIONLIMIT)
1370+
self.check_parse(TEST_EXPANSIONLIMIT, True)
1371+
1372+
def test_recursionlimit(self):
1373+
with self.assertRaisesRegex(SAXParseException,
1374+
"entity nesting limit reached"):
1375+
self.check_parse(TEST_RECURSIONLIMIT)
1376+
self.check_parse(TEST_EXPANSIONLIMIT, True)
1377+
1378+
13141379
def test_main():
13151380
run_unittest(MakeParserTest,
13161381
ParseTest,
@@ -1323,7 +1388,8 @@ def test_main():
13231388
StreamReaderWriterXmlgenTest,
13241389
ExpatReaderTest,
13251390
ErrorReportingTest,
1326-
XmlReaderTest)
1391+
XmlReaderTest,
1392+
XmlEntityExpansion)
13271393

13281394
if __name__ == "__main__":
13291395
test_main()
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<!DOCTYPE he [
2+
<!ELEMENT he (#PCDATA)*>
3+
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
4+
<!ENTITY b "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP">]>
5+
<he>
6+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
7+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
8+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
9+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
10+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
11+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
12+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
13+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
14+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
15+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
16+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
17+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
18+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
19+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
20+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
21+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
22+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
23+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
24+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
25+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
26+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
27+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
28+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
29+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
30+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
31+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
32+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
33+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
34+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
35+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
36+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
37+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
38+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
39+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
40+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
41+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
42+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
43+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
44+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
45+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
46+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
47+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
48+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
49+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
50+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
51+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
52+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
53+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
54+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
55+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
56+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
57+
&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;
58+
</he>

Lib/test/xmltestdata/nestinglimit.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<!DOCTYPE he [
2+
<!ELEMENT he (#PCDATA)*>
3+
<!ENTITY e1 '&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;'>
4+
<!ENTITY e2 '&e3;&e3;&e3;&e3;&e3;'>
5+
<!ENTITY e3 'entity'>
6+
]>
7+
<he>&e1;</he>

Lib/xml/dom/expatbuilder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ def getParser(self):
160160
self._parser.buffer_text = True
161161
self._parser.ordered_attributes = True
162162
self._parser.specified_attributes = True
163+
if self._options.huge_xml is not None:
164+
self._parser.huge_entites = self._options.huge_xml
163165
self.install(self._parser)
164166
return self._parser
165167

Lib/xml/dom/xmlbuilder.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ class Options:
4141
errorHandler = None
4242
filter = None
4343

44+
# None: keep default, True: disable entity expansion protection
45+
huge_xml = None
46+
4447

4548
class DOMBuilder:
4649
entityResolver = None

Lib/xml/etree/ElementTree.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,14 @@ def close(self):
16261626
del self.parser, self._parser
16271627
del self.target, self._target
16281628

1629+
@property
1630+
def huge_xml(self):
1631+
return self._parser.huge_xml
1632+
1633+
@huge_xml.setter
1634+
def huge_xml(self, value):
1635+
self._parser.huge_xml = value
1636+
16291637

16301638
# Import the C accelerators
16311639
try:

Lib/xml/sax/expatreader.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from xml.sax.handler import feature_validation, feature_namespaces
1010
from xml.sax.handler import feature_namespace_prefixes
1111
from xml.sax.handler import feature_external_ges, feature_external_pes
12-
from xml.sax.handler import feature_string_interning
12+
from xml.sax.handler import feature_string_interning, feature_huge_xml
13+
1314
from xml.sax.handler import property_xml_string, property_interning_dict
1415

1516
# xml.parsers.expat does not raise ImportError in Jython
@@ -97,6 +98,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
9798
self._entity_stack = []
9899
self._external_ges = 0
99100
self._interning = None
101+
self._huge_xml = None
100102

101103
# XMLReader methods
102104

@@ -137,6 +139,8 @@ def getFeature(self, name):
137139
return 0
138140
elif name == feature_external_ges:
139141
return self._external_ges
142+
elif name == feature_huge_xml:
143+
return self._parser.huge_xml
140144
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
141145

142146
def setFeature(self, name, state):
@@ -153,6 +157,8 @@ def setFeature(self, name, state):
153157
self._interning = {}
154158
else:
155159
self._interning = None
160+
elif name == feature_huge_xml:
161+
self._huge_xml = bool(state)
156162
elif name == feature_validation:
157163
if state:
158164
raise SAXNotSupportedException(
@@ -285,7 +291,8 @@ def reset(self):
285291
intern = self._interning)
286292
self._parser.StartElementHandler = self.start_element
287293
self._parser.EndElementHandler = self.end_element
288-
294+
if self._huge_xml is not None:
295+
self._parser.huge_xml = self._huge_xml
289296
self._reset_cont_handler()
290297
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
291298
self._parser.NotationDeclHandler = self.notation_decl

Lib/xml/sax/handler.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,19 @@ def resolveEntity(self, publicId, systemId):
277277
# DTD subset.
278278
# access: (parsing) read-only; (not parsing) read/write
279279

280+
feature_huge_xml = "http://python.org/sax/features/huge-xml"
281+
# true: Allow XML files with huge entities and DTD
282+
# false: Protect against DoS attacks like entity expansion (billion laughs)
283+
# access: (parsing) read-only; (not parsing) read/write
284+
285+
280286
all_features = [feature_namespaces,
281287
feature_namespace_prefixes,
282288
feature_string_interning,
283289
feature_validation,
284290
feature_external_ges,
285-
feature_external_pes]
291+
feature_external_pes,
292+
feature_huge_xml]
286293

287294

288295
#============================================================================

Modules/_elementtree.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3709,6 +3709,37 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
37093709
return PyObject_GenericGetAttr((PyObject*) self, nameobj);
37103710
}
37113711

3712+
static PyObject*
3713+
xmlparser_huge_xml_getter(XMLParserObject *self, void *closure)
3714+
{
3715+
if (EXPAT(GetOption) != NULL) {
3716+
XML_Bool hx = XML_FALSE;
3717+
if (EXPAT(GetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
3718+
PyErr_SetString(PyExc_RuntimeError, "Failed to get option value");
3719+
return NULL;
3720+
}
3721+
return PyBool_FromLong((long)hx);
3722+
} else {
3723+
Py_RETURN_NONE;
3724+
}
3725+
}
3726+
3727+
static int
3728+
xmlparser_huge_xml_setter(XMLParserObject *self, PyObject *value, void *closure)
3729+
{
3730+
if (EXPAT(SetOption) != NULL) {
3731+
XML_Bool hx = PyObject_IsTrue(value) ? XML_TRUE : XML_FALSE;
3732+
if (EXPAT(SetOption)(self->parser, XML_OPTION_HUGE_XML, &hx) != XML_STATUS_OK) {
3733+
PyErr_SetString(PyExc_RuntimeError, "Failed to set option");
3734+
return -1;
3735+
}
3736+
return 0;
3737+
} else {
3738+
PyErr_SetString(PyExc_ValueError, "expat version doesn't support huge XML limit");
3739+
return -1;
3740+
}
3741+
}
3742+
37123743
#include "clinic/_elementtree.c.h"
37133744

37143745
static PyMethodDef element_methods[] = {
@@ -3874,6 +3905,14 @@ static PyMethodDef xmlparser_methods[] = {
38743905
{NULL, NULL}
38753906
};
38763907

3908+
static PyGetSetDef xmlparser_getsetlist[] = {
3909+
{"huge_xml",
3910+
(getter)xmlparser_huge_xml_getter,
3911+
(setter)xmlparser_huge_xml_setter,
3912+
"Allow huge entities and disable entity expansion protection"},
3913+
{NULL},
3914+
};
3915+
38773916
static PyTypeObject XMLParser_Type = {
38783917
PyVarObject_HEAD_INIT(NULL, 0)
38793918
"xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
@@ -3904,7 +3943,7 @@ static PyTypeObject XMLParser_Type = {
39043943
0, /* tp_iternext */
39053944
xmlparser_methods, /* tp_methods */
39063945
0, /* tp_members */
3907-
0, /* tp_getset */
3946+
xmlparser_getsetlist, /* tp_getset */
39083947
0, /* tp_base */
39093948
0, /* tp_dict */
39103949
0, /* tp_descr_get */

0 commit comments

Comments
 (0)