Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 13e41c5

Browse files
Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString().
1 parent aa9563c commit 13e41c5

1 file changed

Lines changed: 123 additions & 1 deletion

File tree

Lib/test/test_sax.py

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
1717
from io import BytesIO, StringIO
1818
import codecs
19+
import gc
1920
import os.path
2021
import shutil
2122
from test import support
22-
from test.support import findfile, run_unittest
23+
from test.support import findfile, run_unittest, TESTFN
2324

2425
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
2526
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
@@ -95,6 +96,126 @@ def verify_attrs_wattr(self, attrs):
9596
self.assertEqual(attrs["attr"], "val")
9697
self.assertEqual(attrs.getQNameByName("attr"), "attr")
9798

99+
100+
def xml_str(doc, encoding=None):
101+
if encoding is None:
102+
return doc
103+
return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
104+
105+
def xml_bytes(doc, encoding, decl_encoding=...):
106+
if decl_encoding is ...:
107+
decl_encoding = encoding
108+
return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
109+
110+
def make_xml_file(doc, encoding, decl_encoding=...):
111+
if decl_encoding is ...:
112+
decl_encoding = encoding
113+
with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
114+
f.write(xml_str(doc, decl_encoding))
115+
116+
117+
class ParseTest(unittest.TestCase):
118+
data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
119+
120+
def tearDown(self):
121+
support.unlink(TESTFN)
122+
123+
def check_parse(self, f):
124+
from xml.sax import parse
125+
result = StringIO()
126+
parse(f, XMLGenerator(result, 'utf-8'))
127+
self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
128+
129+
def test_parse_text(self):
130+
encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
131+
'utf-16', 'utf-16le', 'utf-16be')
132+
for encoding in encodings:
133+
self.check_parse(StringIO(xml_str(self.data, encoding)))
134+
make_xml_file(self.data, encoding)
135+
with open(TESTFN, 'r', encoding=encoding) as f:
136+
self.check_parse(f)
137+
self.check_parse(StringIO(self.data))
138+
make_xml_file(self.data, encoding, None)
139+
with open(TESTFN, 'r', encoding=encoding) as f:
140+
self.check_parse(f)
141+
142+
def test_parse_bytes(self):
143+
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
144+
# UTF-16 is autodetected
145+
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
146+
for encoding in encodings:
147+
self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
148+
make_xml_file(self.data, encoding)
149+
self.check_parse(TESTFN)
150+
with open(TESTFN, 'rb') as f:
151+
self.check_parse(f)
152+
self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
153+
make_xml_file(self.data, encoding, None)
154+
self.check_parse(TESTFN)
155+
with open(TESTFN, 'rb') as f:
156+
self.check_parse(f)
157+
# accept UTF-8 with BOM
158+
self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
159+
make_xml_file(self.data, 'utf-8-sig', 'utf-8')
160+
self.check_parse(TESTFN)
161+
with open(TESTFN, 'rb') as f:
162+
self.check_parse(f)
163+
self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
164+
make_xml_file(self.data, 'utf-8-sig', None)
165+
self.check_parse(TESTFN)
166+
with open(TESTFN, 'rb') as f:
167+
self.check_parse(f)
168+
# accept data with declared encoding
169+
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
170+
make_xml_file(self.data, 'iso-8859-1')
171+
self.check_parse(TESTFN)
172+
with open(TESTFN, 'rb') as f:
173+
self.check_parse(f)
174+
# fail on non-UTF-8 incompatible data without declared encoding
175+
with self.assertRaises(SAXException):
176+
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
177+
make_xml_file(self.data, 'iso-8859-1', None)
178+
with support.check_warnings(('unclosed file', ResourceWarning)):
179+
# XXX Failed parser leaks an opened file.
180+
with self.assertRaises(SAXException):
181+
self.check_parse(TESTFN)
182+
# Collect leaked file.
183+
gc.collect()
184+
with open(TESTFN, 'rb') as f:
185+
with self.assertRaises(SAXException):
186+
self.check_parse(f)
187+
188+
def test_parse_InputSource(self):
189+
# accept data without declared but with explicitly specified encoding
190+
make_xml_file(self.data, 'iso-8859-1', None)
191+
with open(TESTFN, 'rb') as f:
192+
input = InputSource()
193+
input.setByteStream(f)
194+
input.setEncoding('iso-8859-1')
195+
self.check_parse(input)
196+
197+
def check_parseString(self, s):
198+
from xml.sax import parseString
199+
result = StringIO()
200+
parseString(s, XMLGenerator(result, 'utf-8'))
201+
self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
202+
203+
def test_parseString_bytes(self):
204+
# UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
205+
# UTF-16 is autodetected
206+
encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
207+
for encoding in encodings:
208+
self.check_parseString(xml_bytes(self.data, encoding))
209+
self.check_parseString(xml_bytes(self.data, encoding, None))
210+
# accept UTF-8 with BOM
211+
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
212+
self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
213+
# accept data with declared encoding
214+
self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
215+
# fail on non-UTF-8 incompatible data without declared encoding
216+
with self.assertRaises(SAXException):
217+
self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
218+
98219
class MakeParserTest(unittest.TestCase):
99220
def test_make_parser2(self):
100221
# Creating parsers several times in a row should succeed.
@@ -1082,6 +1203,7 @@ def test_nsattrs_wattr(self):
10821203

10831204
def test_main():
10841205
run_unittest(MakeParserTest,
1206+
ParseTest,
10851207
SaxutilsTest,
10861208
PrepareInputSourceTest,
10871209
StringXmlgenTest,

0 commit comments

Comments
 (0)