|
16 | 16 | from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl |
17 | 17 | from io import BytesIO, StringIO |
18 | 18 | import codecs |
| 19 | +import gc |
19 | 20 | import os.path |
20 | 21 | import shutil |
21 | 22 | from test import support |
22 | | -from test.support import findfile, run_unittest |
| 23 | +from test.support import findfile, run_unittest, TESTFN |
23 | 24 |
|
24 | 25 | TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") |
25 | 26 | TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") |
@@ -95,6 +96,126 @@ def verify_attrs_wattr(self, attrs): |
95 | 96 | self.assertEqual(attrs["attr"], "val") |
96 | 97 | self.assertEqual(attrs.getQNameByName("attr"), "attr") |
97 | 98 |
|
| 99 | + |
| 100 | +def xml_str(doc, encoding=None): |
| 101 | + if encoding is None: |
| 102 | + return doc |
| 103 | + return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) |
| 104 | + |
| 105 | +def xml_bytes(doc, encoding, decl_encoding=...): |
| 106 | + if decl_encoding is ...: |
| 107 | + decl_encoding = encoding |
| 108 | + return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') |
| 109 | + |
| 110 | +def make_xml_file(doc, encoding, decl_encoding=...): |
| 111 | + if decl_encoding is ...: |
| 112 | + decl_encoding = encoding |
| 113 | + with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: |
| 114 | + f.write(xml_str(doc, decl_encoding)) |
| 115 | + |
| 116 | + |
| 117 | +class ParseTest(unittest.TestCase): |
| 118 | + data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>' |
| 119 | + |
| 120 | + def tearDown(self): |
| 121 | + support.unlink(TESTFN) |
| 122 | + |
| 123 | + def check_parse(self, f): |
| 124 | + from xml.sax import parse |
| 125 | + result = StringIO() |
| 126 | + parse(f, XMLGenerator(result, 'utf-8')) |
| 127 | + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) |
| 128 | + |
| 129 | + def test_parse_text(self): |
| 130 | + encodings = ('us-ascii', 'iso-8859-1', 'utf-8', |
| 131 | + 'utf-16', 'utf-16le', 'utf-16be') |
| 132 | + for encoding in encodings: |
| 133 | + self.check_parse(StringIO(xml_str(self.data, encoding))) |
| 134 | + make_xml_file(self.data, encoding) |
| 135 | + with open(TESTFN, 'r', encoding=encoding) as f: |
| 136 | + self.check_parse(f) |
| 137 | + self.check_parse(StringIO(self.data)) |
| 138 | + make_xml_file(self.data, encoding, None) |
| 139 | + with open(TESTFN, 'r', encoding=encoding) as f: |
| 140 | + self.check_parse(f) |
| 141 | + |
| 142 | + def test_parse_bytes(self): |
| 143 | + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, |
| 144 | + # UTF-16 is autodetected |
| 145 | + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') |
| 146 | + for encoding in encodings: |
| 147 | + self.check_parse(BytesIO(xml_bytes(self.data, encoding))) |
| 148 | + make_xml_file(self.data, encoding) |
| 149 | + self.check_parse(TESTFN) |
| 150 | + with open(TESTFN, 'rb') as f: |
| 151 | + self.check_parse(f) |
| 152 | + self.check_parse(BytesIO(xml_bytes(self.data, encoding, None))) |
| 153 | + make_xml_file(self.data, encoding, None) |
| 154 | + self.check_parse(TESTFN) |
| 155 | + with open(TESTFN, 'rb') as f: |
| 156 | + self.check_parse(f) |
| 157 | + # accept UTF-8 with BOM |
| 158 | + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) |
| 159 | + make_xml_file(self.data, 'utf-8-sig', 'utf-8') |
| 160 | + self.check_parse(TESTFN) |
| 161 | + with open(TESTFN, 'rb') as f: |
| 162 | + self.check_parse(f) |
| 163 | + self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) |
| 164 | + make_xml_file(self.data, 'utf-8-sig', None) |
| 165 | + self.check_parse(TESTFN) |
| 166 | + with open(TESTFN, 'rb') as f: |
| 167 | + self.check_parse(f) |
| 168 | + # accept data with declared encoding |
| 169 | + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1'))) |
| 170 | + make_xml_file(self.data, 'iso-8859-1') |
| 171 | + self.check_parse(TESTFN) |
| 172 | + with open(TESTFN, 'rb') as f: |
| 173 | + self.check_parse(f) |
| 174 | + # fail on non-UTF-8 incompatible data without declared encoding |
| 175 | + with self.assertRaises(SAXException): |
| 176 | + self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) |
| 177 | + make_xml_file(self.data, 'iso-8859-1', None) |
| 178 | + with support.check_warnings(('unclosed file', ResourceWarning)): |
| 179 | + # XXX Failed parser leaks an opened file. |
| 180 | + with self.assertRaises(SAXException): |
| 181 | + self.check_parse(TESTFN) |
| 182 | + # Collect leaked file. |
| 183 | + gc.collect() |
| 184 | + with open(TESTFN, 'rb') as f: |
| 185 | + with self.assertRaises(SAXException): |
| 186 | + self.check_parse(f) |
| 187 | + |
| 188 | + def test_parse_InputSource(self): |
| 189 | + # accept data without declared but with explicitly specified encoding |
| 190 | + make_xml_file(self.data, 'iso-8859-1', None) |
| 191 | + with open(TESTFN, 'rb') as f: |
| 192 | + input = InputSource() |
| 193 | + input.setByteStream(f) |
| 194 | + input.setEncoding('iso-8859-1') |
| 195 | + self.check_parse(input) |
| 196 | + |
| 197 | + def check_parseString(self, s): |
| 198 | + from xml.sax import parseString |
| 199 | + result = StringIO() |
| 200 | + parseString(s, XMLGenerator(result, 'utf-8')) |
| 201 | + self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) |
| 202 | + |
| 203 | + def test_parseString_bytes(self): |
| 204 | + # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, |
| 205 | + # UTF-16 is autodetected |
| 206 | + encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') |
| 207 | + for encoding in encodings: |
| 208 | + self.check_parseString(xml_bytes(self.data, encoding)) |
| 209 | + self.check_parseString(xml_bytes(self.data, encoding, None)) |
| 210 | + # accept UTF-8 with BOM |
| 211 | + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) |
| 212 | + self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) |
| 213 | + # accept data with declared encoding |
| 214 | + self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) |
| 215 | + # fail on non-UTF-8 incompatible data without declared encoding |
| 216 | + with self.assertRaises(SAXException): |
| 217 | + self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) |
| 218 | + |
98 | 219 | class MakeParserTest(unittest.TestCase): |
99 | 220 | def test_make_parser2(self): |
100 | 221 | # Creating parsers several times in a row should succeed. |
@@ -1115,6 +1236,7 @@ def test_nsattrs_wattr(self): |
1115 | 1236 |
|
1116 | 1237 | def test_main(): |
1117 | 1238 | run_unittest(MakeParserTest, |
| 1239 | + ParseTest, |
1118 | 1240 | SaxutilsTest, |
1119 | 1241 | PrepareInputSourceTest, |
1120 | 1242 | StringXmlgenTest, |
|
0 commit comments