Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 61de087

Browse files
Issue #2175: SAX parsers now support a character stream of InputSource object.
1 parent 278ba26 commit 61de087

7 files changed

Lines changed: 64 additions & 12 deletions

File tree

Doc/library/xml.sax.reader.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,10 @@ The :class:`XMLReader` interface supports the following methods:
100100
system identifier (a string identifying the input source -- typically a file
101101
name or an URL), a file-like object, or an :class:`InputSource` object. When
102102
:meth:`parse` returns, the input is completely processed, and the parser object
103-
can be discarded or reset. As a limitation, the current implementation only
104-
accepts byte streams; processing of character streams is for further study.
103+
can be discarded or reset.
104+
105+
.. versionchanged:: 3.5
106+
Added support of character streams.
105107

106108

107109
.. method:: XMLReader.getContentHandler()
@@ -288,8 +290,7 @@ InputSource Objects
288290

289291
.. method:: InputSource.setByteStream(bytefile)
290292

291-
Set the byte stream (a Python file-like object which does not perform
292-
byte-to-character conversion) for this input source.
293+
Set the byte stream (a :term:`binary file`) for this input source.
293294

294295
The SAX parser will ignore this if there is also a character stream specified,
295296
but it will use a byte stream in preference to opening a URI connection itself.
@@ -308,8 +309,7 @@ InputSource Objects
308309

309310
.. method:: InputSource.setCharacterStream(charfile)
310311

311-
Set the character stream for this input source. (The stream must be a Python 1.6
312-
Unicode-wrapped file-like that performs conversion to strings.)
312+
Set the character stream (a :term:`text file`) for this input source.
313313

314314
If there is a character stream specified, the SAX parser will ignore any byte
315315
stream and will not attempt to open a URI connection to the system identifier.

Doc/whatsnew/3.5.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,13 @@ xmlrpc
499499
* :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`.
500500
(Contributed by Claudiu Popa in :issue:`20627`.)
501501

502+
xml.sax
503+
-------
504+
505+
* SAX parsers now support a character stream of
506+
:class:`~xml.sax.xmlreader.InputSource` object.
507+
(Contributed by Serhiy Storchaka in :issue:`2175`.)
508+
502509
faulthandler
503510
------------
504511

Lib/test/test_sax.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,24 @@ def tearDown(self):
185185
def make_byte_stream(self):
186186
return BytesIO(b"This is a byte stream.")
187187

188+
def make_character_stream(self):
189+
return StringIO("This is a character stream.")
190+
188191
def checkContent(self, stream, content):
189192
self.assertIsNotNone(stream)
190193
self.assertEqual(stream.read(), content)
191194
stream.close()
192195

193196

197+
def test_character_stream(self):
198+
# If the source is an InputSource with a character stream, use it.
199+
src = InputSource(self.file)
200+
src.setCharacterStream(self.make_character_stream())
201+
prep = prepare_input_source(src)
202+
self.assertIsNone(prep.getByteStream())
203+
self.checkContent(prep.getCharacterStream(),
204+
"This is a character stream.")
205+
194206
def test_byte_stream(self):
195207
# If the source is an InputSource that does not have a character
196208
# stream but does have a byte stream, use the byte stream.
@@ -225,6 +237,14 @@ def test_binary_file(self):
225237
self.checkContent(prep.getByteStream(),
226238
b"This is a byte stream.")
227239

240+
def test_text_file(self):
241+
# If the source is a text file-like object, use it as a character
242+
# stream.
243+
prep = prepare_input_source(self.make_character_stream())
244+
self.assertIsNone(prep.getByteStream())
245+
self.checkContent(prep.getCharacterStream(),
246+
"This is a character stream.")
247+
228248

229249
# ===== XMLGenerator
230250

@@ -904,6 +924,19 @@ def test_expat_inpsource_byte_stream(self):
904924

905925
self.assertEqual(result.getvalue(), xml_test_out)
906926

927+
def test_expat_inpsource_character_stream(self):
928+
parser = create_parser()
929+
result = BytesIO()
930+
xmlgen = XMLGenerator(result)
931+
932+
parser.setContentHandler(xmlgen)
933+
inpsrc = InputSource()
934+
with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
935+
inpsrc.setCharacterStream(f)
936+
parser.parse(inpsrc)
937+
938+
self.assertEqual(result.getvalue(), xml_test_out)
939+
907940
# ===== IncrementalParser support
908941

909942
def test_expat_incremental(self):

Lib/xml/sax/expatreader.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,14 @@ def close(self):
219219
self._parsing = 0
220220
# break cycle created by expat handlers pointing to our methods
221221
self._parser = None
222-
bs = self._source.getByteStream()
223-
if bs is not None:
224-
bs.close()
222+
try:
223+
file = self._source.getCharacterStream()
224+
if file is not None:
225+
file.close()
226+
finally:
227+
file = self._source.getByteStream()
228+
if file is not None:
229+
file.close()
225230

226231
def _reset_cont_handler(self):
227232
self._parser.ProcessingInstructionHandler = \

Lib/xml/sax/saxutils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,14 @@ def prepare_input_source(source, base=""):
345345
elif hasattr(source, "read"):
346346
f = source
347347
source = xmlreader.InputSource()
348-
source.setByteStream(f)
348+
if isinstance(f.read(0), str):
349+
source.setCharacterStream(f)
350+
else:
351+
source.setByteStream(f)
349352
if hasattr(f, "name") and isinstance(f.name, str):
350353
source.setSystemId(f.name)
351354

352-
if source.getByteStream() is None:
355+
if source.getCharacterStream() is None and source.getByteStream() is None:
353356
sysid = source.getSystemId()
354357
basehead = os.path.dirname(os.path.normpath(base))
355358
sysidfilename = os.path.join(basehead, sysid)

Lib/xml/sax/xmlreader.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ def parse(self, source):
117117
source = saxutils.prepare_input_source(source)
118118

119119
self.prepareParser(source)
120-
file = source.getByteStream()
120+
file = source.getCharacterStream()
121+
if file is None:
122+
file = source.getByteStream()
121123
buffer = file.read(self._bufsize)
122124
while buffer:
123125
self.feed(buffer)

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Core and Builtins
1616
Library
1717
-------
1818

19+
- Issue #2175: SAX parsers now support a character stream of InputSource object.
20+
1921
- Issue #16840: Tkinter now supports 64-bit integers added in Tcl 8.4 and
2022
arbitrary precision integers added in Tcl 8.5.
2123

0 commit comments

Comments
 (0)