Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f6c115f

Browse files
committed
Re-write to no longer depend on an old version of PyXML. This now
implements a SAX XMLReader interface instead of the old Builder interface used with PyDOM (now obsolete). This only depends on the standard library, not PyXML.
1 parent a4699a7 commit f6c115f

1 file changed

Lines changed: 273 additions & 46 deletions

File tree

Doc/tools/sgmlconv/esistools.py

Lines changed: 273 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,33 @@
33

44
import re
55
import string
6-
import sys
7-
import xml.dom.core
8-
import xml.dom.esis_builder
96

7+
import xml.dom.pulldom
108

11-
_data_rx = re.compile(r"[^\\][^\\]*")
9+
import xml.sax
10+
import xml.sax.handler
11+
import xml.sax.xmlreader
12+
13+
14+
_data_match = re.compile(r"[^\\][^\\]*").match
1215

1316
def decode(s):
1417
r = ''
1518
while s:
16-
m = _data_rx.match(s)
19+
m = _data_match(s)
1720
if m:
1821
r = r + m.group()
19-
s = s[len(m.group()):]
22+
s = s[m.end():]
2023
elif s[1] == "\\":
2124
r = r + "\\"
2225
s = s[2:]
2326
elif s[1] == "n":
2427
r = r + "\n"
2528
s = s[2:]
29+
elif s[1] == "%":
30+
s = s[2:]
31+
n, s = s.split(";", 1)
32+
r = r + unichr(int(n))
2633
else:
2734
raise ValueError, "can't handle " + `s`
2835
return r
@@ -35,49 +42,269 @@ def decode(s):
3542
_charmap["\\"] = r"\\"
3643
del c
3744

45+
_null_join = ''.join
3846
def encode(s):
39-
return string.join(map(_charmap.get, s), '')
47+
return _null_join(map(_charmap.get, s))
4048

4149

42-
class ExtendedEsisBuilder(xml.dom.esis_builder.EsisBuilder):
43-
def __init__(self, *args, **kw):
44-
self.__empties = {}
45-
self.__is_empty = 0
46-
apply(xml.dom.esis_builder.EsisBuilder.__init__, (self,) + args, kw)
47-
self.buildFragment()
50+
class ESISReader(xml.sax.xmlreader.XMLReader):
51+
"""SAX Reader which reads from an ESIS stream.
4852
49-
def feed(self, data):
50-
for line in string.split(data, '\n'):
51-
if not line:
52-
break
53-
event = line[0]
54-
text = line[1:]
55-
if event == '(':
56-
element = self.document.createElement(text, self.attr_store)
57-
self.attr_store = {}
58-
self.push(element)
59-
if self.__is_empty:
60-
self.__empties[text] = text
61-
self.__is_empty = 0
62-
elif event == ')':
63-
self.pop()
64-
elif event == 'A':
65-
l = re.split(' ', text, 2)
66-
name = l[0]
67-
value = decode(l[2])
68-
self.attr_store[name] = value
69-
elif event == '-':
70-
text = self.document.createText(decode(text))
71-
self.push(text)
72-
elif event == 'C':
73-
return
74-
elif event == 'e':
75-
self.__is_empty = 1
76-
elif event == '&':
77-
eref = self.document.createEntityReference(text)
78-
self.push(eref)
79-
else:
80-
sys.stderr.write('Unknown event: %s\n' % line)
53+
No verification of the document structure is performed by the
54+
reader; a general verifier could be used as the target
55+
ContentHandler instance.
56+
57+
"""
58+
_decl_handler = None
59+
_lexical_handler = None
60+
61+
_public_id = None
62+
_system_id = None
63+
64+
_buffer = ""
65+
_is_empty = 0
66+
_lineno = 0
67+
_started = 0
68+
69+
def __init__(self, contentHandler=None, errorHandler=None):
70+
xml.sax.xmlreader.XMLReader.__init__(self)
71+
self._attrs = {}
72+
self._attributes = Attributes(self._attrs)
73+
self._locator = Locator()
74+
self._empties = {}
75+
if contentHandler:
76+
self.setContentHandler(contentHandler)
77+
if errorHandler:
78+
self.setErrorHandler(errorHandler)
8179

8280
def get_empties(self):
83-
return self.__empties.keys()
81+
return self._empties.keys()
82+
83+
#
84+
# XMLReader interface
85+
#
86+
87+
def parse(self, source):
88+
raise RuntimeError
89+
self._locator._public_id = source.getPublicId()
90+
self._locator._system_id = source.getSystemId()
91+
fp = source.getByteStream()
92+
handler = self.getContentHandler()
93+
if handler:
94+
handler.startDocument()
95+
lineno = 0
96+
while 1:
97+
token, data = self._get_token(fp)
98+
if token is None:
99+
break
100+
lineno = lineno + 1
101+
self._locator._lineno = lineno
102+
self._handle_token(token, data)
103+
handler = self.getContentHandler()
104+
if handler:
105+
handler.startDocument()
106+
107+
def feed(self, data):
108+
if not self._started:
109+
handler = self.getContentHandler()
110+
if handler:
111+
handler.startDocument()
112+
self._started = 1
113+
data = self._buffer + data
114+
self._buffer = None
115+
lines = data.split("\n")
116+
if lines:
117+
for line in lines[:-1]:
118+
self._lineno = self._lineno + 1
119+
self._locator._lineno = self._lineno
120+
if not line:
121+
e = xml.sax.SAXParseException(
122+
"ESIS input line contains no token type mark",
123+
None, self._locator)
124+
self.getErrorHandler().error(e)
125+
else:
126+
self._handle_token(line[0], line[1:])
127+
self._buffer = lines[-1]
128+
else:
129+
self._buffer = ""
130+
131+
def close(self):
132+
handler = self.getContentHandler()
133+
if handler:
134+
handler.endDocument()
135+
self._buffer = ""
136+
137+
def _get_token(self, fp):
138+
try:
139+
line = fp.readline()
140+
except IOError, e:
141+
e = SAXException("I/O error reading input stream", e)
142+
self.getErrorHandler().fatalError(e)
143+
return
144+
if not line:
145+
return None, None
146+
if line[-1] == "\n":
147+
line = line[:-1]
148+
if not line:
149+
e = xml.sax.SAXParseException(
150+
"ESIS input line contains no token type mark",
151+
None, self._locator)
152+
self.getErrorHandler().error(e)
153+
return
154+
return line[0], line[1:]
155+
156+
def _handle_token(self, token, data):
157+
handler = self.getContentHandler()
158+
if token == '-':
159+
if data and handler:
160+
handler.characters(decode(data))
161+
elif token == ')':
162+
if handler:
163+
handler.endElement(decode(data))
164+
elif token == '(':
165+
if self._is_empty:
166+
self._empties[data] = 1
167+
if handler:
168+
handler.startElement(data, self._attributes)
169+
self._attrs.clear()
170+
self._is_empty = 0
171+
elif token == 'A':
172+
name, value = data.split(' ', 1)
173+
if value != "IMPLIED":
174+
type, value = value.split(' ', 1)
175+
self._attrs[name] = (decode(value), type)
176+
elif token == '&':
177+
# entity reference in SAX?
178+
pass
179+
elif token == '?':
180+
if handler:
181+
if ' ' in data:
182+
target, data = string.split(data, None, 1)
183+
else:
184+
target, data = data, ""
185+
handler.processingInstruction(target, decode(data))
186+
elif token == 'N':
187+
handler = self.getDTDHandler()
188+
if handler:
189+
handler.notationDecl(data, self._public_id, self._system_id)
190+
self._public_id = None
191+
self._system_id = None
192+
elif token == 'p':
193+
self._public_id = decode(data)
194+
elif token == 's':
195+
self._system_id = decode(data)
196+
elif token == 'e':
197+
self._is_empty = 1
198+
elif token == 'C':
199+
pass
200+
else:
201+
e = SAXParseException("unknown ESIS token in event stream",
202+
None, self._locator)
203+
self.getErrorHandler().error(e)
204+
205+
def setContentHandler(self, handler):
206+
old = self.getContentHandler()
207+
if old:
208+
old.setDocumentLocator(None)
209+
if handler:
210+
handler.setDocumentLocator(self._locator)
211+
xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
212+
213+
def getProperty(self, property):
214+
if property == xml.sax.handler.property_lexical_handler:
215+
return self._lexical_handler
216+
217+
elif property == xml.sax.handler.property_declaration_handler:
218+
return self._decl_handler
219+
220+
else:
221+
raise xml.sax.SAXNotRecognizedException("unknown property %s"
222+
% `property`)
223+
224+
def setProperty(self, property, value):
225+
if property == xml.sax.handler.property_lexical_handler:
226+
if self._lexical_handler:
227+
self._lexical_handler.setDocumentLocator(None)
228+
if value:
229+
value.setDocumentLocator(self._locator)
230+
self._lexical_handler = value
231+
232+
elif property == xml.sax.handler.property_declaration_handler:
233+
if self._decl_handler:
234+
self._decl_handler.setDocumentLocator(None)
235+
if value:
236+
value.setDocumentLocator(self._locator)
237+
self._decl_handler = value
238+
239+
else:
240+
raise xml.sax.SAXNotRecognizedException()
241+
242+
def getFeature(self, feature):
243+
if feature == xml.sax.handler.feature_namespaces:
244+
return 1
245+
else:
246+
return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
247+
248+
def setFeature(self, feature, enabled):
249+
if feature == xml.sax.handler.feature_namespaces:
250+
pass
251+
else:
252+
xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
253+
254+
255+
class Attributes(xml.sax.xmlreader.AttributesImpl):
256+
# self._attrs has the form {name: (value, type)}
257+
258+
def getType(self, name):
259+
return self._attrs[name][1]
260+
261+
def getValue(self, name):
262+
return self._attrs[name][0]
263+
264+
def getValueByQName(self, name):
265+
return self._attrs[name][0]
266+
267+
def __getitem__(self, name):
268+
return self._attrs[name][0]
269+
270+
def get(self, name, default=None):
271+
if self._attrs.has_key(name):
272+
return self._attrs[name][0]
273+
return default
274+
275+
def items(self):
276+
L = []
277+
for name, (value, type) in self._attrs.items():
278+
L.append((name, value))
279+
return L
280+
281+
def values(self):
282+
L = []
283+
for value, type in self._attrs.values():
284+
L.append(value)
285+
return L
286+
287+
288+
class Locator(xml.sax.xmlreader.Locator):
289+
_lineno = -1
290+
_public_id = None
291+
_system_id = None
292+
293+
def getLineNumber(self):
294+
return self._lineno
295+
296+
def getPublicId(self):
297+
return self._public_id
298+
299+
def getSystemId(self):
300+
return self._system_id
301+
302+
303+
def parse(stream_or_string, parser=None):
304+
if type(stream_or_string) in [type(""), type(u"")]:
305+
stream = open(stream_or_string)
306+
else:
307+
stream = stream_or_string
308+
if not parser:
309+
parser = ESISReader()
310+
return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)

0 commit comments

Comments
 (0)