33
44import re
55import string
6- import sys
7- import xml .dom .core
8- import xml .dom .esis_builder
96
7+ import xml .dom .pulldom
108
11- _data_rx = re .compile (r"[^\\][^\\]*" )
9+ import xml .sax
10+ import xml .sax .handler
11+ import xml .sax .xmlreader
12+
13+
14+ _data_match = re .compile (r"[^\\][^\\]*" ).match
1215
1316def decode (s ):
1417 r = ''
1518 while s :
16- m = _data_rx . match (s )
19+ m = _data_match (s )
1720 if m :
1821 r = r + m .group ()
19- s = s [len ( m . group () ):]
22+ s = s [m . end ( ):]
2023 elif s [1 ] == "\\ " :
2124 r = r + "\\ "
2225 s = s [2 :]
2326 elif s [1 ] == "n" :
2427 r = r + "\n "
2528 s = s [2 :]
29+ elif s [1 ] == "%" :
30+ s = s [2 :]
31+ n , s = s .split (";" , 1 )
32+ r = r + unichr (int (n ))
2633 else :
2734 raise ValueError , "can't handle " + `s`
2835 return r
@@ -35,49 +42,269 @@ def decode(s):
3542_charmap ["\\ " ] = r"\\"
3643del c
3744
45+ _null_join = '' .join
3846def encode (s ):
39- return string . join (map (_charmap .get , s ), '' )
47+ return _null_join (map (_charmap .get , s ))
4048
4149
42- class ExtendedEsisBuilder (xml .dom .esis_builder .EsisBuilder ):
43- def __init__ (self , * args , ** kw ):
44- self .__empties = {}
45- self .__is_empty = 0
46- apply (xml .dom .esis_builder .EsisBuilder .__init__ , (self ,) + args , kw )
47- self .buildFragment ()
50+ class ESISReader (xml .sax .xmlreader .XMLReader ):
51+ """SAX Reader which reads from an ESIS stream.
4852
49- def feed (self , data ):
50- for line in string .split (data , '\n ' ):
51- if not line :
52- break
53- event = line [0 ]
54- text = line [1 :]
55- if event == '(' :
56- element = self .document .createElement (text , self .attr_store )
57- self .attr_store = {}
58- self .push (element )
59- if self .__is_empty :
60- self .__empties [text ] = text
61- self .__is_empty = 0
62- elif event == ')' :
63- self .pop ()
64- elif event == 'A' :
65- l = re .split (' ' , text , 2 )
66- name = l [0 ]
67- value = decode (l [2 ])
68- self .attr_store [name ] = value
69- elif event == '-' :
70- text = self .document .createText (decode (text ))
71- self .push (text )
72- elif event == 'C' :
73- return
74- elif event == 'e' :
75- self .__is_empty = 1
76- elif event == '&' :
77- eref = self .document .createEntityReference (text )
78- self .push (eref )
79- else :
80- sys .stderr .write ('Unknown event: %s\n ' % line )
53+ No verification of the document structure is performed by the
54+ reader; a general verifier could be used as the target
55+ ContentHandler instance.
56+
57+ """
58+ _decl_handler = None
59+ _lexical_handler = None
60+
61+ _public_id = None
62+ _system_id = None
63+
64+ _buffer = ""
65+ _is_empty = 0
66+ _lineno = 0
67+ _started = 0
68+
69+ def __init__ (self , contentHandler = None , errorHandler = None ):
70+ xml .sax .xmlreader .XMLReader .__init__ (self )
71+ self ._attrs = {}
72+ self ._attributes = Attributes (self ._attrs )
73+ self ._locator = Locator ()
74+ self ._empties = {}
75+ if contentHandler :
76+ self .setContentHandler (contentHandler )
77+ if errorHandler :
78+ self .setErrorHandler (errorHandler )
8179
8280 def get_empties (self ):
83- return self .__empties .keys ()
81+ return self ._empties .keys ()
82+
83+ #
84+ # XMLReader interface
85+ #
86+
87+ def parse (self , source ):
88+ raise RuntimeError
89+ self ._locator ._public_id = source .getPublicId ()
90+ self ._locator ._system_id = source .getSystemId ()
91+ fp = source .getByteStream ()
92+ handler = self .getContentHandler ()
93+ if handler :
94+ handler .startDocument ()
95+ lineno = 0
96+ while 1 :
97+ token , data = self ._get_token (fp )
98+ if token is None :
99+ break
100+ lineno = lineno + 1
101+ self ._locator ._lineno = lineno
102+ self ._handle_token (token , data )
103+ handler = self .getContentHandler ()
104+ if handler :
105+ handler .startDocument ()
106+
107+ def feed (self , data ):
108+ if not self ._started :
109+ handler = self .getContentHandler ()
110+ if handler :
111+ handler .startDocument ()
112+ self ._started = 1
113+ data = self ._buffer + data
114+ self ._buffer = None
115+ lines = data .split ("\n " )
116+ if lines :
117+ for line in lines [:- 1 ]:
118+ self ._lineno = self ._lineno + 1
119+ self ._locator ._lineno = self ._lineno
120+ if not line :
121+ e = xml .sax .SAXParseException (
122+ "ESIS input line contains no token type mark" ,
123+ None , self ._locator )
124+ self .getErrorHandler ().error (e )
125+ else :
126+ self ._handle_token (line [0 ], line [1 :])
127+ self ._buffer = lines [- 1 ]
128+ else :
129+ self ._buffer = ""
130+
131+ def close (self ):
132+ handler = self .getContentHandler ()
133+ if handler :
134+ handler .endDocument ()
135+ self ._buffer = ""
136+
137+ def _get_token (self , fp ):
138+ try :
139+ line = fp .readline ()
140+ except IOError , e :
141+ e = SAXException ("I/O error reading input stream" , e )
142+ self .getErrorHandler ().fatalError (e )
143+ return
144+ if not line :
145+ return None , None
146+ if line [- 1 ] == "\n " :
147+ line = line [:- 1 ]
148+ if not line :
149+ e = xml .sax .SAXParseException (
150+ "ESIS input line contains no token type mark" ,
151+ None , self ._locator )
152+ self .getErrorHandler ().error (e )
153+ return
154+ return line [0 ], line [1 :]
155+
156+ def _handle_token (self , token , data ):
157+ handler = self .getContentHandler ()
158+ if token == '-' :
159+ if data and handler :
160+ handler .characters (decode (data ))
161+ elif token == ')' :
162+ if handler :
163+ handler .endElement (decode (data ))
164+ elif token == '(' :
165+ if self ._is_empty :
166+ self ._empties [data ] = 1
167+ if handler :
168+ handler .startElement (data , self ._attributes )
169+ self ._attrs .clear ()
170+ self ._is_empty = 0
171+ elif token == 'A' :
172+ name , value = data .split (' ' , 1 )
173+ if value != "IMPLIED" :
174+ type , value = value .split (' ' , 1 )
175+ self ._attrs [name ] = (decode (value ), type )
176+ elif token == '&' :
177+ # entity reference in SAX?
178+ pass
179+ elif token == '?' :
180+ if handler :
181+ if ' ' in data :
182+ target , data = string .split (data , None , 1 )
183+ else :
184+ target , data = data , ""
185+ handler .processingInstruction (target , decode (data ))
186+ elif token == 'N' :
187+ handler = self .getDTDHandler ()
188+ if handler :
189+ handler .notationDecl (data , self ._public_id , self ._system_id )
190+ self ._public_id = None
191+ self ._system_id = None
192+ elif token == 'p' :
193+ self ._public_id = decode (data )
194+ elif token == 's' :
195+ self ._system_id = decode (data )
196+ elif token == 'e' :
197+ self ._is_empty = 1
198+ elif token == 'C' :
199+ pass
200+ else :
201+ e = SAXParseException ("unknown ESIS token in event stream" ,
202+ None , self ._locator )
203+ self .getErrorHandler ().error (e )
204+
205+ def setContentHandler (self , handler ):
206+ old = self .getContentHandler ()
207+ if old :
208+ old .setDocumentLocator (None )
209+ if handler :
210+ handler .setDocumentLocator (self ._locator )
211+ xml .sax .xmlreader .XMLReader .setContentHandler (self , handler )
212+
213+ def getProperty (self , property ):
214+ if property == xml .sax .handler .property_lexical_handler :
215+ return self ._lexical_handler
216+
217+ elif property == xml .sax .handler .property_declaration_handler :
218+ return self ._decl_handler
219+
220+ else :
221+ raise xml .sax .SAXNotRecognizedException ("unknown property %s"
222+ % `property` )
223+
224+ def setProperty (self , property , value ):
225+ if property == xml .sax .handler .property_lexical_handler :
226+ if self ._lexical_handler :
227+ self ._lexical_handler .setDocumentLocator (None )
228+ if value :
229+ value .setDocumentLocator (self ._locator )
230+ self ._lexical_handler = value
231+
232+ elif property == xml .sax .handler .property_declaration_handler :
233+ if self ._decl_handler :
234+ self ._decl_handler .setDocumentLocator (None )
235+ if value :
236+ value .setDocumentLocator (self ._locator )
237+ self ._decl_handler = value
238+
239+ else :
240+ raise xml .sax .SAXNotRecognizedException ()
241+
242+ def getFeature (self , feature ):
243+ if feature == xml .sax .handler .feature_namespaces :
244+ return 1
245+ else :
246+ return xml .sax .xmlreader .XMLReader .getFeature (self , feature )
247+
248+ def setFeature (self , feature , enabled ):
249+ if feature == xml .sax .handler .feature_namespaces :
250+ pass
251+ else :
252+ xml .sax .xmlreader .XMLReader .setFeature (self , feature , enabled )
253+
254+
255+ class Attributes (xml .sax .xmlreader .AttributesImpl ):
256+ # self._attrs has the form {name: (value, type)}
257+
258+ def getType (self , name ):
259+ return self ._attrs [name ][1 ]
260+
261+ def getValue (self , name ):
262+ return self ._attrs [name ][0 ]
263+
264+ def getValueByQName (self , name ):
265+ return self ._attrs [name ][0 ]
266+
267+ def __getitem__ (self , name ):
268+ return self ._attrs [name ][0 ]
269+
270+ def get (self , name , default = None ):
271+ if self ._attrs .has_key (name ):
272+ return self ._attrs [name ][0 ]
273+ return default
274+
275+ def items (self ):
276+ L = []
277+ for name , (value , type ) in self ._attrs .items ():
278+ L .append ((name , value ))
279+ return L
280+
281+ def values (self ):
282+ L = []
283+ for value , type in self ._attrs .values ():
284+ L .append (value )
285+ return L
286+
287+
288+ class Locator (xml .sax .xmlreader .Locator ):
289+ _lineno = - 1
290+ _public_id = None
291+ _system_id = None
292+
293+ def getLineNumber (self ):
294+ return self ._lineno
295+
296+ def getPublicId (self ):
297+ return self ._public_id
298+
299+ def getSystemId (self ):
300+ return self ._system_id
301+
302+
303+ def parse (stream_or_string , parser = None ):
304+ if type (stream_or_string ) in [type ("" ), type (u"" )]:
305+ stream = open (stream_or_string )
306+ else :
307+ stream = stream_or_string
308+ if not parser :
309+ parser = ESISReader ()
310+ return xml .dom .pulldom .DOMEventStream (stream , parser , (2 ** 14 ) - 20 )
0 commit comments