11import minidom
2- import xml .sax
3-
4- #todo: namespace handling
2+ import xml .sax ,xml .sax .handler
53
64START_ELEMENT = "START_ELEMENT"
75END_ELEMENT = "END_ELEMENT"
1210IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
1311CHARACTERS = "CHARACTERS"
1412
15- class PullDOM :
13+ class PullDOM ( xml . sax . ContentHandler ) :
1614 def __init__ (self ):
1715 self .firstEvent = [None , None ]
1816 self .lastEvent = self .firstEvent
17+ self ._ns_contexts = [{}] # contains uri -> prefix dicts
18+ self ._current_context = self ._ns_contexts [- 1 ]
1919
2020 def setDocumentLocator (self , locator ): pass
2121
22- def startElement (self , name , attrs ):
23- if not hasattr (self , "curNode" ):
24- # FIXME: hack!
25- self .startDocument ()
26-
27- node = self .document .createElement (name )
28- for (attr , value ) in attrs .items ():
29- node .setAttribute (attr , attrs [attr ])
30-
22+ def startPrefixMapping (self , prefix , uri ):
23+ self ._ns_contexts .append (self ._current_context .copy ())
24+ self ._current_context [uri ] = prefix
25+
26+ def endPrefixMapping (self , prefix ):
27+ del self ._ns_contexts [- 1 ]
28+
29+ def startElementNS (self , name , tagName , attrs ):
30+ if name [0 ]:
31+ # When using namespaces, the reader may or may not
32+ # provide us with the original name. If not, create
33+ # *a* valid tagName from the current context.
34+ if tagName is None :
35+ tagName = self ._current_context [name [0 ]] + ":" + name [1 ]
36+ node = self .document .createElementNS (name [0 ], tagName )
37+ else :
38+ # When the tagname is not prefixed, it just appears as
39+ # name[1]
40+ node = self .document .createElement (name [1 ])
41+
42+ for aname ,value in attrs .items ():
43+ if aname [0 ]:
44+ qname = self ._current_context [name [0 ]] + ":" + aname [1 ]
45+ attr = self .document .createAttributeNS (name [0 ], qname )
46+ else :
47+ attr = self .document .createAttribute (name [0 ], name [1 ])
48+ attr .value = value
49+ node .setAttributeNode (qname , attr )
50+
3151 parent = self .curNode
3252 node .parentNode = parent
3353 if parent .childNodes :
@@ -39,7 +59,7 @@ def startElement(self, name, attrs):
3959 self .lastEvent = self .lastEvent [1 ]
4060 #self.events.append((START_ELEMENT, node))
4161
42- def endElement (self , name ):
62+ def endElementNS (self , name , tagName ):
4363 node = self .curNode
4464 self .lastEvent [1 ] = [(END_ELEMENT , node ), None ]
4565 self .lastEvent = self .lastEvent [1 ]
@@ -122,6 +142,8 @@ def __init__(self, stream, parser, bufsize):
122142
123143 def reset (self ):
124144 self .pulldom = PullDOM ()
145+ # This content handler relies on namespace support
146+ self .parser .setFeature (xml .sax .handler .feature_namespaces ,1 )
125147 self .parser .setContentHandler (self .pulldom )
126148
127149 def __getitem__ (self , pos ):
@@ -154,18 +176,6 @@ def getEvent(self):
154176 self .pulldom .firstEvent [1 ] = self .pulldom .firstEvent [1 ][1 ]
155177 return rc
156178
157- # FIXME: sax2
158- #def _getParser( ):
159- # from xml.sax.saxexts import make_parser
160- # expat doesn't report errors properly! Figure it out
161- # return make_parser()
162- # return make_parser("xml.sax.drivers.drv_xmllib")
163-
164-
165-
166- def _getParser ():
167- return xml .sax .make_parser ()
168-
169179default_bufsize = (2 ** 14 ) - 20
170180
171181# FIXME: move into sax package for common usage
@@ -175,7 +185,7 @@ def parse(stream_or_string, parser=None, bufsize=default_bufsize):
175185 else :
176186 stream = stream_or_string
177187 if not parser :
178- parser = _getParser ()
188+ parser = xml . sax . make_parser ()
179189 return DOMEventStream (stream , parser , bufsize )
180190
181191def parseString (string , parser = None ):
@@ -186,5 +196,6 @@ def parseString(string, parser=None):
186196
187197 bufsize = len (string )
188198 buf = StringIO (string )
189- parser = _getParser ()
199+ if not parser :
200+ parser = xml .sax .make_parser ()
190201 return DOMEventStream (buf , parser , bufsize )
0 commit comments