Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 55c3819

Browse files
committed
Paul Prescod <[email protected]>:
W3C DOM implementation for Python.
1 parent 3f6a7b5 commit 55c3819

2 files changed

Lines changed: 652 additions & 0 deletions

File tree

Lib/xml/dom/minidom.py

Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
import pulldom
2+
import string
3+
from StringIO import StringIO
4+
import types
5+
6+
"""
7+
minidom.py -- a lightweight DOM implementation based on SAX.
8+
9+
Todo:
10+
=====
11+
* convenience methods for getting elements and text.
12+
* more testing
13+
* bring some of the writer and linearizer code into conformance with this
14+
interface
15+
* SAX 2 namespaces
16+
"""
17+
18+
class Node:
19+
ELEMENT_NODE = 1
20+
ATTRIBUTE_NODE = 2
21+
TEXT_NODE = 3
22+
CDATA_SECTION_NODE = 4
23+
ENTITY_REFERENCE_NODE = 5
24+
ENTITY_NODE = 6
25+
PROCESSING_INSTRUCTION_NODE = 7
26+
COMMENT_NODE = 8
27+
DOCUMENT_NODE = 9
28+
DOCUMENT_TYPE_NODE = 10
29+
DOCUMENT_FRAGMENT_NODE = 11
30+
NOTATION_NODE = 12
31+
32+
allnodes=[]
33+
34+
def __init__( self ):
35+
self.childNodes=[]
36+
Node.allnodes.append( repr( id( self ))+repr( self.__class__ ))
37+
38+
def __getattr__( self, key ):
39+
if key[0:2]=="__": raise AttributeError
40+
# getattr should never call getattr!
41+
if self.__dict__.has_key("inGetAttr"):
42+
del self.inGetAttr
43+
raise AttributeError, key
44+
45+
prefix,attrname=key[:5],key[5:]
46+
if prefix=="_get_":
47+
self.inGetAttr=1
48+
if hasattr( self, attrname ):
49+
del self.inGetAttr
50+
return (lambda self=self, attrname=attrname:
51+
getattr( self, attrname ))
52+
else:
53+
del self.inGetAttr
54+
raise AttributeError, key
55+
else:
56+
self.inGetAttr=1
57+
try:
58+
func = getattr( self, "_get_"+key )
59+
except AttributeError:
60+
raise AttributeError, key
61+
del self.inGetAttr
62+
return func()
63+
64+
def __nonzero__(self): return 1
65+
66+
def toxml( self ):
67+
writer=StringIO()
68+
self.writexml( writer )
69+
return writer.getvalue()
70+
71+
def hasChildNodes( self ):
72+
if self.childNodes: return 1
73+
else: return 0
74+
75+
def insertBefore( self, newChild, refChild):
76+
index=self.childNodes.index( refChild )
77+
self.childNodes.insert( index, newChild )
78+
79+
def appendChild( self, node ):
80+
self.childNodes.append( node )
81+
82+
def unlink( self ):
83+
self.parentNode=None
84+
while self.childNodes:
85+
self.childNodes[-1].unlink()
86+
del self.childNodes[-1] # probably not most efficient!
87+
self.childNodes=None
88+
if self.attributes:
89+
for attr in self.attributes.values():
90+
attr.unlink()
91+
self.attributes=None
92+
index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ ))
93+
del Node.allnodes[index]
94+
95+
def _write_data( writer, data):
96+
"Writes datachars to writer."
97+
data=string.replace(data,"&","&amp;")
98+
data=string.replace(data,"<","&lt;")
99+
data=string.replace(data,"\"","&quot;")
100+
data=string.replace(data,">","&gt;")
101+
writer.write(data)
102+
103+
def _closeElement( element ):
104+
del element.parentNode
105+
for node in element.elements:
106+
_closeElement( node )
107+
108+
def _getElementsByTagNameHelper( parent, name, rc ):
109+
for node in parent.childNodes:
110+
if node.nodeType==Node.ELEMENT_NODE and\
111+
(name=="*" or node.tagName==name):
112+
rc.append( node )
113+
_getElementsByTagNameHelper( node, name, rc )
114+
return rc
115+
116+
def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
117+
for node in parent.childNodes:
118+
if (node.nodeType==Node.ELEMENT_NODE ):
119+
if ((localName=="*" or node.tagName==localName) and
120+
(nsURI=="*" or node.namespaceURI==nsURI)):
121+
rc.append( node )
122+
_getElementsByTagNameNSHelper( node, name, rc )
123+
124+
class Attr(Node):
125+
nodeType=Node.ATTRIBUTE_NODE
126+
def __init__( self, qName, namespaceURI="", prefix="",
127+
localName=None ):
128+
Node.__init__( self )
129+
assert qName
130+
# skip setattr for performance
131+
self.__dict__["nodeName"] = self.__dict__["name"] = qName
132+
self.__dict__["localName"]=localName or qName
133+
self.__dict__["prefix"]=prefix
134+
self.__dict__["namespaceURI"]=namespaceURI
135+
# nodeValue and value are set elsewhere
136+
self.attributes=None
137+
138+
def __setattr__( self, name, value ):
139+
if name in ("value", "nodeValue" ):
140+
self.__dict__["value"]=self.__dict__["nodeValue"]=value
141+
else:
142+
self.__dict__[name]=value
143+
144+
class AttributeList:
145+
# the attribute list is a transient interface to the underlying dictionaries
146+
# mutations here will change the underlying element's dictionary
147+
def __init__( self, attrs, attrsNS ):
148+
self.__attrs=attrs
149+
self.__attrsNS=attrs
150+
self.length=len( self.__attrs.keys() )
151+
152+
def item( self, index ):
153+
try:
154+
return self[self.keys()[index]]
155+
except IndexError:
156+
return None
157+
158+
def items( self ):
159+
return map( lambda node: (node.tagName, node.value),
160+
self.__attrs.values() )
161+
162+
def itemsNS( self ):
163+
return map( lambda node: ((node.URI, node.localName), node.value),
164+
self.__attrs.values() )
165+
166+
def keys( self ):
167+
return self.__attrs.keys()
168+
169+
def keysNS( self ):
170+
return self.__attrsNS.keys()
171+
172+
def values( self ):
173+
return self.__attrs.values()
174+
175+
def __len__( self ):
176+
return self.length
177+
178+
def __cmp__( self, other ):
179+
if self.__attrs is other.__attrs:
180+
return 0
181+
else:
182+
return cmp( id( self ), id( other ) )
183+
184+
#FIXME: is it appropriate to return .value?
185+
def __getitem__( self, attname_or_tuple ):
186+
if type( attname_or_tuple ) == type( (1,2) ):
187+
return self.__attrsNS[attname_or_tuple].value
188+
else:
189+
return self.__attrs[attname_or_tuple].value
190+
191+
def __setitem__( self, attname ):
192+
raise TypeError, "object does not support item assignment"
193+
194+
class Element( Node ):
195+
nodeType=Node.ELEMENT_NODE
196+
def __init__( self, tagName, namespaceURI="", prefix="",
197+
localName=None ):
198+
Node.__init__( self )
199+
self.tagName = self.nodeName = tagName
200+
self.localName=localName or tagName
201+
self.prefix=prefix
202+
self.namespaceURI=namespaceURI
203+
self.nodeValue=None
204+
205+
self.__attrs={} # attributes are double-indexed:
206+
self.__attrsNS={}# tagName -> Attribute
207+
# URI,localName -> Attribute
208+
# in the future: consider lazy generation of attribute objects
209+
# this is too tricky for now because of headaches
210+
# with namespaces.
211+
212+
def getAttribute( self, attname ):
213+
return self.__attrs[attname].value
214+
215+
def getAttributeNS( self, namespaceURI, localName ):
216+
return self.__attrsNS[(namespaceURI, localName)].value
217+
218+
def setAttribute( self, attname, value ):
219+
attr=Attr( attname )
220+
# for performance
221+
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
222+
self.setAttributeNode( attr )
223+
224+
def setAttributeNS( self, namespaceURI, qualifiedName, value ):
225+
attr=createAttributeNS( namespaceURI, qualifiedName )
226+
# for performance
227+
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
228+
self.setAttributeNode( attr )
229+
230+
def setAttributeNode( self, attr ):
231+
self.__attrs[attr.name]=attr
232+
self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr
233+
234+
def removeAttribute( self, name ):
235+
attr = self.__attrs[name]
236+
self.removeAttributeNode( attr )
237+
238+
def removeAttributeNS( self, namespaceURI, localName ):
239+
attr = self.__attrsNS[(uri, localName)]
240+
self.removeAttributeNode( attr )
241+
242+
def removeAttributeNode( self, node ):
243+
del self.__attrs[node.name]
244+
del self.__attrsNS[(node.namespaceURI, node.localName)]
245+
246+
def getElementsByTagName( self, name ):
247+
return _getElementsByTagNameHelper( self, name, [] )
248+
249+
def getElementsByTagNameNS(self,namespaceURI,localName):
250+
_getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
251+
252+
def __repr__( self ):
253+
return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
254+
255+
def writexml(self, writer):
256+
writer.write("<"+self.tagName)
257+
258+
a_names=self._get_attributes().keys()
259+
a_names.sort()
260+
261+
for a_name in a_names:
262+
writer.write(" "+a_name+"=\"")
263+
_write_data(writer, self._get_attributes()[a_name])
264+
writer.write("\"")
265+
if self.childNodes:
266+
writer.write(">")
267+
for node in self.childNodes:
268+
node.writexml( writer )
269+
writer.write("</"+self.tagName+">")
270+
else:
271+
writer.write("/>")
272+
273+
def _get_attributes( self ):
274+
return AttributeList( self.__attrs, self.__attrsNS )
275+
276+
class Comment( Node ):
277+
nodeType=Node.COMMENT_NODE
278+
def __init__(self, data ):
279+
Node.__init__( self )
280+
self.data=self.nodeValue=data
281+
self.nodeName="#comment"
282+
self.attributes=None
283+
284+
def writexml( self, writer ):
285+
writer.write( "<!--" + self.data + "-->" )
286+
287+
class ProcessingInstruction( Node ):
288+
nodeType=Node.PROCESSING_INSTRUCTION_NODE
289+
def __init__(self, target, data ):
290+
Node.__init__( self )
291+
self.target = self.nodeName = target
292+
self.data = self.nodeValue = data
293+
self.attributes=None
294+
295+
def writexml( self, writer ):
296+
writer.write( "<?" + self.target +" " + self.data+ "?>" )
297+
298+
class Text( Node ):
299+
nodeType=Node.TEXT_NODE
300+
nodeName="#text"
301+
def __init__(self, data ):
302+
Node.__init__( self )
303+
self.data = self.nodeValue = data
304+
self.attributes=None
305+
306+
def __repr__(self):
307+
if len( self.data )> 10:
308+
dotdotdot="..."
309+
else:
310+
dotdotdot=""
311+
return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
312+
313+
def writexml( self, writer ):
314+
_write_data( writer, self.data )
315+
316+
class Document( Node ):
317+
nodeType=Node.DOCUMENT_NODE
318+
def __init__( self ):
319+
Node.__init__( self )
320+
self.documentElement=None
321+
self.attributes=None
322+
self.nodeName="#document"
323+
self.nodeValue=None
324+
325+
createElement=Element
326+
327+
createTextNode=Text
328+
329+
createComment=Comment
330+
331+
createProcessingInstruction=ProcessingInstruction
332+
333+
createAttribute=Attr
334+
335+
def createElementNS(self, namespaceURI, qualifiedName):
336+
fields = string.split(qualifiedName, ':')
337+
if len(fields) == 2:
338+
prefix = fields[0]
339+
localName = fields[1]
340+
elif len(fields) == 1:
341+
prefix = ''
342+
localName = fields[0]
343+
return Element(self, qualifiedName, namespaceURI, prefix, localName)
344+
345+
def createAttributeNS(self, namespaceURI, qualifiedName):
346+
fields = string.split(qualifiedName,':')
347+
if len(fields) == 2:
348+
localName = fields[1]
349+
prefix = fields[0]
350+
elif len(fields) == 1:
351+
localName = fields[0]
352+
prefix = None
353+
return Attr(qualifiedName, namespaceURI, prefix, localName)
354+
355+
def getElementsByTagNameNS(self,namespaceURI,localName):
356+
_getElementsByTagNameNSHelper( self, namespaceURI, localName )
357+
358+
def close( self ):
359+
for node in self.elements:
360+
_closeElement( node )
361+
362+
def unlink( self ):
363+
self.documentElement=None
364+
Node.unlink( self )
365+
366+
def getElementsByTagName( self, name ):
367+
rc=[]
368+
_getElementsByTagNameHelper( self, name, rc )
369+
return rc
370+
371+
def writexml( self, writer ):
372+
for node in self.childNodes:
373+
node.writexml( writer )
374+
375+
def _doparse( func, args, kwargs ):
376+
events=apply( func, args, kwargs )
377+
(toktype, rootNode)=events.getEvent()
378+
events.expandNode( rootNode )
379+
return rootNode
380+
381+
def parse( *args, **kwargs ):
382+
return _doparse( pulldom.parse, args, kwargs )
383+
384+
def parseString( *args, **kwargs ):
385+
return _doparse( pulldom.parseString, args, kwargs )

0 commit comments

Comments
 (0)