1010
1111class Outputter :
1212 def StartElementHandler (self , name , attrs ):
13- print 'Start element:\n \t ' , name , attrs
13+ print 'Start element:\n \t ' , repr ( name ) , attrs
1414
1515 def EndElementHandler (self , name ):
16- print 'End element:\n \t ' , name
16+ print 'End element:\n \t ' , repr ( name )
1717
1818 def CharacterDataHandler (self , data ):
1919 data = string .strip (data )
@@ -22,13 +22,13 @@ def CharacterDataHandler(self, data):
2222 print '\t ' , repr (data )
2323
2424 def ProcessingInstructionHandler (self , target , data ):
25- print 'PI:\n \t ' , target , data
25+ print 'PI:\n \t ' , repr ( target ), repr ( data )
2626
2727 def StartNamespaceDeclHandler (self , prefix , uri ):
28- print 'NS decl:\n \t ' , prefix , uri
28+ print 'NS decl:\n \t ' , repr ( prefix ), repr ( uri )
2929
3030 def EndNamespaceDeclHandler (self , prefix ):
31- print 'End of NS decl:\n \t ' , prefix
31+ print 'End of NS decl:\n \t ' , repr ( prefix )
3232
3333 def StartCdataSectionHandler (self ):
3434 print 'Start of CDATA section'
@@ -51,8 +51,9 @@ def NotStandaloneHandler(self, userData):
5151 print 'Not standalone'
5252 return 1
5353
54- def ExternalEntityRefHandler (self , context , base , sysId , pubId ):
55- print 'External entity ref:' , context , base , sysId , pubId
54+ def ExternalEntityRefHandler (self , * args ):
55+ context , base , sysId , pubId = args
56+ print 'External entity ref:' , args
5657 return 1
5758
5859 def DefaultHandler (self , userData ):
@@ -64,7 +65,14 @@ def DefaultHandlerExpand(self, userData):
6465
6566out = Outputter ()
6667parser = pyexpat .ParserCreate (namespace_separator = '!' )
67- for name in ['StartElementHandler' , 'EndElementHandler' ,
68+
69+ # Test getting/setting returns_unicode
70+ parser .returns_unicode = 0 ; assert parser .returns_unicode == 0
71+ parser .returns_unicode = 1 ; assert parser .returns_unicode == 1
72+ parser .returns_unicode = 2 ; assert parser .returns_unicode == 1
73+ parser .returns_unicode = 0 ; assert parser .returns_unicode == 0
74+
75+ HANDLER_NAMES = ['StartElementHandler' , 'EndElementHandler' ,
6876 'CharacterDataHandler' , 'ProcessingInstructionHandler' ,
6977 'UnparsedEntityDeclHandler' , 'NotationDeclHandler' ,
7078 'StartNamespaceDeclHandler' , 'EndNamespaceDeclHandler' ,
@@ -73,7 +81,8 @@ def DefaultHandlerExpand(self, userData):
7381 'DefaultHandler' , 'DefaultHandlerExpand' ,
7482 #'NotStandaloneHandler',
7583 'ExternalEntityRefHandler'
76- ]:
84+ ]
85+ for name in HANDLER_NAMES :
7786 setattr (parser , name , getattr (out , name ) )
7887
7988data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
@@ -88,7 +97,7 @@ def DefaultHandlerExpand(self, userData):
8897%unparsed_entity;
8998]>
9099
91- <root>
100+ <root attr1="value1" attr2="value2ὀ" >
92101<myns:subelement xmlns:myns="http://www.python.org/namespace">
93102 Contents of subelements
94103</myns:subelement>
@@ -97,6 +106,8 @@ def DefaultHandlerExpand(self, userData):
97106</root>
98107"""
99108
109+ # Produce UTF-8 output
110+ parser .returns_unicode = 0
100111try :
101112 parser .Parse (data , 1 )
102113except pyexpat .error :
@@ -105,3 +116,33 @@ def DefaultHandlerExpand(self, userData):
105116 print '** Column' , parser .ErrorColumnNumber
106117 print '** Byte' , parser .ErrorByteIndex
107118
119+ # Try the parse again, this time producing Unicode output
120+ parser = pyexpat .ParserCreate (namespace_separator = '!' )
121+ parser .returns_unicode = 1
122+
123+ for name in HANDLER_NAMES :
124+ setattr (parser , name , getattr (out , name ) )
125+ try :
126+ parser .Parse (data , 1 )
127+ except pyexpat .error :
128+ print '** Error' , parser .ErrorCode , pyexpat .ErrorString ( parser .ErrorCode )
129+ print '** Line' , parser .ErrorLineNumber
130+ print '** Column' , parser .ErrorColumnNumber
131+ print '** Byte' , parser .ErrorByteIndex
132+
133+ # Try parsing a file
134+ parser = pyexpat .ParserCreate (namespace_separator = '!' )
135+ parser .returns_unicode = 1
136+
137+ for name in HANDLER_NAMES :
138+ setattr (parser , name , getattr (out , name ) )
139+ import StringIO
140+ file = StringIO .StringIO (data )
141+ try :
142+ parser .ParseFile (file )
143+ except pyexpat .error :
144+ print '** Error' , parser .ErrorCode , pyexpat .ErrorString ( parser .ErrorCode )
145+ print '** Line' , parser .ErrorLineNumber
146+ print '** Column' , parser .ErrorColumnNumber
147+ print '** Byte' , parser .ErrorByteIndex
148+
0 commit comments