55This is limited, but seems sufficient for the ESIS generated by the
66latex2esis.py script when run over the Python documentation.
77"""
8+
9+ # This should have an explicit option to indicate whether the *INPUT* was
10+ # generated from an SGML or an XML application.
11+
812__version__ = '$Revision$'
913
1014import errno
1620from xml .utils import escape
1721
1822
23+ AUTOCLOSE = ()
24+
1925EMPTIES_FILENAME = "../sgml/empties.dat"
2026LIST_EMPTIES = 0
2127
2228
29+ _elem_map = {}
30+ _attr_map = {}
31+ _token_map = {}
32+
33+ _normalize_case = str
34+
35+ def map_gi (sgmlgi , map ):
36+ uncased = _normalize_case (sgmlgi )
37+ try :
38+ return map [uncased ]
39+ except IndexError :
40+ map [uncased ] = sgmlgi
41+ return sgmlgi
42+
43+ def null_map_gi (sgmlgi , map ):
44+ return sgmlgi
45+
46+
2347def format_attrs (attrs , xml = 0 ):
2448 attrs = attrs .items ()
2549 attrs .sort ()
26- s = ''
50+ parts = []
51+ append = parts .append
2752 for name , value in attrs :
2853 if xml :
29- s = '%s %s ="%s"' % (s , name , escape (value ))
54+ append ( '%s="%s"' % (name , escape (value ) ))
3055 else :
3156 # this is a little bogus, but should do for now
3257 if name == value and isnmtoken (value ):
33- s = "%s %s" % ( s , value )
58+ append ( value )
3459 elif istoken (value ):
3560 if value == "no" + name :
36- s = "%s %s" % ( s , value )
61+ append ( value )
3762 else :
38- s = "%s %s =%s" % (s , name , value )
63+ append ( "%s=%s" % (name , value ) )
3964 else :
40- s = '%s %s="%s"' % (s , name , escape (value ))
41- return s
65+ append ('%s="%s"' % (name , escape (value )))
66+ if parts :
67+ parts .insert (0 , '' )
68+ return string .join (parts )
4269
4370
4471_nmtoken_rx = re .compile ("[a-z][-._a-z0-9]*$" , re .IGNORECASE )
@@ -78,6 +105,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
78105 if data == "COMMENT" :
79106 ofp .write ("<!--" )
80107 continue
108+ data = map_gi (data , _elem_map )
81109 if knownempty and xml :
82110 ofp .write ("<%s%s/>" % (data , format_attrs (attrs , xml )))
83111 else :
@@ -93,6 +121,7 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
93121 if data == "COMMENT" :
94122 ofp .write ("-->" )
95123 continue
124+ data = map_gi (data , _elem_map )
96125 if xml :
97126 if not lastempty :
98127 ofp .write ("</%s>" % data )
@@ -107,19 +136,24 @@ def do_convert(ifp, ofp, xml=0, autoclose=()):
107136 lastempty = 0
108137 elif type == "A" :
109138 name , type , value = string .split (data , " " , 2 )
139+ name = map_gi (name , _attr_map )
110140 attrs [name ] = esistools .decode (value )
111141 elif type == "e" :
112142 knownempty = 1
113143
114144 if LIST_EMPTIES :
115- knownempties .append ("" )
116- if os .path .isfile (EMPTIES_FILENAME ):
117- mode = "a"
118- else :
119- mode = "w"
120- fp = open (EMPTIES_FILENAME , mode )
121- fp .write (string .join (knownempties , "\n " ))
122- fp .close ()
145+ dump_empty_element_names (knownempties )
146+
147+
148+ def dump_empty_element_names (knownempties ):
149+ knownempties .append ("" )
150+ if os .path .isfile (EMPTIES_FILENAME ):
151+ mode = "a"
152+ else :
153+ mode = "w"
154+ fp = open (EMPTIES_FILENAME , mode )
155+ fp .write (string .join (knownempties , "\n " ))
156+ fp .close ()
123157
124158
125159def sgml_convert (ifp , ofp , autoclose ):
@@ -130,27 +164,53 @@ def xml_convert(ifp, ofp, autoclose):
130164 return do_convert (ifp , ofp , xml = 1 , autoclose = autoclose )
131165
132166
133- AUTOCLOSE = ("para" , "term" ,)
167+ def update_gi_map (map , names , fromsgml = 1 ):
168+ for name in string .split (names , "," ):
169+ if fromsgml :
170+ uncased = string .lower (name )
171+ else :
172+ uncased = name
173+ map [uncased ] = name
134174
135175
136176def main ():
137177 import getopt
138178 import sys
139179 #
140180 autoclose = AUTOCLOSE
141- convert = sgml_convert
142- xml = 0
181+ convert = xml_convert
182+ xml = 1
143183 xmldecl = 0
144- opts , args = getopt .getopt (sys .argv [1 :], "adx" ,
145- ["autoclose" , "declare" , "xml" ])
184+ elem_names = ''
185+ attr_names = ''
186+ value_names = ''
187+ opts , args = getopt .getopt (sys .argv [1 :], "adesx" ,
188+ ["autoclose=" , "declare" , "sgml" , "xml" ,
189+ "elements-map=" , "attributes-map" ,
190+ "values-map=" ])
146191 for opt , arg in opts :
147192 if opt in ("-d" , "--declare" ):
148193 xmldecl = 1
194+ elif opt == "-e" :
195+ global LIST_EMPTIES
196+ LIST_EMPTIES = 1
197+ elif opt in ("-s" , "--sgml" ):
198+ xml = 0
199+ convert = sgml_convert
149200 elif opt in ("-x" , "--xml" ):
150201 xml = 1
151202 convert = xml_convert
152203 elif opt in ("-a" , "--autoclose" ):
153204 autoclose = string .split (arg , "," )
205+ elif opt == "--elements-map" :
206+ elem_names = ("%s,%s" % (elem_names , arg ))[1 :]
207+ elif opt == "--attributes-map" :
208+ attr_names = ("%s,%s" % (attr_names , arg ))[1 :]
209+ elif opt == "--values-map" :
210+ value_names = ("%s,%s" % (value_names , arg ))[1 :]
211+ #
212+ # open input streams:
213+ #
154214 if len (args ) == 0 :
155215 ifp = sys .stdin
156216 ofp = sys .stdout
@@ -163,7 +223,23 @@ def main():
163223 else :
164224 usage ()
165225 sys .exit (2 )
166- # knownempties is ignored in the XML version
226+ #
227+ # setup the name maps:
228+ #
229+ if elem_names or attr_names or value_names :
230+ # assume the origin was SGML; ignore case of the names from the ESIS
231+ # stream but set up conversion tables to get the case right on output
232+ global _normalize_case
233+ _normalize_case = string .lower
234+ update_gi_map (_elem_map , string .split (elem_names , "," ))
235+ update_gi_map (_attr_map , string .split (attr_names , "," ))
236+ update_gi_map (_values_map , string .split (value_names , "," ))
237+ else :
238+ global map_gi
239+ map_gi = null_map_gi
240+ #
241+ # run the conversion:
242+ #
167243 try :
168244 if xml and xmldecl :
169245 opf .write ('<?xml version="1.0" encoding="iso8859-1"?>\n ' )
0 commit comments