11"""A lexical analyzer class for simple shell-like syntaxes."""
22
33# Module and documentation by Eric S. Raymond, 21 Dec 1998
4+ # Input stacking and error message cleanup added by ESR, March 2000
45
56import sys
67
78class shlex :
89 "A lexical analyzer class for simple shell-like syntaxes."
9- def __init__ (self , instream = None ):
10+ def __init__ (self , instream = None , infile = None ):
1011 if instream :
1112 self .instream = instream
13+ self .infile = infile
1214 else :
1315 self .instream = sys .stdin
16+ self .infile = None
1417 self .commenters = '#'
1518 self .wordchars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
1619 self .whitespace = ' \t \r \n '
@@ -20,37 +23,76 @@ def __init__(self, instream=None):
2023 self .lineno = 1
2124 self .debug = 0
2225 self .token = ''
26+ self .filestack = []
27+ self .source = None
28+ if self .debug :
29+ print 'shlex: reading from %s, line %d' % (self .instream ,self .lineno )
2330
2431 def push_token (self , tok ):
2532 "Push a token onto the stack popped by the get_token method"
26- if ( self .debug >= 1 ) :
27- print "Pushing " + tok
33+ if self .debug >= 1 :
34+ print "shlex: pushing token " + ` tok`
2835 self .pushback = [tok ] + self .pushback ;
2936
3037 def get_token (self ):
31- "Get a token from the input stream (or from stack if it's monempty )"
38+ "Get a token from the input stream (or from stack if it's nonempty )"
3239 if self .pushback :
3340 tok = self .pushback [0 ]
3441 self .pushback = self .pushback [1 :]
35- if ( self .debug >= 1 ) :
36- print "Popping " + tok
42+ if self .debug >= 1 :
43+ print "shlex: popping token " + ` tok`
3744 return tok
45+ # No pushback. Get a token.
46+ raw = self .read_token ()
47+ # Handle inclusions
48+ while raw == self .source :
49+ (newfile , newstream ) = self .sourcehook (self .read_token ())
50+ self .filestack = [(self .infile ,self .instream ,self .lineno )] + self .filestack
51+ self .infile = newfile
52+ self .instream = newstream
53+ self .lineno = 1
54+ if self .debug :
55+ print 'shlex: pushing to file %s' % (self .infile ,)
56+ raw = self .get_token ()
57+ # Maybe we got EOF instead?
58+ while raw == "" :
59+ if len (self .filestack ) == 0 :
60+ return ""
61+ else :
62+ self .instream .close ()
63+ (self .infile , self .instream , self .lineno ) = self .filestack [0 ]
64+ self .filestack = self .filestack [1 :]
65+ if self .debug :
66+ print 'shlex: popping to %s, line %d' % (self .instream , self .lineno )
67+ self .state = ' '
68+ raw = self .get_token ()
69+ # Neither inclusion nor EOF
70+ if self .debug >= 1 :
71+ if raw :
72+ print "shlex: token=" + `raw`
73+ else :
74+ print "shlex: token=EOF"
75+ return raw
76+
77+ def read_token (self ):
78+ "Read a token from the input stream (no pushback or inclusions)"
3879 tok = ''
3980 while 1 :
4081 nextchar = self .instream .read (1 );
4182 if nextchar == '\n ' :
4283 self .lineno = self .lineno + 1
4384 if self .debug >= 3 :
44- print "In state " + repr (self .state ) + " I see character: " + repr (nextchar )
85+ print "shlex: in state " + repr (self .state ) + " I see character: " + repr (nextchar )
4586 if self .state == None :
46- return ''
87+ self .token = '' ; # past end of file
88+ break
4789 elif self .state == ' ' :
4890 if not nextchar :
4991 self .state = None ; # end of file
5092 break
5193 elif nextchar in self .whitespace :
5294 if self .debug >= 2 :
53- print "I see whitespace in whitespace state"
95+ print "shlex: I see whitespace in whitespace state"
5496 if self .token :
5597 break # emit current token
5698 else :
@@ -81,7 +123,7 @@ def get_token(self):
81123 break
82124 elif nextchar in self .whitespace :
83125 if self .debug >= 2 :
84- print "I see whitespace in word state"
126+ print "shlex: I see whitespace in word state"
85127 self .state = ' '
86128 if self .token :
87129 break # emit current token
@@ -95,26 +137,33 @@ def get_token(self):
95137 else :
96138 self .pushback = [nextchar ] + self .pushback
97139 if self .debug >= 2 :
98- print "I see punctuation in word state"
140+ print "shlex: I see punctuation in word state"
99141 self .state = ' '
100142 if self .token :
101143 break # emit current token
102144 else :
103145 continue
104-
105146 result = self .token
106147 self .token = ''
107- if self .debug >= 1 :
108- print "Token: " + result
148+ if self .debug > 1 :
149+ if result :
150+ print "shlex: raw token=" + `result`
151+ else :
152+ print "shlex: raw token=EOF"
109153 return result
110154
155+ def sourcehook (self , newfile ):
156+ "Hook called on a filename to be sourced."
157+ if newfile [0 ] == '"' :
158+ newfile = newfile [1 :- 1 ]
159+ return (newfile , open (newfile , "r" ))
160+
111161if __name__ == '__main__' :
112162
113163 lexer = shlex ()
114164 while 1 :
115165 tt = lexer .get_token ()
116- if tt != None :
117- print "Token: " + repr (tt )
118- else :
166+ print "Token: " + repr (tt )
167+ if not tt :
119168 break
120169
0 commit comments