44# minimally patched to make it even more xgettext compatible
55# by Peter Funk <[email protected] > 66
7- # for selftesting
8- try :
9- import fintl
10- _ = fintl .gettext
11- except ImportError :
12- def _ (s ): return s
13-
14-
15- __doc__ = _ ("""pygettext -- Python equivalent of xgettext(1)
7+ """pygettext -- Python equivalent of xgettext(1)
168
179Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
1810internationalization of C programs. Most of these tools are independent of
@@ -65,7 +57,13 @@ def _(s): return s
6557
6658 -E
6759 --escape
68- replace non-ASCII characters with octal escape sequences.
60+ Replace non-ASCII characters with octal escape sequences.
61+
62+ -D
63+ --docstrings
64+ Extract module, class, method, and function docstrings. These do not
65+ need to be wrapped in _() markers, and in fact cannot be for Python to
66+ consider them docstrings.
6967
7068 -h
7169 --help
@@ -93,6 +91,15 @@ def _(s): return s
9391 each msgid. The style of comments is controlled by the -S/--style
9492 option. This is the default.
9593
94+ -o filename
95+ --output=filename
96+ Rename the default output file from messages.pot to filename. If
97+ filename is `-' then the output is sent to standard out.
98+
99+ -p dir
100+ --output-dir=dir
101+ Output files will be placed in directory dir.
102+
96103 -S stylename
97104 --style stylename
98105 Specify which style to use for location comments. Two styles are
@@ -103,15 +110,6 @@ def _(s): return s
103110
104111 The style name is case insensitive. GNU style is the default.
105112
106- -o filename
107- --output=filename
108- Rename the default output file from messages.pot to filename. If
109- filename is `-' then the output is sent to standard out.
110-
111- -p dir
112- --output-dir=dir
113- Output files will be placed in directory dir.
114-
115113 -v
116114 --verbose
117115 Print the names of the files being processed.
@@ -132,15 +130,22 @@ def _(s): return s
132130
133131If `inputfile' is -, standard input is read.
134132
135- """ )
133+ """
136134
137135import os
138136import sys
139137import time
140138import getopt
141139import tokenize
142140
143- __version__ = '1.1'
141+ # for selftesting
142+ try :
143+ import fintl
144+ _ = fintl .gettext
145+ except ImportError :
146+ def _ (s ): return s
147+
148+ __version__ = '1.2'
144149
145150default_keywords = ['_' ]
146151DEFAULTKEYWORDS = ', ' .join (default_keywords )
@@ -171,9 +176,9 @@ def _(s): return s
171176
172177
173178def usage (code , msg = '' ):
174- print __doc__ % globals ()
179+ print >> sys . stderr , _ ( __doc__ ) % globals ()
175180 if msg :
176- print msg
181+ print >> sys . stderr , msg
177182 sys .exit (code )
178183
179184
@@ -239,15 +244,48 @@ def __init__(self, options):
239244 self .__state = self .__waiting
240245 self .__data = []
241246 self .__lineno = - 1
247+ self .__freshmodule = 1
242248
243249 def __call__ (self , ttype , tstring , stup , etup , line ):
244250 # dispatch
251+ ## import token
252+ ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
253+ ## 'tstring:', tstring
245254 self .__state (ttype , tstring , stup [0 ])
246255
247256 def __waiting (self , ttype , tstring , lineno ):
257+ # Do docstring extractions, if enabled
258+ if self .__options .docstrings :
259+ # module docstring?
260+ if self .__freshmodule :
261+ if ttype == tokenize .STRING :
262+ self .__addentry (safe_eval (tstring ), lineno )
263+ self .__freshmodule = 0
264+ elif ttype not in (tokenize .COMMENT , tokenize .NL ):
265+ self .__freshmodule = 0
266+ return
267+ # class docstring?
268+ if ttype == tokenize .NAME and tstring in ('class' , 'def' ):
269+ self .__state = self .__suiteseen
270+ return
248271 if ttype == tokenize .NAME and tstring in self .__options .keywords :
249272 self .__state = self .__keywordseen
250273
274+ def __suiteseen (self , ttype , tstring , lineno ):
275+ # ignore anything until we see the colon
276+ if ttype == tokenize .OP and tstring == ':' :
277+ self .__state = self .__suitedocstring
278+
279+ def __suitedocstring (self , ttype , tstring , lineno ):
280+ # ignore any intervening noise
281+ if ttype == tokenize .STRING :
282+ self .__addentry (safe_eval (tstring ), lineno )
283+ self .__state = self .__waiting
284+ elif ttype not in (tokenize .NEWLINE , tokenize .INDENT ,
285+ tokenize .COMMENT ):
286+ # there was no class docstring
287+ self .__state = self .__waiting
288+
251289 def __keywordseen (self , ttype , tstring , lineno ):
252290 if ttype == tokenize .OP and tstring == '(' :
253291 self .__data = []
@@ -263,29 +301,28 @@ def __openseen(self, ttype, tstring, lineno):
263301 # of messages seen. Reset state for the next batch. If there
264302 # were no strings inside _(), then just ignore this entry.
265303 if self .__data :
266- msg = EMPTYSTRING .join (self .__data )
267- if not msg in self .__options .toexclude :
268- entry = (self .__curfile , self .__lineno )
269- linenos = self .__messages .get (msg )
270- if linenos is None :
271- self .__messages [msg ] = [entry ]
272- else :
273- linenos .append (entry )
304+ self .__addentry (EMPTYSTRING .join (self .__data ))
274305 self .__state = self .__waiting
275306 elif ttype == tokenize .STRING :
276307 self .__data .append (safe_eval (tstring ))
277308 # TBD: should we warn if we seen anything else?
278309
310+ def __addentry (self , msg , lineno = None ):
311+ if lineno is None :
312+ lineno = self .__lineno
313+ if not msg in self .__options .toexclude :
314+ entry = (self .__curfile , lineno )
315+ self .__messages .setdefault (msg , []).append (entry )
316+
279317 def set_filename (self , filename ):
280318 self .__curfile = filename
281319
282320 def write (self , fp ):
283321 options = self .__options
284322 timestamp = time .ctime (time .time ())
285- # common header
286- # The time stamp in the header doesn't have the same format
287- # as that generated by xgettext...
288- print >> fp , pot_header % {'time' : timestamp , 'version' : __version__ }
323+ # The time stamp in the header doesn't have the same format as that
324+ # generated by xgettext...
325+ print >> fp , pot_header % {'time' : timestamp , 'version' : __version__ }
289326 for k , v in self .__messages .items ():
290327 if not options .writelocations :
291328 pass
@@ -304,25 +341,27 @@ def write(self, fp):
304341 if len (locline ) + len (s ) <= options .width :
305342 locline = locline + s
306343 else :
307- print >> fp , locline
344+ print >> fp , locline
308345 locline = "#:" + s
309346 if len (locline ) > 2 :
310- print >> fp , locline
347+ print >> fp , locline
311348 # TBD: sorting, normalizing
312- print >> fp , 'msgid' , normalize (k )
313- print >> fp , 'msgstr ""\n '
349+ print >> fp , 'msgid' , normalize (k )
350+ print >> fp , 'msgstr ""\n '
351+
314352
315353
316354def main ():
317355 global default_keywords
318356 try :
319357 opts , args = getopt .getopt (
320358 sys .argv [1 :],
321- 'ad:Ehk :Kno:p:S:Vvw:x:' ,
359+ 'ad:DEhk :Kno:p:S:Vvw:x:' ,
322360 ['extract-all' , 'default-domain' , 'escape' , 'help' ,
323361 'keyword=' , 'no-default-keywords' ,
324362 'add-location' , 'no-location' , 'output=' , 'output-dir=' ,
325363 'style=' , 'verbose' , 'version' , 'width=' , 'exclude-file=' ,
364+ 'docstrings' ,
326365 ])
327366 except getopt .error , msg :
328367 usage (1 , msg )
@@ -343,6 +382,7 @@ class Options:
343382 verbose = 0
344383 width = 78
345384 excludefilename = ''
385+ docstrings = 0
346386
347387 options = Options ()
348388 locations = {'gnu' : options .GNU ,
@@ -359,6 +399,8 @@ class Options:
359399 options .outfile = arg + '.pot'
360400 elif opt in ('-E' , '--escape' ):
361401 options .escape = 1
402+ elif opt in ('-D' , '--docstrings' ):
403+ options .docstrings = 1
362404 elif opt in ('-k' , '--keyword' ):
363405 options .keywords .append (arg )
364406 elif opt in ('-K' , '--no-default-keywords' ):
0 commit comments