Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 08a8a35

Browse files
committed
Added the -D/--docstrings option for extraction of unmarked module,
class, method, and function docstrings.
1 parent 3aecfc9 commit 08a8a35

1 file changed

Lines changed: 82 additions & 40 deletions

File tree

Tools/i18n/pygettext.py

Lines changed: 82 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,7 @@
44
# minimally patched to make it even more xgettext compatible
55
# by Peter Funk <[email protected]>
66

7-
# for selftesting
8-
try:
9-
import fintl
10-
_ = fintl.gettext
11-
except ImportError:
12-
def _(s): return s
13-
14-
15-
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
7+
"""pygettext -- Python equivalent of xgettext(1)
168
179
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
1810
internationalization of C programs. Most of these tools are independent of
@@ -65,7 +57,13 @@ def _(s): return s
6557
6658
-E
6759
--escape
68-
replace non-ASCII characters with octal escape sequences.
60+
Replace non-ASCII characters with octal escape sequences.
61+
62+
-D
63+
--docstrings
64+
Extract module, class, method, and function docstrings. These do not
65+
need to be wrapped in _() markers, and in fact cannot be for Python to
66+
consider them docstrings.
6967
7068
-h
7169
--help
@@ -93,6 +91,15 @@ def _(s): return s
9391
each msgid. The style of comments is controlled by the -S/--style
9492
option. This is the default.
9593
94+
-o filename
95+
--output=filename
96+
Rename the default output file from messages.pot to filename. If
97+
filename is `-' then the output is sent to standard out.
98+
99+
-p dir
100+
--output-dir=dir
101+
Output files will be placed in directory dir.
102+
96103
-S stylename
97104
--style stylename
98105
Specify which style to use for location comments. Two styles are
@@ -103,15 +110,6 @@ def _(s): return s
103110
104111
The style name is case insensitive. GNU style is the default.
105112
106-
-o filename
107-
--output=filename
108-
Rename the default output file from messages.pot to filename. If
109-
filename is `-' then the output is sent to standard out.
110-
111-
-p dir
112-
--output-dir=dir
113-
Output files will be placed in directory dir.
114-
115113
-v
116114
--verbose
117115
Print the names of the files being processed.
@@ -132,15 +130,22 @@ def _(s): return s
132130
133131
If `inputfile' is -, standard input is read.
134132
135-
""")
133+
"""
136134

137135
import os
138136
import sys
139137
import time
140138
import getopt
141139
import tokenize
142140

143-
__version__ = '1.1'
141+
# for selftesting
142+
try:
143+
import fintl
144+
_ = fintl.gettext
145+
except ImportError:
146+
def _(s): return s
147+
148+
__version__ = '1.2'
144149

145150
default_keywords = ['_']
146151
DEFAULTKEYWORDS = ', '.join(default_keywords)
@@ -171,9 +176,9 @@ def _(s): return s
171176

172177

173178
def usage(code, msg=''):
174-
print __doc__ % globals()
179+
print >> sys.stderr, _(__doc__) % globals()
175180
if msg:
176-
print msg
181+
print >> sys.stderr, msg
177182
sys.exit(code)
178183

179184

@@ -239,15 +244,48 @@ def __init__(self, options):
239244
self.__state = self.__waiting
240245
self.__data = []
241246
self.__lineno = -1
247+
self.__freshmodule = 1
242248

243249
def __call__(self, ttype, tstring, stup, etup, line):
244250
# dispatch
251+
## import token
252+
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
253+
## 'tstring:', tstring
245254
self.__state(ttype, tstring, stup[0])
246255

247256
def __waiting(self, ttype, tstring, lineno):
257+
# Do docstring extractions, if enabled
258+
if self.__options.docstrings:
259+
# module docstring?
260+
if self.__freshmodule:
261+
if ttype == tokenize.STRING:
262+
self.__addentry(safe_eval(tstring), lineno)
263+
self.__freshmodule = 0
264+
elif ttype not in (tokenize.COMMENT, tokenize.NL):
265+
self.__freshmodule = 0
266+
return
267+
# class docstring?
268+
if ttype == tokenize.NAME and tstring in ('class', 'def'):
269+
self.__state = self.__suiteseen
270+
return
248271
if ttype == tokenize.NAME and tstring in self.__options.keywords:
249272
self.__state = self.__keywordseen
250273

274+
def __suiteseen(self, ttype, tstring, lineno):
275+
# ignore anything until we see the colon
276+
if ttype == tokenize.OP and tstring == ':':
277+
self.__state = self.__suitedocstring
278+
279+
def __suitedocstring(self, ttype, tstring, lineno):
280+
# ignore any intervening noise
281+
if ttype == tokenize.STRING:
282+
self.__addentry(safe_eval(tstring), lineno)
283+
self.__state = self.__waiting
284+
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
285+
tokenize.COMMENT):
286+
# there was no class docstring
287+
self.__state = self.__waiting
288+
251289
def __keywordseen(self, ttype, tstring, lineno):
252290
if ttype == tokenize.OP and tstring == '(':
253291
self.__data = []
@@ -263,29 +301,28 @@ def __openseen(self, ttype, tstring, lineno):
263301
# of messages seen. Reset state for the next batch. If there
264302
# were no strings inside _(), then just ignore this entry.
265303
if self.__data:
266-
msg = EMPTYSTRING.join(self.__data)
267-
if not msg in self.__options.toexclude:
268-
entry = (self.__curfile, self.__lineno)
269-
linenos = self.__messages.get(msg)
270-
if linenos is None:
271-
self.__messages[msg] = [entry]
272-
else:
273-
linenos.append(entry)
304+
self.__addentry(EMPTYSTRING.join(self.__data))
274305
self.__state = self.__waiting
275306
elif ttype == tokenize.STRING:
276307
self.__data.append(safe_eval(tstring))
277308
# TBD: should we warn if we seen anything else?
278309

310+
def __addentry(self, msg, lineno=None):
311+
if lineno is None:
312+
lineno = self.__lineno
313+
if not msg in self.__options.toexclude:
314+
entry = (self.__curfile, lineno)
315+
self.__messages.setdefault(msg, []).append(entry)
316+
279317
def set_filename(self, filename):
280318
self.__curfile = filename
281319

282320
def write(self, fp):
283321
options = self.__options
284322
timestamp = time.ctime(time.time())
285-
# common header
286-
# The time stamp in the header doesn't have the same format
287-
# as that generated by xgettext...
288-
print >>fp, pot_header % {'time': timestamp, 'version': __version__}
323+
# The time stamp in the header doesn't have the same format as that
324+
# generated by xgettext...
325+
print >> fp, pot_header % {'time': timestamp, 'version': __version__}
289326
for k, v in self.__messages.items():
290327
if not options.writelocations:
291328
pass
@@ -304,25 +341,27 @@ def write(self, fp):
304341
if len(locline) + len(s) <= options.width:
305342
locline = locline + s
306343
else:
307-
print >>fp, locline
344+
print >> fp, locline
308345
locline = "#:" + s
309346
if len(locline) > 2:
310-
print >>fp, locline
347+
print >> fp, locline
311348
# TBD: sorting, normalizing
312-
print >>fp, 'msgid', normalize(k)
313-
print >>fp, 'msgstr ""\n'
349+
print >> fp, 'msgid', normalize(k)
350+
print >> fp, 'msgstr ""\n'
351+
314352

315353

316354
def main():
317355
global default_keywords
318356
try:
319357
opts, args = getopt.getopt(
320358
sys.argv[1:],
321-
'ad:Ehk:Kno:p:S:Vvw:x:',
359+
'ad:DEhk:Kno:p:S:Vvw:x:',
322360
['extract-all', 'default-domain', 'escape', 'help',
323361
'keyword=', 'no-default-keywords',
324362
'add-location', 'no-location', 'output=', 'output-dir=',
325363
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
364+
'docstrings',
326365
])
327366
except getopt.error, msg:
328367
usage(1, msg)
@@ -343,6 +382,7 @@ class Options:
343382
verbose = 0
344383
width = 78
345384
excludefilename = ''
385+
docstrings = 0
346386

347387
options = Options()
348388
locations = {'gnu' : options.GNU,
@@ -359,6 +399,8 @@ class Options:
359399
options.outfile = arg + '.pot'
360400
elif opt in ('-E', '--escape'):
361401
options.escape = 1
402+
elif opt in ('-D', '--docstrings'):
403+
options.docstrings = 1
362404
elif opt in ('-k', '--keyword'):
363405
options.keywords.append(arg)
364406
elif opt in ('-K', '--no-default-keywords'):

0 commit comments

Comments
 (0)