Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e27db5a

Browse files
committed
Initial implementation
1 parent 31e5e37 commit e27db5a

1 file changed

Lines changed: 268 additions & 0 deletions

File tree

Tools/i18n/pygettext.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,269 @@
11
#! /usr/bin/env python
2+
3+
"""pygettext -- Python equivalent of xgettext(1)
4+
5+
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
6+
internationalization of C programs. Most of these tools are independent of
7+
the programming language and can be used from within Python programs. Martin
8+
von Loewis' work[1] helps considerably in this regard.
9+
10+
There's one hole though; xgettext is the program that scans source code
11+
looking for message strings, but it groks only C (or C++). Python introduces
12+
a few wrinkles, such as dual quoting characters, triple quoted strings, and
13+
raw strings. xgettext understands none of this.
14+
15+
Enter pygettext, which uses Python's standard tokenize module to scan Python
16+
source code, generating .pot files identical to what GNU xgettext[2] generates
17+
for C and C++ code. From there, the standard GNU tools can be used.
18+
19+
A word about marking Python strings as candidates for translation. GNU
20+
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
21+
gettext_noop. But those can be a lot of text to include all over your code.
22+
C and C++ have a trick: they use the C preprocessor. Most internationalized C
23+
source includes a #define for gettext() to _() so that what has to be written
24+
in the source is much less. Thus these are both translatable strings:
25+
26+
gettext("Translatable String")
27+
_("Translatable String")
28+
29+
Python of course has no preprocessor so this doesn't work so well. Thus,
30+
pygettext searches only for _() by default, but see the -k/--keyword flag
31+
below for how to augment this.
32+
33+
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
34+
[2] http://www.gnu.org/software/gettext/gettext.html
35+
36+
37+
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
38+
where ever possible.
39+
40+
Usage: pygettext [options] filename ...
41+
42+
Options:
43+
44+
-a
45+
--extract-all
46+
Extract all strings
47+
48+
-d default-domain
49+
--default-domain=default-domain
50+
Rename the default output file from messages.pot to default-domain.pot
51+
52+
-k [word]
53+
--keyword[=word]
54+
Additional keywords to look for. Without `word' means not to use the
55+
default keywords. The default keywords, which are always looked for
56+
if not explicitly disabled: _
57+
58+
The default keyword list is different than GNU xgettext. You can have
59+
multiple -k flags on the command line.
60+
61+
--no-location
62+
Do not write filename/lineno location comments
63+
64+
-n [style]
65+
--add-location[=style]
66+
Write filename/lineno location comments indicating where each
67+
extracted string is found in the source. These lines appear before
68+
each msgid. Two styles are supported:
69+
70+
Solaris # File: filename, line: line-number
71+
Gnu #: filename:line
72+
73+
If style is omitted, Gnu is used. The style name is case
74+
insensitive. By default, locations are included.
75+
76+
--help
77+
-h
78+
print this help message and exit
79+
80+
"""
81+
82+
import os
83+
import sys
84+
import string
85+
import time
86+
import getopt
87+
import tokenize
88+
89+
__version__ = '0.1'
90+
91+
92+
93+
def usage(code, msg=''):
94+
print __doc__ % globals()
95+
if msg:
96+
print msg
97+
sys.exit(code)
98+
99+
100+
101+
def normalize(s):
102+
# This converts the various Python string types into a format that is
103+
# appropriate for .po files, namely much closer to C style.
104+
#
105+
# unwrap quotes, safely
106+
s = eval(s, {'__builtins__':{}}, {})
107+
# now escape any embedded double quotes
108+
parts = []
109+
last = 0
110+
i = string.find(s, '"')
111+
while i >= 0:
112+
# find the number of preceding backslashes
113+
j = i
114+
n = 0
115+
while j >= 0 and s[i] == '\\':
116+
j = j - 1
117+
n = n + 1
118+
if (n % 2) == 0:
119+
parts.append(s[last:j])
120+
parts.append('\\')
121+
parts.append(s[j:i])
122+
else:
123+
parts.append(s[last:i])
124+
last = i
125+
i = string.find(s, '"', i+1)
126+
else:
127+
parts.append(s[last:])
128+
if parts:
129+
return '"' + string.join(parts, '') + '"'
130+
else:
131+
return '"' + s + '"'
132+
133+
134+
135+
class TokenEater:
136+
def __init__(self, options):
137+
self.__options = options
138+
self.__messages = {}
139+
self.__state = self.__waiting
140+
self.__data = []
141+
self.__lineno = -1
142+
143+
def __call__(self, ttype, tstring, stup, etup, line):
144+
# dispatch
145+
self.__state(ttype, tstring, stup[0])
146+
147+
def __waiting(self, ttype, tstring, lineno):
148+
if ttype == tokenize.NAME and tstring in self.__options.keywords:
149+
self.__state = self.__keywordseen
150+
151+
def __keywordseen(self, ttype, tstring, lineno):
152+
if ttype == tokenize.OP and tstring == '(':
153+
self.__data = []
154+
self.__lineno = lineno
155+
self.__state = self.__openseen
156+
else:
157+
self.__state = self.__waiting
158+
159+
def __openseen(self, ttype, tstring, lineno):
160+
if ttype == tokenize.OP and tstring == ')':
161+
# We've seen the last of the translatable strings. Record the
162+
# line number of the first line of the strings and update the list
163+
# of messages seen. Reset state for the next batch. If there
164+
# were no strings inside _(), then just ignore this entry.
165+
if self.__data:
166+
msg = string.join(self.__data, '')
167+
entry = (self.__curfile, self.__lineno)
168+
linenos = self.__messages.get(msg)
169+
if linenos is None:
170+
self.__messages[msg] = [entry]
171+
else:
172+
linenos.append(entry)
173+
self.__state = self.__waiting
174+
elif ttype == tokenize.STRING:
175+
self.__data.append(normalize(tstring))
176+
# TBD: should we warn if we seen anything else?
177+
178+
def set_filename(self, filename):
179+
self.__curfile = filename
180+
181+
def write(self, fp):
182+
options = self.__options
183+
timestamp = time.ctime(time.time())
184+
# common header
185+
try:
186+
sys.stdout = fp
187+
print '# POT file generated by pygettext.py', __version__
188+
print '#', timestamp
189+
print '#'
190+
for k, v in self.__messages.items():
191+
for filename, lineno in v:
192+
# location comments are different b/w Solaris and GNU
193+
if options.location == options.SOLARIS:
194+
print '# File: %s,' % filename, 'line: %d' % lineno
195+
elif options.location == options.GNU:
196+
print '#: %s:%d' % (filename, lineno)
197+
# TBD: sorting, normalizing
198+
print 'msgid', k
199+
print 'msgstr '
200+
print
201+
finally:
202+
sys.stdout = sys.__stdout__
203+
204+
205+
def main():
206+
default_keywords = ['_']
207+
try:
208+
opts, args = getopt.getopt(
209+
sys.argv[1:],
210+
'k:d:n:h',
211+
['keyword', 'default-domain', 'help',
212+
'add-location=', 'no-location'])
213+
except getopt.error, msg:
214+
usage(1, msg)
215+
216+
# for holding option values
217+
class Options:
218+
# constants
219+
GNU = 1
220+
SOLARIS = 2
221+
# defaults
222+
keywords = []
223+
outfile = 'messages.pot'
224+
location = GNU
225+
226+
options = Options()
227+
locations = {'gnu' : options.GNU,
228+
'solaris' : options.SOLARIS,
229+
}
230+
231+
# parse options
232+
for opt, arg in opts:
233+
if opt in ('-h', '--help'):
234+
usage(0)
235+
elif opt in ('-k', '--keyword'):
236+
if arg is None:
237+
default_keywords = []
238+
options.keywords.append(arg)
239+
elif opt in ('-d', '--default-domain'):
240+
options.outfile = arg + '.pot'
241+
elif opt in ('-n', '--add-location'):
242+
if arg is None:
243+
arg = 'gnu'
244+
try:
245+
options.location = locations[string.lower(arg)]
246+
except KeyError:
247+
usage(1, 'Invalid value for --add-location: ' + arg)
248+
elif opt in ('--no-location',):
249+
options.location = 0
250+
251+
# calculate all keywords
252+
options.keywords.extend(default_keywords)
253+
254+
# slurp through all the files
255+
eater = TokenEater(options)
256+
for filename in args:
257+
fp = open(filename)
258+
eater.set_filename(filename)
259+
tokenize.tokenize(fp.readline, eater)
260+
fp.close()
261+
262+
fp = open(options.outfile, 'w')
263+
eater.write(fp)
264+
fp.close()
265+
266+
267+
268+
if __name__ == '__main__':
269+
main()

0 commit comments

Comments
 (0)