|
1 | 1 | #! /usr/bin/env python |
| 2 | +# -*- coding: iso-8859-1 -*- |
2 | 3 | # Originally written by Barry Warsaw <[email protected]> |
3 | 4 | # |
4 | 5 | # Minimally patched to make it even more xgettext compatible |
5 | 6 | # by Peter Funk <[email protected]> |
| 7 | +# |
| 8 | +# 2002-11-22 Jürgen Hermann <[email protected]> |
| 9 | +# Added checks that _() only contains string literals, and |
| 10 | +# command line args are resolved to module lists, i.e. you |
| 11 | +# can now pass a filename, a module or package name, or a |
| 12 | +# directory (including globbing chars, important for Win32). |
| 13 | +# Made docstring fit in 80 chars wide displays using pydoc. |
| 14 | +# |
6 | 15 |
|
7 | | -"""pygettext -- Python equivalent of xgettext(1) |
| 16 | +# for selftesting |
| 17 | +try: |
| 18 | + import fintl |
| 19 | + _ = fintl.gettext |
| 20 | +except ImportError: |
| 21 | + _ = lambda s: s |
| 22 | + |
| 23 | +__doc__ = _("""pygettext -- Python equivalent of xgettext(1) |
8 | 24 |
|
9 | 25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
10 | | -internationalization of C programs. Most of these tools are independent of |
11 | | -the programming language and can be used from within Python programs. Martin |
12 | | -von Loewis' work[1] helps considerably in this regard. |
| 26 | +internationalization of C programs. Most of these tools are independent of |
| 27 | +the programming language and can be used from within Python programs. |
| 28 | +Martin von Loewis' work[1] helps considerably in this regard. |
13 | 29 |
|
14 | 30 | There's one problem though; xgettext is the program that scans source code |
15 | | -looking for message strings, but it groks only C (or C++). Python introduces |
16 | | -a few wrinkles, such as dual quoting characters, triple quoted strings, and |
17 | | -raw strings. xgettext understands none of this. |
18 | | -
|
19 | | -Enter pygettext, which uses Python's standard tokenize module to scan Python |
20 | | -source code, generating .pot files identical to what GNU xgettext[2] generates |
21 | | -for C and C++ code. From there, the standard GNU tools can be used. |
22 | | -
|
23 | | -A word about marking Python strings as candidates for translation. GNU |
24 | | -xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and |
25 | | -gettext_noop. But those can be a lot of text to include all over your code. |
26 | | -C and C++ have a trick: they use the C preprocessor. Most internationalized C |
27 | | -source includes a #define for gettext() to _() so that what has to be written |
28 | | -in the source is much less. Thus these are both translatable strings: |
| 31 | +looking for message strings, but it groks only C (or C++). Python |
| 32 | +introduces a few wrinkles, such as dual quoting characters, triple quoted |
| 33 | +strings, and raw strings. xgettext understands none of this. |
| 34 | +
|
| 35 | +Enter pygettext, which uses Python's standard tokenize module to scan |
| 36 | +Python source code, generating .pot files identical to what GNU xgettext[2] |
| 37 | +generates for C and C++ code. From there, the standard GNU tools can be |
| 38 | +used. |
| 39 | +
|
| 40 | +A word about marking Python strings as candidates for translation. GNU |
| 41 | +xgettext recognizes the following keywords: gettext, dgettext, dcgettext, |
| 42 | +and gettext_noop. But those can be a lot of text to include all over your |
| 43 | +code. C and C++ have a trick: they use the C preprocessor. Most |
| 44 | +internationalized C source includes a #define for gettext() to _() so that |
| 45 | +what has to be written in the source is much less. Thus these are both |
| 46 | +translatable strings: |
29 | 47 |
|
30 | 48 | gettext("Translatable String") |
31 | 49 | _("Translatable String") |
|
37 | 55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html |
38 | 56 | [2] http://www.gnu.org/software/gettext/gettext.html |
39 | 57 |
|
40 | | -NOTE: pygettext attempts to be option and feature compatible with GNU xgettext |
41 | | -where ever possible. However some options are still missing or are not fully |
42 | | -implemented. Also, xgettext's use of command line switches with option |
43 | | -arguments is broken, and in these cases, pygettext just defines additional |
44 | | -switches. |
| 58 | +NOTE: pygettext attempts to be option and feature compatible with GNU |
| 59 | +xgettext where ever possible. However some options are still missing or are |
| 60 | +not fully implemented. Also, xgettext's use of command line switches with |
| 61 | +option arguments is broken, and in these cases, pygettext just defines |
| 62 | +additional switches. |
45 | 63 |
|
46 | 64 | Usage: pygettext [options] inputfile ... |
47 | 65 |
|
|
61 | 79 |
|
62 | 80 | -D |
63 | 81 | --docstrings |
64 | | - Extract module, class, method, and function docstrings. These do not |
65 | | - need to be wrapped in _() markers, and in fact cannot be for Python to |
66 | | - consider them docstrings. (See also the -X option). |
| 82 | + Extract module, class, method, and function docstrings. These do |
| 83 | + not need to be wrapped in _() markers, and in fact cannot be for |
| 84 | + Python to consider them docstrings. (See also the -X option). |
67 | 85 |
|
68 | 86 | -h |
69 | 87 | --help |
|
135 | 153 | conjunction with the -D option above. |
136 | 154 |
|
137 | 155 | If `inputfile' is -, standard input is read. |
138 | | -""" |
| 156 | +""") |
139 | 157 |
|
140 | 158 | import os |
141 | 159 | import sys |
142 | 160 | import time |
143 | 161 | import getopt |
| 162 | +import token |
144 | 163 | import tokenize |
145 | 164 | import operator |
146 | 165 |
|
147 | | -# for selftesting |
148 | | -try: |
149 | | - import fintl |
150 | | - _ = fintl.gettext |
151 | | -except ImportError: |
152 | | - def _(s): return s |
153 | | - |
154 | | -__version__ = '1.4' |
| 166 | +__version__ = '1.5' |
155 | 167 |
|
156 | 168 | default_keywords = ['_'] |
157 | 169 | DEFAULTKEYWORDS = ', '.join(default_keywords) |
@@ -183,7 +195,7 @@ def _(s): return s |
183 | 195 |
|
184 | 196 |
|
185 | 197 | def usage(code, msg=''): |
186 | | - print >> sys.stderr, _(__doc__) % globals() |
| 198 | + print >> sys.stderr, __doc__ % globals() |
187 | 199 | if msg: |
188 | 200 | print >> sys.stderr, msg |
189 | 201 | sys.exit(code) |
@@ -242,6 +254,103 @@ def normalize(s): |
242 | 254 | s = '""\n"' + lineterm.join(lines) + '"' |
243 | 255 | return s |
244 | 256 |
|
| 257 | + |
| 258 | +def containsAny(str, set): |
| 259 | + """ Check whether 'str' contains ANY of the chars in 'set' |
| 260 | + """ |
| 261 | + return 1 in [c in str for c in set] |
| 262 | + |
| 263 | + |
| 264 | +def _visit_pyfiles(list, dirname, names): |
| 265 | + """ Helper for getFilesForName(). |
| 266 | + """ |
| 267 | + # get extension for python source files |
| 268 | + if not globals().has_key('_py_ext'): |
| 269 | + import imp |
| 270 | + global _py_ext |
| 271 | + _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0] |
| 272 | + |
| 273 | + # don't recurse into CVS directories |
| 274 | + if 'CVS' in names: |
| 275 | + names.remove('CVS') |
| 276 | + |
| 277 | + # add all *.py files to list |
| 278 | + list.extend( |
| 279 | + [os.path.join(dirname, file) |
| 280 | + for file in names |
| 281 | + if os.path.splitext(file)[1] == _py_ext]) |
| 282 | + |
| 283 | + |
| 284 | +def _get_modpkg_path(dotted_name, pathlist=None): |
| 285 | + """ Get the filesystem path for a module or a package. |
| 286 | +
|
| 287 | + Return the file system path to a file for a module, |
| 288 | + and to a directory for a package. Return None if |
| 289 | + the name is not found, or is a builtin or extension module. |
| 290 | + """ |
| 291 | + import imp |
| 292 | + |
| 293 | + # split off top-most name |
| 294 | + parts = dotted_name.split('.', 1) |
| 295 | + |
| 296 | + if len(parts) > 1: |
| 297 | + # we have a dotted path, import top-level package |
| 298 | + try: |
| 299 | + file, pathname, description = imp.find_module(parts[0], pathlist) |
| 300 | + if file: file.close() |
| 301 | + except ImportError: |
| 302 | + return None |
| 303 | + |
| 304 | + # check if it's indeed a package |
| 305 | + if description[2] == imp.PKG_DIRECTORY: |
| 306 | + # recursively handle the remaining name parts |
| 307 | + pathname = _get_modpkg_path(parts[1], [pathname]) |
| 308 | + else: |
| 309 | + pathname = None |
| 310 | + else: |
| 311 | + # plain name |
| 312 | + try: |
| 313 | + file, pathname, description = imp.find_module(dotted_name, pathlist) |
| 314 | + if file: file.close() |
| 315 | + if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: |
| 316 | + pathname = None |
| 317 | + except ImportError: |
| 318 | + pathname = None |
| 319 | + |
| 320 | + return pathname |
| 321 | + |
| 322 | + |
| 323 | +def getFilesForName(name): |
| 324 | + """ Get a list of module files for a filename, a module or package name, |
| 325 | + or a directory. |
| 326 | + """ |
| 327 | + import imp |
| 328 | + |
| 329 | + if not os.path.exists(name): |
| 330 | + # check for glob chars |
| 331 | + if containsAny(name, "*?[]"): |
| 332 | + import glob |
| 333 | + files = glob.glob(name) |
| 334 | + list = [] |
| 335 | + for file in files: |
| 336 | + list.extend(getFilesForName(file)) |
| 337 | + return list |
| 338 | + |
| 339 | + # try to find module or package |
| 340 | + name = _get_modpkg_path(name) |
| 341 | + if not name: |
| 342 | + return [] |
| 343 | + |
| 344 | + if os.path.isdir(name): |
| 345 | + # find all python files in directory |
| 346 | + list = [] |
| 347 | + os.path.walk(name, _visit_pyfiles, list) |
| 348 | + return list |
| 349 | + elif os.path.exists(name): |
| 350 | + # a single file |
| 351 | + return [name] |
| 352 | + |
| 353 | + return [] |
245 | 354 |
|
246 | 355 |
|
247 | 356 | class TokenEater: |
@@ -314,7 +423,12 @@ def __openseen(self, ttype, tstring, lineno): |
314 | 423 | self.__state = self.__waiting |
315 | 424 | elif ttype == tokenize.STRING: |
316 | 425 | self.__data.append(safe_eval(tstring)) |
317 | | - # TBD: should we warn if we seen anything else? |
| 426 | + elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, |
| 427 | + token.NEWLINE, tokenize.NL]: |
| 428 | + # warn if we see anything else than STRING or whitespace |
| 429 | + print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % { |
| 430 | + 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno} |
| 431 | + self.__state = self.__waiting |
318 | 432 |
|
319 | 433 | def __addentry(self, msg, lineno=None, isdocstring=0): |
320 | 434 | if lineno is None: |
@@ -495,6 +609,15 @@ class Options: |
495 | 609 | else: |
496 | 610 | options.toexclude = [] |
497 | 611 |
|
| 612 | + # resolve args to module lists |
| 613 | + expanded = [] |
| 614 | + for arg in args: |
| 615 | + if arg == '-': |
| 616 | + expanded.append(arg) |
| 617 | + else: |
| 618 | + expanded.extend(getFilesForName(arg)) |
| 619 | + args = expanded |
| 620 | + |
498 | 621 | # slurp through all the files |
499 | 622 | eater = TokenEater(options) |
500 | 623 | for filename in args: |
@@ -539,3 +662,6 @@ class Options: |
539 | 662 | main() |
540 | 663 | # some more test strings |
541 | 664 | _(u'a unicode string') |
| 665 | + _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning |
| 666 | + _('more' 'than' 'one' 'string') |
| 667 | + |
0 commit comments