Patch #494845: Support string concatenation, detect non-string data,

loewis · loewis · commit 0d1fdea8efc4 · 2002-11-22T08:36:54.000Z
add globbing support, find modules by name instead of by file.
diff --git a/Misc/ACKS b/Misc/ACKS
@@ -220,6 +220,7 @@ Jonathan Hendry
 James Henstridge
 Chris Herborth
 Ivan Herman
+J�rgen Hermann
 Gary Herron
 Bernhard Herzog
 Magnus L. Hetland
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -621,6 +621,9 @@ Library
 Tools/Demos
 -----------
 
+- pygettext.py now supports globbing on Windows, and accepts module
+  names in addition to accepting file names.
+
 - The SGI demos (Demo/sgi) have been removed.  Nobody thought they
   were interesting any more.  (The SGI library modules and extensions
   are still there; it is believed that at least some of these are
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
@@ -1,31 +1,49 @@
 #! /usr/bin/env python
+# -*- coding: iso-8859-1 -*-
 # Originally written by Barry Warsaw <barry@zope.com>
 #
 # Minimally patched to make it even more xgettext compatible 
 # by Peter Funk <pf@artcom-gmbh.de>
+#
+# 2002-11-22 J�rgen Hermann <jh@web.de>
+# Added checks that _() only contains string literals, and
+# command line args are resolved to module lists, i.e. you
+# can now pass a filename, a module or package name, or a
+# directory (including globbing chars, important for Win32).
+# Made docstring fit in 80 chars wide displays using pydoc.
+#
 
-"""pygettext -- Python equivalent of xgettext(1)
+# for selftesting
+try:
+    import fintl
+    _ = fintl.gettext
+except ImportError:
+    _ = lambda s: s
+
+__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
 
 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
-internationalization of C programs.  Most of these tools are independent of
-the programming language and can be used from within Python programs.  Martin
-von Loewis' work[1] helps considerably in this regard.
+internationalization of C programs. Most of these tools are independent of
+the programming language and can be used from within Python programs.
+Martin von Loewis' work[1] helps considerably in this regard. 
 
 There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++).  Python introduces
-a few wrinkles, such as dual quoting characters, triple quoted strings, and
-raw strings.  xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan Python
-source code, generating .pot files identical to what GNU xgettext[2] generates
-for C and C++ code.  From there, the standard GNU tools can be used.
-
-A word about marking Python strings as candidates for translation.  GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
-gettext_noop.  But those can be a lot of text to include all over your code.
-C and C++ have a trick: they use the C preprocessor.  Most internationalized C
-source includes a #define for gettext() to _() so that what has to be written
-in the source is much less.  Thus these are both translatable strings:
+looking for message strings, but it groks only C (or C++). Python
+introduces a few wrinkles, such as dual quoting characters, triple quoted
+strings, and raw strings. xgettext understands none of this. 
+
+Enter pygettext, which uses Python's standard tokenize module to scan
+Python source code, generating .pot files identical to what GNU xgettext[2]
+generates for C and C++ code. From there, the standard GNU tools can be
+used. 
+
+A word about marking Python strings as candidates for translation. GNU
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
+and gettext_noop. But those can be a lot of text to include all over your
+code. C and C++ have a trick: they use the C preprocessor. Most
+internationalized C source includes a #define for gettext() to _() so that
+what has to be written in the source is much less. Thus these are both
+translatable strings: 
 
     gettext("Translatable String")
     _("Translatable String")
@@ -37,11 +55,11 @@
  [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
  [2] http://www.gnu.org/software/gettext/gettext.html
 
-NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible.  However some options are still missing or are not fully
-implemented.  Also, xgettext's use of command line switches with option
-arguments is broken, and in these cases, pygettext just defines additional
-switches.
+NOTE: pygettext attempts to be option and feature compatible with GNU
+xgettext where ever possible. However some options are still missing or are
+not fully implemented. Also, xgettext's use of command line switches with
+option arguments is broken, and in these cases, pygettext just defines
+additional switches. 
 
 Usage: pygettext [options] inputfile ...
 
@@ -61,9 +79,9 @@
 
     -D
     --docstrings
-        Extract module, class, method, and function docstrings.  These do not
-        need to be wrapped in _() markers, and in fact cannot be for Python to
-        consider them docstrings. (See also the -X option).
+        Extract module, class, method, and function docstrings.  These do
+        not need to be wrapped in _() markers, and in fact cannot be for
+        Python to consider them docstrings. (See also the -X option).
 
     -h
     --help
@@ -135,23 +153,17 @@
         conjunction with the -D option above.
 
 If `inputfile' is -, standard input is read.
-"""
+""")
 
 import os
 import sys
 import time
 import getopt
+import token
 import tokenize
 import operator
 
-# for selftesting
-try:
-    import fintl
-    _ = fintl.gettext
-except ImportError:
-    def _(s): return s
-
-__version__ = '1.4'
+__version__ = '1.5'
 
 default_keywords = ['_']
 DEFAULTKEYWORDS = ', '.join(default_keywords)
@@ -183,7 +195,7 @@ def _(s): return s
 
 
 def usage(code, msg=''):
-    print >> sys.stderr, _(__doc__) % globals()
+    print >> sys.stderr, __doc__ % globals()
     if msg:
         print >> sys.stderr, msg
     sys.exit(code)
@@ -242,6 +254,103 @@ def normalize(s):
         s = '""\n"' + lineterm.join(lines) + '"'
     return s
 
+
+def containsAny(str, set):
+    """ Check whether 'str' contains ANY of the chars in 'set'
+    """
+    return 1 in [c in str for c in set]
+
+
+def _visit_pyfiles(list, dirname, names):
+    """ Helper for getFilesForName().
+    """
+    # get extension for python source files
+    if not globals().has_key('_py_ext'):
+        import imp
+        global _py_ext
+        _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
+
+    # don't recurse into CVS directories
+    if 'CVS' in names:
+        names.remove('CVS')
+
+    # add all *.py files to list
+    list.extend(
+        [os.path.join(dirname, file)
+            for file in names
+                if os.path.splitext(file)[1] == _py_ext])
+
+
+def _get_modpkg_path(dotted_name, pathlist=None):
+    """ Get the filesystem path for a module or a package.
+
+        Return the file system path to a file for a module,
+        and to a directory for a package. Return None if
+        the name is not found, or is a builtin or extension module.
+    """
+    import imp
+
+    # split off top-most name
+    parts = dotted_name.split('.', 1)
+
+    if len(parts) > 1:
+        # we have a dotted path, import top-level package
+        try:
+            file, pathname, description = imp.find_module(parts[0], pathlist)
+            if file: file.close()
+        except ImportError:
+            return None
+
+        # check if it's indeed a package
+        if description[2] == imp.PKG_DIRECTORY:
+            # recursively handle the remaining name parts
+            pathname = _get_modpkg_path(parts[1], [pathname])
+        else:
+            pathname = None
+    else:
+        # plain name
+        try:
+            file, pathname, description = imp.find_module(dotted_name, pathlist)
+            if file: file.close()
+            if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+                pathname = None
+        except ImportError:
+            pathname = None
+
+    return pathname
+
+
+def getFilesForName(name):
+    """ Get a list of module files for a filename, a module or package name,
+        or a directory.
+    """
+    import imp
+
+    if not os.path.exists(name):
+        # check for glob chars
+        if containsAny(name, "*?[]"):
+            import glob
+            files = glob.glob(name)
+            list = []
+            for file in files:
+                list.extend(getFilesForName(file))
+            return list
+
+        # try to find module or package
+        name = _get_modpkg_path(name)
+        if not name:
+            return []
+
+    if os.path.isdir(name):
+        # find all python files in directory
+        list = []
+        os.path.walk(name, _visit_pyfiles, list)
+        return list
+    elif os.path.exists(name):
+        # a single file
+        return [name]
+
+    return []
 
 
 class TokenEater:
@@ -314,7 +423,12 @@ def __openseen(self, ttype, tstring, lineno):
             self.__state = self.__waiting
         elif ttype == tokenize.STRING:
             self.__data.append(safe_eval(tstring))
-        # TBD: should we warn if we seen anything else?
+        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
+                           token.NEWLINE, tokenize.NL]:
+            # warn if we see anything else than STRING or whitespace
+            print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
+                'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
+            self.__state = self.__waiting
 
     def __addentry(self, msg, lineno=None, isdocstring=0):
         if lineno is None:
@@ -495,6 +609,15 @@ class Options:
     else:
         options.toexclude = []
 
+    # resolve args to module lists
+    expanded = []
+    for arg in args:
+        if arg == '-':
+            expanded.append(arg)
+        else:
+            expanded.extend(getFilesForName(arg))
+    args = expanded
+
     # slurp through all the files
     eater = TokenEater(options)
     for filename in args:
@@ -539,3 +662,6 @@ class Options:
     main()
     # some more test strings
     _(u'a unicode string')
+    _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
+    _('more' 'than' 'one' 'string')
+