@@ -73,17 +73,15 @@ def test(condition, true, false):
7373
7474
7575def c2py (plural ):
76- """
77- Gets a C expression as used in PO files for plural forms and
78- returns a Python lambda function that implements an equivalent
79- expression.
76+ """Gets a C expression as used in PO files for plural forms and returns a
77+ Python lambda function that implements an equivalent expression.
8078 """
8179 # Security check, allow only the "n" identifier
8280 from StringIO import StringIO
8381 import token , tokenize
8482 tokens = tokenize .generate_tokens (StringIO (plural ).readline )
8583 try :
86- danger = [ x for x in tokens if x [0 ] == token .NAME and x [1 ] != 'n' ]
84+ danger = [x for x in tokens if x [0 ] == token .NAME and x [1 ] != 'n' ]
8785 except tokenize .TokenError :
8886 raise ValueError , \
8987 'plural forms expression error, maybe unbalanced parenthesis'
@@ -218,7 +216,7 @@ def info(self):
218216 def charset (self ):
219217 return self ._charset
220218
221- def install (self , unicode = 0 ):
219+ def install (self , unicode = False ):
222220 import __builtin__
223221 __builtin__ .__dict__ ['_' ] = unicode and self .ugettext or self .gettext
224222
@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):
228226 LE_MAGIC = 0x950412deL
229227 BE_MAGIC = 0xde120495L
230228
231- def __init__ (self , fp = None , coerce = False ):
232- # Set this attribute before calling the base class constructor, since
233- # the latter calls _parse() which depends on self._coerce.
234- self ._coerce = coerce
235- NullTranslations .__init__ (self , fp )
236-
237229 def _parse (self , fp ):
238230 """Override this method to support alternative .mo formats."""
239231 unpack = struct .unpack
@@ -281,21 +273,28 @@ def _parse(self, fp):
281273 self ._charset = v .split ('charset=' )[1 ]
282274 elif k == 'plural-forms' :
283275 v = v .split (';' )
284- ## nplurals = v[0].split('nplurals=')[1]
285- ## nplurals = int(nplurals.strip())
286276 plural = v [1 ].split ('plural=' )[1 ]
287277 self .plural = c2py (plural )
278+ # Note: we unconditionally convert both msgids and msgstrs to
279+ # Unicode using the character encoding specified in the charset
280+ # parameter of the Content-Type header. The gettext documentation
281+ # strongly encourages msgids to be us-ascii, but some appliations
282+ # require alternative encodings (e.g. Zope's ZCML and ZPT). For
283+ # traditional gettext applications, the msgid conversion will
284+ # cause no problems since us-ascii should always be a subset of
285+ # the charset encoding. We may want to fall back to 8-bit msgids
286+ # if the Unicode conversion fails.
288287 if msg .find ('\x00 ' ) >= 0 :
289288 # Plural forms
290289 msgid1 , msgid2 = msg .split ('\x00 ' )
291290 tmsg = tmsg .split ('\x00 ' )
292- if self ._coerce :
291+ if self ._charset :
293292 msgid1 = unicode (msgid1 , self ._charset )
294293 tmsg = [unicode (x , self ._charset ) for x in tmsg ]
295294 for i in range (len (tmsg )):
296295 catalog [(msgid1 , i )] = tmsg [i ]
297296 else :
298- if self ._coerce :
297+ if self ._charset :
299298 msg = unicode (msg , self ._charset )
300299 tmsg = unicode (tmsg , self ._charset )
301300 catalog [msg ] = tmsg
@@ -304,16 +303,23 @@ def _parse(self, fp):
304303 transidx += 8
305304
306305 def gettext (self , message ):
307- try :
308- return self ._catalog [ message ]
309- except KeyError :
306+ missing = object ()
307+ tmsg = self ._catalog . get ( message , missing )
308+ if tmsg is missing :
310309 if self ._fallback :
311310 return self ._fallback .gettext (message )
312311 return message
312+ # Encode the Unicode tmsg back to an 8-bit string, if possible
313+ if self ._charset :
314+ return tmsg .encode (self ._charset )
315+ return tmsg
313316
314317 def ngettext (self , msgid1 , msgid2 , n ):
315318 try :
316- return self ._catalog [(msgid1 , self .plural (n ))]
319+ tmsg = self ._catalog [(msgid1 , self .plural (n ))]
320+ if self ._charset :
321+ return tmsg .encode (self ._charset )
322+ return tmsg
317323 except KeyError :
318324 if self ._fallback :
319325 return self ._fallback .ngettext (msgid1 , msgid2 , n )
@@ -328,10 +334,7 @@ def ugettext(self, message):
328334 if tmsg is missing :
329335 if self ._fallback :
330336 return self ._fallback .ugettext (message )
331- tmsg = message
332- if not self ._coerce :
333- return unicode (tmsg , self ._charset )
334- # The msgstr is already coerced to Unicode
337+ return unicode (message )
335338 return tmsg
336339
337340 def ungettext (self , msgid1 , msgid2 , n ):
@@ -341,12 +344,9 @@ def ungettext(self, msgid1, msgid2, n):
341344 if self ._fallback :
342345 return self ._fallback .ungettext (msgid1 , msgid2 , n )
343346 if n == 1 :
344- tmsg = msgid1
347+ tmsg = unicode ( msgid1 )
345348 else :
346- tmsg = msgid2
347- if not self ._coerce :
348- return unicode (tmsg , self ._charset )
349- # The msgstr is already coerced to Unicode
349+ tmsg = unicode (msgid2 )
350350 return tmsg
351351
352352
@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):
392392_translations = {}
393393
394394def translation (domain , localedir = None , languages = None ,
395- class_ = None , fallback = 0 ):
395+ class_ = None , fallback = False ):
396396 if class_ is None :
397397 class_ = GNUTranslations
398398 mofiles = find (domain , localedir , languages , all = 1 )
399- if len ( mofiles ) == 0 :
399+ if not mofiles :
400400 if fallback :
401401 return NullTranslations ()
402402 raise IOError (ENOENT , 'No translation file found for domain' , domain )
@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,
419419 return result
420420
421421
422- def install (domain , localedir = None , unicode = 0 ):
423- translation (domain , localedir , fallback = 1 ).install (unicode )
422+ def install (domain , localedir = None , unicode = False ):
423+ translation (domain , localedir , fallback = True ).install (unicode )
424424
425425
426426
0 commit comments