Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c4acc2b

Browse files
committed
GNUTranslations:
__init__(): Removed since we no longer need the coerce flag. Message ids and strings are now always coerced to Unicode, /if/ the catalog specified a charset parameter. gettext(), ngettext(): Since the message strings are Unicodes in the catalog, coerce back to encoded 8-bit strings on return. ugettext(), ungettext(): Coerce the message ids to Unicode when there's no entry for the id in the catalog. Minor code cleanups; use booleans where appropriate.
1 parent edb155f commit c4acc2b

1 file changed

Lines changed: 33 additions & 33 deletions

File tree

Lib/gettext.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,15 @@ def test(condition, true, false):
7373

7474

7575
def c2py(plural):
76-
"""
77-
Gets a C expression as used in PO files for plural forms and
78-
returns a Python lambda function that implements an equivalent
79-
expression.
76+
"""Gets a C expression as used in PO files for plural forms and returns a
77+
Python lambda function that implements an equivalent expression.
8078
"""
8179
# Security check, allow only the "n" identifier
8280
from StringIO import StringIO
8381
import token, tokenize
8482
tokens = tokenize.generate_tokens(StringIO(plural).readline)
8583
try:
86-
danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
84+
danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
8785
except tokenize.TokenError:
8886
raise ValueError, \
8987
'plural forms expression error, maybe unbalanced parenthesis'
@@ -218,7 +216,7 @@ def info(self):
218216
def charset(self):
219217
return self._charset
220218

221-
def install(self, unicode=0):
219+
def install(self, unicode=False):
222220
import __builtin__
223221
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
224222

@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):
228226
LE_MAGIC = 0x950412deL
229227
BE_MAGIC = 0xde120495L
230228

231-
def __init__(self, fp=None, coerce=False):
232-
# Set this attribute before calling the base class constructor, since
233-
# the latter calls _parse() which depends on self._coerce.
234-
self._coerce = coerce
235-
NullTranslations.__init__(self, fp)
236-
237229
def _parse(self, fp):
238230
"""Override this method to support alternative .mo formats."""
239231
unpack = struct.unpack
@@ -281,21 +273,28 @@ def _parse(self, fp):
281273
self._charset = v.split('charset=')[1]
282274
elif k == 'plural-forms':
283275
v = v.split(';')
284-
## nplurals = v[0].split('nplurals=')[1]
285-
## nplurals = int(nplurals.strip())
286276
plural = v[1].split('plural=')[1]
287277
self.plural = c2py(plural)
278+
# Note: we unconditionally convert both msgids and msgstrs to
279+
# Unicode using the character encoding specified in the charset
280+
# parameter of the Content-Type header. The gettext documentation
281+
# strongly encourages msgids to be us-ascii, but some appliations
282+
# require alternative encodings (e.g. Zope's ZCML and ZPT). For
283+
# traditional gettext applications, the msgid conversion will
284+
# cause no problems since us-ascii should always be a subset of
285+
# the charset encoding. We may want to fall back to 8-bit msgids
286+
# if the Unicode conversion fails.
288287
if msg.find('\x00') >= 0:
289288
# Plural forms
290289
msgid1, msgid2 = msg.split('\x00')
291290
tmsg = tmsg.split('\x00')
292-
if self._coerce:
291+
if self._charset:
293292
msgid1 = unicode(msgid1, self._charset)
294293
tmsg = [unicode(x, self._charset) for x in tmsg]
295294
for i in range(len(tmsg)):
296295
catalog[(msgid1, i)] = tmsg[i]
297296
else:
298-
if self._coerce:
297+
if self._charset:
299298
msg = unicode(msg, self._charset)
300299
tmsg = unicode(tmsg, self._charset)
301300
catalog[msg] = tmsg
@@ -304,16 +303,23 @@ def _parse(self, fp):
304303
transidx += 8
305304

306305
def gettext(self, message):
307-
try:
308-
return self._catalog[message]
309-
except KeyError:
306+
missing = object()
307+
tmsg = self._catalog.get(message, missing)
308+
if tmsg is missing:
310309
if self._fallback:
311310
return self._fallback.gettext(message)
312311
return message
312+
# Encode the Unicode tmsg back to an 8-bit string, if possible
313+
if self._charset:
314+
return tmsg.encode(self._charset)
315+
return tmsg
313316

314317
def ngettext(self, msgid1, msgid2, n):
315318
try:
316-
return self._catalog[(msgid1, self.plural(n))]
319+
tmsg = self._catalog[(msgid1, self.plural(n))]
320+
if self._charset:
321+
return tmsg.encode(self._charset)
322+
return tmsg
317323
except KeyError:
318324
if self._fallback:
319325
return self._fallback.ngettext(msgid1, msgid2, n)
@@ -328,10 +334,7 @@ def ugettext(self, message):
328334
if tmsg is missing:
329335
if self._fallback:
330336
return self._fallback.ugettext(message)
331-
tmsg = message
332-
if not self._coerce:
333-
return unicode(tmsg, self._charset)
334-
# The msgstr is already coerced to Unicode
337+
return unicode(message)
335338
return tmsg
336339

337340
def ungettext(self, msgid1, msgid2, n):
@@ -341,12 +344,9 @@ def ungettext(self, msgid1, msgid2, n):
341344
if self._fallback:
342345
return self._fallback.ungettext(msgid1, msgid2, n)
343346
if n == 1:
344-
tmsg = msgid1
347+
tmsg = unicode(msgid1)
345348
else:
346-
tmsg = msgid2
347-
if not self._coerce:
348-
return unicode(tmsg, self._charset)
349-
# The msgstr is already coerced to Unicode
349+
tmsg = unicode(msgid2)
350350
return tmsg
351351

352352

@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):
392392
_translations = {}
393393

394394
def translation(domain, localedir=None, languages=None,
395-
class_=None, fallback=0):
395+
class_=None, fallback=False):
396396
if class_ is None:
397397
class_ = GNUTranslations
398398
mofiles = find(domain, localedir, languages, all=1)
399-
if len(mofiles)==0:
399+
if not mofiles:
400400
if fallback:
401401
return NullTranslations()
402402
raise IOError(ENOENT, 'No translation file found for domain', domain)
@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,
419419
return result
420420

421421

422-
def install(domain, localedir=None, unicode=0):
423-
translation(domain, localedir, fallback=1).install(unicode)
422+
def install(domain, localedir=None, unicode=False):
423+
translation(domain, localedir, fallback=True).install(unicode)
424424

425425

426426

0 commit comments

Comments
 (0)