188188"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\ n"
189189"Language-Team: LANGUAGE <[email protected] >\\ n" 190190"MIME-Version: 1.0\\ n"
191- "Content-Type: text/plain; charset=CHARSET \\ n"
192- "Content-Transfer-Encoding: ENCODING \\ n"
191+ "Content-Type: text/plain; charset=%(charset)s \\ n"
192+ "Content-Transfer-Encoding: %(encoding)s \\ n"
193193"Generated-By: pygettext.py %(version)s\\ n"
194194
195195''' )
@@ -203,54 +203,51 @@ def usage(code, msg=''):
203203
204204
205205
206- escapes = []
207-
208- def make_escapes (pass_iso8859 ):
209- global escapes
210- if pass_iso8859 :
211- # Allow iso-8859 characters to pass through so that e.g. 'msgid
206+ def make_escapes (pass_nonascii ):
207+ global escapes , escape
208+ if pass_nonascii :
209+ # Allow non-ascii characters to pass through so that e.g. 'msgid
212210 # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
213211 # escape any character outside the 32..126 range.
214212 mod = 128
213+ escape = escape_ascii
215214 else :
216215 mod = 256
217- for i in range ( 256 ):
218- if 32 <= ( i % mod ) <= 126 :
219- escapes . append ( chr ( i ))
220- else :
221- escapes . append ( " \\ %03o" % i )
222- escapes [ord ('\\ ' )] = ' \\ \\ '
223- escapes [ord ('\t ' )] = ' \\ t '
224- escapes [ord ('\r ' )] = ' \\ r '
225- escapes [ord ('\n ' )] = ' \\ n '
226- escapes [ ord ( ' \" ' )] = ' \\ "'
216+ escape = escape_nonascii
217+ escapes = [ r"\%03o" % i for i in range ( mod )]
218+ for i in range ( 32 , 127 ):
219+ escapes [ i ] = chr ( i )
220+ escapes [ ord ( ' \\ ' )] = r'\\'
221+ escapes [ord ('\t ' )] = r'\t '
222+ escapes [ord ('\r ' )] = r'\r '
223+ escapes [ord ('\n ' )] = r'\n '
224+ escapes [ord ('\" ' )] = r'\" '
225+
227226
227+ def escape_ascii (s , encoding ):
228+ return '' .join (escapes [ord (c )] if ord (c ) < 128 else c for c in s )
228229
229- def escape (s ):
230- global escapes
231- s = list (s )
232- for i in range (len (s )):
233- s [i ] = escapes [ord (s [i ])]
234- return EMPTYSTRING .join (s )
230+ def escape_nonascii (s , encoding ):
231+ return '' .join (escapes [b ] for b in s .encode (encoding ))
235232
236233
237234def safe_eval (s ):
238235 # unwrap quotes, safely
239236 return eval (s , {'__builtins__' :{}}, {})
240237
241238
242- def normalize (s ):
239+ def normalize (s , encoding ):
243240 # This converts the various Python string types into a format that is
244241 # appropriate for .po files, namely much closer to C style.
245242 lines = s .split ('\n ' )
246243 if len (lines ) == 1 :
247- s = '"' + escape (s ) + '"'
244+ s = '"' + escape (s , encoding ) + '"'
248245 else :
249246 if not lines [- 1 ]:
250247 del lines [- 1 ]
251248 lines [- 1 ] = lines [- 1 ] + '\n '
252249 for i in range (len (lines )):
253- lines [i ] = escape (lines [i ])
250+ lines [i ] = escape (lines [i ], encoding )
254251 lineterm = '\\ n"\n "'
255252 s = '""\n "' + lineterm .join (lines ) + '"'
256253 return s
@@ -447,7 +444,10 @@ def write(self, fp):
447444 timestamp = time .strftime ('%Y-%m-%d %H:%M+%Z' )
448445 # The time stamp in the header doesn't have the same format as that
449446 # generated by xgettext...
450- print (pot_header % {'time' : timestamp , 'version' : __version__ }, file = fp )
447+ encoding = fp .encoding if fp .encoding else 'UTF-8'
448+ print (pot_header % {'time' : timestamp , 'version' : __version__ ,
449+ 'charset' : encoding ,
450+ 'encoding' : '8bit' }, file = fp )
451451 # Sort the entries. First sort each particular entry's keys, then
452452 # sort all the entries by their first item.
453453 reverse = {}
@@ -491,7 +491,7 @@ def write(self, fp):
491491 print (locline , file = fp )
492492 if isdocstring :
493493 print ('#, docstring' , file = fp )
494- print ('msgid' , normalize (k ), file = fp )
494+ print ('msgid' , normalize (k , encoding ), file = fp )
495495 print ('msgstr ""\n ' , file = fp )
496496
497497
@@ -587,7 +587,7 @@ class Options:
587587 fp .close ()
588588
589589 # calculate escapes
590- make_escapes (options .escape )
590+ make_escapes (not options .escape )
591591
592592 # calculate all keywords
593593 options .keywords .extend (default_keywords )
@@ -620,17 +620,17 @@ class Options:
620620 if filename == '-' :
621621 if options .verbose :
622622 print (_ ('Reading standard input' ))
623- fp = sys .stdin
623+ fp = sys .stdin . buffer
624624 closep = 0
625625 else :
626626 if options .verbose :
627627 print (_ ('Working on %s' ) % filename )
628- fp = open (filename )
628+ fp = open (filename , 'rb' )
629629 closep = 1
630630 try :
631631 eater .set_filename (filename )
632632 try :
633- tokens = tokenize .generate_tokens (fp .readline )
633+ tokens = tokenize .tokenize (fp .readline )
634634 for _token in tokens :
635635 eater (* _token )
636636 except tokenize .TokenError as e :
0 commit comments