189189"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\ n"
190190"Language-Team: LANGUAGE <[email protected] >\\ n" 191191"MIME-Version: 1.0\\ n"
192- "Content-Type: text/plain; charset=CHARSET \\ n"
193- "Content-Transfer-Encoding: ENCODING \\ n"
192+ "Content-Type: text/plain; charset=%(charset)s \\ n"
193+ "Content-Transfer-Encoding: %(encoding)s \\ n"
194194"Generated-By: pygettext.py %(version)s\\ n"
195195
196196''' )
@@ -204,54 +204,51 @@ def usage(code, msg=''):
204204
205205
206206
207- escapes = []
208-
209- def make_escapes (pass_iso8859 ):
210- global escapes
211- if pass_iso8859 :
212- # Allow iso-8859 characters to pass through so that e.g. 'msgid
207+ def make_escapes (pass_nonascii ):
208+ global escapes , escape
209+ if pass_nonascii :
210+ # Allow non-ascii characters to pass through so that e.g. 'msgid
213211 # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
214212 # escape any character outside the 32..126 range.
215213 mod = 128
214+ escape = escape_ascii
216215 else :
217216 mod = 256
218- for i in range ( 256 ):
219- if 32 <= ( i % mod ) <= 126 :
220- escapes . append ( chr ( i ))
221- else :
222- escapes . append ( " \\ %03o" % i )
223- escapes [ord ('\\ ' )] = ' \\ \\ '
224- escapes [ord ('\t ' )] = ' \\ t '
225- escapes [ord ('\r ' )] = ' \\ r '
226- escapes [ord ('\n ' )] = ' \\ n '
227- escapes [ ord ( ' \" ' )] = ' \\ "'
217+ escape = escape_nonascii
218+ escapes = [ r"\%03o" % i for i in range ( mod )]
219+ for i in range ( 32 , 127 ):
220+ escapes [ i ] = chr ( i )
221+ escapes [ ord ( ' \\ ' )] = r'\\'
222+ escapes [ord ('\t ' )] = r'\t '
223+ escapes [ord ('\r ' )] = r'\r '
224+ escapes [ord ('\n ' )] = r'\n '
225+ escapes [ord ('\" ' )] = r'\" '
226+
228227
228+ def escape_ascii (s , encoding ):
229+ return '' .join (escapes [ord (c )] if ord (c ) < 128 else c for c in s )
229230
230- def escape (s ):
231- global escapes
232- s = list (s )
233- for i in range (len (s )):
234- s [i ] = escapes [ord (s [i ])]
235- return EMPTYSTRING .join (s )
231+ def escape_nonascii (s , encoding ):
232+ return '' .join (escapes [b ] for b in s .encode (encoding ))
236233
237234
238235def safe_eval (s ):
239236 # unwrap quotes, safely
240237 return eval (s , {'__builtins__' :{}}, {})
241238
242239
243- def normalize (s ):
240+ def normalize (s , encoding ):
244241 # This converts the various Python string types into a format that is
245242 # appropriate for .po files, namely much closer to C style.
246243 lines = s .split ('\n ' )
247244 if len (lines ) == 1 :
248- s = '"' + escape (s ) + '"'
245+ s = '"' + escape (s , encoding ) + '"'
249246 else :
250247 if not lines [- 1 ]:
251248 del lines [- 1 ]
252249 lines [- 1 ] = lines [- 1 ] + '\n '
253250 for i in range (len (lines )):
254- lines [i ] = escape (lines [i ])
251+ lines [i ] = escape (lines [i ], encoding )
255252 lineterm = '\\ n"\n "'
256253 s = '""\n "' + lineterm .join (lines ) + '"'
257254 return s
@@ -448,7 +445,10 @@ def write(self, fp):
448445 timestamp = time .strftime ('%Y-%m-%d %H:%M+%Z' )
449446 # The time stamp in the header doesn't have the same format as that
450447 # generated by xgettext...
451- print (pot_header % {'time' : timestamp , 'version' : __version__ }, file = fp )
448+ encoding = fp .encoding if fp .encoding else 'UTF-8'
449+ print (pot_header % {'time' : timestamp , 'version' : __version__ ,
450+ 'charset' : encoding ,
451+ 'encoding' : '8bit' }, file = fp )
452452 # Sort the entries. First sort each particular entry's keys, then
453453 # sort all the entries by their first item.
454454 reverse = {}
@@ -492,7 +492,7 @@ def write(self, fp):
492492 print (locline , file = fp )
493493 if isdocstring :
494494 print ('#, docstring' , file = fp )
495- print ('msgid' , normalize (k ), file = fp )
495+ print ('msgid' , normalize (k , encoding ), file = fp )
496496 print ('msgstr ""\n ' , file = fp )
497497
498498
@@ -588,7 +588,7 @@ class Options:
588588 fp .close ()
589589
590590 # calculate escapes
591- make_escapes (options .escape )
591+ make_escapes (not options .escape )
592592
593593 # calculate all keywords
594594 options .keywords .extend (default_keywords )
@@ -621,17 +621,17 @@ class Options:
621621 if filename == '-' :
622622 if options .verbose :
623623 print (_ ('Reading standard input' ))
624- fp = sys .stdin
624+ fp = sys .stdin . buffer
625625 closep = 0
626626 else :
627627 if options .verbose :
628628 print (_ ('Working on %s' ) % filename )
629- fp = open (filename )
629+ fp = open (filename , 'rb' )
630630 closep = 1
631631 try :
632632 eater .set_filename (filename )
633633 try :
634- tokens = tokenize .generate_tokens (fp .readline )
634+ tokens = tokenize .tokenize (fp .readline )
635635 for _token in tokens :
636636 eater (* _token )
637637 except tokenize .TokenError as e :
0 commit comments