1
1
"""
2
2
Process raw qstr file and output qstr data with length, hash and data bytes.
3
3
4
- This script works with Python 2.6, 2.7, 3.3 and 3.4.
4
+ This script works with Python 2.7, 3.3 and 3.4.
5
+
6
+ For documentation about the format of compressed translated strings, see
7
+ supervisor/shared/translate.h
5
8
"""
6
9
7
10
from __future__ import print_function
@@ -132,19 +135,37 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
132
135
print ("// estimated total memory size" , len (lengths ) + 2 * len (values ) + sum (len (cb [u ]) for u in all_strings_concat ))
133
136
print ("//" , values , lengths )
134
137
values_type = "uint16_t" if max (ord (u ) for u in values ) > 255 else "uint8_t"
138
+ max_translation_encoded_length = max (len (translation .encode ("utf-8" )) for original ,translation in translations )
135
139
with open (compression_filename , "w" ) as f :
136
140
f .write ("const uint8_t lengths[] = {{ {} }};\n " .format (", " .join (map (str , lengths ))))
137
141
f .write ("const {} values[] = {{ {} }};\n " .format (values_type , ", " .join (str (ord (u )) for u in values )))
142
+ f .write ("#define compress_max_length_bits ({})\n " .format (max_translation_encoded_length .bit_length ()))
138
143
return values , lengths
139
144
140
- def decompress (encoding_table , length , encoded ):
145
+ def decompress (encoding_table , encoded , encoded_length_bits ):
141
146
values , lengths = encoding_table
142
- #print(l, encoded)
143
147
dec = []
144
148
this_byte = 0
145
149
this_bit = 7
146
150
b = encoded [this_byte ]
147
- for i in range (length ):
151
+ bits = 0
152
+ for i in range (encoded_length_bits ):
153
+ bits <<= 1
154
+ if 0x80 & b :
155
+ bits |= 1
156
+
157
+ b <<= 1
158
+ if this_bit == 0 :
159
+ this_bit = 7
160
+ this_byte += 1
161
+ if this_byte < len (encoded ):
162
+ b = encoded [this_byte ]
163
+ else :
164
+ this_bit -= 1
165
+ length = bits
166
+
167
+ i = 0
168
+ while i < length :
148
169
bits = 0
149
170
bit_length = 0
150
171
max_code = lengths [0 ]
@@ -170,10 +191,11 @@ def decompress(encoding_table, length, encoded):
170
191
searched_length += lengths [bit_length ]
171
192
172
193
v = values [searched_length + bits - max_code ]
194
+ i += len (v .encode ('utf-8' ))
173
195
dec .append (v )
174
196
return '' .join (dec )
175
197
176
- def compress (encoding_table , decompressed ):
198
+ def compress (encoding_table , decompressed , encoded_length_bits , len_translation_encoded ):
177
199
if not isinstance (decompressed , str ):
178
200
raise TypeError ()
179
201
values , lengths = encoding_table
@@ -182,6 +204,19 @@ def compress(encoding_table, decompressed):
182
204
#print(lengths)
183
205
current_bit = 7
184
206
current_byte = 0
207
+
208
+ code = len_translation_encoded
209
+ bits = encoded_length_bits + 1
210
+ for i in range (bits - 1 , 0 , - 1 ):
211
+ if len_translation_encoded & (1 << (i - 1 )):
212
+ enc [current_byte ] |= 1 << current_bit
213
+ if current_bit == 0 :
214
+ current_bit = 7
215
+ #print("packed {0:0{width}b}".format(enc[current_byte], width=8))
216
+ current_byte += 1
217
+ else :
218
+ current_bit -= 1
219
+
185
220
for c in decompressed :
186
221
#print()
187
222
#print("char", c, values.index(c))
@@ -342,14 +377,17 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
342
377
343
378
total_text_size = 0
344
379
total_text_compressed_size = 0
380
+ max_translation_encoded_length = max (len (translation .encode ("utf-8" )) for original , translation in i18ns )
381
+ encoded_length_bits = max_translation_encoded_length .bit_length ()
345
382
for original , translation in i18ns :
346
383
translation_encoded = translation .encode ("utf-8" )
347
- compressed = compress (encoding_table , translation )
384
+ compressed = compress (encoding_table , translation , encoded_length_bits , len ( translation_encoded ) )
348
385
total_text_compressed_size += len (compressed )
349
- decompressed = decompress (encoding_table , len (translation_encoded ), compressed )
386
+ decompressed = decompress (encoding_table , compressed , encoded_length_bits )
387
+ assert decompressed == translation
350
388
for c in C_ESCAPES :
351
389
decompressed = decompressed .replace (c , C_ESCAPES [c ])
352
- print ("TRANSLATION(\" {}\" , {}, {{ {} }} ) // {}" .format (original , len ( translation_encoded ) + 1 , ", " .join (["0x{:02x }" .format (x ) for x in compressed ]), decompressed ))
390
+ print ("TRANSLATION(\" {}\" , {}) // {}" .format (original , ", " .join (["{:d }" .format (x ) for x in compressed ]), decompressed ))
353
391
total_text_size += len (translation .encode ("utf-8" ))
354
392
355
393
print ()
@@ -385,6 +423,7 @@ def print_qstr_enums(qstrs):
385
423
386
424
qcfgs , qstrs , i18ns = parse_input_headers (args .infiles )
387
425
if args .translation :
426
+ i18ns = sorted (i18ns )
388
427
translations = translate (args .translation , i18ns )
389
428
encoding_table = compute_huffman_coding (translations , qstrs , args .compression_filename )
390
429
print_qstr_data (encoding_table , qcfgs , qstrs , translations )
0 commit comments