|
1 | 1 | #! /usr/bin/env python3 |
2 | 2 |
|
3 | | -"""RFC 3548: Base16, Base32, Base64 Data Encodings""" |
| 3 | +"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" |
4 | 4 |
|
5 | 5 | # Modified 04-Oct-1995 by Jack Jansen to use binascii module |
6 | 6 | # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support |
|
9 | 9 | import re |
10 | 10 | import struct |
11 | 11 | import binascii |
| 12 | +import itertools |
12 | 13 |
|
13 | 14 |
|
14 | 15 | __all__ = [ |
|
17 | 18 | # Generalized interface for other encodings |
18 | 19 | 'b64encode', 'b64decode', 'b32encode', 'b32decode', |
19 | 20 | 'b16encode', 'b16decode', |
| 21 | + # Base85 and Ascii85 encodings |
| 22 | + 'b85encode', 'b85decode', 'a85encode', 'a85decode', |
20 | 23 | # Standard Base64 encoding |
21 | 24 | 'standard_b64encode', 'standard_b64decode', |
22 | 25 | # Some common Base64 alternatives. As referenced by RFC 3458, see thread |
@@ -268,7 +271,193 @@ def b16decode(s, casefold=False): |
268 | 271 | raise binascii.Error('Non-base16 digit found') |
269 | 272 | return binascii.unhexlify(s) |
270 | 273 |
|
| 274 | +# |
| 275 | +# Ascii85 encoding/decoding |
| 276 | +# |
271 | 277 |
|
| 278 | +def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): |
| 279 | + # Helper function for a85encode and b85encode |
| 280 | + if not isinstance(b, bytes_types): |
| 281 | + b = memoryview(b).tobytes() |
| 282 | + |
| 283 | + padding = (-len(b)) % 4 |
| 284 | + if padding: |
| 285 | + b = b + b'\0' * padding |
| 286 | + words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) |
| 287 | + |
| 288 | + a85chars2 = _a85chars2 |
| 289 | + a85chars = _a85chars |
| 290 | + chunks = [b'z' if foldnuls and not word else |
| 291 | + b'y' if foldspaces and word == 0x20202020 else |
| 292 | + (chars2[word // 614125] + |
| 293 | + chars2[word // 85 % 7225] + |
| 294 | + chars[word % 85]) |
| 295 | + for word in words] |
| 296 | + |
| 297 | + if padding and not pad: |
| 298 | + if chunks[-1] == b'z': |
| 299 | + chunks[-1] = chars[0] * 5 |
| 300 | + chunks[-1] = chunks[-1][:-padding] |
| 301 | + |
| 302 | + return b''.join(chunks) |
| 303 | + |
| 304 | +_A85START = b"<~" |
| 305 | +_A85END = b"~>" |
| 306 | +_a85chars = [bytes([i]) for i in range(33, 118)] |
| 307 | +_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] |
| 308 | + |
| 309 | +def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): |
| 310 | + """Encode a byte string using Ascii85. |
| 311 | +
|
| 312 | + b is the byte string to encode. The encoded byte string is returned. |
| 313 | +
|
| 314 | + foldspaces is an optional flag that uses the special short sequence 'y' |
| 315 | + instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This |
| 316 | + feature is not supported by the "standard" Adobe encoding. |
| 317 | +
|
| 318 | + wrapcol controls whether the output should have newline ('\n') characters |
| 319 | + added to it. If this is non-zero, each output line will be at most this |
| 320 | + many characters long. |
| 321 | +
|
| 322 | + pad controls whether the input string is padded to a multiple of 4 before |
| 323 | + encoding. Note that the btoa implementation always pads. |
| 324 | +
|
| 325 | + adobe controls whether the encoded byte sequence is framed with <~ and ~>, |
| 326 | + which is used by the Adobe implementation. |
| 327 | + """ |
| 328 | + result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) |
| 329 | + |
| 330 | + if adobe: |
| 331 | + result = _A85START + result |
| 332 | + if wrapcol: |
| 333 | + wrapcol = max(2 if adobe else 1, wrapcol) |
| 334 | + chunks = [result[i: i + wrapcol] |
| 335 | + for i in range(0, len(result), wrapcol)] |
| 336 | + if adobe: |
| 337 | + if len(chunks[-1]) + 2 > wrapcol: |
| 338 | + chunks.append(b'') |
| 339 | + result = b'\n'.join(chunks) |
| 340 | + if adobe: |
| 341 | + result += _A85END |
| 342 | + |
| 343 | + return result |
| 344 | + |
| 345 | +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): |
| 346 | + """Decode an Ascii85 encoded byte string. |
| 347 | +
|
| 348 | + s is the byte string to decode. |
| 349 | +
|
| 350 | + foldspaces is a flag that specifies whether the 'y' short sequence should be |
| 351 | + accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is |
| 352 | + not supported by the "standard" Adobe encoding. |
| 353 | +
|
| 354 | + adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. |
| 355 | + is framed with <~ and ~>). |
| 356 | +
|
| 357 | + ignorechars should be a byte string containing characters to ignore from the |
| 358 | + input. This should only contain whitespace characters, and by default |
| 359 | + contains all whitespace characters in ASCII. |
| 360 | + """ |
| 361 | + b = _bytes_from_decode_data(b) |
| 362 | + if adobe: |
| 363 | + if not (b.startswith(_A85START) and b.endswith(_A85END)): |
| 364 | + raise ValueError("Ascii85 encoded byte sequences must be bracketed " |
| 365 | + "by {} and {}".format(_A85START, _A85END)) |
| 366 | + b = b[2:-2] # Strip off start/end markers |
| 367 | + # |
| 368 | + # We have to go through this stepwise, so as to ignore spaces and handle |
| 369 | + # special short sequences |
| 370 | + # |
| 371 | + packI = struct.Struct('!I').pack |
| 372 | + decoded = [] |
| 373 | + decoded_append = decoded.append |
| 374 | + curr = [] |
| 375 | + curr_append = curr.append |
| 376 | + curr_clear = curr.clear |
| 377 | + for x in b + b'u' * 4: |
| 378 | + if b'!'[0] <= x <= b'u'[0]: |
| 379 | + curr_append(x) |
| 380 | + if len(curr) == 5: |
| 381 | + acc = 0 |
| 382 | + for x in curr: |
| 383 | + acc = 85 * acc + (x - 33) |
| 384 | + try: |
| 385 | + decoded_append(packI(acc)) |
| 386 | + except struct.error: |
| 387 | + raise ValueError('Ascii85 overflow') from None |
| 388 | + curr_clear() |
| 389 | + elif x == b'z'[0]: |
| 390 | + if curr: |
| 391 | + raise ValueError('z inside Ascii85 5-tuple') |
| 392 | + decoded_append(b'\0\0\0\0') |
| 393 | + elif foldspaces and x == b'y'[0]: |
| 394 | + if curr: |
| 395 | + raise ValueError('y inside Ascii85 5-tuple') |
| 396 | + decoded_append(b'\x20\x20\x20\x20') |
| 397 | + elif x in ignorechars: |
| 398 | + # Skip whitespace |
| 399 | + continue |
| 400 | + else: |
| 401 | + raise ValueError('Non-Ascii85 digit found: %c' % x) |
| 402 | + |
| 403 | + result = b''.join(decoded) |
| 404 | + padding = 4 - len(curr) |
| 405 | + if padding: |
| 406 | + # Throw away the extra padding |
| 407 | + result = result[:-padding] |
| 408 | + return result |
| 409 | + |
| 410 | +# The following code is originally taken (with permission) from Mercurial |
| 411 | + |
| 412 | +_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ |
| 413 | + b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" |
| 414 | +_b85chars = [bytes([i]) for i in _b85chars] |
| 415 | +_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] |
| 416 | +_b85dec = None |
| 417 | + |
| 418 | +def b85encode(b, pad=False): |
| 419 | + """Encode an ASCII-encoded byte array in base85 format. |
| 420 | +
|
| 421 | + If pad is true, the input is padded with "\0" so its length is a multiple of |
| 422 | + 4 characters before encoding. |
| 423 | + """ |
| 424 | + return _85encode(b, _b85chars, _b85chars2, pad) |
| 425 | + |
| 426 | +def b85decode(b): |
| 427 | + """Decode base85-encoded byte array""" |
| 428 | + b = _bytes_from_decode_data(b) |
| 429 | + global _b85dec |
| 430 | + if _b85dec is None: |
| 431 | + _b85dec = [None] * 256 |
| 432 | + for i, c in enumerate(_b85chars): |
| 433 | + _b85dec[c[0]] = i |
| 434 | + |
| 435 | + padding = (-len(b)) % 5 |
| 436 | + b = b + b'~' * padding |
| 437 | + out = [] |
| 438 | + packI = struct.Struct('!I').pack |
| 439 | + for i in range(0, len(b), 5): |
| 440 | + chunk = b[i:i + 5] |
| 441 | + acc = 0 |
| 442 | + try: |
| 443 | + for c in chunk: |
| 444 | + acc = acc * 85 + _b85dec[c] |
| 445 | + except TypeError: |
| 446 | + for j, c in enumerate(chunk): |
| 447 | + if _b85dec[c] is None: |
| 448 | + raise ValueError('bad base85 character at position %d' |
| 449 | + % (i + j)) from None |
| 450 | + raise |
| 451 | + try: |
| 452 | + out.append(packI(acc)) |
| 453 | + except struct.error: |
| 454 | + raise ValueError('base85 overflow in hunk starting at byte %d' |
| 455 | + % i) from None |
| 456 | + |
| 457 | + result = b''.join(out) |
| 458 | + if padding: |
| 459 | + result = result[:-padding] |
| 460 | + return result |
272 | 461 |
|
273 | 462 | # Legacy interface. This code could be cleaned up since I don't believe |
274 | 463 | # binascii has any line length limitations. It just doesn't seem worth it |
|
0 commit comments