Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d6a91a7

Browse files
committed
Issue #20879: Delay the initialization of encoding and decoding tables for
base32, ascii85 and base85 codecs in the base64 module, and delay the initialization of the unquote_to_bytes() table of the urllib.parse module, to not waste memory if these modules are not used.
1 parent 2a60534 commit d6a91a7

3 files changed

Lines changed: 54 additions & 19 deletions

File tree

Lib/base64.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -138,15 +138,22 @@ def urlsafe_b64decode(s):
138138

139139
# Base32 encoding/decoding must be done in Python
140140
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
141-
_b32tab = [bytes([i]) for i in _b32alphabet]
142-
_b32tab2 = [a + b for a in _b32tab for b in _b32tab]
143-
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
141+
_b32tab2 = None
142+
_b32rev = None
144143

145144
def b32encode(s):
146145
"""Encode a byte string using Base32.
147146
148147
s is the byte string to encode. The encoded byte string is returned.
149148
"""
149+
global _b32tab2
150+
# Delay the initialization of the table to not waste memory
151+
# if the function is never called
152+
if _b32tab2 is None:
153+
b32tab = [bytes((i,)) for i in _b32alphabet]
154+
_b32tab2 = [a + b for a in b32tab for b in b32tab]
155+
b32tab = None
156+
150157
if not isinstance(s, bytes_types):
151158
s = memoryview(s).tobytes()
152159
leftover = len(s) % 5
@@ -193,6 +200,11 @@ def b32decode(s, casefold=False, map01=None):
193200
the input is incorrectly padded or if there are non-alphabet
194201
characters present in the input.
195202
"""
203+
global _b32rev
204+
# Delay the initialization of the table to not waste memory
205+
# if the function is never called
206+
if _b32rev is None:
207+
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
196208
s = _bytes_from_decode_data(s)
197209
if len(s) % 8:
198210
raise binascii.Error('Incorrect padding')
@@ -274,6 +286,11 @@ def b16decode(s, casefold=False):
274286
# Ascii85 encoding/decoding
275287
#
276288

289+
_a85chars = None
290+
_a85chars2 = None
291+
_A85START = b"<~"
292+
_A85END = b"~>"
293+
277294
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
278295
# Helper function for a85encode and b85encode
279296
if not isinstance(b, bytes_types):
@@ -284,8 +301,6 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
284301
b = b + b'\0' * padding
285302
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
286303

287-
a85chars2 = _a85chars2
288-
a85chars = _a85chars
289304
chunks = [b'z' if foldnuls and not word else
290305
b'y' if foldspaces and word == 0x20202020 else
291306
(chars2[word // 614125] +
@@ -300,11 +315,6 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
300315

301316
return b''.join(chunks)
302317

303-
_A85START = b"<~"
304-
_A85END = b"~>"
305-
_a85chars = [bytes([i]) for i in range(33, 118)]
306-
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
307-
308318
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
309319
"""Encode a byte string using Ascii85.
310320
@@ -324,6 +334,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
324334
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
325335
which is used by the Adobe implementation.
326336
"""
337+
global _a85chars, _a85chars2
338+
# Delay the initialization of tables to not waste memory
339+
# if the function is never called
340+
if _a85chars is None:
341+
_a85chars = [bytes((i,)) for i in range(33, 118)]
342+
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
343+
327344
result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
328345

329346
if adobe:
@@ -408,10 +425,10 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
408425

409426
# The following code is originally taken (with permission) from Mercurial
410427

411-
_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
412-
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
413-
_b85chars = [bytes([i]) for i in _b85chars]
414-
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
428+
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
429+
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
430+
_b85chars = None
431+
_b85chars2 = None
415432
_b85dec = None
416433

417434
def b85encode(b, pad=False):
@@ -420,17 +437,25 @@ def b85encode(b, pad=False):
420437
If pad is true, the input is padded with "\0" so its length is a multiple of
421438
4 characters before encoding.
422439
"""
440+
global _b85chars, _b85chars2
441+
# Delay the initialization of tables to not waste memory
442+
# if the function is never called
443+
if _b85chars is None:
444+
_b85chars = [bytes((i,)) for i in _b85alphabet]
445+
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
423446
return _85encode(b, _b85chars, _b85chars2, pad)
424447

425448
def b85decode(b):
426449
"""Decode base85-encoded byte array"""
427-
b = _bytes_from_decode_data(b)
428450
global _b85dec
451+
# Delay the initialization of tables to not waste memory
452+
# if the function is never called
429453
if _b85dec is None:
430454
_b85dec = [None] * 256
431-
for i, c in enumerate(_b85chars):
432-
_b85dec[c[0]] = i
455+
for i, c in enumerate(_b85alphabet):
456+
_b85dec[c] = i
433457

458+
b = _bytes_from_decode_data(b)
434459
padding = (-len(b)) % 5
435460
b = b + b'~' * padding
436461
out = []

Lib/urllib/parse.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,7 @@ def urldefrag(url):
472472
return _coerce_result(DefragResult(defrag, frag))
473473

474474
_hexdig = '0123456789ABCDEFabcdef'
475-
_hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])
476-
for a in _hexdig for b in _hexdig}
475+
_hextobyte = None
477476

478477
def unquote_to_bytes(string):
479478
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -490,6 +489,12 @@ def unquote_to_bytes(string):
490489
return string
491490
res = [bits[0]]
492491
append = res.append
492+
# Delay the initialization of the table to not waste memory
493+
# if the function is never called
494+
global _hextobyte
495+
if _hextobyte is None:
496+
_hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])
497+
for a in _hexdig for b in _hexdig}
493498
for item in bits[1:]:
494499
try:
495500
append(_hextobyte[item[:2]])

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ Core and Builtins
2121
Library
2222
-------
2323

24+
- Issue #20879: Delay the initialization of encoding and decoding tables for
25+
base32, ascii85 and base85 codecs in the base64 module, and delay the
26+
initialization of the unquote_to_bytes() table of the urllib.parse module, to
27+
not waste memory if these modules are not used.
28+
2429
- Issue #19157: Include the broadcast address in the usuable hosts for IPv6
2530
in ipaddress.
2631

0 commit comments

Comments
 (0)