Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ad5b9de

Browse files
committed
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's specification is unchanged.
1 parent c3b6ac7 commit ad5b9de

1 file changed

Lines changed: 11 additions & 14 deletions

File tree

Lib/encodings/__init__.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@
3434
_cache = {}
3535
_unknown = '--unknown--'
3636
_import_tail = ['*']
37-
_norm_encoding_map = (' . '
38-
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
39-
' abcdefghijklmnopqrstuvwxyz '
40-
' '
41-
' '
42-
' ')
4337
_aliases = aliases.aliases
4438

4539
class CodecRegistryError(LookupError, SystemError):
@@ -58,14 +52,17 @@ def normalize_encoding(encoding):
5852
non-ASCII characters, these must be Latin-1 compatible.
5953
6054
"""
61-
# Make sure we have an 8-bit string, because .translate() works
62-
# differently for Unicode strings.
63-
if isinstance(encoding, str):
64-
# Note that .encode('latin-1') does *not* use the codec
65-
# registry, so this call doesn't recurse. (See unicodeobject.c
66-
# PyUnicode_AsEncodedString() for details)
67-
encoding = encoding.encode('latin-1')
68-
return '_'.join(encoding.translate(_norm_encoding_map).split())
55+
chars = []
56+
punct = False
57+
for c in encoding:
58+
if c.isalnum() or c == '.':
59+
if punct and chars:
60+
chars.append('_')
61+
chars.append(c)
62+
punct = False
63+
else:
64+
punct = True
65+
return ''.join(chars)
6966

7067
def search_function(encoding):
7168

0 commit comments

Comments
 (0)