Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f4fdff7

Browse files
committed
Header.__init__(), .append(): Add an optional argument `errors' which
is passed straight through to the unicode() and ustr.encode() calls. I think it's the best we can do to address the UnicodeErrors in badly encoded headers such as is described in SF bug #648119.
1 parent 72261c9 commit f4fdff7

1 file changed

Lines changed: 11 additions & 6 deletions

File tree

Lib/email/Header.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
127127

128128
class Header:
129129
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
130-
continuation_ws=' '):
130+
continuation_ws=' ', errors='strict'):
131131
"""Create a MIME-compliant header that can contain many character sets.
132132
133133
Optional s is the initial header value. If None, the initial header
@@ -150,6 +150,8 @@ def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
150150
continuation_ws must be RFC 2822 compliant folding whitespace (usually
151151
either a space or a hard tab) which will be prepended to continuation
152152
lines.
153+
154+
errors is passed through to the .append() call.
153155
"""
154156
if charset is None:
155157
charset = USASCII
@@ -161,7 +163,7 @@ def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
161163
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
162164
self._chunks = []
163165
if s is not None:
164-
self.append(s, charset)
166+
self.append(s, charset, errors)
165167
if maxlinelen is None:
166168
maxlinelen = MAXLINELEN
167169
if header_name is None:
@@ -196,7 +198,7 @@ def __eq__(self, other):
196198
def __ne__(self, other):
197199
return not self == other
198200

199-
def append(self, s, charset=None):
201+
def append(self, s, charset=None, errors='strict'):
200202
"""Append a string to the MIME header.
201203
202204
Optional charset, if given, should be a Charset instance or the name
@@ -213,6 +215,9 @@ def append(self, s, charset=None):
213215
using RFC 2047 rules, the Unicode string will be encoded using the
214216
following charsets in order: us-ascii, the charset hint, utf-8. The
215217
first character set not to provoke a UnicodeError is used.
218+
219+
Optional `errors' is passed as the third argument to any unicode() or
220+
ustr.encode() call.
216221
"""
217222
if charset is None:
218223
charset = self._charset
@@ -227,20 +232,20 @@ def append(self, s, charset=None):
227232
# Possibly raise UnicodeError if the byte string can't be
228233
# converted to a unicode with the input codec of the charset.
229234
incodec = charset.input_codec or 'us-ascii'
230-
ustr = unicode(s, incodec)
235+
ustr = unicode(s, incodec, errors)
231236
# Now make sure that the unicode could be converted back to a
232237
# byte string with the output codec, which may be different
233238
# than the iput coded. Still, use the original byte string.
234239
outcodec = charset.output_codec or 'us-ascii'
235-
ustr.encode(outcodec)
240+
ustr.encode(outcodec, errors)
236241
elif isinstance(s, UnicodeType):
237242
# Now we have to be sure the unicode string can be converted
238243
# to a byte string with a reasonable output codec. We want to
239244
# use the byte string in the chunk.
240245
for charset in USASCII, charset, UTF8:
241246
try:
242247
outcodec = charset.output_codec or 'us-ascii'
243-
s = s.encode(outcodec)
248+
s = s.encode(outcodec, errors)
244249
break
245250
except UnicodeError:
246251
pass

0 commit comments

Comments
 (0)