Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4a44293

Browse files
committed
Fixes to support CJKCodecs as per SF bug #852347. Actually, this
patch removes dependencies on the old unsupported KoreanCodecs package and the alternative JapaneseCodecs package. Since both of those provide aliases for their codecs, this removal just makes the generic codec names work. We needed to make slight changes to __init__() as well. This will be backported to Python 2.3 when its branch freeze is over.
1 parent 339270e commit 4a44293

1 file changed

Lines changed: 16 additions & 19 deletions

File tree

Lib/email/Charset.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
# Copyright (C) 2001,2002 Python Software Foundation
22
# Author: [email protected] (Ben Gertzfield), [email protected] (Barry Warsaw)
33

4+
# Python 2.3 doesn't come with any Asian codecs by default. Two packages are
5+
# currently available and supported as of this writing (30-Dec-2003):
6+
#
7+
# CJKCodecs
8+
# http://cjkpython.i18n.org
9+
# This package contains Chinese, Japanese, and Korean codecs
10+
11+
# JapaneseCodecs
12+
# http://www.asahi-net.or.jp/~rd6t-kjym/python
13+
# Some Japanese users prefer this codec package
14+
415
from types import UnicodeType
516
from email.Encoders import encode_7or8bit
617
import email.base64MIME
@@ -88,27 +99,11 @@ def _isunicode(s):
8899
'ascii': 'us-ascii',
89100
}
90101

91-
# Map charsets to their Unicode codec strings. Note that Python doesn't come
92-
# with any Asian codecs by default. Here's where to get them:
93-
#
94-
# Japanese -- http://www.asahi-net.or.jp/~rd6t-kjym/python
95-
# Korean -- http://sf.net/projects/koco
96-
# Chinese -- http://sf.net/projects/python-codecs
97-
#
98-
# Note that these codecs have their own lifecycle and may be in varying states
99-
# of stability and useability.
100102

103+
# Map charsets to their Unicode codec strings.
101104
CODEC_MAP = {
102-
'euc-jp': 'japanese.euc-jp',
103-
'iso-2022-jp': 'japanese.iso-2022-jp',
104-
'shift_jis': 'japanese.shift_jis',
105-
'euc-kr': 'korean.euc-kr',
106-
'ks_c_5601-1987': 'korean.cp949',
107-
'iso-2022-kr': 'korean.iso-2022-kr',
108-
'johab': 'korean.johab',
109-
'gb2132': 'eucgb2312_cn',
105+
'gb2312': 'eucgb2312_cn',
110106
'big5': 'big5_tw',
111-
'utf-8': 'utf-8',
112107
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
113108
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
114109
# Let that stuff pass through without conversion to/from Unicode.
@@ -220,6 +215,8 @@ def __init__(self, input_charset=DEFAULT_CHARSET):
220215
# it.
221216
henc, benc, conv = CHARSETS.get(self.input_charset,
222217
(SHORTEST, BASE64, None))
218+
if not conv:
219+
conv = self.input_charset
223220
# Set the attributes, allowing the arguments to override the default.
224221
self.header_encoding = henc
225222
self.body_encoding = benc
@@ -229,7 +226,7 @@ def __init__(self, input_charset=DEFAULT_CHARSET):
229226
self.input_codec = CODEC_MAP.get(self.input_charset,
230227
self.input_charset)
231228
self.output_codec = CODEC_MAP.get(self.output_charset,
232-
self.input_codec)
229+
self.output_charset)
233230

234231
def __str__(self):
235232
return self.input_charset.lower()

0 commit comments

Comments
 (0)