Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 92b550c

Browse files
committed
This patch by Martin v. Loewis changes the UTF-16 codec to only
write a BOM at the start of the stream and also to only read it as BOM at the start of a stream. Subsequent reading/writing of BOMs will read/write the BOM as ZWNBSP character. This is in sync with the Unicode specifications. Note that UTF-16 files will now *have* to start with a BOM mark in order to be readable by the codec.
1 parent 8c78d3a commit 92b550c

1 file changed

Lines changed: 33 additions & 3 deletions

File tree

Lib/encodings/utf_16.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
77
88
"""
9-
import codecs
9+
import codecs, sys
1010

1111
### Codec APIs
1212

@@ -18,10 +18,40 @@ class Codec(codecs.Codec):
1818
decode = codecs.utf_16_decode
1919

2020
class StreamWriter(Codec,codecs.StreamWriter):
21-
pass
21+
def __init__(self, stream, errors='strict'):
22+
self.bom_written = 0
23+
codecs.StreamWriter.__init__(self, stream, errors)
24+
25+
def write(self, data):
26+
result = codecs.StreamWriter.write(self, data)
27+
if not self.bom_written:
28+
self.bom_written = 1
29+
if sys.byteorder == 'little':
30+
self.encode = codecs.utf_16_le_encode
31+
else:
32+
self.encode = codecs.utf_16_be_encode
33+
return result
2234

2335
class StreamReader(Codec,codecs.StreamReader):
24-
pass
36+
def __init__(self, stream, errors='strict'):
37+
self.bom_read = 0
38+
codecs.StreamReader.__init__(self, stream, errors)
39+
40+
def read(self, size=-1):
41+
if not self.bom_read:
42+
signature = self.stream.read(2)
43+
if signature == codecs.BOM_BE:
44+
self.decode = codecs.utf_16_be_decode
45+
elif signature == codecs.BOM_LE:
46+
self.decode = codecs.utf_16_le_decode
47+
else:
48+
raise UnicodeError,"UTF-16 stream does not start with BOM"
49+
if size > 2:
50+
size -= 2
51+
elif size >= 0:
52+
size = 0
53+
self.bom_read = 1
54+
return codecs.StreamReader.read(self, size)
2555

2656
### encodings module API
2757

0 commit comments

Comments
 (0)