Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d624f18

Browse files
committed
Added much functionality to the bytes type.
Change file.readinto() to require binary mode.
1 parent e06b6b8 commit d624f18

5 files changed

Lines changed: 717 additions & 206 deletions

File tree

Include/bytesobject.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ extern "C" {
2121
/* Object layout */
2222
typedef struct {
2323
PyObject_VAR_HEAD
24-
char *ob_sval;
24+
char *ob_bytes;
2525
} PyBytesObject;
2626

2727
/* Type object */
@@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
3232
#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
3333

3434
/* Direct API functions */
35+
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
3536
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
3637
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
3738
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
3839
PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
3940

4041
/* Macros, trading safety for speed */
41-
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
42+
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
4243
#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
4344

4445
#ifdef __cplusplus

Lib/test/test_bytes.py

Lines changed: 232 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""Unit tests for the bytes type."""
22

3+
import os
4+
import re
35
import sys
6+
import tempfile
47
import unittest
58
import test.test_support
69

@@ -45,7 +48,7 @@ def __index__(self):
4548
self.assertRaises(ValueError, bytes, [C(256)])
4649

4750
def test_constructor_type_errors(self):
48-
self.assertRaises(TypeError, bytes, 0)
51+
self.assertRaises(TypeError, bytes, 0.0)
4952
class C:
5053
pass
5154
self.assertRaises(TypeError, bytes, ["0"])
@@ -100,42 +103,239 @@ def test_doc(self):
100103
self.failUnless(bytes.__doc__ != None)
101104
self.failUnless(bytes.__doc__.startswith("bytes("))
102105

103-
# XXX More stuff to test and build (TDD):
104-
# constructor from str: bytes(<str>) == bytes(map(ord, <str>))?
105-
# encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]])
106-
# default encoding Latin-1? (Matching ord)
107-
# slicing
108-
# extended slicing?
109-
# item assignment
110-
# slice assignment
111-
# extended slice assignment?
112-
# __contains__ with simple int arg
113-
# __contains__ with another bytes arg?
114-
# find/index? (int or bytes arg?)
115-
# count? (int arg)
116-
# concatenation (+)
117-
# repeat?
118-
# extend?
119-
# append?
120-
# insert?
121-
# pop?
122-
# __reversed__?
123-
# reverse? (inplace)
124-
# NOT sort!
106+
def test_buffer_api(self):
107+
short_sample = "Hello world\n"
108+
sample = short_sample + "x"*(20 - len(short_sample))
109+
tfn = tempfile.mktemp()
110+
try:
111+
# Prepare
112+
with open(tfn, "wb") as f:
113+
f.write(short_sample)
114+
# Test readinto
115+
with open(tfn, "rb") as f:
116+
b = bytes([ord('x')]*20)
117+
n = f.readinto(b)
118+
self.assertEqual(n, len(short_sample))
119+
self.assertEqual(list(b), map(ord, sample))
120+
# Test writing in binary mode
121+
with open(tfn, "wb") as f:
122+
f.write(b)
123+
with open(tfn, "rb") as f:
124+
self.assertEqual(f.read(), sample)
125+
# Test writing in text mode
126+
with open(tfn, "w") as f:
127+
f.write(b)
128+
with open(tfn, "r") as f:
129+
self.assertEqual(f.read(), sample)
130+
# Can't use readinto in text mode
131+
with open(tfn, "r") as f:
132+
self.assertRaises(TypeError, f.readinto, b)
133+
finally:
134+
try:
135+
os.remove(tfn)
136+
except os.error:
137+
pass
138+
139+
def test_reversed(self):
140+
input = map(ord, "Hello")
141+
b = bytes(input)
142+
output = list(reversed(b))
143+
input.reverse()
144+
self.assertEqual(output, input)
145+
146+
def test_getslice(self):
147+
def by(s):
148+
return bytes(map(ord, s))
149+
b = by("Hello, world")
150+
151+
self.assertEqual(b[:5], by("Hello"))
152+
self.assertEqual(b[1:5], by("ello"))
153+
self.assertEqual(b[5:7], by(", "))
154+
self.assertEqual(b[7:], by("world"))
155+
self.assertEqual(b[7:12], by("world"))
156+
self.assertEqual(b[7:100], by("world"))
157+
158+
self.assertEqual(b[:-7], by("Hello"))
159+
self.assertEqual(b[-11:-7], by("ello"))
160+
self.assertEqual(b[-7:-5], by(", "))
161+
self.assertEqual(b[-5:], by("world"))
162+
self.assertEqual(b[-5:12], by("world"))
163+
self.assertEqual(b[-5:100], by("world"))
164+
self.assertEqual(b[-100:5], by("Hello"))
165+
166+
def test_regexps(self):
167+
def by(s):
168+
return bytes(map(ord, s))
169+
b = by("Hello, world")
170+
self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
171+
172+
def test_setitem(self):
173+
b = bytes([1, 2, 3])
174+
b[1] = 100
175+
self.assertEqual(b, bytes([1, 100, 3]))
176+
b[-1] = 200
177+
self.assertEqual(b, bytes([1, 100, 200]))
178+
class C:
179+
def __init__(self, i=0):
180+
self.i = i
181+
def __index__(self):
182+
return self.i
183+
b[0] = C(10)
184+
self.assertEqual(b, bytes([10, 100, 200]))
185+
try:
186+
b[3] = 0
187+
self.fail("Didn't raise IndexError")
188+
except IndexError:
189+
pass
190+
try:
191+
b[-10] = 0
192+
self.fail("Didn't raise IndexError")
193+
except IndexError:
194+
pass
195+
try:
196+
b[0] = 256
197+
self.fail("Didn't raise ValueError")
198+
except ValueError:
199+
pass
200+
try:
201+
b[0] = C(-1)
202+
self.fail("Didn't raise ValueError")
203+
except ValueError:
204+
pass
205+
try:
206+
b[0] = None
207+
self.fail("Didn't raise TypeError")
208+
except TypeError:
209+
pass
210+
211+
def test_delitem(self):
212+
b = bytes(range(10))
213+
del b[0]
214+
self.assertEqual(b, bytes(range(1, 10)))
215+
del b[-1]
216+
self.assertEqual(b, bytes(range(1, 9)))
217+
del b[4]
218+
self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
219+
220+
def test_setslice(self):
221+
b = bytes(range(10))
222+
self.assertEqual(list(b), list(range(10)))
223+
224+
b[0:5] = bytes([1, 1, 1, 1, 1])
225+
self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
226+
227+
del b[0:-5]
228+
self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
229+
230+
b[0:0] = bytes([0, 1, 2, 3, 4])
231+
self.assertEqual(b, bytes(range(10)))
232+
233+
b[-7:-3] = bytes([100, 101])
234+
self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
235+
236+
b[3:5] = [3, 4, 5, 6]
237+
self.assertEqual(b, bytes(range(10)))
238+
239+
def test_setslice_trap(self):
240+
# This test verifies that we correctly handle assigning self
241+
# to a slice of self (the old Lambert Meertens trap).
242+
b = bytes(range(256))
243+
b[8:] = b
244+
self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
245+
246+
def test_encoding(self):
247+
sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
248+
for enc in ("utf8", "utf16"):
249+
b = bytes(sample, enc)
250+
self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
251+
self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
252+
b = bytes(sample, "latin1", "ignore")
253+
self.assertEqual(b, bytes(sample[:-4]))
254+
255+
def test_decode(self):
256+
sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
257+
for enc in ("utf8", "utf16"):
258+
b = bytes(sample, enc)
259+
self.assertEqual(b.decode(enc), sample)
260+
sample = u"Hello world\n\x80\x81\xfe\xff"
261+
b = bytes(sample, "latin1")
262+
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
263+
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
264+
265+
def test_from_buffer(self):
266+
sample = "Hello world\n\x80\x81\xfe\xff"
267+
buf = buffer(sample)
268+
b = bytes(buf)
269+
self.assertEqual(b, bytes(map(ord, sample)))
270+
271+
def test_to_str(self):
272+
sample = "Hello world\n\x80\x81\xfe\xff"
273+
b = bytes(sample)
274+
self.assertEqual(str(b), sample)
275+
276+
def test_from_int(self):
277+
b = bytes(0)
278+
self.assertEqual(b, bytes())
279+
b = bytes(10)
280+
self.assertEqual(b, bytes([0]*10))
281+
b = bytes(10000)
282+
self.assertEqual(b, bytes([0]*10000))
283+
284+
def test_concat(self):
285+
b1 = bytes("abc")
286+
b2 = bytes("def")
287+
self.assertEqual(b1 + b2, bytes("abcdef"))
288+
self.assertRaises(TypeError, lambda: b1 + "def")
289+
self.assertRaises(TypeError, lambda: "abc" + b2)
290+
291+
def test_repeat(self):
292+
b = bytes("abc")
293+
self.assertEqual(b * 3, bytes("abcabcabc"))
294+
self.assertEqual(b * 0, bytes())
295+
self.assertEqual(b * -1, bytes())
296+
self.assertRaises(TypeError, lambda: b * 3.14)
297+
self.assertRaises(TypeError, lambda: 3.14 * b)
298+
self.assertRaises(MemoryError, lambda: b * sys.maxint)
299+
self.assertEqual(bytes('x')*100, bytes('x'*100))
300+
301+
# Optimizations:
125302
# __iter__? (optimization)
126-
# __str__? (could return "".join(map(chr, self))
127-
# decode
128-
# buffer API
129-
# check that regexp searches work
130-
# (I suppose re.sub() returns a string)
131-
# file.readinto
132-
# file.write
303+
# __reversed__? (optimization)
304+
305+
# XXX Some list methods?
306+
# extended slicing
307+
# extended slice assignment
308+
# extend (same as b[len(b):] = src)
309+
# reverse (in-place)
310+
# remove
311+
# pop
312+
# NOT sort!
313+
# With int arg:
314+
# __contains__
315+
# index
316+
# count
317+
# append
318+
# insert
319+
320+
# XXX Some string methods? (Those that don't use character properties)
321+
# startswith
322+
# endswidth
323+
# find, rfind
324+
# __contains__ (bytes arg)
325+
# index, rindex (bytes arg)
326+
# join
327+
# replace
328+
# translate
329+
# split, rsplit
330+
# lstrip, rstrip, strip??
331+
332+
# XXX pickle and marshal support?
133333

134334

135335
def test_main():
136336
test.test_support.run_unittest(BytesTest)
137337

138338

139339
if __name__ == "__main__":
140-
##test_main()
141-
unittest.main()
340+
test_main()
341+
##unittest.main()

Lib/test/test_file.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,17 @@
6767
f.close()
6868
verify(buf == a.tostring()[:n])
6969

70+
# verify readinto refuses text files
71+
a = array('c', 'x'*10)
72+
f = open(TESTFN, 'r')
73+
try:
74+
f.readinto(a)
75+
raise TestFailed("readinto shouldn't work in text mode")
76+
except TypeError:
77+
pass
78+
finally:
79+
f.close()
80+
7081
# verify writelines with integers
7182
f = open(TESTFN, 'wb')
7283
try:
@@ -261,13 +272,13 @@ def bug801631():
261272

262273
try:
263274
# Prepare the testfile
264-
bag = open(TESTFN, "w")
275+
bag = open(TESTFN, "wb")
265276
bag.write(filler * nchunks)
266277
bag.writelines(testlines)
267278
bag.close()
268279
# Test for appropriate errors mixing read* and iteration
269280
for methodname, args in methods:
270-
f = open(TESTFN)
281+
f = open(TESTFN, 'rb')
271282
if f.next() != filler:
272283
raise TestFailed, "Broken testfile"
273284
meth = getattr(f, methodname)
@@ -286,7 +297,7 @@ def bug801631():
286297
# Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
287298
# 4096 lines of that should get us exactly on the buffer boundary for
288299
# any power-of-2 buffersize between 4 and 16384 (inclusive).
289-
f = open(TESTFN)
300+
f = open(TESTFN, 'rb')
290301
for i in range(nchunks):
291302
f.next()
292303
testline = testlines.pop(0)
@@ -328,7 +339,7 @@ def bug801631():
328339
raise TestFailed("readlines() after next() with empty buffer "
329340
"failed. Got %r, expected %r" % (line, testline))
330341
# Reading after iteration hit EOF shouldn't hurt either
331-
f = open(TESTFN)
342+
f = open(TESTFN, 'rb')
332343
try:
333344
for line in f:
334345
pass

0 commit comments

Comments
 (0)