SF patch #1397960: When mixing file-iteration and

Yhg1s · Yhg1s · commit c45251a48542 · 2006-02-12T11:53:32.000Z
readline/readlines/read/readinto, loudly break by raising ValueError, rather
than silently deliver data out of order or hitting EOF prematurely.

Probably not a bugfix candidate, even though it affects no 'working' code.
diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py
@@ -3,7 +3,7 @@
 from array import array
 from weakref import proxy
 
-from test.test_support import verify, TESTFN, TestFailed
+from test.test_support import verify, TESTFN, TestFailed, findfile
 from UserList import UserList
 
 # verify weak references
@@ -228,3 +228,113 @@ def bug801631():
     bug801631()
 finally:
     os.unlink(TESTFN)
+
+# Test the complex interaction when mixing file-iteration and the various
+# read* methods. Ostensibly, the mixture could just be tested to work
+# when it should work according to the Python language, instead of fail
+# when it should fail according to the current CPython implementation.
+# People don't always program Python the way they should, though, and the
+# implemenation might change in subtle ways, so we explicitly test for
+# errors, too; the test will just have to be updated when the
+# implementation changes.
+dataoffset = 16384
+filler = "ham\n"
+assert not dataoffset % len(filler), \
+    "dataoffset must be multiple of len(filler)"
+nchunks = dataoffset // len(filler)
+testlines = [
+    "spam, spam and eggs\n",
+    "eggs, spam, ham and spam\n",
+    "saussages, spam, spam and eggs\n",
+    "spam, ham, spam and eggs\n",
+    "spam, spam, spam, spam, spam, ham, spam\n",
+    "wonderful spaaaaaam.\n"
+]
+methods = [("readline", ()), ("read", ()), ("readlines", ()),
+           ("readinto", (array("c", " "*100),))]
+
+try:
+    # Prepare the testfile
+    bag = open(TESTFN, "w")
+    bag.write(filler * nchunks)
+    bag.writelines(testlines)
+    bag.close()
+    # Test for appropriate errors mixing read* and iteration
+    for methodname, args in methods:
+        f = open(TESTFN)
+        if f.next() != filler:
+            raise TestFailed, "Broken testfile"
+        meth = getattr(f, methodname)
+        try:
+            meth(*args)
+        except ValueError:
+            pass
+        else:
+            raise TestFailed("%s%r after next() didn't raise ValueError" %
+                             (methodname, args))
+        f.close()
+
+    # Test to see if harmless (by accident) mixing of read* and iteration
+    # still works. This depends on the size of the internal iteration
+    # buffer (currently 8192,) but we can test it in a flexible manner.
+    # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
+    # 4096 lines of that should get us exactly on the buffer boundary for
+    # any power-of-2 buffersize between 4 and 16384 (inclusive).
+    f = open(TESTFN)
+    for i in range(nchunks):
+        f.next()
+    testline = testlines.pop(0)
+    try:
+        line = f.readline()
+    except ValueError:
+        raise TestFailed("readline() after next() with supposedly empty "
+                         "iteration-buffer failed anyway")
+    if line != testline:
+        raise TestFailed("readline() after next() with empty buffer "
+                         "failed. Got %r, expected %r" % (line, testline))
+    testline = testlines.pop(0)
+    buf = array("c", "\x00" * len(testline))
+    try:
+        f.readinto(buf)
+    except ValueError:
+        raise TestFailed("readinto() after next() with supposedly empty "
+                         "iteration-buffer failed anyway")
+    line = buf.tostring()
+    if line != testline:
+        raise TestFailed("readinto() after next() with empty buffer "
+                         "failed. Got %r, expected %r" % (line, testline))
+
+    testline = testlines.pop(0)
+    try:
+        line = f.read(len(testline))
+    except ValueError:
+        raise TestFailed("read() after next() with supposedly empty "
+                         "iteration-buffer failed anyway")
+    if line != testline:
+        raise TestFailed("read() after next() with empty buffer "
+                         "failed. Got %r, expected %r" % (line, testline))
+    try:
+        lines = f.readlines()
+    except ValueError:
+        raise TestFailed("readlines() after next() with supposedly empty "
+                         "iteration-buffer failed anyway")
+    if lines != testlines:
+        raise TestFailed("readlines() after next() with empty buffer "
+                         "failed. Got %r, expected %r" % (line, testline))
+    # Reading after iteration hit EOF shouldn't hurt either
+    f = open(TESTFN)
+    for line in f:
+        pass
+    try:
+        f.readline()
+        f.readinto(buf)
+        f.read()
+        f.readlines()
+    except ValueError:
+        raise TestFailed("read* failed after next() consumed file")
+finally:
+    # Bare 'except' so as not to mask errors in the test
+    try:
+        os.unlink(TESTFN)
+    except:
+        pass
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
@@ -344,6 +344,17 @@ err_closed(void)
 	return NULL;
 }
 
+/* Refuse regular file I/O if there's data in the iteration-buffer.
+ * Mixing them would cause data to arrive out of order, as the read*
+ * methods don't use the iteration buffer. */
+static PyObject *
+err_iterbuffered(void)
+{
+	PyErr_SetString(PyExc_ValueError,
+		"Mixing iteration and read methods would lose data");
+	return NULL;
+}
+
 static void drop_readahead(PyFileObject *);
 
 /* Methods */
@@ -795,6 +806,11 @@ file_read(PyFileObject *f, PyObject *args)
 
 	if (f->f_fp == NULL)
 		return err_closed();
+	/* refuse to mix with f.next() */
+	if (f->f_buf != NULL &&
+	    (f->f_bufend - f->f_bufptr) > 0 &&
+	    f->f_buf[0] != '\0')
+		return err_iterbuffered();
 	if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 		return NULL;
 	if (bytesrequested < 0)
@@ -858,6 +874,11 @@ file_readinto(PyFileObject *f, PyObject *args)
 
 	if (f->f_fp == NULL)
 		return err_closed();
+	/* refuse to mix with f.next() */
+	if (f->f_buf != NULL &&
+	    (f->f_bufend - f->f_bufptr) > 0 &&
+	    f->f_buf[0] != '\0')
+		return err_iterbuffered();
 	if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
 		return NULL;
 	ndone = 0;
@@ -1211,9 +1232,15 @@ PyFile_GetLine(PyObject *f, int n)
 	}
 
 	if (PyFile_Check(f)) {
-		if (((PyFileObject*)f)->f_fp == NULL)
+		PyFileObject *fo = (PyFileObject *)f;
+		if (fo->f_fp == NULL)
 			return err_closed();
-		result = get_line((PyFileObject *)f, n);
+		/* refuse to mix with f.next() */
+		if (fo->f_buf != NULL &&
+		    (fo->f_bufend - fo->f_bufptr) > 0 &&
+		    fo->f_buf[0] != '\0')
+			return err_iterbuffered();
+		result = get_line(fo, n);
 	}
 	else {
 		PyObject *reader;
@@ -1296,6 +1323,11 @@ file_readline(PyFileObject *f, PyObject *args)
 
 	if (f->f_fp == NULL)
 		return err_closed();
+	/* refuse to mix with f.next() */
+	if (f->f_buf != NULL &&
+	    (f->f_bufend - f->f_bufptr) > 0 &&
+	    f->f_buf[0] != '\0')
+		return err_iterbuffered();
 	if (!PyArg_ParseTuple(args, "|i:readline", &n))
 		return NULL;
 	if (n == 0)
@@ -1324,6 +1356,11 @@ file_readlines(PyFileObject *f, PyObject *args)
 
 	if (f->f_fp == NULL)
 		return err_closed();
+	/* refuse to mix with f.next() */
+	if (f->f_buf != NULL &&
+	    (f->f_bufend - f->f_bufptr) > 0 &&
+	    f->f_buf[0] != '\0')
+		return err_iterbuffered();
 	if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 		return NULL;
 	if ((list = PyList_New(0)) == NULL)