1-
21/* File object implementation */
32
43#include "Python.h"
@@ -116,6 +115,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
116115 f -> f_close = close ;
117116 f -> f_softspace = 0 ;
118117 f -> f_binary = strchr (mode ,'b' ) != NULL ;
118+ f -> f_buf = NULL ;
119119#ifdef WITH_UNIVERSAL_NEWLINES
120120 f -> f_univ_newline = (strchr (mode , 'U' ) != NULL );
121121 f -> f_newlinetypes = NEWLINE_UNKNOWN ;
@@ -271,6 +271,8 @@ err_closed(void)
271271 return NULL ;
272272}
273273
274+ void drop_readahead (PyFileObject * );
275+
274276/* Methods */
275277
276278static void
@@ -283,6 +285,7 @@ file_dealloc(PyFileObject *f)
283285 }
284286 Py_XDECREF (f -> f_name );
285287 Py_XDECREF (f -> f_mode );
288+ drop_readahead (f );
286289 f -> ob_type -> tp_free ((PyObject * )f );
287290}
288291
@@ -405,6 +408,7 @@ file_seek(PyFileObject *f, PyObject *args)
405408
406409 if (f -> f_fp == NULL )
407410 return err_closed ();
411+ drop_readahead (f );
408412 whence = 0 ;
409413 if (!PyArg_ParseTuple (args , "O|i:seek" , & offobj , & whence ))
410414 return NULL ;
@@ -1177,28 +1181,6 @@ file_readline(PyFileObject *f, PyObject *args)
11771181 return get_line (f , n );
11781182}
11791183
1180- static PyObject *
1181- file_xreadlines (PyFileObject * f )
1182- {
1183- static PyObject * xreadlines_function = NULL ;
1184-
1185- if (f -> f_fp == NULL )
1186- return err_closed ();
1187- if (!xreadlines_function ) {
1188- PyObject * xreadlines_module =
1189- PyImport_ImportModule ("xreadlines" );
1190- if (!xreadlines_module )
1191- return NULL ;
1192-
1193- xreadlines_function = PyObject_GetAttrString (xreadlines_module ,
1194- "xreadlines" );
1195- Py_DECREF (xreadlines_module );
1196- if (!xreadlines_function )
1197- return NULL ;
1198- }
1199- return PyObject_CallFunction (xreadlines_function , "(O)" , f );
1200- }
1201-
12021184static PyObject *
12031185file_readlines (PyFileObject * f , PyObject * args )
12041186{
@@ -1462,6 +1444,15 @@ file_writelines(PyFileObject *f, PyObject *seq)
14621444#undef CHUNKSIZE
14631445}
14641446
1447+ static PyObject *
1448+ file_getiter (PyFileObject * f )
1449+ {
1450+ if (f -> f_fp == NULL )
1451+ return err_closed ();
1452+ Py_INCREF (f );
1453+ return (PyObject * )f ;
1454+ }
1455+
14651456PyDoc_STRVAR (readline_doc ,
14661457"readline([size]) -> next line from the file, as a string.\n"
14671458"\n"
@@ -1517,10 +1508,10 @@ PyDoc_STRVAR(readlines_doc,
15171508"total number of bytes in the lines returned." );
15181509
15191510PyDoc_STRVAR (xreadlines_doc ,
1520- "xreadlines() -> next line from the file, as a string .\n"
1511+ "xreadlines() -> returns self .\n"
15211512"\n"
1522- "Equivalent to xreadlines.xreadlines(file). This is like readline(), but \n"
1523- "often quicker, due to reading ahead internally ." );
1513+ "For backward compatibility. File objects now include the performance \n"
1514+ "optimizations previously implemented in the xreadlines module ." );
15241515
15251516PyDoc_STRVAR (writelines_doc ,
15261517"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
@@ -1554,7 +1545,7 @@ static PyMethodDef file_methods[] = {
15541545 {"tell" , (PyCFunction )file_tell , METH_NOARGS , tell_doc },
15551546 {"readinto" , (PyCFunction )file_readinto , METH_VARARGS , readinto_doc },
15561547 {"readlines" , (PyCFunction )file_readlines , METH_VARARGS , readlines_doc },
1557- {"xreadlines" , (PyCFunction )file_xreadlines , METH_NOARGS , xreadlines_doc },
1548+ {"xreadlines" , (PyCFunction )file_getiter , METH_NOARGS , xreadlines_doc },
15581549 {"writelines" , (PyCFunction )file_writelines , METH_O , writelines_doc },
15591550 {"flush" , (PyCFunction )file_flush , METH_NOARGS , flush_doc },
15601551 {"close" , (PyCFunction )file_close , METH_NOARGS , close_doc },
@@ -1617,12 +1608,120 @@ static PyGetSetDef file_getsetlist[] = {
16171608 {0 },
16181609};
16191610
1611+ void
1612+ drop_readahead (PyFileObject * f )
1613+ {
1614+ if (f -> f_buf != NULL ) {
1615+ PyMem_Free (f -> f_buf );
1616+ f -> f_buf = NULL ;
1617+ }
1618+ }
1619+
1620+ /* Make sure that file has a readahead buffer with at least one byte
1621+ (unless at EOF) and no more than bufsize. Returns negative value on
1622+ error */
1623+ int readahead (PyFileObject * f , int bufsize ) {
1624+ int chunksize ;
1625+
1626+ if (f -> f_buf != NULL ) {
1627+ if ( (f -> f_bufend - f -> f_bufptr ) >= 1 )
1628+ return 0 ;
1629+ else
1630+ drop_readahead (f );
1631+ }
1632+ if ((f -> f_buf = PyMem_Malloc (bufsize )) == NULL ) {
1633+ return -1 ;
1634+ }
1635+ Py_BEGIN_ALLOW_THREADS
1636+ errno = 0 ;
1637+ chunksize = Py_UniversalNewlineFread (
1638+ f -> f_buf , bufsize , f -> f_fp , (PyObject * )f );
1639+ Py_END_ALLOW_THREADS
1640+ if (chunksize == 0 ) {
1641+ if (ferror (f -> f_fp )) {
1642+ PyErr_SetFromErrno (PyExc_IOError );
1643+ clearerr (f -> f_fp );
1644+ drop_readahead (f );
1645+ return -1 ;
1646+ }
1647+ }
1648+ f -> f_bufptr = f -> f_buf ;
1649+ f -> f_bufend = f -> f_buf + chunksize ;
1650+ return 0 ;
1651+ }
1652+
1653+ /* Used by file_iternext. The returned string will start with 'skip'
1654+ uninitialized bytes followed by the remainder of the line. Don't be
1655+ horrified by the recursive call: maximum recursion depth is limited by
1656+ logarithmic buffer growth to about 50 even when reading a 1gb line. */
1657+
1658+ PyStringObject *
1659+ readahead_get_line_skip (PyFileObject * f , int skip , int bufsize ) {
1660+ PyStringObject * s ;
1661+ char * bufptr ;
1662+ char * buf ;
1663+ int len ;
1664+
1665+ if (f -> f_buf == NULL )
1666+ if (readahead (f , bufsize ) < 0 )
1667+ return NULL ;
1668+
1669+ len = f -> f_bufend - f -> f_bufptr ;
1670+ if (len == 0 )
1671+ return (PyStringObject * )
1672+ PyString_FromStringAndSize (NULL , skip );
1673+ bufptr = memchr (f -> f_bufptr , '\n' , len );
1674+ if (bufptr != NULL ) {
1675+ bufptr ++ ; /* Count the '\n' */
1676+ len = bufptr - f -> f_bufptr ;
1677+ s = (PyStringObject * )
1678+ PyString_FromStringAndSize (NULL , skip + len );
1679+ if (s == NULL )
1680+ return NULL ;
1681+ memcpy (PyString_AS_STRING (s )+ skip , f -> f_bufptr , len );
1682+ f -> f_bufptr = bufptr ;
1683+ if (bufptr == f -> f_bufend )
1684+ drop_readahead (f );
1685+ } else {
1686+ bufptr = f -> f_bufptr ;
1687+ buf = f -> f_buf ;
1688+ f -> f_buf = NULL ; /* Force new readahead buffer */
1689+ s = readahead_get_line_skip (
1690+ f , skip + len , bufsize + (bufsize >>2 ) );
1691+ if (s == NULL ) {
1692+ PyMem_Free (buf );
1693+ return NULL ;
1694+ }
1695+ memcpy (PyString_AS_STRING (s )+ skip , bufptr , len );
1696+ PyMem_Free (buf );
1697+ }
1698+ return s ;
1699+ }
1700+
1701+ /* A larger buffer size may actually decrease performance. */
1702+ #define READAHEAD_BUFSIZE 8192
1703+
16201704static PyObject *
1621- file_getiter ( PyObject * f )
1705+ file_iternext ( PyFileObject * f )
16221706{
1623- return PyObject_CallMethod (f , "xreadlines" , "" );
1707+ PyStringObject * l ;
1708+
1709+ int i ;
1710+
1711+ if (f -> f_fp == NULL )
1712+ return err_closed ();
1713+
1714+ i = f -> f_softspace ;
1715+
1716+ l = readahead_get_line_skip (f , 0 , READAHEAD_BUFSIZE );
1717+ if (l == NULL || PyString_GET_SIZE (l ) == 0 ) {
1718+ Py_XDECREF (l );
1719+ return NULL ;
1720+ }
1721+ return (PyObject * )l ;
16241722}
16251723
1724+
16261725static PyObject *
16271726file_new (PyTypeObject * type , PyObject * args , PyObject * kwds )
16281727{
@@ -1742,8 +1841,8 @@ PyTypeObject PyFile_Type = {
17421841 0 , /* tp_clear */
17431842 0 , /* tp_richcompare */
17441843 0 , /* tp_weaklistoffset */
1745- file_getiter , /* tp_iter */
1746- 0 , /* tp_iternext */
1844+ ( getiterfunc ) file_getiter , /* tp_iter */
1845+ ( iternextfunc ) file_iternext , /* tp_iternext */
17471846 file_methods , /* tp_methods */
17481847 file_memberlist , /* tp_members */
17491848 file_getsetlist , /* tp_getset */
0 commit comments