Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f29b64d

Browse files
committed
Use the "MS" getline hack (fgets()) by default on non-get_unlocked
platforms. See NEWS for details.
1 parent e119006 commit f29b64d

2 files changed

Lines changed: 64 additions & 35 deletions

File tree

Misc/NEWS

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,23 @@ Core language, builtins, and interpreter
2525

2626
- Even if you don't use file.xreadlines(), you may expect a speedup on
2727
line-by-line input. The file.readline() method has been optimized
28-
quite a bit in platform-specific ways, both on Windows (using an
29-
incredibly complex, but nevertheless thread-safe), and on systems
30-
(like Linux) that support flockfile(), getc_unlocked(), and
31-
funlockfile(). In addition, the fileinput module, while still slow,
32-
has been sped up too, by using file.readlines(sizehint).
28+
quite a bit in platform-specific ways: on systems (like Linux) that
29+
support flockfile(), getc_unlocked(), and funlockfile(), those are
30+
used by default. On systems (like Windows) without getc_unlocked(),
31+
a complicated (but still thread-safe) method using fgets() is used by
32+
default.
33+
34+
You can force use of the fgets() method by #define'ing
35+
USE_FGETS_IN_GETLINE at build time (it may be faster than
36+
getc_unlocked()).
37+
38+
You can force fgets() not to be used by #define'ing
39+
DONT_USE_FGETS_IN_GETLINE (this is the first thing to try if std test
40+
test_bufio.py fails -- and let us know if it does!).
41+
42+
- In addition, the fileinput module, while still slower than the other
43+
methods on most platforms, has been sped up too, by using
44+
file.readlines(sizehint).
3345

3446
- Support for run-time warnings has been added, including a new
3547
command line option (-W) to specify the disposition of warnings.

Objects/fileobject.c

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ file_readinto(PyFileObject *f, PyObject *args)
635635
}
636636

637637
/**************************************************************************
638-
Win32 MS routine to get next line.
638+
Routine to get next line using platform fgets().
639639
640640
Under MSVC 6:
641641
@@ -651,23 +651,41 @@ So we use fgets for speed(!), despite that it's painful.
651651
652652
MS realloc is also slow.
653653
654-
In the usual case, we have one pleasantly small line already sitting in a
655-
stdio buffer, and we optimize heavily for that case.
656-
657-
CAUTION: This routine cheats, relying on that MSVC 6 fgets doesn't overwrite
658-
any buffer positions to the right of the terminating null byte. Seems
659-
unlikely that will change in the future, but ... std test test_bufio should
660-
catch it if that changes.
654+
Reports from other platforms on this method vs getc_unlocked (which MS doesn't
655+
have):
656+
Linux a wash
657+
Solaris a wash
658+
Tru64 Unix getline_via_fgets significantly faster
659+
660+
CAUTION: The C std isn't clear about this: in those cases where fgets
661+
writes something into the buffer, can it write into any position beyond the
662+
required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
663+
known on which it does; and it would be a strange way to code fgets. Still,
664+
getline_via_fgets may not work correctly if it does. The std test
665+
test_bufio.py should fail if platform fgets() routinely writes beyond the
666+
trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
661667
**************************************************************************/
662668

663-
/* if Win32 and MS's compiler */
664-
#if defined(MS_WIN32) && defined(_MSC_VER)
665-
#define USE_MS_GETLINE_HACK
669+
/* Use this routine if told to, or by default on non-get_unlocked()
670+
* platforms unless told not to. Yikes! Let's spell that out:
671+
* On a platform with getc_unlocked():
672+
* By default, use getc_unlocked().
673+
* If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
674+
* On a platform without getc_unlocked():
675+
* By default, use fgets().
676+
* If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
677+
*/
678+
#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
679+
#define USE_FGETS_IN_GETLINE
680+
#endif
681+
682+
#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
683+
#undef USE_FGETS_IN_GETLINE
666684
#endif
667685

668-
#ifdef USE_MS_GETLINE_HACK
686+
#ifdef USE_FGETS_IN_GETLINE
669687
static PyObject*
670-
ms_getline_hack(FILE *fp)
688+
getline_via_fgets(FILE *fp)
671689
{
672690
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
673691
* no-realloc path. get_line uses 100 for its initial size, but isn't trying
@@ -686,14 +704,14 @@ ms_getline_hack(FILE *fp)
686704
char* pvfree; /* address of next free slot */
687705
char* pvend; /* address one beyond last free slot */
688706
char* p; /* temp */
689-
char msbuf[INITBUFSIZE];
707+
char buf[INITBUFSIZE];
690708

691709
/* Optimize for normal case: avoid _PyString_Resize if at all
692-
* possible via first reading into auto msbuf.
710+
* possible via first reading into auto buf.
693711
*/
694712
Py_BEGIN_ALLOW_THREADS
695-
memset(msbuf, '\n', INITBUFSIZE);
696-
p = fgets(msbuf, INITBUFSIZE, fp);
713+
memset(buf, '\n', INITBUFSIZE);
714+
p = fgets(buf, INITBUFSIZE, fp);
697715
Py_END_ALLOW_THREADS
698716

699717
if (p == NULL) {
@@ -704,42 +722,42 @@ ms_getline_hack(FILE *fp)
704722
return v;
705723
}
706724
/* fgets read *something* */
707-
p = memchr(msbuf, '\n', INITBUFSIZE);
725+
p = memchr(buf, '\n', INITBUFSIZE);
708726
if (p != NULL) {
709727
/* Did the \n come from fgets or from us?
710728
* Since fgets stops at the first \n, and then writes \0, if
711729
* it's from fgets a \0 must be next. But if that's so, it
712730
* could not have come from us, since the \n's we filled the
713731
* buffer with have only more \n's to the right.
714732
*/
715-
pvend = msbuf + INITBUFSIZE;
733+
pvend = buf + INITBUFSIZE;
716734
if (p+1 < pvend && *(p+1) == '\0') {
717735
/* It's from fgets: we win! In particular, we
718736
* haven't done any mallocs yet, and can build the
719737
* final result on the first try.
720738
*/
721-
v = PyString_FromStringAndSize(msbuf, p - msbuf + 1);
739+
v = PyString_FromStringAndSize(buf, p - buf + 1);
722740
return v;
723741
}
724742
/* Must be from us: fgets didn't fill the buffer and didn't
725743
* find a newline, so it must be the last and newline-free
726744
* line of the file.
727745
*/
728-
assert(p > msbuf && *(p-1) == '\0');
729-
v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
746+
assert(p > buf && *(p-1) == '\0');
747+
v = PyString_FromStringAndSize(buf, p - buf - 1);
730748
return v;
731749
}
732750
/* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
733751
* So this line isn't over yet, or maybe it is but we're exactly at
734752
* EOF; in either case, we're tired <wink>.
735753
*/
736-
assert(msbuf[INITBUFSIZE-1] == '\0');
754+
assert(buf[INITBUFSIZE-1] == '\0');
737755
total_v_size = INITBUFSIZE + INCBUFSIZE;
738756
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
739757
if (v == NULL)
740758
return v;
741759
/* copy over everything except the last null byte */
742-
memcpy(BUF(v), msbuf, INITBUFSIZE-1);
760+
memcpy(BUF(v), buf, INITBUFSIZE-1);
743761
pvfree = BUF(v) + INITBUFSIZE - 1;
744762

745763
/* Keep reading stuff into v; if it ever ends successfully, break
@@ -798,7 +816,7 @@ ms_getline_hack(FILE *fp)
798816
#undef INITBUFSIZE
799817
#undef INCBUFSIZE
800818
}
801-
#endif /* ifdef USE_MS_GETLINE_HACK */
819+
#endif /* ifdef USE_FGETS_IN_GETLINE */
802820

803821
/* Internal routine to get a line.
804822
Size argument interpretation:
@@ -825,10 +843,9 @@ get_line(PyFileObject *f, int n)
825843
size_t n1, n2;
826844
PyObject *v;
827845

828-
#ifdef USE_MS_GETLINE_HACK
829-
846+
#ifdef USE_FGETS_IN_GETLINE
830847
if (n <= 0)
831-
return ms_getline_hack(fp);
848+
return getline_via_fgets(fp);
832849
#endif
833850
n2 = n > 0 ? n : 100;
834851
v = PyString_FromStringAndSize((char *)NULL, n2);
@@ -967,10 +984,10 @@ static PyObject *
967984
file_xreadlines(PyFileObject *f, PyObject *args)
968985
{
969986
static PyObject* xreadlines_function = NULL;
970-
987+
971988
if (!PyArg_ParseTuple(args, ":xreadlines"))
972989
return NULL;
973-
990+
974991
if (!xreadlines_function) {
975992
PyObject *xreadlines_module =
976993
PyImport_ImportModule("xreadlines");

0 commit comments

Comments
 (0)