77 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
88*/
99
10+ #define STATE_REALIZED 1
11+ #define STATE_ACCUMULATING 2
12+
1013typedef struct {
1114 PyObject_HEAD
1215 Py_UCS4 * buf ;
1316 Py_ssize_t pos ;
1417 Py_ssize_t string_size ;
1518 size_t buf_size ;
1619
20+ /* The stringio object can be in two states: accumulating or realized.
21+ In accumulating state, the internal buffer contains nothing and
22+ the contents are given by the embedded _PyAccu structure.
23+ In realized state, the internal buffer is meaningful and the
24+ _PyAccu is destroyed.
25+ */
26+ int state ;
27+ _PyAccu accu ;
28+
1729 char ok ; /* initialized? */
1830 char closed ;
1931 char readuniversal ;
@@ -40,6 +52,11 @@ typedef struct {
4052 return NULL; \
4153 }
4254
55+ #define ENSURE_REALIZED (self ) \
56+ if (realize(self) < 0) { \
57+ return NULL; \
58+ }
59+
4360PyDoc_STRVAR (stringio_doc ,
4461 "Text I/O implementation using an in-memory buffer.\n"
4562 "\n"
@@ -102,6 +119,54 @@ resize_buffer(stringio *self, size_t size)
102119 return -1 ;
103120}
104121
122+ static PyObject *
123+ make_intermediate (stringio * self )
124+ {
125+ PyObject * intermediate = _PyAccu_Finish (& self -> accu );
126+ self -> state = STATE_REALIZED ;
127+ if (intermediate == NULL )
128+ return NULL ;
129+ if (_PyAccu_Init (& self -> accu ) ||
130+ _PyAccu_Accumulate (& self -> accu , intermediate )) {
131+ Py_DECREF (intermediate );
132+ return NULL ;
133+ }
134+ self -> state = STATE_ACCUMULATING ;
135+ return intermediate ;
136+ }
137+
138+ static int
139+ realize (stringio * self )
140+ {
141+ Py_ssize_t len ;
142+ PyObject * intermediate ;
143+
144+ if (self -> state == STATE_REALIZED )
145+ return 0 ;
146+ assert (self -> state == STATE_ACCUMULATING );
147+ self -> state = STATE_REALIZED ;
148+
149+ intermediate = _PyAccu_Finish (& self -> accu );
150+ if (intermediate == NULL )
151+ return -1 ;
152+
153+ /* Append the intermediate string to the internal buffer.
154+ The length should be equal to the current cursor position.
155+ */
156+ len = PyUnicode_GET_LENGTH (intermediate );
157+ if (resize_buffer (self , len ) < 0 ) {
158+ Py_DECREF (intermediate );
159+ return -1 ;
160+ }
161+ if (!PyUnicode_AsUCS4 (intermediate , self -> buf , len , 0 )) {
162+ Py_DECREF (intermediate );
163+ return -1 ;
164+ }
165+
166+ Py_DECREF (intermediate );
167+ return 0 ;
168+ }
169+
105170/* Internal routine for writing a whole PyUnicode object to the buffer of a
106171 StringIO object. Returns 0 on success, or -1 on error. */
107172static Py_ssize_t
@@ -136,7 +201,6 @@ write_str(stringio *self, PyObject *obj)
136201 return -1 ;
137202 }
138203 len = PyUnicode_GET_LENGTH (decoded );
139-
140204 assert (len >= 0 );
141205
142206 /* This overflow check is not strictly necessary. However, it avoids us to
@@ -147,6 +211,17 @@ write_str(stringio *self, PyObject *obj)
147211 "new position too large" );
148212 goto fail ;
149213 }
214+
215+ if (self -> state == STATE_ACCUMULATING ) {
216+ if (self -> string_size == self -> pos ) {
217+ if (_PyAccu_Accumulate (& self -> accu , decoded ))
218+ goto fail ;
219+ goto success ;
220+ }
221+ if (realize (self ))
222+ goto fail ;
223+ }
224+
150225 if (self -> pos + len > self -> string_size ) {
151226 if (resize_buffer (self , self -> pos + len ) < 0 )
152227 goto fail ;
@@ -174,6 +249,7 @@ write_str(stringio *self, PyObject *obj)
174249 0 ))
175250 goto fail ;
176251
252+ success :
177253 /* Set the new length of the internal string if it has changed. */
178254 self -> pos += len ;
179255 if (self -> string_size < self -> pos )
@@ -195,6 +271,8 @@ stringio_getvalue(stringio *self)
195271{
196272 CHECK_INITIALIZED (self );
197273 CHECK_CLOSED (self );
274+ if (self -> state == STATE_ACCUMULATING )
275+ return make_intermediate (self );
198276 return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND , self -> buf ,
199277 self -> string_size );
200278}
@@ -251,6 +329,14 @@ stringio_read(stringio *self, PyObject *args)
251329 size = 0 ;
252330 }
253331
332+ /* Optimization for seek(0); read() */
333+ if (self -> state == STATE_ACCUMULATING && self -> pos == 0 && size == n ) {
334+ PyObject * result = make_intermediate (self );
335+ self -> pos = self -> string_size ;
336+ return result ;
337+ }
338+
339+ ENSURE_REALIZED (self );
254340 output = self -> buf + self -> pos ;
255341 self -> pos += size ;
256342 return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND , output , size );
@@ -301,6 +387,7 @@ stringio_readline(stringio *self, PyObject *args)
301387 if (!PyArg_ParseTuple (args , "|O:readline" , & arg ))
302388 return NULL ;
303389 CHECK_CLOSED (self );
390+ ENSURE_REALIZED (self );
304391
305392 if (PyNumber_Check (arg )) {
306393 limit = PyNumber_AsSsize_t (arg , PyExc_OverflowError );
@@ -322,6 +409,7 @@ stringio_iternext(stringio *self)
322409
323410 CHECK_INITIALIZED (self );
324411 CHECK_CLOSED (self );
412+ ENSURE_REALIZED (self );
325413
326414 if (Py_TYPE (self ) == & PyStringIO_Type ) {
327415 /* Skip method call overhead for speed */
@@ -392,6 +480,7 @@ stringio_truncate(stringio *self, PyObject *args)
392480 }
393481
394482 if (size < self -> string_size ) {
483+ ENSURE_REALIZED (self );
395484 if (resize_buffer (self , size ) < 0 )
396485 return NULL ;
397486 self -> string_size = size ;
@@ -492,6 +581,7 @@ stringio_close(stringio *self)
492581 /* Free up some memory */
493582 if (resize_buffer (self , 0 ) < 0 )
494583 return NULL ;
584+ _PyAccu_Destroy (& self -> accu );
495585 Py_CLEAR (self -> readnl );
496586 Py_CLEAR (self -> writenl );
497587 Py_CLEAR (self -> decoder );
@@ -521,6 +611,7 @@ stringio_dealloc(stringio *self)
521611 PyMem_Free (self -> buf );
522612 self -> buf = NULL ;
523613 }
614+ _PyAccu_Destroy (& self -> accu );
524615 Py_CLEAR (self -> readnl );
525616 Py_CLEAR (self -> writenl );
526617 Py_CLEAR (self -> decoder );
@@ -559,6 +650,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
559650 PyObject * value = NULL ;
560651 PyObject * newline_obj = NULL ;
561652 char * newline = "\n" ;
653+ Py_ssize_t value_len ;
562654
563655 if (!PyArg_ParseTupleAndKeywords (args , kwds , "|OO:__init__" , kwlist ,
564656 & value , & newline_obj ))
@@ -600,6 +692,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
600692
601693 self -> ok = 0 ;
602694
695+ _PyAccu_Destroy (& self -> accu );
603696 Py_CLEAR (self -> readnl );
604697 Py_CLEAR (self -> writenl );
605698 Py_CLEAR (self -> decoder );
@@ -636,19 +729,27 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
636729 /* Now everything is set up, resize buffer to size of initial value,
637730 and copy it */
638731 self -> string_size = 0 ;
639- if (value && value != Py_None ) {
640- Py_ssize_t len = PyUnicode_GetSize (value );
732+ if (value && value != Py_None )
733+ value_len = PyUnicode_GetSize (value );
734+ else
735+ value_len = 0 ;
736+ if (value_len > 0 ) {
641737 /* This is a heuristic, for newline translation might change
642738 the string length. */
643- if (resize_buffer (self , len ) < 0 )
739+ if (resize_buffer (self , 0 ) < 0 )
644740 return -1 ;
741+ self -> state = STATE_REALIZED ;
645742 self -> pos = 0 ;
646743 if (write_str (self , value ) < 0 )
647744 return -1 ;
648745 }
649746 else {
747+ /* Empty stringio object, we can start by accumulating */
650748 if (resize_buffer (self , 0 ) < 0 )
651749 return -1 ;
750+ if (_PyAccu_Init (& self -> accu ))
751+ return -1 ;
752+ self -> state = STATE_ACCUMULATING ;
652753 }
653754 self -> pos = 0 ;
654755
0 commit comments