@@ -647,6 +647,8 @@ typedef struct
647647 char telling ;
648648 /* Specialized encoding func (see below) */
649649 encodefunc_t encodefunc ;
650+ /* Whether or not it's the start of the stream */
651+ char encoding_start_of_stream ;
650652
651653 /* Reads and writes are internally buffered in order to speed things up.
652654 However, any read will first flush the write buffer if itsn't empty.
@@ -707,21 +709,50 @@ utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
707709static PyObject *
708710utf16_encode (PyTextIOWrapperObject * self , PyObject * text )
709711{
710- PyObject * res ;
711- res = PyUnicode_EncodeUTF16 (PyUnicode_AS_UNICODE (text ),
712- PyUnicode_GET_SIZE (text ),
713- PyBytes_AS_STRING (self -> errors ), 0 );
714- if (res == NULL )
715- return NULL ;
716- /* Next writes will skip the BOM and use native byte ordering */
712+ if (!self -> encoding_start_of_stream ) {
713+ /* Skip the BOM and use native byte ordering */
717714#if defined(WORDS_BIGENDIAN )
718- self -> encodefunc = ( encodefunc_t ) utf16be_encode ;
715+ return utf16be_encode ( self , text ) ;
719716#else
720- self -> encodefunc = ( encodefunc_t ) utf16le_encode ;
717+ return utf16le_encode ( self , text ) ;
721718#endif
722- return res ;
719+ }
720+ return PyUnicode_EncodeUTF16 (PyUnicode_AS_UNICODE (text ),
721+ PyUnicode_GET_SIZE (text ),
722+ PyBytes_AS_STRING (self -> errors ), 0 );
723723}
724724
725+ static PyObject *
726+ utf32be_encode (PyTextIOWrapperObject * self , PyObject * text )
727+ {
728+ return PyUnicode_EncodeUTF32 (PyUnicode_AS_UNICODE (text ),
729+ PyUnicode_GET_SIZE (text ),
730+ PyBytes_AS_STRING (self -> errors ), 1 );
731+ }
732+
733+ static PyObject *
734+ utf32le_encode (PyTextIOWrapperObject * self , PyObject * text )
735+ {
736+ return PyUnicode_EncodeUTF32 (PyUnicode_AS_UNICODE (text ),
737+ PyUnicode_GET_SIZE (text ),
738+ PyBytes_AS_STRING (self -> errors ), -1 );
739+ }
740+
741+ static PyObject *
742+ utf32_encode (PyTextIOWrapperObject * self , PyObject * text )
743+ {
744+ if (!self -> encoding_start_of_stream ) {
745+ /* Skip the BOM and use native byte ordering */
746+ #if defined(WORDS_BIGENDIAN )
747+ return utf32be_encode (self , text );
748+ #else
749+ return utf32le_encode (self , text );
750+ #endif
751+ }
752+ return PyUnicode_EncodeUTF32 (PyUnicode_AS_UNICODE (text ),
753+ PyUnicode_GET_SIZE (text ),
754+ PyBytes_AS_STRING (self -> errors ), 0 );
755+ }
725756
726757static PyObject *
727758utf8_encode (PyTextIOWrapperObject * self , PyObject * text )
@@ -749,10 +780,13 @@ typedef struct {
749780static encodefuncentry encodefuncs [] = {
750781 {"ascii" , (encodefunc_t ) ascii_encode },
751782 {"iso8859-1" , (encodefunc_t ) latin1_encode },
783+ {"utf-8" , (encodefunc_t ) utf8_encode },
752784 {"utf-16-be" , (encodefunc_t ) utf16be_encode },
753785 {"utf-16-le" , (encodefunc_t ) utf16le_encode },
754786 {"utf-16" , (encodefunc_t ) utf16_encode },
755- {"utf-8" , (encodefunc_t ) utf8_encode },
787+ {"utf-32-be" , (encodefunc_t ) utf32be_encode },
788+ {"utf-32-le" , (encodefunc_t ) utf32le_encode },
789+ {"utf-32" , (encodefunc_t ) utf32_encode },
756790 {NULL , NULL }
757791};
758792
@@ -978,6 +1012,33 @@ TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
9781012 self -> seekable = self -> telling = PyObject_IsTrue (res );
9791013 Py_DECREF (res );
9801014
1015+ self -> encoding_start_of_stream = 0 ;
1016+ if (self -> seekable && self -> encoder ) {
1017+ PyObject * cookieObj ;
1018+ int cmp ;
1019+
1020+ self -> encoding_start_of_stream = 1 ;
1021+
1022+ cookieObj = PyObject_CallMethodObjArgs (buffer , _PyIO_str_tell , NULL );
1023+ if (cookieObj == NULL )
1024+ goto error ;
1025+
1026+ cmp = PyObject_RichCompareBool (cookieObj , _PyIO_zero , Py_EQ );
1027+ Py_DECREF (cookieObj );
1028+ if (cmp < 0 ) {
1029+ goto error ;
1030+ }
1031+
1032+ if (cmp == 0 ) {
1033+ self -> encoding_start_of_stream = 0 ;
1034+ res = PyObject_CallMethodObjArgs (self -> encoder , _PyIO_str_setstate ,
1035+ _PyIO_zero , NULL );
1036+ if (res == NULL )
1037+ goto error ;
1038+ Py_DECREF (res );
1039+ }
1040+ }
1041+
9811042 self -> ok = 1 ;
9821043 return 0 ;
9831044
@@ -1192,8 +1253,10 @@ TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
11921253 needflush = 1 ;
11931254
11941255 /* XXX What if we were just reading? */
1195- if (self -> encodefunc != NULL )
1256+ if (self -> encodefunc != NULL ) {
11961257 b = (* self -> encodefunc )((PyObject * ) self , text );
1258+ self -> encoding_start_of_stream = 0 ;
1259+ }
11971260 else
11981261 b = PyObject_CallMethodObjArgs (self -> encoder ,
11991262 _PyIO_str_encode , text , NULL );
@@ -1847,24 +1910,38 @@ _TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
18471910 return 0 ;
18481911}
18491912
1913+ static int
1914+ _TextIOWrapper_encoder_setstate (PyTextIOWrapperObject * self ,
1915+ CookieStruct * cookie )
1916+ {
1917+ PyObject * res ;
1918+ /* Same as _TextIOWrapper_decoder_setstate() above. */
1919+ if (cookie -> start_pos == 0 && cookie -> dec_flags == 0 ) {
1920+ res = PyObject_CallMethodObjArgs (self -> encoder , _PyIO_str_reset , NULL );
1921+ self -> encoding_start_of_stream = 1 ;
1922+ }
1923+ else {
1924+ res = PyObject_CallMethodObjArgs (self -> encoder , _PyIO_str_setstate ,
1925+ _PyIO_zero , NULL );
1926+ self -> encoding_start_of_stream = 0 ;
1927+ }
1928+ if (res == NULL )
1929+ return -1 ;
1930+ Py_DECREF (res );
1931+ return 0 ;
1932+ }
1933+
18501934static PyObject *
18511935TextIOWrapper_seek (PyTextIOWrapperObject * self , PyObject * args )
18521936{
18531937 PyObject * cookieObj , * posobj ;
18541938 CookieStruct cookie ;
18551939 int whence = 0 ;
1856- static PyObject * zero = NULL ;
18571940 PyObject * res ;
18581941 int cmp ;
18591942
18601943 CHECK_INITIALIZED (self );
18611944
1862- if (zero == NULL ) {
1863- zero = PyLong_FromLong (0L );
1864- if (zero == NULL )
1865- return NULL ;
1866- }
1867-
18681945 if (!PyArg_ParseTuple (args , "O|i:seek" , & cookieObj , & whence ))
18691946 return NULL ;
18701947 CHECK_CLOSED (self );
@@ -1879,7 +1956,7 @@ TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
18791956
18801957 if (whence == 1 ) {
18811958 /* seek relative to current position */
1882- cmp = PyObject_RichCompareBool (cookieObj , zero , Py_EQ );
1959+ cmp = PyObject_RichCompareBool (cookieObj , _PyIO_zero , Py_EQ );
18831960 if (cmp < 0 )
18841961 goto fail ;
18851962
@@ -1900,7 +1977,7 @@ TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
19001977 else if (whence == 2 ) {
19011978 /* seek relative to end of file */
19021979
1903- cmp = PyObject_RichCompareBool (cookieObj , zero , Py_EQ );
1980+ cmp = PyObject_RichCompareBool (cookieObj , _PyIO_zero , Py_EQ );
19041981 if (cmp < 0 )
19051982 goto fail ;
19061983
@@ -1934,7 +2011,7 @@ TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
19342011 goto fail ;
19352012 }
19362013
1937- cmp = PyObject_RichCompareBool (cookieObj , zero , Py_LT );
2014+ cmp = PyObject_RichCompareBool (cookieObj , _PyIO_zero , Py_LT );
19382015 if (cmp < 0 )
19392016 goto fail ;
19402017
@@ -2013,6 +2090,11 @@ TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
20132090 goto fail ;
20142091 }
20152092
2093+ /* Finally, reset the encoder (merely useful for proper BOM handling) */
2094+ if (self -> encoder ) {
2095+ if (_TextIOWrapper_encoder_setstate (self , & cookie ) < 0 )
2096+ goto fail ;
2097+ }
20162098 return cookieObj ;
20172099 fail :
20182100 Py_XDECREF (cookieObj );
0 commit comments