@@ -132,6 +132,23 @@ char _PyIO_get_console_type(PyObject *path_or_fd) {
132
132
return m ;
133
133
}
134
134
135
+ static DWORD
136
+ _find_last_utf8_boundary (const char * buf , DWORD len )
137
+ {
138
+ /* This function never returns 0, returns the original len instead */
139
+ DWORD count = 1 ;
140
+ if (len == 0 || (buf [len - 1 ] & 0x80 ) == 0 ) {
141
+ return len ;
142
+ }
143
+ for (;; count ++ ) {
144
+ if (count > 3 || count >= len ) {
145
+ return len ;
146
+ }
147
+ if ((buf [len - count ] & 0xc0 ) != 0x80 ) {
148
+ return len - count ;
149
+ }
150
+ }
151
+ }
135
152
136
153
/*[clinic input]
137
154
module _io
@@ -954,7 +971,7 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, Py_buffer *b)
954
971
{
955
972
BOOL res = TRUE;
956
973
wchar_t * wbuf ;
957
- DWORD len , wlen , orig_len , n = 0 ;
974
+ DWORD len , wlen , n = 0 ;
958
975
HANDLE handle ;
959
976
960
977
if (self -> fd == -1 )
@@ -984,21 +1001,8 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, Py_buffer *b)
984
1001
have to reduce and recalculate. */
985
1002
while (wlen > 32766 / sizeof (wchar_t )) {
986
1003
len /= 2 ;
987
- orig_len = len ;
988
- /* Reduce the length until we hit the final byte of a UTF-8 sequence
989
- * (top bit is unset). Fix for github issue 82052.
990
- */
991
- while (len > 0 && (((char * )b -> buf )[len - 1 ] & 0x80 ) != 0 )
992
- -- len ;
993
- /* If we hit a length of 0, something has gone wrong. This shouldn't
994
- * be possible, as valid UTF-8 can have at most 3 non-final bytes
995
- * before a final one, and our buffer is way longer than that.
996
- * But to be on the safe side, if we hit this issue we just restore
997
- * the original length and let the console API sort it out.
998
- */
999
- if (len == 0 ) {
1000
- len = orig_len ;
1001
- }
1004
+ /* Fix for github issues gh-110913 and gh-82052. */
1005
+ len = _find_last_utf8_boundary (b -> buf , len );
1002
1006
wlen = MultiByteToWideChar (CP_UTF8 , 0 , b -> buf , len , NULL , 0 );
1003
1007
}
1004
1008
Py_END_ALLOW_THREADS
0 commit comments