@@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
512512 else
513513 Py_UNREACHABLE ();
514514 }
515- #endif
515+ #endif /* USE_TCL_UNICODE */
516516 const char * s = Tcl_GetStringFromObj (value , & len );
517517 return unicodeFromTclStringAndSize (s , len );
518518}
@@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
10181018 PyErr_SetString (PyExc_OverflowError , "string is too long" );
10191019 return NULL ;
10201020 }
1021- if (PyUnicode_IS_ASCII (value )) {
1021+ if (PyUnicode_IS_ASCII (value ) &&
1022+ strlen (PyUnicode_DATA (value )) == (size_t )PyUnicode_GET_LENGTH (value ))
1023+ {
10221024 return Tcl_NewStringObj ((const char * )PyUnicode_DATA (value ),
10231025 (int )size );
10241026 }
@@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
10331035 "surrogatepass" , NATIVE_BYTEORDER );
10341036 else
10351037 Py_UNREACHABLE ();
1036- #else
1037- encoded = _PyUnicode_AsUTF8String (value , "surrogateescape" );
1038- #endif
10391038 if (!encoded ) {
10401039 return NULL ;
10411040 }
@@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
10451044 PyErr_SetString (PyExc_OverflowError , "string is too long" );
10461045 return NULL ;
10471046 }
1048- #if USE_TCL_UNICODE
10491047 result = Tcl_NewUnicodeObj ((const Tcl_UniChar * )PyBytes_AS_STRING (encoded ),
10501048 (int )(size / sizeof (Tcl_UniChar )));
10511049#else
1050+ encoded = _PyUnicode_AsUTF8String (value , "surrogateescape" );
1051+ if (!encoded ) {
1052+ return NULL ;
1053+ }
1054+ size = PyBytes_GET_SIZE (encoded );
1055+ if (strlen (PyBytes_AS_STRING (encoded )) != (size_t )size ) {
1056+ /* The string contains embedded null characters.
1057+ * Tcl needs a null character to be represented as \xc0\x80 in
1058+ * the Modified UTF-8 encoding. Otherwise the string can be
1059+ * truncated in some internal operations.
1060+ *
1061+ * NOTE: stringlib_replace() could be used here, but optimizing
1062+ * this obscure case isn't worth it unless stringlib_replace()
1063+ * was already exposed in the C API for other reasons. */
1064+ Py_SETREF (encoded ,
1065+ PyObject_CallMethod (encoded , "replace" , "y#y#" ,
1066+ "\0" , (Py_ssize_t )1 ,
1067+ "\xc0\x80" , (Py_ssize_t )2 ));
1068+ if (!encoded ) {
1069+ return NULL ;
1070+ }
1071+ size = PyBytes_GET_SIZE (encoded );
1072+ }
1073+ if (size > INT_MAX ) {
1074+ Py_DECREF (encoded );
1075+ PyErr_SetString (PyExc_OverflowError , "string is too long" );
1076+ return NULL ;
1077+ }
10521078 result = Tcl_NewStringObj (PyBytes_AS_STRING (encoded ), (int )size );
1053- #endif
1079+ #endif /* USE_TCL_UNICODE */
10541080 Py_DECREF (encoded );
10551081 return result ;
10561082 }
0 commit comments