@@ -3073,6 +3073,140 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
30733073 return NULL ;
30743074}
30753075
3076+ static size_t
3077+ wcstombs_errorpos (const wchar_t * wstr )
3078+ {
3079+ size_t len ;
3080+ #if SIZEOF_WCHAR_T == 2
3081+ wchar_t buf [3 ];
3082+ #else
3083+ wchar_t buf [2 ];
3084+ #endif
3085+ char outbuf [MB_LEN_MAX ];
3086+ const wchar_t * start , * previous ;
3087+ int save_errno ;
3088+
3089+ save_errno = errno ;
3090+ #if SIZEOF_WCHAR_T == 2
3091+ buf [2 ] = 0 ;
3092+ #else
3093+ buf [1 ] = 0 ;
3094+ #endif
3095+ start = wstr ;
3096+ while (* wstr != L'\0' )
3097+ {
3098+ previous = wstr ;
3099+ #if SIZEOF_WCHAR_T == 2
3100+ if (Py_UNICODE_IS_HIGH_SURROGATE (wstr [0 ])
3101+ && Py_UNICODE_IS_LOW_SURROGATE (wstr [1 ]))
3102+ {
3103+ buf [0 ] = wstr [0 ];
3104+ buf [1 ] = wstr [1 ];
3105+ wstr += 2 ;
3106+ }
3107+ else {
3108+ buf [0 ] = * wstr ;
3109+ buf [1 ] = 0 ;
3110+ wstr ++ ;
3111+ }
3112+ #else
3113+ buf [0 ] = * wstr ;
3114+ wstr ++ ;
3115+ #endif
3116+ len = wcstombs (outbuf , buf , sizeof (outbuf ));
3117+ if (len == (size_t )-1 ) {
3118+ errno = save_errno ;
3119+ return previous - start ;
3120+ }
3121+ }
3122+
3123+ /* failed to find the unencodable character */
3124+ errno = save_errno ;
3125+ return 0 ;
3126+ }
3127+
3128+ PyObject *
3129+ PyUnicode_EncodeLocale (PyObject * unicode , int surrogateescape )
3130+ {
3131+ Py_ssize_t wlen , wlen2 ;
3132+ wchar_t * wstr ;
3133+ PyObject * bytes = NULL ;
3134+ char * errmsg ;
3135+ PyObject * exc ;
3136+ size_t error_pos ;
3137+
3138+ wstr = PyUnicode_AsWideCharString (unicode , & wlen );
3139+ if (wstr == NULL )
3140+ return NULL ;
3141+
3142+ wlen2 = wcslen (wstr );
3143+ if (wlen2 != wlen ) {
3144+ PyMem_Free (wstr );
3145+ PyErr_SetString (PyExc_TypeError , "embedded null character" );
3146+ return NULL ;
3147+ }
3148+
3149+ if (surrogateescape ) {
3150+ /* locale encoding with surrogateescape */
3151+ char * str ;
3152+
3153+ str = _Py_wchar2char (wstr , & error_pos );
3154+ if (str == NULL ) {
3155+ if (error_pos == (size_t )-1 ) {
3156+ PyErr_NoMemory ();
3157+ PyMem_Free (wstr );
3158+ return NULL ;
3159+ }
3160+ else {
3161+ goto encode_error ;
3162+ }
3163+ }
3164+ PyMem_Free (wstr );
3165+
3166+ bytes = PyBytes_FromString (str );
3167+ PyMem_Free (str );
3168+ }
3169+ else {
3170+ size_t len , len2 ;
3171+
3172+ len = wcstombs (NULL , wstr , 0 );
3173+ if (len == (size_t )-1 ) {
3174+ error_pos = wcstombs_errorpos (wstr );
3175+ goto encode_error ;
3176+ }
3177+
3178+ bytes = PyBytes_FromStringAndSize (NULL , len );
3179+ if (bytes == NULL ) {
3180+ PyMem_Free (wstr );
3181+ return NULL ;
3182+ }
3183+
3184+ len2 = wcstombs (PyBytes_AS_STRING (bytes ), wstr , len + 1 );
3185+ if (len2 == (size_t )-1 || len2 > len ) {
3186+ error_pos = wcstombs_errorpos (wstr );
3187+ goto encode_error ;
3188+ }
3189+ PyMem_Free (wstr );
3190+ }
3191+ return bytes ;
3192+
3193+ encode_error :
3194+ errmsg = strerror (errno );
3195+ assert (errmsg != NULL );
3196+ if (errmsg == NULL )
3197+ errmsg = "wcstombs() encountered an unencodable wide character" ;
3198+ PyMem_Free (wstr );
3199+ Py_XDECREF (bytes );
3200+
3201+ exc = NULL ;
3202+ raise_encode_exception (& exc ,
3203+ "locale" , unicode ,
3204+ error_pos , error_pos + 1 ,
3205+ errmsg );
3206+ Py_XDECREF (exc );
3207+ return NULL ;
3208+ }
3209+
30763210PyObject *
30773211PyUnicode_EncodeFSDefault (PyObject * unicode )
30783212{
@@ -3097,38 +3231,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
30973231 "surrogateescape" );
30983232 }
30993233 else {
3100- /* locale encoding with surrogateescape */
3101- wchar_t * wchar ;
3102- char * bytes ;
3103- PyObject * bytes_obj ;
3104- size_t error_pos ;
3105-
3106- wchar = PyUnicode_AsWideCharString (unicode , NULL );
3107- if (wchar == NULL )
3108- return NULL ;
3109- bytes = _Py_wchar2char (wchar , & error_pos );
3110- if (bytes == NULL ) {
3111- if (error_pos != (size_t )-1 ) {
3112- char * errmsg = strerror (errno );
3113- PyObject * exc = NULL ;
3114- if (errmsg == NULL )
3115- errmsg = "Py_wchar2char() failed" ;
3116- raise_encode_exception (& exc ,
3117- "filesystemencoding" , unicode ,
3118- error_pos , error_pos + 1 ,
3119- errmsg );
3120- Py_XDECREF (exc );
3121- }
3122- else
3123- PyErr_NoMemory ();
3124- PyMem_Free (wchar );
3125- return NULL ;
3126- }
3127- PyMem_Free (wchar );
3128-
3129- bytes_obj = PyBytes_FromString (bytes );
3130- PyMem_Free (bytes );
3131- return bytes_obj ;
3234+ return PyUnicode_EncodeLocale (unicode , 1 );
31323235 }
31333236#endif
31343237}
0 commit comments