@@ -105,20 +105,21 @@ FILE *
105105_Py_wfopen (const wchar_t * path , const wchar_t * mode )
106106{
107107#ifndef MS_WINDOWS
108- char cpath [PATH_MAX ];
108+ FILE * f ;
109+ char * cpath ;
109110 char cmode [10 ];
110111 size_t r ;
111- r = wcstombs (cpath , path , PATH_MAX );
112- if (r == (size_t )-1 || r >= PATH_MAX ) {
113- errno = EINVAL ;
114- return NULL ;
115- }
116112 r = wcstombs (cmode , mode , 10 );
117113 if (r == (size_t )-1 || r >= 10 ) {
118114 errno = EINVAL ;
119115 return NULL ;
120116 }
121- return fopen (cpath , cmode );
117+ cpath = _Py_wchar2char (path );
118+ if (cpath == NULL )
119+ return NULL ;
120+ f = fopen (cpath , cmode );
121+ PyMem_Free (cpath );
122+ return f ;
122123#else
123124 return _wfopen (path , mode );
124125#endif
@@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
734735}
735736
736737
738+ /* Encode a (wide) character string to the locale encoding with the
739+ surrogateescape error handler (characters in range U+DC80..U+DCFF are
740+ converted to bytes 0x80..0xFF).
741+
742+ This function is the reverse of _Py_char2wchar().
743+
744+ Return a pointer to a newly allocated byte string (use PyMem_Free() to free
745+ the memory), or NULL on error (conversion error or memory error). */
746+ char *
747+ _Py_wchar2char (const wchar_t * text )
748+ {
749+ const size_t len = wcslen (text );
750+ char * result = NULL , * bytes = NULL ;
751+ size_t i , size , converted ;
752+ wchar_t c , buf [2 ];
753+
754+ /* The function works in two steps:
755+ 1. compute the length of the output buffer in bytes (size)
756+ 2. outputs the bytes */
757+ size = 0 ;
758+ buf [1 ] = 0 ;
759+ while (1 ) {
760+ for (i = 0 ; i < len ; i ++ ) {
761+ c = text [i ];
762+ if (c >= 0xdc80 && c <= 0xdcff ) {
763+ /* UTF-8b surrogate */
764+ if (bytes != NULL ) {
765+ * bytes ++ = c - 0xdc00 ;
766+ size -- ;
767+ }
768+ else
769+ size ++ ;
770+ continue ;
771+ }
772+ else {
773+ buf [0 ] = c ;
774+ if (bytes != NULL )
775+ converted = wcstombs (bytes , buf , size );
776+ else
777+ converted = wcstombs (NULL , buf , 0 );
778+ if (converted == (size_t )-1 ) {
779+ if (result != NULL )
780+ PyMem_Free (result );
781+ return NULL ;
782+ }
783+ if (bytes != NULL ) {
784+ bytes += converted ;
785+ size -= converted ;
786+ }
787+ else
788+ size += converted ;
789+ }
790+ }
791+ if (result != NULL ) {
792+ * bytes = 0 ;
793+ break ;
794+ }
795+
796+ size += 1 ; /* nul byte at the end */
797+ result = PyMem_Malloc (size );
798+ if (result == NULL )
799+ return NULL ;
800+ bytes = result ;
801+ }
802+ return result ;
803+ }
804+
805+
806+ /* Decode a byte string from the locale encoding with the
807+ surrogateescape error handler (undecodable bytes are decoded as characters
808+ in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
809+ character, escape the bytes using the surrogateescape error handler instead
810+ of decoding them.
811+
812+ Use _Py_wchar2char() to encode the character string back to a byte string.
813+
814+ Return a pointer to a newly allocated (wide) character string (use
815+ PyMem_Free() to free the memory), or NULL on error (conversion error or
816+ memory error). */
737817wchar_t *
738818_Py_char2wchar (char * arg )
739819{
0 commit comments