Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f2e08b3

Browse files
author
Victor Stinner
committed
Create _Py_wchar2char() function, reverse of _Py_char2wchar()
* Use _Py_wchar2char() in _wstat() and _Py_wfopen() * Document _Py_char2wchar()
1 parent e9b428f commit f2e08b3

3 files changed

Lines changed: 97 additions & 11 deletions

File tree

Include/Python.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,15 @@
126126
#ifdef __cplusplus
127127
extern "C" {
128128
#endif
129+
129130
/* _Py_Mangle is defined in compile.c */
130131
PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
131132

132133
/* These functions live in main.c */
133134
PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
135+
PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text);
134136
PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode);
137+
135138
#ifdef __cplusplus
136139
}
137140
#endif

Modules/getpath.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION;
139139
static int
140140
_wstat(const wchar_t* path, struct stat *buf)
141141
{
142-
char fname[PATH_MAX];
143-
size_t res = wcstombs(fname, path, sizeof(fname));
144-
if (res == (size_t)-1) {
142+
int err;
143+
char *fname;
144+
fname = _Py_wchar2char(path);
145+
if (fname == NULL) {
145146
errno = EINVAL;
146147
return -1;
147148
}
148-
return stat(fname, buf);
149+
err = stat(fname, buf);
150+
PyMem_Free(fname);
151+
return err;
149152
}
150153
#endif
151154

Modules/main.c

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,21 @@ FILE *
105105
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
106106
{
107107
#ifndef MS_WINDOWS
108-
char cpath[PATH_MAX];
108+
FILE *f;
109+
char *cpath;
109110
char cmode[10];
110111
size_t r;
111-
r = wcstombs(cpath, path, PATH_MAX);
112-
if (r == (size_t)-1 || r >= PATH_MAX) {
113-
errno = EINVAL;
114-
return NULL;
115-
}
116112
r = wcstombs(cmode, mode, 10);
117113
if (r == (size_t)-1 || r >= 10) {
118114
errno = EINVAL;
119115
return NULL;
120116
}
121-
return fopen(cpath, cmode);
117+
cpath = _Py_wchar2char(path);
118+
if (cpath == NULL)
119+
return NULL;
120+
f = fopen(cpath, cmode);
121+
PyMem_Free(cpath);
122+
return f;
122123
#else
123124
return _wfopen(path, mode);
124125
#endif
@@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
734735
}
735736

736737

738+
/* Encode a (wide) character string to the locale encoding with the
739+
surrogateescape error handler (characters in range U+DC80..U+DCFF are
740+
converted to bytes 0x80..0xFF).
741+
742+
This function is the reverse of _Py_char2wchar().
743+
744+
Return a pointer to a newly allocated byte string (use PyMem_Free() to free
745+
the memory), or NULL on error (conversion error or memory error). */
746+
char*
747+
_Py_wchar2char(const wchar_t *text)
748+
{
749+
const size_t len = wcslen(text);
750+
char *result = NULL, *bytes = NULL;
751+
size_t i, size, converted;
752+
wchar_t c, buf[2];
753+
754+
/* The function works in two steps:
755+
1. compute the length of the output buffer in bytes (size)
756+
2. outputs the bytes */
757+
size = 0;
758+
buf[1] = 0;
759+
while (1) {
760+
for (i=0; i < len; i++) {
761+
c = text[i];
762+
if (c >= 0xdc80 && c <= 0xdcff) {
763+
/* UTF-8b surrogate */
764+
if (bytes != NULL) {
765+
*bytes++ = c - 0xdc00;
766+
size--;
767+
}
768+
else
769+
size++;
770+
continue;
771+
}
772+
else {
773+
buf[0] = c;
774+
if (bytes != NULL)
775+
converted = wcstombs(bytes, buf, size);
776+
else
777+
converted = wcstombs(NULL, buf, 0);
778+
if (converted == (size_t)-1) {
779+
if (result != NULL)
780+
PyMem_Free(result);
781+
return NULL;
782+
}
783+
if (bytes != NULL) {
784+
bytes += converted;
785+
size -= converted;
786+
}
787+
else
788+
size += converted;
789+
}
790+
}
791+
if (result != NULL) {
792+
*bytes = 0;
793+
break;
794+
}
795+
796+
size += 1; /* nul byte at the end */
797+
result = PyMem_Malloc(size);
798+
if (result == NULL)
799+
return NULL;
800+
bytes = result;
801+
}
802+
return result;
803+
}
804+
805+
806+
/* Decode a byte string from the locale encoding with the
807+
surrogateescape error handler (undecodable bytes are decoded as characters
808+
in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
809+
character, escape the bytes using the surrogateescape error handler instead
810+
of decoding them.
811+
812+
Use _Py_wchar2char() to encode the character string back to a byte string.
813+
814+
Return a pointer to a newly allocated (wide) character string (use
815+
PyMem_Free() to free the memory), or NULL on error (conversion error or
816+
memory error). */
737817
wchar_t*
738818
_Py_char2wchar(char* arg)
739819
{

0 commit comments

Comments
 (0)