Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 47fcb5b

Browse files
author
Victor Stinner
committed
Issue #9542: Create PyUnicode_FSDecoder() function
It's a ParseTuple converter: decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. * Don't specify surrogateescape error handler in the comments nor the documentation, but PyUnicode_DecodeFSDefaultAndSize() and PyUnicode_EncodeFSDefault() because these functions use strict error handler for the mbcs encoding (on Windows). * Remove PyUnicode_FSConverter() comment in unicodeobject.c to avoid inconsistency with unicodeobject.h.
1 parent f2e08b3 commit 47fcb5b

4 files changed

Lines changed: 70 additions & 10 deletions

File tree

Doc/c-api/unicode.rst

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function:
380380

381381
.. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result)
382382

383-
Convert *obj* into *result*, using :cdata:`Py_FileSystemDefaultEncoding`,
384-
and the ``"surrogateescape"`` error handler. *result* must be a
385-
``PyObject*``, return a :func:`bytes` object which must be released if it
386-
is no longer used.
383+
ParseTuple converter: encode :class:`str` objects to :class:`bytes` using
384+
:cfunc:`PyUnicode_EncodeFSDefault`; :class:`bytes` objects are output as-is.
385+
*result* must be a :ctype:`PyBytesObject*` which must be released when it is
386+
no longer used.
387387

388388
.. versionadded:: 3.1
389389

390+
To decode file names during argument parsing, the ``"O&"`` converter should be
391+
used, passsing :func:`PyUnicode_FSDecoder` as the conversion function:
392+
393+
.. cfunction:: int PyUnicode_FSDecoder(PyObject* obj, void* result)
394+
395+
ParseTuple converter: decode :class:`bytes` objects to :class:`str` using
396+
:cfunc:`PyUnicode_DecodeFSDefaultAndSize`; :class:`str` objects are output
397+
as-is. *result* must be a :ctype:`PyUnicodeObject*` which must be released
398+
when it is no longer used.
399+
400+
.. versionadded:: 3.2
401+
390402
.. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
391403

392404
Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding`

Include/unicodeobject.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
200200
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
201201
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
202202
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
203+
# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
203204
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
204205
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
205206
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
300301
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
301302
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
302303
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
304+
# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
303305
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
304306
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
305307
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
12391241

12401242
/* --- File system encoding ---------------------------------------------- */
12411243

1242-
/* ParseTuple converter which converts a Unicode object into the file
1243-
system encoding as a bytes object, using the "surrogateescape" error
1244-
handler; bytes objects are output as-is. */
1244+
/* ParseTuple converter: encode str objects to bytes using
1245+
PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
12451246

12461247
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
12471248

1249+
/* ParseTuple converter: decode bytes objects to unicode using
1250+
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1251+
1252+
PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1253+
12481254
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
12491255
and the "surrogateescape" error handler.
12501256

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #9542: Create PyUnicode_FSDecoder() function, a ParseTuple converter:
16+
decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize();
17+
str objects are output as-is.
18+
1519
- Issue #9203: Computed gotos are now enabled by default on supported
1620
compilers (which are detected by the configure script). They can still
1721
be disable selectively by specifying --without-computed-gotos.

Objects/unicodeobject.c

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
16521652
}
16531653
}
16541654

1655-
/* Convert the argument to a bytes object, according to the file
1656-
system encoding. The addr param must be a PyObject**.
1657-
This is designed to be used with "O&" in PyArg_Parse APIs. */
16581655

16591656
int
16601657
PyUnicode_FSConverter(PyObject* arg, void* addr)
@@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
16961693
}
16971694

16981695

1696+
int
1697+
PyUnicode_FSDecoder(PyObject* arg, void* addr)
1698+
{
1699+
PyObject *output = NULL;
1700+
Py_ssize_t size;
1701+
void *data;
1702+
if (arg == NULL) {
1703+
Py_DECREF(*(PyObject**)addr);
1704+
return 1;
1705+
}
1706+
if (PyUnicode_Check(arg)) {
1707+
output = arg;
1708+
Py_INCREF(output);
1709+
}
1710+
else {
1711+
arg = PyBytes_FromObject(arg);
1712+
if (!arg)
1713+
return 0;
1714+
output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg),
1715+
PyBytes_GET_SIZE(arg));
1716+
Py_DECREF(arg);
1717+
if (!output)
1718+
return 0;
1719+
if (!PyUnicode_Check(output)) {
1720+
Py_DECREF(output);
1721+
PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
1722+
return 0;
1723+
}
1724+
}
1725+
size = PyUnicode_GET_SIZE(output);
1726+
data = PyUnicode_AS_UNICODE(output);
1727+
if (size != Py_UNICODE_strlen(data)) {
1728+
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
1729+
Py_DECREF(output);
1730+
return 0;
1731+
}
1732+
*(PyObject**)addr = output;
1733+
return Py_CLEANUP_SUPPORTED;
1734+
}
1735+
1736+
16991737
char*
17001738
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
17011739
{

0 commit comments

Comments
 (0)