Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 612344f

Browse files
committed
Change UnicodeDecodeError objects so that the 'object' attribute
is a bytes object. Add 'y' and 'y#' format specifiers that work like 's' and 's#' but only accept bytes objects.
1 parent c2b87a6 commit 612344f

5 files changed

Lines changed: 98 additions & 8 deletions

File tree

Doc/api/utilities.tex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,18 @@ \section{Parsing arguments and building values
424424
compatible objects pass back a reference to the raw internal data
425425
representation.
426426

427+
\item[\samp{y} (bytes object)
428+
{[const char *]}]
429+
This variant on \samp{s} convert a Python bytes object to a C pointer to a
430+
character string. The bytes object must not contain embedded NUL bytes;
431+
if it does, a \exception{TypeError} exception is raised.
432+
433+
\item[\samp{y\#} (bytes object)
434+
{[const char *, int]}]
435+
This variant on \samp{s#} stores into two C variables, the first one
436+
a pointer to a character string, the second one its length. This only
437+
accepts bytes objects.
438+
427439
\item[\samp{z} (string or \code{None}) {[const char *]}]
428440
Like \samp{s}, but the Python object may also be \code{None}, in
429441
which case the C pointer is set to \NULL.

Doc/ext/extending.tex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,10 @@ \section{Building Arbitrary Values
802802
Py_BuildValue("i", 123) 123
803803
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
804804
Py_BuildValue("s", "hello") 'hello'
805+
Py_BuildValue("y", "hello") b'hello'
805806
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
806807
Py_BuildValue("s#", "hello", 4) 'hell'
808+
Py_BuildValue("y#", "hello", 4) b'hell'
807809
Py_BuildValue("()") ()
808810
Py_BuildValue("(i)", 123) (123,)
809811
Py_BuildValue("(ii)", 123, 456) (123, 456)

Objects/exceptions.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value)
12421242
}
12431243

12441244

1245+
static PyObject *
1246+
get_bytes(PyObject *attr, const char *name)
1247+
{
1248+
if (!attr) {
1249+
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
1250+
return NULL;
1251+
}
1252+
1253+
if (!PyBytes_Check(attr)) {
1254+
PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
1255+
return NULL;
1256+
}
1257+
Py_INCREF(attr);
1258+
return attr;
1259+
}
1260+
12451261
static PyObject *
12461262
get_unicode(PyObject *attr, const char *name)
12471263
{
@@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
12801296
PyObject *
12811297
PyUnicodeDecodeError_GetObject(PyObject *exc)
12821298
{
1283-
return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
1299+
return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
12841300
}
12851301

12861302
PyObject *
@@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
13141330
{
13151331
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
13161332
Py_ssize_t size;
1317-
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
1333+
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
13181334
"object");
13191335
if (!obj) return -1;
1320-
size = PyString_GET_SIZE(obj);
1336+
size = PyBytes_GET_SIZE(obj);
13211337
if (*start<0)
13221338
*start = 0;
13231339
if (*start>=size)
@@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
13821398
{
13831399
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
13841400
Py_ssize_t size;
1385-
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
1401+
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
13861402
"object");
13871403
if (!obj) return -1;
1388-
size = PyString_GET_SIZE(obj);
1404+
size = PyBytes_GET_SIZE(obj);
13891405
if (*end<1)
13901406
*end = 1;
13911407
if (*end>size)
@@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
16291645
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
16301646
return -1;
16311647
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
1632-
kwds, &PyString_Type);
1648+
kwds, &PyBytes_Type);
16331649
}
16341650

16351651
static PyObject *
@@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
16481664
/* FromFormat does not support %02x, so format that separately */
16491665
char byte[4];
16501666
PyOS_snprintf(byte, sizeof(byte), "%02x",
1651-
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
1667+
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
16521668
return PyString_FromFormat(
16531669
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
16541670
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
@@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
16891705
assert(length < INT_MAX);
16901706
assert(start < INT_MAX);
16911707
assert(end < INT_MAX);
1692-
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
1708+
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
16931709
encoding, object, length, start, end, reason);
16941710
}
16951711

Python/getargs.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
819819
break;
820820
}
821821

822+
case 'y': {/* bytes */
823+
if (*format == '#') {
824+
void **p = (void **)va_arg(*p_va, char **);
825+
FETCH_SIZE;
826+
827+
if (PyBytes_Check(arg)) {
828+
*p = PyBytes_AS_STRING(arg);
829+
STORE_SIZE(PyBytes_GET_SIZE(arg));
830+
}
831+
else
832+
return converterr("bytes", arg, msgbuf, bufsize);
833+
format++;
834+
} else {
835+
char **p = va_arg(*p_va, char **);
836+
837+
if (PyBytes_Check(arg))
838+
*p = PyBytes_AS_STRING(arg);
839+
else
840+
return converterr("bytes", arg, msgbuf, bufsize);
841+
if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
842+
return converterr("bytes without null bytes",
843+
arg, msgbuf, bufsize);
844+
}
845+
break;
846+
}
847+
822848
case 'z': {/* string, may be NULL (None) */
823849
if (*format == '#') { /* any buffer-like object */
824850
void **p = (void **)va_arg(*p_va, char **);
@@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
15951621

15961622
case 's': /* string */
15971623
case 'z': /* string or None */
1624+
case 'y': /* bytes */
15981625
case 'u': /* unicode string */
15991626
case 't': /* buffer, read-only */
16001627
case 'w': /* buffer, read-write */

Python/modsupport.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
424424
return v;
425425
}
426426

427+
case 'y':
428+
{
429+
PyObject *v;
430+
char *str = va_arg(*p_va, char *);
431+
Py_ssize_t n;
432+
if (**p_format == '#') {
433+
++*p_format;
434+
if (flags & FLAG_SIZE_T)
435+
n = va_arg(*p_va, Py_ssize_t);
436+
else
437+
n = va_arg(*p_va, int);
438+
}
439+
else
440+
n = -1;
441+
if (str == NULL) {
442+
v = Py_None;
443+
Py_INCREF(v);
444+
}
445+
else {
446+
if (n < 0) {
447+
size_t m = strlen(str);
448+
if (m > PY_SSIZE_T_MAX) {
449+
PyErr_SetString(PyExc_OverflowError,
450+
"string too long for Python bytes");
451+
return NULL;
452+
}
453+
n = (Py_ssize_t)m;
454+
}
455+
v = PyBytes_FromStringAndSize(str, n);
456+
}
457+
return v;
458+
}
459+
427460
case 'N':
428461
case 'S':
429462
case 'O':

0 commit comments

Comments
 (0)