Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-129813: Add PyBytesWriter C API
* Replace usage of the old private _PyBytesWriter with the new public
  PyBytesWriter C API.
* Remove the old private _PyBytesWriter C API.
* Add a freelist for PyBytesWriter_Create().
* TODO: write doc
* TODO: document new functions in What's New and Changelog
  • Loading branch information
vstinner committed Feb 7, 2025
commit 490222fac88fcabf3da47a6831008d65b7ebd4c7
31 changes: 31 additions & 0 deletions Include/cpython/bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,34 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable)
{
return PyBytes_Join(sep, iterable);
}


// --- PyBytesWriter API -----------------------------------------------------

typedef struct PyBytesWriter PyBytesWriter;

PyAPI_FUNC(void*) PyBytesWriter_Create(
PyBytesWriter **writer,
Py_ssize_t alloc);
PyAPI_FUNC(void) PyBytesWriter_Discard(
PyBytesWriter *writer);
PyAPI_FUNC(PyObject*) PyBytesWriter_Finish(
PyBytesWriter *writer,
void *buf);

PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetAllocated(
PyBytesWriter *writer);
PyAPI_FUNC(void*) PyBytesWriter_Extend(
PyBytesWriter *writer,
void *buf,
Py_ssize_t extend);
PyAPI_FUNC(void*) PyBytesWriter_WriteBytes(
PyBytesWriter *writer,
void *buf,
const char *bytes,
Py_ssize_t size);
PyAPI_FUNC(void*) PyBytesWriter_Format(
PyBytesWriter *writer,
void *buf,
const char *format,
...);
83 changes: 1 addition & 82 deletions Include/internal/pycore_bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,88 +59,7 @@ PyAPI_FUNC(void)
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src);

/* --- _PyBytesWriter ----------------------------------------------------- */

/* The _PyBytesWriter structure is big: it contains an embedded "stack buffer".
A _PyBytesWriter variable must be declared at the end of variables in a
function to optimize the memory allocation on the stack. */
typedef struct {
/* bytes, bytearray or NULL (when the small buffer is used) */
PyObject *buffer;

/* Number of allocated size. */
Py_ssize_t allocated;

/* Minimum number of allocated bytes,
incremented by _PyBytesWriter_Prepare() */
Py_ssize_t min_size;

/* If non-zero, use a bytearray instead of a bytes object for buffer. */
int use_bytearray;

/* If non-zero, overallocate the buffer (default: 0).
This flag must be zero if use_bytearray is non-zero. */
int overallocate;

/* Stack buffer */
int use_small_buffer;
char small_buffer[512];
} _PyBytesWriter;

/* Initialize a bytes writer

By default, the overallocation is disabled. Set the overallocate attribute
to control the allocation of the buffer.

Export _PyBytesWriter API for '_pickle' shared extension. */
PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer);

/* Get the buffer content and reset the writer.
Return a bytes object, or a bytearray object if use_bytearray is non-zero.
Raise an exception and return NULL on error. */
PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer,
void *str);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer);

/* Allocate the buffer to write size bytes.
Return the pointer to the beginning of buffer data.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer,
Py_ssize_t size);

/* Ensure that the buffer is large enough to write *size* bytes.
Add size to the writer minimum size (min_size attribute).

str is the current pointer inside the buffer.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);

/* Resize the buffer to make it larger.
The new buffer may be larger than size bytes because of overallocation.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error.

Note: size must be greater than the number of allocated bytes in the writer.

This function doesn't use the writer minimum size (min_size attribute).

See also _PyBytesWriter_Prepare().
*/
PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);

/* Write bytes.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
extern char* _PyBytesWriter_Start(PyBytesWriter *writer);

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_freelist_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ extern "C" {
# define Py_futureiters_MAXFREELIST 255
# define Py_object_stack_chunks_MAXFREELIST 4
# define Py_unicode_writers_MAXFREELIST 1
# define Py_bytes_writers_MAXFREELIST 1
# define Py_pymethodobjects_MAXFREELIST 20

// A generic freelist of either PyObjects or other data structures.
Expand Down Expand Up @@ -53,6 +54,7 @@ struct _Py_freelists {
struct _Py_freelist futureiters;
struct _Py_freelist object_stack_chunks;
struct _Py_freelist unicode_writers;
struct _Py_freelist bytes_writers;
struct _Py_freelist pymethodobjects;
};

Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ extern int _PyLong_FormatWriter(
int alternate);

extern char* _PyLong_FormatBytesWriter(
_PyBytesWriter *writer,
PyBytesWriter *writer,
char *str,
PyObject *obj,
int base,
Expand Down
59 changes: 59 additions & 0 deletions Lib/test/test_capi/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,5 +290,64 @@ def test_join(self):
bytes_join(b'', NULL)


class PyBytesWriterTest(unittest.TestCase):
def create_writer(self, alloc):
return _testcapi.PyBytesWriter(alloc)

def test_empty(self):
# Test PyBytesWriter_Create()
writer = self.create_writer(0)
self.assertEqual(writer.finish(), b'')

def test_write_bytes(self):
# Test PyBytesWriter_WriteBytes()

writer = self.create_writer(0)
writer.write_bytes(b'Hello World!', -1)
self.assertEqual(writer.finish(), b'Hello World!')

writer = self.create_writer(0)
writer.write_bytes(b'Hello ', -1)
writer.write_bytes(b'World! <truncated>', 6)
self.assertEqual(writer.finish(), b'Hello World!')

def test_extend(self):
# Test PyBytesWriter_Extend()

writer = self.create_writer(0)
writer.extend(20, b'number=123456')
writer.extend(0, b'')
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(0, b'')
writer.extend(20, b'number=123456')
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number=')
writer.extend(10, b'123456')
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number=')
writer.extend(0, b'')
writer.extend(10, b'123456')
self.assertEqual(writer.finish(), b'number=123456')

writer = self.create_writer(0)
writer.extend(10, b'number')
writer.extend(10, b'=')
writer.extend(10, b'123')
writer.extend(10, b'456')
self.assertEqual(writer.finish(), b'number=123456')

def test_format(self):
# Test PyBytesWriter_Format()
writer = self.create_writer(0)
writer.format_i(123456)
self.assertEqual(writer.finish(), b'123456')


if __name__ == "__main__":
unittest.main()
33 changes: 13 additions & 20 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -2601,29 +2601,22 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj)
static PyObject *
raw_unicode_escape(PyObject *obj)
{
char *p;
Py_ssize_t i, size;
const void *data;
int kind;
_PyBytesWriter writer;
Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
const void *data = PyUnicode_DATA(obj);
int kind = PyUnicode_KIND(obj);

_PyBytesWriter_Init(&writer);

size = PyUnicode_GET_LENGTH(obj);
data = PyUnicode_DATA(obj);
kind = PyUnicode_KIND(obj);

p = _PyBytesWriter_Alloc(&writer, size);
if (p == NULL)
goto error;
writer.overallocate = 1;
PyBytesWriter *writer;
char *p = PyBytesWriter_Create(&writer, size);
if (p == NULL) {
return NULL;
}

for (i=0; i < size; i++) {
for (Py_ssize_t i=0; i < size; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
/* Map 32-bit characters to '\Uxxxxxxxx' */
if (ch >= 0x10000) {
/* -1: subtract 1 preallocated byte */
p = _PyBytesWriter_Prepare(&writer, p, 10-1);
p = PyBytesWriter_Extend(writer, p, 10-1);
if (p == NULL)
goto error;

Expand All @@ -2644,7 +2637,7 @@ raw_unicode_escape(PyObject *obj)
ch == 0x1a)
{
/* -1: subtract 1 preallocated byte */
p = _PyBytesWriter_Prepare(&writer, p, 6-1);
p = PyBytesWriter_Extend(writer, p, 6-1);
if (p == NULL)
goto error;

Expand All @@ -2660,10 +2653,10 @@ raw_unicode_escape(PyObject *obj)
*p++ = (char) ch;
}

return _PyBytesWriter_Finish(&writer, p);
return PyBytesWriter_Finish(writer, p);

error:
_PyBytesWriter_Dealloc(&writer);
PyBytesWriter_Discard(writer);
return NULL;
}

Expand Down
21 changes: 11 additions & 10 deletions Modules/_struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -2259,7 +2259,6 @@ strings.");
static PyObject *
s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
char *buf;
PyStructObject *soself;
_structmodulestate *state = get_struct_state_structinst(self);

Expand All @@ -2275,21 +2274,23 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
}

/* Allocate a new string */
_PyBytesWriter writer;
_PyBytesWriter_Init(&writer);
buf = _PyBytesWriter_Alloc(&writer, soself->s_size);
PyBytesWriter *writer;
char *buf = PyBytesWriter_Create(&writer, soself->s_size);
if (buf == NULL) {
_PyBytesWriter_Dealloc(&writer);
return NULL;
goto error;
}

/* Call the guts */
if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) {
_PyBytesWriter_Dealloc(&writer);
return NULL;
if (s_pack_internal(soself, args, 0, buf, state) != 0) {
goto error;
}
buf += soself->s_size;

return _PyBytesWriter_Finish(&writer, buf + soself->s_size);
return PyBytesWriter_Finish(writer, buf);

error:
PyBytesWriter_Discard(writer);
return NULL;
}

PyDoc_STRVAR(s_pack_into__doc__,
Expand Down
Loading