Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-139871: Optimize bytearray unique bytes iconcat
If the bytearray is empty and a uniquely referenced bytes object is
being concatenated (ex. one just recieved from read), just use its
storage as the backing for the bytearray rather than copying it.

build_bytes_unique: Mean +- std dev: [base] 383 ns +- 11 ns -> [iconcat_opt] 342 ns +- 5 ns: 1.12x faster
build_bytearray: Mean +- std dev: [base] 496 ns +- 8 ns -> [iconcat_opt] 471 ns +- 13 ns: 1.05x faster
encode: Mean +- std dev: [base] 482 us +- 2 us -> [iconcat_opt] 13.8 us +- 0.1 us: 34.78x faster

Benchmark hidden because not significant (1): build_bytes

Geometric mean: 2.53x faster

note: Performance of build_bytes is expected to stay constant.
```python
import pyperf

runner = pyperf.Runner()

count1 = 1_000
count2 = 100
count3 = 10_000

CHUNK_A = b'a' * count1
CHUNK_B = b'b' * count2
CHUNK_C = b'c' * count3

def build_bytes():
    # Bytes not uniquely referenced.
    ba = bytearray()
    ba += CHUNK_A
    ba += CHUNK_B
    ba += CHUNK_C

def build_bytes_unique():
    ba = bytearray()
    # Repeat inline results in uniquely referenced bytes.
    ba += b'a' * count1
    ba += b'b' * count2
    ba += b'c' * count3

def build_bytearray():
    # Each bytearray appended is uniquely referenced.
    ba = bytearray()
    ba += bytearray(CHUNK_A)
    ba += bytearray(CHUNK_B)
    ba += bytearray(CHUNK_C)

runner.bench_func('build_bytes', build_bytes)
runner.bench_func('build_bytes_unique', build_bytes_unique)
runner.bench_func('build_bytearray', build_bytearray)
runner.timeit(
    name="encode",
    setup="a = 'a' * 1_000_000",
    stmt="bytearray(a, encoding='utf8')")
```
  • Loading branch information
cmaloney committed Nov 22, 2025
commit db4c09aa661d03958ce523727e44bad6e2e3d31d
55 changes: 42 additions & 13 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,43 @@ bytearray_iconcat_lock_held(PyObject *op, PyObject *other)
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
PyByteArrayObject *self = _PyByteArray_CAST(op);

// optimization: Avoid copying the bytes coming in when possible.
if (self->ob_alloc == 0 && _PyObject_IsUniquelyReferenced(other)) {
// note: ob_bytes_object is always the immortal empty bytes here.
Comment thread
cmaloney marked this conversation as resolved.
Outdated
if (!_canresize(self)) {
Comment thread
vstinner marked this conversation as resolved.
return NULL;
}

/* Get the bytes out of the temporary bytearray.

Just returning other doesn't work as __init__ calls this and can't
change self. */
Comment thread
cmaloney marked this conversation as resolved.
Outdated
if (PyByteArray_CheckExact(other)) {
PyObject *taken = PyObject_CallMethodNoArgs(other,
&_Py_ID(take_bytes));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks unsafe to me. If you call a method, you may invalidate the assumptions you verified earlier

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe call bytearray_take_bytes_impl() directly to reduce the risk of side effects? And you can check again _PyObject_IsUniquelyReferenced() in an assertion.

if (taken == NULL) {
return NULL;
}
// Avoid Py_INCREF needed for argument case.
Py_ssize_t size = Py_SIZE(taken);
self->ob_bytes_object = taken;
bytearray_reinit_from_bytes(self, size, size);
return Py_NewRef(self);
}

if (PyBytes_CheckExact(other)) {
Py_ssize_t size = Py_SIZE(other);
self->ob_bytes_object = other;
bytearray_reinit_from_bytes(self, size, size);
Py_INCREF(self->ob_bytes_object);

// Caller has a reference still and its decref will return
// bytes to be uniquely referenced.
assert(Py_REFCNT(self->ob_bytes_object) == 2);
return Py_NewRef(self);
}
}

Py_buffer vo;
if (PyObject_GetBuffer(other, &vo, PyBUF_SIMPLE) != 0) {
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Expand Down Expand Up @@ -977,22 +1014,14 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
}
}

/* Use the buffer API */
/* Use the buffer API. Defer to iconcat which optimizes. */
if (PyObject_CheckBuffer(arg)) {
Py_ssize_t size;
Py_buffer view;
if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
PyObject *new = bytearray_iconcat((PyObject *)self, arg);
if (new == NULL) {
return -1;
size = view.len;
if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
if (PyBuffer_ToContiguous(PyByteArray_AS_STRING(self),
&view, size, 'C') < 0)
goto fail;
PyBuffer_Release(&view);
}
Py_DECREF(new);
return 0;
fail:
PyBuffer_Release(&view);
return -1;
}

if (PyList_CheckExact(arg) || PyTuple_CheckExact(arg)) {
Expand Down
Loading