gh-139871: Optimize bytearray unique bytes iconcat

If the bytearray is empty and a uniquely referenced bytes object is being concatenated (ex. one just recieved from read), just use its storage as the backing for the bytearray rather than copying it. build_bytes_unique: Mean +- std dev: [base] 383 ns +- 11 ns -> [iconcat_opt] 342 ns +- 5 ns: 1.12x faster build_bytearray: Mean +- std dev: [base] 496 ns +- 8 ns -> [iconcat_opt] 471 ns +- 13 ns: 1.05x faster encode: Mean +- std dev: [base] 482 us +- 2 us -> [iconcat_opt] 13.8 us +- 0.1 us: 34.78x faster Benchmark hidden because not significant (1): build_bytes Geometric mean: 2.53x faster note: Performance of build_bytes is expected to stay constant. ```python import pyperf runner = pyperf.Runner() count1 = 1_000 count2 = 100 count3 = 10_000 CHUNK_A = b'a' * count1 CHUNK_B = b'b' * count2 CHUNK_C = b'c' * count3 def build_bytes(): # Bytes not uniquely referenced. ba = bytearray() ba += CHUNK_A ba += CHUNK_B ba += CHUNK_C def build_bytes_unique(): ba = bytearray() # Repeat inline results in uniquely referenced bytes. ba += b'a' * count1 ba += b'b' * count2 ba += b'c' * count3 def build_bytearray(): # Each bytearray appended is uniquely referenced. ba = bytearray() ba += bytearray(CHUNK_A) ba += bytearray(CHUNK_B) ba += bytearray(CHUNK_C) runner.bench_func('build_bytes', build_bytes) runner.bench_func('build_bytes_unique', build_bytes_unique) runner.bench_func('build_bytearray', build_bytearray) runner.timeit( name="encode", setup="a = 'a' * 1_000_000", stmt="bytearray(a, encoding='utf8')") ```
python · cmaloney · Nov 18, 2025 · Nov 24, 2025 · Nov 24, 2025 · Dec 3, 2025
commit db4c09aa661d03958ce523727e44bad6e2e3d31d
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
@@ -333,6 +333,43 @@ bytearray_iconcat_lock_held(PyObject *op, PyObject *other)
     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
     PyByteArrayObject *self = _PyByteArray_CAST(op);
 
+    // optimization: Avoid copying the bytes coming in when possible.
+    if (self->ob_alloc == 0 && _PyObject_IsUniquelyReferenced(other)) {
+        // note: ob_bytes_object is always the immortal empty bytes here.
+        if (!_canresize(self)) {
+            return NULL;
+        }
+
+        /* Get the bytes out of the temporary bytearray.
+
+           Just returning other doesn't work as __init__ calls this and can't
+           change self. */
+        if (PyByteArray_CheckExact(other)) {
+            PyObject *taken = PyObject_CallMethodNoArgs(other,
+                                                        &_Py_ID(take_bytes));
+            if (taken == NULL) {
+                return NULL;
+            }
+            // Avoid Py_INCREF needed for argument case.
+            Py_ssize_t size = Py_SIZE(taken);
+            self->ob_bytes_object = taken;
+            bytearray_reinit_from_bytes(self, size, size);
+            return Py_NewRef(self);
+        }
+
+        if (PyBytes_CheckExact(other)) {
+            Py_ssize_t size = Py_SIZE(other);
+            self->ob_bytes_object = other;
+            bytearray_reinit_from_bytes(self, size, size);
+            Py_INCREF(self->ob_bytes_object);
+
+            // Caller has a reference still and its decref will return
+            // bytes to be uniquely referenced.
+            assert(Py_REFCNT(self->ob_bytes_object) == 2);
+            return Py_NewRef(self);
+        }
+    }
+
     Py_buffer vo;
     if (PyObject_GetBuffer(other, &vo, PyBUF_SIMPLE) != 0) {
         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
@@ -977,22 +1014,14 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
         }
     }
 
-    /* Use the buffer API */
+    /* Use the buffer API. Defer to iconcat which optimizes. */
     if (PyObject_CheckBuffer(arg)) {
-        Py_ssize_t size;
-        Py_buffer view;
-        if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
+        PyObject *new = bytearray_iconcat((PyObject *)self, arg);
+        if (new == NULL) {
             return -1;
-        size = view.len;
-        if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
-        if (PyBuffer_ToContiguous(PyByteArray_AS_STRING(self),
-            &view, size, 'C') < 0)
-            goto fail;
-        PyBuffer_Release(&view);
+        }
+        Py_DECREF(new);
         return 0;
-    fail:
-        PyBuffer_Release(&view);
-        return -1;
     }
 
     if (PyList_CheckExact(arg) || PyTuple_CheckExact(arg)) {