Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6e2ee86

Browse files
committed
Update the array overallocation scheme to match the approach used for
lists. Speeds append() operations and reduces memory requirements (because of more conservative overallocation). Paves the way for the feature request for array.extend() to support arbitrary iterable arguments.
1 parent 118e127 commit 6e2ee86

2 files changed

Lines changed: 79 additions & 62 deletions

File tree

Misc/NEWS

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,9 @@ Core and builtins
180180
Extension modules
181181
-----------------
182182

183-
- array objects now support the copy module
183+
- array objects now support the copy module. Also, their resizing
184+
scheme has been updated the same as for list objects. The improves
185+
performance for append() operations.
184186

185187
- cStringIO.writelines() now accepts any iterable argument and writes
186188
the lines one at a time rather than joining them and writing once.

Modules/arraymodule.c

Lines changed: 76 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -13,52 +13,6 @@
1313
#endif /* DONT_HAVE_SYS_TYPES_H */
1414
#endif /* !STDC_HEADERS */
1515

16-
/* Shamelessy stolen from listobject.c */
17-
static int
18-
roundupsize(int n)
19-
{
20-
unsigned int nbits = 0;
21-
unsigned int n2 = (unsigned int)n >> 5;
22-
23-
/* Round up:
24-
* If n < 256, to a multiple of 8.
25-
* If n < 2048, to a multiple of 64.
26-
* If n < 16384, to a multiple of 512.
27-
* If n < 131072, to a multiple of 4096.
28-
* If n < 1048576, to a multiple of 32768.
29-
* If n < 8388608, to a multiple of 262144.
30-
* If n < 67108864, to a multiple of 2097152.
31-
* If n < 536870912, to a multiple of 16777216.
32-
* ...
33-
* If n < 2**(5+3*i), to a multiple of 2**(3*i).
34-
*
35-
* This over-allocates proportional to the list size, making room
36-
* for additional growth. The over-allocation is mild, but is
37-
* enough to give linear-time amortized behavior over a long
38-
* sequence of appends() in the presence of a poorly-performing
39-
* system realloc() (which is a reality, e.g., across all flavors
40-
* of Windows, with Win9x behavior being particularly bad -- and
41-
* we've still got address space fragmentation problems on Win9x
42-
* even with this scheme, although it requires much longer lists to
43-
* provoke them than it used to).
44-
*/
45-
do {
46-
n2 >>= 3;
47-
nbits += 3;
48-
} while (n2);
49-
return ((n >> nbits) + 1) << nbits;
50-
}
51-
52-
#define NRESIZE(var, type, nitems) \
53-
do { \
54-
size_t _new_size = roundupsize(nitems); \
55-
if (_new_size <= ((~(size_t)0) / sizeof(type))) \
56-
PyMem_RESIZE(var, type, _new_size); \
57-
else \
58-
var = NULL; \
59-
} while (0)
60-
/* END SHAMELESSLY STOLEN CODE */
61-
6216
struct arrayobject; /* Forward */
6317

6418
/* All possible arraydescr values are defined in the vector "descriptors"
@@ -76,6 +30,7 @@ typedef struct arrayobject {
7630
PyObject_HEAD
7731
int ob_size;
7832
char *ob_item;
33+
int allocated;
7934
struct arraydescr *ob_descr;
8035
} arrayobject;
8136

@@ -84,6 +39,54 @@ static PyTypeObject Arraytype;
8439
#define array_Check(op) PyObject_TypeCheck(op, &Arraytype)
8540
#define array_CheckExact(op) ((op)->ob_type == &Arraytype)
8641

42+
static int
43+
array_resize(arrayobject *self, int newsize)
44+
{
45+
char *items;
46+
size_t _new_size;
47+
48+
/* Bypass realloc() when a previous overallocation is large enough
49+
to accommodate the newsize. If the newsize is 16 smaller than the
50+
current size, then proceed with the realloc() to shrink the list.
51+
*/
52+
53+
if (self->allocated >= newsize &&
54+
self->ob_size < newsize + 16 &&
55+
self->ob_item != NULL) {
56+
self->ob_size = newsize;
57+
return 0;
58+
}
59+
60+
/* This over-allocates proportional to the array size, making room
61+
* for additional growth. The over-allocation is mild, but is
62+
* enough to give linear-time amortized behavior over a long
63+
* sequence of appends() in the presence of a poorly-performing
64+
* system realloc().
65+
* The growth pattern is: 0, 4, 8, 16, 25, 34, 46, 56, 67, 79, ...
66+
* Note, the pattern starts out the same as for lists but then
67+
* grows at a smaller rate so that larger arrays only overallocate
68+
* by about 1/16th -- this is done because arrays are presumed to be more
69+
* memory critical.
70+
*/
71+
72+
_new_size = (newsize >> 4) + (self->ob_size < 8 ? 3 : 7) + newsize;
73+
items = self->ob_item;
74+
/* XXX The following multiplication and division does not optimize away
75+
like it does for lists since the size is not known at compile time */
76+
if (_new_size <= ((~(size_t)0) / self->ob_descr->itemsize))
77+
PyMem_RESIZE(items, char, (_new_size * self->ob_descr->itemsize));
78+
else
79+
items = NULL;
80+
if (items == NULL) {
81+
PyErr_NoMemory();
82+
return -1;
83+
}
84+
self->ob_item = items;
85+
self->ob_size = newsize;
86+
self->allocated = _new_size;
87+
return 0;
88+
}
89+
8790
/****************************************************************************
8891
Get and Set functions for each type.
8992
A Get function takes an arrayobject* and an integer index, returning the
@@ -438,6 +441,7 @@ newarrayobject(PyTypeObject *type, int size, struct arraydescr *descr)
438441
}
439442
}
440443
op->ob_descr = descr;
444+
op->allocated = size;
441445
return (PyObject *) op;
442446
}
443447

@@ -455,30 +459,29 @@ static int
455459
ins1(arrayobject *self, int where, PyObject *v)
456460
{
457461
char *items;
462+
int n = self->ob_size;
458463
if (v == NULL) {
459464
PyErr_BadInternalCall();
460465
return -1;
461466
}
462467
if ((*self->ob_descr->setitem)(self, -1, v) < 0)
463468
return -1;
464-
items = self->ob_item;
465-
NRESIZE(items, char, (self->ob_size+1) * self->ob_descr->itemsize);
466-
if (items == NULL) {
467-
PyErr_NoMemory();
469+
470+
if (array_resize(self, n+1) == -1)
468471
return -1;
469-
}
472+
items = self->ob_item;
470473
if (where < 0) {
471-
where += self->ob_size;
474+
where += n;
472475
if (where < 0)
473476
where = 0;
474477
}
475-
if (where > self->ob_size)
476-
where = self->ob_size;
477-
memmove(items + (where+1)*self->ob_descr->itemsize,
478-
items + where*self->ob_descr->itemsize,
479-
(self->ob_size-where)*self->ob_descr->itemsize);
480-
self->ob_item = items;
481-
self->ob_size++;
478+
if (where > n)
479+
where = n;
480+
/* appends don't need to call memmove() */
481+
if (where != n)
482+
memmove(items + (where+1)*self->ob_descr->itemsize,
483+
items + where*self->ob_descr->itemsize,
484+
(n-where)*self->ob_descr->itemsize);
482485
return (*self->ob_descr->setitem)(self, where, v);
483486
}
484487

@@ -728,6 +731,7 @@ array_ass_slice(arrayobject *a, int ilow, int ihigh, PyObject *v)
728731
PyMem_RESIZE(item, char, a->ob_size*a->ob_descr->itemsize);
729732
/* Can't fail */
730733
a->ob_item = item;
734+
a->allocated = a->ob_size;
731735
}
732736
else if (d > 0) { /* Insert d items */
733737
PyMem_RESIZE(item, char,
@@ -741,6 +745,7 @@ array_ass_slice(arrayobject *a, int ilow, int ihigh, PyObject *v)
741745
(a->ob_size-ihigh)*a->ob_descr->itemsize);
742746
a->ob_item = item;
743747
a->ob_size += d;
748+
a->allocated = a->ob_size;
744749
}
745750
if (n > 0)
746751
memcpy(item + ilow*a->ob_descr->itemsize, b->ob_item,
@@ -795,7 +800,8 @@ array_do_extend(arrayobject *self, PyObject *bb)
795800
}
796801
memcpy(self->ob_item + self->ob_size*self->ob_descr->itemsize,
797802
b->ob_item, b->ob_size*b->ob_descr->itemsize);
798-
self->ob_size = size;
803+
self->ob_size = size;
804+
self->allocated = size;
799805

800806
return 0;
801807
#undef b
@@ -825,6 +831,7 @@ array_inplace_repeat(arrayobject *self, int n)
825831
PyMem_FREE(items);
826832
self->ob_item = NULL;
827833
self->ob_size = 0;
834+
self->allocated = 0;
828835
}
829836
else {
830837
PyMem_Resize(items, char, n * size);
@@ -837,6 +844,7 @@ array_inplace_repeat(arrayobject *self, int n)
837844
}
838845
self->ob_item = items;
839846
self->ob_size *= n;
847+
self->allocated = self->ob_size;
840848
}
841849
}
842850
Py_INCREF(self);
@@ -1158,12 +1166,14 @@ array_fromfile(arrayobject *self, PyObject *args)
11581166
}
11591167
self->ob_item = item;
11601168
self->ob_size += n;
1169+
self->allocated = self->ob_size;
11611170
nread = fread(item + (self->ob_size - n) * itemsize,
11621171
itemsize, n, fp);
11631172
if (nread < (size_t)n) {
11641173
self->ob_size -= (n - nread);
11651174
PyMem_RESIZE(item, char, self->ob_size*itemsize);
11661175
self->ob_item = item;
1176+
self->allocated = self->ob_size;
11671177
PyErr_SetString(PyExc_EOFError,
11681178
"not enough items in file");
11691179
return NULL;
@@ -1230,6 +1240,7 @@ array_fromlist(arrayobject *self, PyObject *list)
12301240
}
12311241
self->ob_item = item;
12321242
self->ob_size += n;
1243+
self->allocated = self->ob_size;
12331244
for (i = 0; i < n; i++) {
12341245
PyObject *v = PyList_GetItem(list, i);
12351246
if ((*self->ob_descr->setitem)(self,
@@ -1238,6 +1249,7 @@ array_fromlist(arrayobject *self, PyObject *list)
12381249
PyMem_RESIZE(item, char,
12391250
self->ob_size * itemsize);
12401251
self->ob_item = item;
1252+
self->allocated = self->ob_size;
12411253
return NULL;
12421254
}
12431255
}
@@ -1300,6 +1312,7 @@ array_fromstring(arrayobject *self, PyObject *args)
13001312
}
13011313
self->ob_item = item;
13021314
self->ob_size += n;
1315+
self->allocated = self->ob_size;
13031316
memcpy(item + (self->ob_size - n) * itemsize,
13041317
str, itemsize*n);
13051318
}
@@ -1353,6 +1366,7 @@ array_fromunicode(arrayobject *self, PyObject *args)
13531366
}
13541367
self->ob_item = (char *) item;
13551368
self->ob_size += n;
1369+
self->allocated = self->ob_size;
13561370
memcpy(item + self->ob_size - n,
13571371
ustr, n * sizeof(Py_UNICODE));
13581372
}
@@ -1611,7 +1625,7 @@ array_ass_subscr(arrayobject* self, PyObject* item, PyObject* value)
16111625

16121626
self->ob_size -= slicelength;
16131627
self->ob_item = PyMem_REALLOC(self->ob_item, itemsize*self->ob_size);
1614-
1628+
self->allocated = self->ob_size;
16151629

16161630
return 0;
16171631
}
@@ -1811,6 +1825,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18111825
self->ob_item = item;
18121826
self->ob_size = n / sizeof(Py_UNICODE);
18131827
memcpy(item, PyUnicode_AS_DATA(initial), n);
1828+
self->allocated = self->ob_size;
18141829
}
18151830
#endif
18161831
}

0 commit comments

Comments
 (0)