Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4d8d808

Browse files
committed
Fix-up reference counting for OBJECT arrays in ufuncloop.
1 parent a046954 commit 4d8d808

4 files changed

Lines changed: 137 additions & 6 deletions

File tree

THANKS.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ Chris Hanley for help with records.py, testing, and bug fixes.
1313
Travis Vaught and Joe Cooper for administration of scipy.org web site and SVN
1414
Eric Firing for bugfixes.
1515
Arnd Baecker for 64-bit testing
16-
David Cooke for code improvements
16+
David Cooke for many code improvements including the auto-generated C-API

scipy/base/include/scipy/ufuncobject.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ typedef struct {
9393
int needbuffer[MAX_ARGS];
9494
int leftover;
9595
int ninnerloops;
96-
intp lastdim;
96+
int lastdim;
9797

9898
/* Whether or not to swap */
9999
int swap[MAX_ARGS];

scipy/base/src/ufuncobject.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,8 @@ PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args,
14531453
Bool pyobject[MAX_ARGS];
14541454
int datasize[MAX_ARGS];
14551455
int i, j, k, stopcondition;
1456+
char *myptr1, *myptr2;
1457+
14561458

14571459
for (i=0; i<self->nargs; i++) {
14581460
copyswapn[i] = mps[i]->descr->copyswapn;
@@ -1461,7 +1463,7 @@ PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args,
14611463
(mps[i]->descr->type_num == PyArray_OBJECT));
14621464
laststrides[i] = iters[i]->strides[loop->lastdim];
14631465
if (steps[i] && laststrides[i] != mpselsize[i]) fastmemcpy[i] = 0;
1464-
else fastmemcpy[i] = !pyobject[i];
1466+
else fastmemcpy[i] = 1;
14651467
}
14661468
/* Do generic buffered looping here (works for any kind of
14671469
arrays -- some need buffers, some don't.
@@ -1540,7 +1542,6 @@ PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args,
15401542
memcpy(buffer[i], tptr[i],
15411543
copysizes[i]);
15421544
else {
1543-
char *myptr1, *myptr2;
15441545
myptr1 = buffer[i];
15451546
myptr2 = tptr[i];
15461547
for (j=0; j<bufsize; j++) {
@@ -1583,14 +1584,22 @@ PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args,
15831584
mpselsize[i]);
15841585
}
15851586
/* copy back to output arrays */
1587+
/* decref what's already there for object arrays */
1588+
if (pyobject[i]) {
1589+
myptr1 = tptr[i];
1590+
for (j=0; j<datasize[i]; j++) {
1591+
Py_XDECREF(*((PyObject **)myptr1));
1592+
myptr1 += laststrides[i];
1593+
}
1594+
}
15861595
if (fastmemcpy[i])
15871596
memcpy(tptr[i], buffer[i], copysizes[i]);
15881597
else {
1589-
char *myptr1, *myptr2;
15901598
myptr2 = buffer[i];
15911599
myptr1 = tptr[i];
15921600
for (j=0; j<bufsize; j++) {
1593-
memcpy(myptr1, myptr2, mpselsize[i]);
1601+
memcpy(myptr1, myptr2,
1602+
mpselsize[i]);
15941603
myptr1 += laststrides[i];
15951604
myptr2 += mpselsize[i];
15961605
}
@@ -1602,6 +1611,30 @@ PyUFunc_GenericFunction(PyUFuncObject *self, PyObject *args,
16021611
if (!needbuffer[i]) dptr[i] = tptr[i];
16031612
}
16041613
}
1614+
1615+
if (loop->obj) { /* DECREF castbuf for object arrays */
1616+
for (i=0; i<self->nargs; i++) {
1617+
if (pyobject[i]) {
1618+
if (steps[i] == 0) {
1619+
Py_XDECREF(*((PyObject **)castbuf[i]));
1620+
}
1621+
else {
1622+
int size = loop->bufsize;
1623+
PyObject **objptr = castbuf[i];
1624+
/* size is loop->bufsize unless there
1625+
was only one loop */
1626+
if (ninnerloops == 1) \
1627+
size = loop->leftover;
1628+
1629+
for (j=0; j<size; j++) {
1630+
Py_XDECREF(*objptr);
1631+
objptr += 1;
1632+
}
1633+
}
1634+
}
1635+
}
1636+
1637+
}
16051638

16061639
UFUNC_CHECK_ERROR(loop);
16071640

scipy/doc/ufuncs.txt

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
2+
BUFFERED General Ufunc explanation:
3+
4+
We need to optimize the section of ufunc code that handles mixed-type
5+
and misbehaved arrays. In particular, we need to fix it so that items
6+
are not copied into the buffer if they don't have to be.
7+
8+
Right now, all data is copied into the buffers (even scalars are copied
9+
multiple times into the buffers even if they are not going to be cast).
10+
11+
Some benchmarks show that this results in a significant slow-down
12+
(factor of 4) over similar numarray code.
13+
14+
The approach is therefore, to loop over the largest-dimension (just like
15+
the NO_BUFFER) portion of the code. All arrays will either have N or
16+
1 in this last dimension (or their would be a mis-match error). The
17+
buffer size is B.
18+
19+
If N <= B (and only if needed), we copy the entire last-dimension into
20+
the buffer as fast as possible using the single-stride information.
21+
22+
Also we only copy into output arrays if needed as well (other-wise the
23+
output arrays are used directly in the ufunc code).
24+
25+
Call the function using the appropriate strides information from all the input
26+
arrays. Only set the strides to the element-size for arrays that will be copied.
27+
28+
If N > B, then we have to do the above operation in a loop (with an extra loop
29+
at the end with a different buffer size).
30+
31+
Both of these cases are handled with the following code:
32+
33+
Compute N = quotient * B + remainder.
34+
quotient = N / B # integer math
35+
(store quotient + 1) as the number of innerloops
36+
remainder = N % B # integer remainder
37+
38+
On the inner-dimension we will have (quotient + 1) loops where
39+
the size of the inner function is B for all but the last when the niter size is
40+
remainder.
41+
42+
So, the code looks very similar to NOBUFFER_LOOP except the inner loop is
43+
replaced with...
44+
45+
for(k=0; i<quotient+1; k++) {
46+
if (k==quotient+1) make itersize remainder size
47+
copy only needed items to buffer.
48+
swap input buffers if needed
49+
cast input buffers if needed
50+
call function()
51+
cast outputs in buffers if needed
52+
swap outputs in buffers if needed
53+
copy only needed items back to output arrays.
54+
update all data-pointers by strides*niter
55+
}
56+
57+
58+
Reference counting for OBJECT arrays:
59+
60+
If there are object arrays involved then loop->obj gets set to 1. Then there are two cases:
61+
62+
1) The loop function is an object loop:
63+
64+
Inputs:
65+
- castbuf starts as NULL and then gets filled with new references.
66+
- function gets called and doesn't alter the reference count in castbuf
67+
- on the next iteration (next value of k), the casting function will
68+
DECREF what is present in castbuf already and place a new object.
69+
70+
- At the end of the inner loop (for loop over k), the final new-references
71+
in castbuf must be DECREF'd. If its a scalar then a single DECREF suffices
72+
Otherwise, "bufsize" DECREF's are needed (unless there was only one
73+
loop, then "remainder" DECREF's are needed).
74+
75+
Outputs:
76+
- castbuf contains a new reference as the result of the function call. This
77+
gets converted to the type of interest and. This new reference in castbuf
78+
will be DECREF'd by later calls to the function. Thus, only after the
79+
inner most loop do we need to DECREF the remaining references in castbuf.
80+
81+
2) The loop function is of a different type:
82+
83+
Inputs:
84+
85+
- The PyObject input is copied over to buffer which receives a "borrowed"
86+
reference. This reference is then used but not altered by the cast
87+
call. Nothing needs to be done.
88+
89+
Outputs:
90+
91+
- The buffer[i] memory receives the PyObject input after the cast. This is
92+
a new reference which will be "stolen" as it is copied over into memory.
93+
The only problem is that what is presently in memory must be DECREF'd first.
94+
95+
96+
97+
98+

0 commit comments

Comments
 (0)