Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2e3dfaf

Browse files
committed
Install C version of heapq.nsmallest().
1 parent c929766 commit 2e3dfaf

4 files changed

Lines changed: 182 additions & 17 deletions

File tree

Doc/lib/libheapq.tex

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,6 @@ \section{\module{heapq} ---
9797
\versionadded{2.4}
9898
\end{funcdesc}
9999

100-
Though the above functions appear symmetrical, they each have different
101-
speed and space requirements. In particular, \function{nsmallest()}
102-
operates on a full copy of the dataset. In contrast, \function{nlargest()}
103-
only requires storage space for \var{n} elements.
104-
105100
Both functions perform best for smaller values of \var{n}. For larger
106101
values, it is more efficient to use the \function{sorted()} function. Also,
107102
when \code{n==1}, it is more efficient to use the builtin \function{min()}

Lib/heapq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def _siftup(heap, pos):
300300

301301
# If available, use C implementation
302302
try:
303-
from _heapq import heappush, heappop, heapify, heapreplace
303+
from _heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
304304
except ImportError:
305305
pass
306306

Lib/test/test_heapq.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import random
55
import unittest
66
from test import test_support
7+
import sys
78

89

910
def heapiter(heap):
@@ -91,16 +92,28 @@ def test_heapsort(self):
9192

9293
def test_nsmallest(self):
9394
data = [random.randrange(2000) for i in range(1000)]
94-
self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
95-
self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
95+
for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
96+
self.assertEqual(nsmallest(data, i), sorted(data)[:i])
9697

9798
def test_largest(self):
9899
data = [random.randrange(2000) for i in range(1000)]
99-
self.assertEqual(nlargest(data, 400), sorted(data, reverse=True)[:400])
100+
for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
101+
self.assertEqual(nlargest(data, i), sorted(data, reverse=True)[:i])
100102

101-
def test_main():
102-
test_support.run_unittest(TestHeap)
103+
def test_main(verbose=None):
104+
test_classes = [TestHeap]
105+
test_support.run_unittest(*test_classes)
106+
107+
# verify reference counting
108+
if verbose and hasattr(sys, "gettotalrefcount"):
109+
import gc
110+
counts = [None] * 5
111+
for i in xrange(len(counts)):
112+
test_support.run_unittest(*test_classes)
113+
gc.collect()
114+
counts[i] = sys.gettotalrefcount()
115+
print counts
103116

104117
if __name__ == "__main__":
105-
test_main()
118+
test_main(verbose=True)
106119

Modules/_heapqmodule.c

Lines changed: 162 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ PyDoc_STRVAR(heapify_doc,
219219
static PyObject *
220220
nlargest(PyObject *self, PyObject *args)
221221
{
222-
PyObject *heap=NULL, *elem, *rv, *iterable, *sol, *it, *oldelem;
222+
PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
223223
int i, n;
224224

225225
if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n))
@@ -246,10 +246,9 @@ nlargest(PyObject *self, PyObject *args)
246246
if (PyList_GET_SIZE(heap) == 0)
247247
goto sortit;
248248

249-
rv = heapify(self, heap);
250-
if (rv == NULL)
251-
goto fail;
252-
Py_DECREF(rv);
249+
for (i=n/2-1 ; i>=0 ; i--)
250+
if(_siftup((PyListObject *)heap, i) == -1)
251+
goto fail;
253252

254253
sol = PyList_GET_ITEM(heap, 0);
255254
while (1) {
@@ -290,6 +289,162 @@ PyDoc_STRVAR(nlargest_doc,
290289
\n\
291290
Equivalent to: sorted(iterable, reverse=True)[:n]\n");
292291

292+
static int
293+
_siftdownmax(PyListObject *heap, int startpos, int pos)
294+
{
295+
PyObject *newitem, *parent;
296+
int cmp, parentpos;
297+
298+
assert(PyList_Check(heap));
299+
if (pos >= PyList_GET_SIZE(heap)) {
300+
PyErr_SetString(PyExc_IndexError, "index out of range");
301+
return -1;
302+
}
303+
304+
newitem = PyList_GET_ITEM(heap, pos);
305+
Py_INCREF(newitem);
306+
/* Follow the path to the root, moving parents down until finding
307+
a place newitem fits. */
308+
while (pos > startpos){
309+
parentpos = (pos - 1) >> 1;
310+
parent = PyList_GET_ITEM(heap, parentpos);
311+
cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
312+
if (cmp == -1)
313+
return -1;
314+
if (cmp == 1)
315+
break;
316+
Py_INCREF(parent);
317+
Py_DECREF(PyList_GET_ITEM(heap, pos));
318+
PyList_SET_ITEM(heap, pos, parent);
319+
pos = parentpos;
320+
}
321+
Py_DECREF(PyList_GET_ITEM(heap, pos));
322+
PyList_SET_ITEM(heap, pos, newitem);
323+
return 0;
324+
}
325+
326+
static int
327+
_siftupmax(PyListObject *heap, int pos)
328+
{
329+
int startpos, endpos, childpos, rightpos;
330+
int cmp;
331+
PyObject *newitem, *tmp;
332+
333+
assert(PyList_Check(heap));
334+
endpos = PyList_GET_SIZE(heap);
335+
startpos = pos;
336+
if (pos >= endpos) {
337+
PyErr_SetString(PyExc_IndexError, "index out of range");
338+
return -1;
339+
}
340+
newitem = PyList_GET_ITEM(heap, pos);
341+
Py_INCREF(newitem);
342+
343+
/* Bubble up the smaller child until hitting a leaf. */
344+
childpos = 2*pos + 1; /* leftmost child position */
345+
while (childpos < endpos) {
346+
/* Set childpos to index of smaller child. */
347+
rightpos = childpos + 1;
348+
if (rightpos < endpos) {
349+
cmp = PyObject_RichCompareBool(
350+
PyList_GET_ITEM(heap, childpos),
351+
PyList_GET_ITEM(heap, rightpos),
352+
Py_LE);
353+
if (cmp == -1)
354+
return -1;
355+
if (cmp == 1)
356+
childpos = rightpos;
357+
}
358+
/* Move the smaller child up. */
359+
tmp = PyList_GET_ITEM(heap, childpos);
360+
Py_INCREF(tmp);
361+
Py_DECREF(PyList_GET_ITEM(heap, pos));
362+
PyList_SET_ITEM(heap, pos, tmp);
363+
pos = childpos;
364+
childpos = 2*pos + 1;
365+
}
366+
367+
/* The leaf at pos is empty now. Put newitem there, and and bubble
368+
it up to its final resting place (by sifting its parents down). */
369+
Py_DECREF(PyList_GET_ITEM(heap, pos));
370+
PyList_SET_ITEM(heap, pos, newitem);
371+
return _siftdownmax(heap, startpos, pos);
372+
}
373+
374+
static PyObject *
375+
nsmallest(PyObject *self, PyObject *args)
376+
{
377+
PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
378+
int i, n;
379+
380+
if (!PyArg_ParseTuple(args, "Oi:nsmallest", &iterable, &n))
381+
return NULL;
382+
383+
it = PyObject_GetIter(iterable);
384+
if (it == NULL)
385+
return NULL;
386+
387+
heap = PyList_New(0);
388+
if (it == NULL)
389+
goto fail;
390+
391+
for (i=0 ; i<n ; i++ ){
392+
elem = PyIter_Next(it);
393+
if (elem == NULL)
394+
goto sortit;
395+
if (PyList_Append(heap, elem) == -1) {
396+
Py_DECREF(elem);
397+
goto fail;
398+
}
399+
Py_DECREF(elem);
400+
}
401+
n = PyList_GET_SIZE(heap);
402+
if (n == 0)
403+
goto sortit;
404+
405+
for (i=n/2-1 ; i>=0 ; i--)
406+
if(_siftupmax((PyListObject *)heap, i) == -1)
407+
goto fail;
408+
409+
los = PyList_GET_ITEM(heap, 0);
410+
while (1) {
411+
elem = PyIter_Next(it);
412+
if (elem == NULL) {
413+
if (PyErr_Occurred())
414+
goto fail;
415+
else
416+
goto sortit;
417+
}
418+
if (PyObject_RichCompareBool(los, elem, Py_LE)) {
419+
Py_DECREF(elem);
420+
continue;
421+
}
422+
423+
oldelem = PyList_GET_ITEM(heap, 0);
424+
PyList_SET_ITEM(heap, 0, elem);
425+
Py_DECREF(oldelem);
426+
if (_siftupmax((PyListObject *)heap, 0) == -1)
427+
goto fail;
428+
los = PyList_GET_ITEM(heap, 0);
429+
}
430+
431+
sortit:
432+
Py_DECREF(it);
433+
if (PyList_Sort(heap) == -1)
434+
goto fail;
435+
return heap;
436+
437+
fail:
438+
Py_DECREF(it);
439+
Py_XDECREF(heap);
440+
return NULL;
441+
}
442+
443+
PyDoc_STRVAR(nsmallest_doc,
444+
"Find the n smallest elements in a dataset.\n\
445+
\n\
446+
Equivalent to: sorted(iterable)[:n]\n");
447+
293448
static PyMethodDef heapq_methods[] = {
294449
{"heappush", (PyCFunction)heappush,
295450
METH_VARARGS, heappush_doc},
@@ -301,6 +456,8 @@ static PyMethodDef heapq_methods[] = {
301456
METH_O, heapify_doc},
302457
{"nlargest", (PyCFunction)nlargest,
303458
METH_VARARGS, nlargest_doc},
459+
{"nsmallest", (PyCFunction)nsmallest,
460+
METH_VARARGS, nsmallest_doc},
304461
{NULL, NULL} /* sentinel */
305462
};
306463

0 commit comments

Comments
 (0)