Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f6b2667

Browse files
committed
Issue #16098: Update heapq.nsmallest to use the same algorithm as nlargest.
This removes the dependency on bisect and it bring the pure Python code in-sync with the C code.
1 parent 31584e3 commit f6b2667

1 file changed

Lines changed: 59 additions & 25 deletions

File tree

Lib/heapq.py

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,7 @@
127127
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
128128
'nlargest', 'nsmallest', 'heappushpop']
129129

130-
from itertools import islice, repeat, count, tee, chain
131-
import bisect
130+
from itertools import islice, count, tee, chain
132131

133132
def heappush(heap, item):
134133
"""Push item onto heap, maintaining the heap invariant."""
@@ -180,6 +179,19 @@ def heapify(x):
180179
for i in reversed(range(n//2)):
181180
_siftup(x, i)
182181

182+
def _heappushpop_max(heap, item):
183+
"""Maxheap version of a heappush followed by a heappop."""
184+
if heap and item < heap[0]:
185+
item, heap[0] = heap[0], item
186+
_siftup_max(heap, 0)
187+
return item
188+
189+
def _heapify_max(x):
190+
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
191+
n = len(x)
192+
for i in reversed(range(n//2)):
193+
_siftup_max(x, i)
194+
183195
def nlargest(n, iterable):
184196
"""Find the n largest elements in a dataset.
185197
@@ -205,30 +217,16 @@ def nsmallest(n, iterable):
205217
"""
206218
if n < 0:
207219
return []
208-
if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
209-
# For smaller values of n, the bisect method is faster than a minheap.
210-
# It is also memory efficient, consuming only n elements of space.
211-
it = iter(iterable)
212-
result = sorted(islice(it, 0, n))
213-
if not result:
214-
return result
215-
insort = bisect.insort
216-
pop = result.pop
217-
los = result[-1] # los --> Largest of the nsmallest
218-
for elem in it:
219-
if elem < los:
220-
insort(result, elem)
221-
pop()
222-
los = result[-1]
220+
it = iter(iterable)
221+
result = list(islice(it, n))
222+
if not result:
223223
return result
224-
# An alternative approach manifests the whole iterable in memory but
225-
# saves comparisons by heapifying all at once. Also, saves time
226-
# over bisect.insort() which has O(n) data movement time for every
227-
# insertion. Finding the n smallest of an m length iterable requires
228-
# O(m) + O(n log m) comparisons.
229-
h = list(iterable)
230-
heapify(h)
231-
return list(map(heappop, repeat(h, min(n, len(h)))))
224+
_heapify_max(result)
225+
_heappushpop = _heappushpop_max
226+
for elem in it:
227+
_heappushpop(result, elem)
228+
result.sort()
229+
return result
232230

233231
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
234232
# is the index of a leaf with a possibly out-of-order value. Restore the
@@ -306,6 +304,42 @@ def _siftup(heap, pos):
306304
heap[pos] = newitem
307305
_siftdown(heap, startpos, pos)
308306

307+
def _siftdown_max(heap, startpos, pos):
308+
'Maxheap variant of _siftdown'
309+
newitem = heap[pos]
310+
# Follow the path to the root, moving parents down until finding a place
311+
# newitem fits.
312+
while pos > startpos:
313+
parentpos = (pos - 1) >> 1
314+
parent = heap[parentpos]
315+
if parent < newitem:
316+
heap[pos] = parent
317+
pos = parentpos
318+
continue
319+
break
320+
heap[pos] = newitem
321+
322+
def _siftup_max(heap, pos):
323+
'Minheap variant of _siftup'
324+
endpos = len(heap)
325+
startpos = pos
326+
newitem = heap[pos]
327+
# Bubble up the larger child until hitting a leaf.
328+
childpos = 2*pos + 1 # leftmost child position
329+
while childpos < endpos:
330+
# Set childpos to index of larger child.
331+
rightpos = childpos + 1
332+
if rightpos < endpos and not heap[rightpos] < heap[childpos]:
333+
childpos = rightpos
334+
# Move the larger child up.
335+
heap[pos] = heap[childpos]
336+
pos = childpos
337+
childpos = 2*pos + 1
338+
# The leaf at pos is empty now. Put newitem there, and bubble it up
339+
# to its final resting place (by sifting its parents down).
340+
heap[pos] = newitem
341+
_siftdown_max(heap, startpos, pos)
342+
309343
# If available, use C implementation
310344
try:
311345
from _heapq import *

0 commit comments

Comments
 (0)