Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b25aa36

Browse files
committed
Improve the memory performance and speed of heapq.nsmallest() by using
an alternate algorithm when the number of selected items is small relative to the full iterable.
1 parent 2e66940 commit b25aa36

2 files changed

Lines changed: 24 additions & 0 deletions

File tree

Lib/heapq.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
'nsmallest']
131131

132132
from itertools import islice, repeat
133+
import bisect
133134

134135
def heappush(heap, item):
135136
"""Push item onto heap, maintaining the heap invariant."""
@@ -196,6 +197,28 @@ def nsmallest(iterable, n):
196197
197198
Equivalent to: sorted(iterable)[:n]
198199
"""
200+
if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
201+
# For smaller values of n, the bisect method is faster than a minheap.
202+
# It is also memory efficient, consuming only n elements of space.
203+
it = iter(iterable)
204+
result = sorted(islice(it, 0, n))
205+
if not result:
206+
return result
207+
insort = bisect.insort
208+
pop = result.pop
209+
los = result[-1] # los --> Largest of the nsmallest
210+
for elem in it:
211+
if los <= elem:
212+
continue
213+
insort(result, elem)
214+
pop()
215+
los = result[-1]
216+
return result
217+
# An alternative approach manifests the whole iterable in memory but
218+
# saves comparisons by heapifying all at once. Also, saves time
219+
# over bisect.insort() which has O(n) data movement time for every
220+
# insertion. Finding the n smallest of an m length iterable requires
221+
# O(m) + O(n log m) comparisons.
199222
h = list(iterable)
200223
heapify(h)
201224
return map(heappop, repeat(h, min(n, len(h))))

Lib/test/test_heapq.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def test_heapsort(self):
9292
def test_nsmallest(self):
9393
data = [random.randrange(2000) for i in range(1000)]
9494
self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
95+
self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
9596

9697
def test_largest(self):
9798
data = [random.randrange(2000) for i in range(1000)]

0 commit comments

Comments
 (0)