File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 130130 'nsmallest' ]
131131
132132from itertools import islice , repeat
133+ import bisect
133134
134135def heappush (heap , item ):
135136 """Push item onto heap, maintaining the heap invariant."""
@@ -196,6 +197,28 @@ def nsmallest(iterable, n):
196197
197198 Equivalent to: sorted(iterable)[:n]
198199 """
200+ if hasattr (iterable , '__len__' ) and n * 10 <= len (iterable ):
201+ # For smaller values of n, the bisect method is faster than a minheap.
202+ # It is also memory efficient, consuming only n elements of space.
203+ it = iter (iterable )
204+ result = sorted (islice (it , 0 , n ))
205+ if not result :
206+ return result
207+ insort = bisect .insort
208+ pop = result .pop
209+ los = result [- 1 ] # los --> Largest of the nsmallest
210+ for elem in it :
211+ if los <= elem :
212+ continue
213+ insort (result , elem )
214+ pop ()
215+ los = result [- 1 ]
216+ return result
217+ # An alternative approach manifests the whole iterable in memory but
218+ # saves comparisons by heapifying all at once. Also, saves time
219+ # over bisect.insort() which has O(n) data movement time for every
220+ # insertion. Finding the n smallest of an m length iterable requires
221+ # O(m) + O(n log m) comparisons.
199222 h = list (iterable )
200223 heapify (h )
201224 return map (heappop , repeat (h , min (n , len (h ))))
Original file line number Diff line number Diff line change @@ -92,6 +92,7 @@ def test_heapsort(self):
9292 def test_nsmallest (self ):
9393 data = [random .randrange (2000 ) for i in range (1000 )]
9494 self .assertEqual (nsmallest (data , 400 ), sorted (data )[:400 ])
95+ self .assertEqual (nsmallest (data , 50 ), sorted (data )[:50 ])
9596
9697 def test_largest (self ):
9798 data = [random .randrange (2000 ) for i in range (1000 )]
You can’t perform that action at this time.
0 commit comments