@@ -267,7 +267,7 @@ def shuffle(self, x, random=None, int=int):
267267 x [i ], x [j ] = x [j ], x [i ]
268268
269269 def sample (self , population , k ):
270- """Chooses k unique random elements from a population sequence.
270+ """Chooses k unique random elements from a population sequence or set .
271271
272272 Returns a new list containing elements from the population while
273273 leaving the original population unchanged. The resulting list is
@@ -284,15 +284,6 @@ def sample(self, population, k):
284284 large population: sample(range(10000000), 60)
285285 """
286286
287- # XXX Although the documentation says `population` is "a sequence",
288- # XXX attempts are made to cater to any iterable with a __len__
289- # XXX method. This has had mixed success. Examples from both
290- # XXX sides: sets work fine, and should become officially supported;
291- # XXX dicts are much harder, and have failed in various subtle
292- # XXX ways across attempts. Support for mapping types should probably
293- # XXX be dropped (and users should pass mapping.keys() or .values()
294- # XXX explicitly).
295-
296287 # Sampling without replacement entails tracking either potential
297288 # selections (the pool) in a list or previous selections in a set.
298289
@@ -303,37 +294,35 @@ def sample(self, population, k):
303294 # preferred since the list takes less space than the
304295 # set and it doesn't suffer from frequent reselections.
305296
297+ if isinstance (population , (set , frozenset )):
298+ population = tuple (population )
299+ if not hasattr (population , '__getitem__' ) or hasattr (population , 'keys' ):
300+ raise TypeError ("Population must be a sequence or set. For dicts, use dict.keys()." )
301+ random = self .random
306302 n = len (population )
307303 if not 0 <= k <= n :
308- raise ValueError ("sample larger than population" )
309- random = self .random
304+ raise ValueError ("Sample larger than population" )
310305 _int = int
311306 result = [None ] * k
312307 setsize = 21 # size of a small set minus size of an empty list
313308 if k > 5 :
314309 setsize += 4 ** _ceil (_log (k * 3 , 4 )) # table size for big sets
315- if n <= setsize or hasattr (population , "keys" ):
316- # An n-length list is smaller than a k-length set, or this is a
317- # mapping type so the other algorithm wouldn't work.
310+ if n <= setsize :
311+ # An n-length list is smaller than a k-length set
318312 pool = list (population )
319313 for i in range (k ): # invariant: non-selected at [0,n-i)
320314 j = _int (random () * (n - i ))
321315 result [i ] = pool [j ]
322316 pool [j ] = pool [n - i - 1 ] # move non-selected item into vacancy
323317 else :
324- try :
325- selected = set ()
326- selected_add = selected .add
327- for i in range (k ):
318+ selected = set ()
319+ selected_add = selected .add
320+ for i in range (k ):
321+ j = _int (random () * n )
322+ while j in selected :
328323 j = _int (random () * n )
329- while j in selected :
330- j = _int (random () * n )
331- selected_add (j )
332- result [i ] = population [j ]
333- except (TypeError , KeyError ): # handle (at least) sets
334- if isinstance (population , list ):
335- raise
336- return self .sample (tuple (population ), k )
324+ selected_add (j )
325+ result [i ] = population [j ]
337326 return result
338327
339328## -------------------- real-valued distributions -------------------
0 commit comments