Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c0b4034

Browse files
committed
Improved clarity and thoroughness of docstring.
Added design notes in comments. Used better variable names. Eliminated the unsavory "pool[-k:]" which was an aspiring bug (for k==0). Used if/else to show the two algorithms in parallel style. Added one more test assertion.
1 parent 674dae2 commit c0b4034

1 file changed

Lines changed: 41 additions & 20 deletions

File tree

Lib/random.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -377,39 +377,59 @@ def shuffle(self, x, random=None, int=int):
377377
def sample(self, population, k, random=None, int=int):
378378
"""Chooses k unique random elements from a population sequence.
379379
380-
Returns a new list containing elements from the population. The
381-
list itself is in random order so that all sub-slices are also
382-
random samples. The original sequence is left undisturbed.
380+
Returns a new list containing elements from the population while
381+
leaving the original population unchanged. The resulting list is
382+
in selection order so that all sub-slices will also be valid random
383+
samples. This allows raffle winners (the sample) to be partitioned
384+
into grand prize and second place winners (the subslices).
383385
384-
If the population has repeated elements, then each occurrence is
385-
a possible selection in the sample.
386+
Members of the population need not be hashable or unique. If the
387+
population contains repeats, then each occurrence is a possible
388+
selection in the sample.
386389
387-
If indices are needed for a large population, use xrange as an
388-
argument: sample(xrange(10000000), 60)
390+
To choose a sample in a range of integers, use xrange as an argument.
391+
This is especially fast and space efficient for sampling from a
392+
large population: sample(xrange(10000000), 60)
389393
390394
Optional arg random is a 0-argument function returning a random
391395
float in [0.0, 1.0); by default, the standard random.random.
392396
"""
393397

398+
# Sampling without replacement entails tracking either potential
399+
# selections (the pool) or previous selections.
400+
401+
# Pools are stored in lists which provide __getitem__ for selection
402+
# and provide a way to remove selections. But each list.remove()
403+
# rebuilds the entire list, so it is better to rearrange the list,
404+
# placing non-selected elements at the head of the list. Tracking
405+
# the selection pool is only space efficient with small populations.
406+
407+
# Previous selections are stored in dictionaries which provide
408+
# __contains__ for detecting repeat selections. Discarding repeats
409+
# is efficient unless most of the population has already been chosen.
410+
# So, tracking selections is useful when sample sizes are much
411+
# smaller than the total population.
412+
394413
n = len(population)
395414
if not 0 <= k <= n:
396415
raise ValueError, "sample larger than population"
397416
if random is None:
398417
random = self.random
418+
result = [None] * k
399419
if n < 6 * k: # if n len list takes less space than a k len dict
400-
pool = list(population)
401-
for i in xrange(n-1, n-k-1, -1):
402-
j = int(random() * (i+1))
403-
pool[i], pool[j] = pool[j], pool[i]
404-
return pool[-k:]
405-
inorder = [None] * k
406-
selections = {}
407-
for i in xrange(k):
408-
j = int(random() * n)
409-
while j in selections:
420+
pool = list(population) # track potential selections
421+
for i in xrange(k):
422+
j = int(random() * (n-i)) # non-selected at [0,n-i)
423+
result[i] = pool[j] # save selected element
424+
pool[j] = pool[n-i-1] # non-selected to head of list
425+
else:
426+
selected = {} # track previous selections
427+
for i in xrange(k):
410428
j = int(random() * n)
411-
selections[j] = inorder[i] = population[j]
412-
return inorder # return selections in the order they were picked
429+
while j in selected: # discard and replace repeats
430+
j = int(random() * n)
431+
result[i] = selected[j] = population[j]
432+
return result # return selections in the order they were picked
413433

414434
## -------------------- real-valued distributions -------------------
415435

@@ -756,6 +776,7 @@ def _test_sample(n):
756776
for k in xrange(n+1):
757777
s = sample(population, k)
758778
assert len(dict([(elem,True) for elem in s])) == len(s) == k
779+
assert None not in s
759780

760781
def _sample_generator(n, k):
761782
# Return a fixed element from the sample. Validates random ordering.
@@ -787,7 +808,7 @@ def _test(N=2000):
787808
_test_generator(N, 'weibullvariate(1.0, 1.0)')
788809
_test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4
789810
_test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
790-
_test_sample(1000)
811+
_test_sample(500)
791812

792813
# Test jumpahead.
793814
s = getstate()

0 commit comments

Comments
 (0)