Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9be6283

Browse files
committed
Tim's quicksort on May 25.
1 parent 16653cb commit 9be6283

1 file changed

Lines changed: 141 additions & 123 deletions

File tree

Objects/listobject.c

Lines changed: 141 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -625,46 +625,11 @@ docompare(x, y, compare)
625625
}
626626

627627
/* MINSIZE is the smallest array we care to partition; smaller arrays
628-
are sorted using a straight insertion sort (above). It must be at
629-
least 3 for the quicksort implementation to work. Assuming that
630-
comparisons are more expensive than everything else (and this is a
631-
good assumption for Python), it should be 10, which is the cutoff
632-
point: quicksort requires more comparisons than insertion sort for
633-
smaller arrays. */
634-
#define MINSIZE 10
635-
636-
/* Straight insertion sort. More efficient for sorting small arrays. */
637-
638-
static int
639-
insertionsort(array, size, compare)
640-
PyObject **array; /* Start of array to sort */
641-
int size; /* Number of elements to sort */
642-
PyObject *compare;/* Comparison function object, or NULL => default */
643-
{
644-
register PyObject **a = array;
645-
register PyObject **end = array+size;
646-
register PyObject **p;
647-
648-
for (p = a+1; p < end; p++) {
649-
register PyObject *key = *p;
650-
register PyObject **q = p;
651-
while (--q >= a) {
652-
register int k = docompare(key, *q, compare);
653-
/* if (p-q >= MINSIZE)
654-
fprintf(stderr, "OUCH! %d\n", p-q); */
655-
if (k == CMPERROR)
656-
return -1;
657-
if (k < 0) {
658-
*(q+1) = *q;
659-
*q = key; /* For consistency */
660-
}
661-
else
662-
break;
663-
}
664-
}
665-
666-
return 0;
667-
}
628+
are sorted using binary insertion. It must be at least 4 for the
629+
quicksort implementation to work. Binary insertion always requires
630+
fewer compares than quicksort, but does O(N**2) data movement. The
631+
more expensive compares, the larger MINSIZE should be. */
632+
#define MINSIZE 49
668633

669634
/* STACKSIZE is the size of our work stack. A rough estimate is that
670635
this allows us to sort arrays of MINSIZE * 2**STACKSIZE, or large
@@ -673,20 +638,20 @@ insertionsort(array, size, compare)
673638
exactly in two.) */
674639
#define STACKSIZE 64
675640

676-
/* Quicksort algorithm. Return -1 if an exception occurred; in this
641+
/* quicksort algorithm. Return -1 if an exception occurred; in this
677642
case we leave the array partly sorted but otherwise in good health
678643
(i.e. no items have been removed or duplicated). */
679644

680645
static int
681646
quicksort(array, size, compare)
682-
PyObject **array; /* Start of array to sort */
683-
int size; /* Number of elements to sort */
647+
PyObject **array; /* Start of array to sort */
648+
int size; /* Number of elements to sort */
684649
PyObject *compare;/* Comparison function object, or NULL for default */
685650
{
686651
register PyObject *tmp, *pivot;
687652
register PyObject **l, **r, **p;
688-
register PyObject **lo, **hi;
689-
int top, k, n;
653+
PyObject **lo, **hi, **notp;
654+
int top, k, n, lisp, risp;
690655
PyObject **lostack[STACKSIZE];
691656
PyObject **histack[STACKSIZE];
692657

@@ -699,55 +664,66 @@ quicksort(array, size, compare)
699664
while (--top >= 0) {
700665
lo = lostack[top];
701666
hi = histack[top];
702-
703-
/* If it's a small one, use straight insertion sort */
704667
n = hi - lo;
705-
if (n < MINSIZE)
668+
669+
/* If it's a small one, use binary insertion sort */
670+
if (n < MINSIZE) {
671+
for (notp = lo+1; notp < hi; ++notp) {
672+
/* set l to where *notp belongs */
673+
l = lo;
674+
r = notp;
675+
pivot = *r;
676+
do {
677+
p = l + ((r - l) >> 1);
678+
k = docompare(pivot, *p, compare);
679+
if (k == CMPERROR)
680+
return -1;
681+
if (k < 0)
682+
r = p;
683+
else
684+
l = p + 1;
685+
} while (l < r);
686+
/* Pivot should go at l -- slide over to
687+
make room. Caution: using memmove
688+
is much slower under MSVC 5; we're
689+
not usually moving many slots. */
690+
for (p = notp; p > l; --p)
691+
*p = *(p-1);
692+
*l = pivot;
693+
}
706694
continue;
695+
}
707696

708-
/* Choose median of first, middle and last as pivot;
709-
these 3 are reverse-sorted in the process; the ends
710-
will be swapped on the first do-loop iteration.
711-
*/
712-
l = lo; /* First */
697+
/* Choose median of first, middle and last as pivot */
698+
l = lo; /* First */
713699
p = lo + (n>>1); /* Middle */
714-
r = hi - 1; /* Last */
700+
r = hi - 1; /* Last */
715701

716-
k = docompare(*l, *p, compare);
702+
k = docompare(*p, *l, compare);
717703
if (k == CMPERROR)
718704
return -1;
719705
if (k < 0)
720-
{ tmp = *l; *l = *p; *p = tmp; }
706+
{ tmp = *p; *p = *l; *l = tmp; }
721707

722-
k = docompare(*p, *r, compare);
708+
k = docompare(*r, *p, compare);
723709
if (k == CMPERROR)
724710
return -1;
725711
if (k < 0)
726-
{ tmp = *p; *p = *r; *r = tmp; }
712+
{ tmp = *r; *r = *p; *p = tmp; }
727713

728-
k = docompare(*l, *p, compare);
714+
k = docompare(*p, *l, compare);
729715
if (k == CMPERROR)
730716
return -1;
731717
if (k < 0)
732-
{ tmp = *l; *l = *p; *p = tmp; }
718+
{ tmp = *p; *p = *l; *l = tmp; }
733719

734720
pivot = *p;
721+
l++;
722+
r--;
735723

736724
/* Partition the array */
737-
do {
738-
tmp = *l; *l = *r; *r = tmp;
739-
if (l == p) {
740-
p = r;
741-
l++;
742-
}
743-
else if (r == p) {
744-
p = l;
745-
r--;
746-
}
747-
else {
748-
l++;
749-
r--;
750-
}
725+
for (;;) {
726+
lisp = risp = 1; /* presumed guilty */
751727

752728
/* Move left index to element >= pivot */
753729
while (l < p) {
@@ -756,8 +732,10 @@ quicksort(array, size, compare)
756732
return -1;
757733
if (k < 0)
758734
l++;
759-
else
735+
else {
736+
lisp = 0;
760737
break;
738+
}
761739
}
762740
/* Move right index to element <= pivot */
763741
while (r > p) {
@@ -766,79 +744,119 @@ quicksort(array, size, compare)
766744
return -1;
767745
if (k < 0)
768746
r--;
769-
else
747+
else {
748+
risp = 0;
770749
break;
750+
}
751+
}
752+
753+
if (lisp == risp) {
754+
/* assert l < p < r or l == p == r
755+
* This is the most common case, so we
756+
* strive to get back to the top of the
757+
* loop ASAP.
758+
*/
759+
tmp = *l; *l = *r; *r = tmp;
760+
l++; r--;
761+
if (l < r)
762+
continue;
763+
break;
771764
}
772765

773-
} while (l < r);
766+
/* One (exactly) of the pointers is at p */
767+
/* assert (p == l) ^ (p == r) */
768+
notp = lisp ? r : l;
769+
k = (r - l) >> 1;
770+
if (k) {
771+
*p = *notp;
772+
if (lisp) {
773+
p = r - k;
774+
l++;
775+
}
776+
else {
777+
p = l + k;
778+
r--;
779+
}
780+
/* assert l < p < r */
781+
*notp = *p;
782+
*p = pivot; /* for consistency */
783+
continue;
784+
}
774785

775-
/* lo < l == p == r < hi-1
776-
*p == pivot
786+
/* assert l+1 == r */
787+
*p = *notp;
788+
*notp = pivot;
789+
p = notp;
790+
break;
791+
} /* end of partitioning loop */
777792

793+
/* assert *p == pivot
778794
All in [lo,p) are <= pivot
779795
At p == pivot
780796
All in [p+1,hi) are >= pivot
781-
782-
Now extend as far as possible (around p) so that:
783-
All in [lo,r) are <= pivot
784-
All in [r,l) are == pivot
785-
All in [l,hi) are >= pivot
786-
This wastes two compares if no elements are == to the
787-
pivot, but can win big when there are duplicates.
788-
Mildly tricky: continue using only "<" -- we deduce
789-
equality indirectly.
790797
*/
791-
while (r > lo) {
792-
/* because r-1 < p, *(r-1) <= pivot is known */
793-
k = docompare(*(r-1), pivot, compare);
794-
if (k == CMPERROR)
795-
return -1;
796-
if (k < 0)
797-
break;
798-
/* <= and not < implies == */
799-
r--;
800-
}
801798

802-
l++;
803-
while (l < hi) {
804-
/* because l > p, pivot <= *l is known */
805-
k = docompare(pivot, *l, compare);
806-
if (k == CMPERROR)
807-
return -1;
808-
if (k < 0)
809-
break;
810-
/* <= and not < implies == */
799+
r = p;
800+
l = p + 1;
801+
/* Partitions are [lo,r) and [l,hi).
802+
* See whether *l == pivot; we know *l >= pivot, so
803+
* they're equal iff *l <= pivot too, or not pivot < *l.
804+
* This wastes a compare if it fails, but can win big
805+
* when there are runs of duplicates.
806+
*/
807+
k = docompare(pivot, *l, compare);
808+
if (k == CMPERROR)
809+
return -1;
810+
if (!(k < 0)) {
811+
/* Now extend as far as possible (around p) so that:
812+
All in [lo,r) are <= pivot
813+
All in [r,l) are == pivot
814+
All in [l,hi) are >= pivot
815+
Mildly tricky: continue using only "<" -- we
816+
deduce equality indirectly.
817+
*/
818+
while (r > lo) {
819+
/* because r-1 < p, *(r-1) <= pivot is known */
820+
k = docompare(*(r-1), pivot, compare);
821+
if (k == CMPERROR)
822+
return -1;
823+
if (k < 0)
824+
break;
825+
/* <= and not < implies == */
826+
r--;
827+
}
828+
811829
l++;
812-
}
830+
while (l < hi) {
831+
/* because l > p, pivot <= *l is known */
832+
k = docompare(pivot, *l, compare);
833+
if (k == CMPERROR)
834+
return -1;
835+
if (k < 0)
836+
break;
837+
/* <= and not < implies == */
838+
l++;
839+
}
840+
841+
} /* end of checking for duplicates */
813842

814843
/* Push biggest partition first */
815844
if (r - lo >= hi - l) {
816845
/* First one is bigger */
817-
lostack[top] = lo;
846+
lostack[top] = lo;
818847
histack[top++] = r;
819-
lostack[top] = l;
848+
lostack[top] = l;
820849
histack[top++] = hi;
821850
} else {
822851
/* Second one is bigger */
823-
lostack[top] = l;
852+
lostack[top] = l;
824853
histack[top++] = hi;
825-
lostack[top] = lo;
854+
lostack[top] = lo;
826855
histack[top++] = r;
827856
}
828857
/* Should assert top <= STACKSIZE */
829858
}
830859

831-
/*
832-
* Ouch - even if I screwed up the quicksort above, the
833-
* insertionsort below will cover up the problem - just a
834-
* performance hit would be noticable.
835-
*/
836-
837-
/* insertionsort is pretty fast on the partially sorted list */
838-
839-
if (insertionsort(array, size, compare) < 0)
840-
return -1;
841-
842860
/* Success */
843861
return 0;
844862
}

0 commit comments

Comments
 (0)