实现快速排序，几乎可以工作但不能

implementation of quick sort, almost working but not

本文关键字：工作但不能快速排序实现更新时间：2023-10-16

尝试用中的这段代码实现快速排序算法

除了数组中的一个元素没有排序外，代码几乎可以工作了，我认为它一定与partition函数中使用的i,j有关，不确定。。

我执行它是否正确？

这些是相关功能：

int partition(int a[], int start, int end){
int i,j,pivot,temp;
pivot = a[end]; //select last elem as the pivot
i = start;   // point i and j to before/after start end of the arr
j= end-1;
while(true){
//reduce j until we reach an elem that is less than pivot
if(a[j] >= pivot){ j= j-1; }    
//increase i until we reach an elem that is more than pivot
if(a[i] <= pivot){ i = i+1; }  

if(i<j){  //swap elems so that it is partitioned
temp = a[j];
a[j] = a[i];
a[i] = temp;
}
else{ return j; } //return the boundary for this partition
}
}
void quick_sort(int arr[], int start, int end){
int boundary;
if(start<end){
boundary = partition(arr,start,end);
quick_sort(arr,start,boundary);
quick_sort(arr,boundary+1, end);
}
return;
}

初始调用是：quick_sort(a,0,n);，其中a是长度为n的数组

这是我试图遵循的算法：(p和r是开始和结束索引)

Partition(A,p,r)
x <- A[p]
i <- p-1
j <- r+1
while (True) {
repeat
j <- j-1
until (A[j] <= x)
repeat
i <- i+1
until (A[i] >= x)
if (i <j) 
exchange A[i] <- A[j]
else 
return(j)
}
}

我看了所有可能出错的情况，无法专注于这个问题。提前感谢您的帮助。

通过1

在您的(原始)分区代码中，您有：

int partition(int a[], int start, int end){
int i,j,pivot,temp;
pivot = a[end]; //select last elem as the pivot
i = start- 1;   // point i and j to before/after start end of the arr
j= end +1;
while(true){
//reduce j until we reach an elem that is less than pivot
if(a[j] >= pivot){ j= j-1; }    
//increase i until we reach an elem that is more than pivot
if(a[i] <= pivot){ i = i+1; }

您只应该访问a[start]到a[end]，但您从访问这些边界之外的循环的第一次迭代开始。这必然是问题的一部分。

修复不一定那么明显。去掉减法和加法是很诱人的，但这不一定是正确的。关键的事情之一是获得足够有用的诊断打印，以了解发生了什么

通过2

自Pass 1以来，已添加并修复伪代码。伪代码与维基百科上QuickSort下的代码非常相似。这是"Hoare Partitioning"主题的一个小变化——变化是在问题中使用repeat … until，而在维基百科上使用do … while。

algorithm quicksort(A, lo, hi) is
if lo < hi then
p := partition(A, lo, hi)
quicksort(A, lo, p)
quicksort(A, p + 1, hi)
algorithm partition(A, lo, hi) is
pivot := A[lo]
i := lo - 1
j := hi + 1
loop forever
do
i := i + 1
while A[i] < pivot
do
j := j - 1
while A[j] > pivot
if i >= j then
return j
swap A[i] with A[j]

您当前版本的分区代码是：

int partition(int a[], int start, int end){
int i,j,pivot,temp;
pivot = a[end]; //select last elem as the pivot
i = start;   // point i and j to before/after start end of the arr
j= end-1;
while(true){
//reduce j until we reach an elem that is less than pivot
if(a[j] >= pivot){ j= j-1; }    
//increase i until we reach an elem that is more than pivot
if(a[i] <= pivot){ i = i+1; }  
if(i<j){  //swap elems so that it is partitioned
temp = a[j];
a[j] = a[i];
a[i] = temp;
}
else{ return j; } //return the boundary for this partition
}
}

有许多关键区别：

您使用最后一个元素而不是第一个元素作为轴心
在递增i和递减j时，您根本没有循环

修复这些问题会产生这样的代码：

static inline void swap_ints(int *A, int i, int j)
{
int t = A[i];
A[i] = A[j];
A[j] = t;
}
static int partition(int a[], int start, int end)
{
int pivot = a[start];   // select first elem as the pivot
//printf("-->> P(%d,%d) - pivot %dn", start, end, pivot);
int i = start - 1;      // point i before start of the array
int j = end + 1;        // point j after end of the array
while (true)
{
do
{
j--;
//printf("---- j a[%d] = %dn", j, a[j]);
} while (a[j] > pivot);
do
{
i++;
//printf("---- i a[%d] = %dn", i, a[i]);
} while (a[i] < pivot);
if (i >= j)
break;
//printf("-<>- a[%d]=%d, a[%d]=%dn", j, a[j], i, a[i]);
swap_ints(a, i, j);
//printf("-><- a[%d]=%d, a[%d]=%dn", j, a[j], i, a[i]);
//dump_data("Post-swap", a, start, end);
}
//dump_data("Partition", a, start, end);
//printf("<<-- P(%d) = %dn", j, a[j]);
return j;
}

有很多被注释掉的打印功能，有时用来让我放心，事情正在按预期进行。

你的主要快速排序功能还可以，尽管它也有(大量)打印：

void quicksort(int arr[], int start, int end)
{
if (start < end)
{
dump_data("QS Pre-partition", arr, start, end);
int boundary = partition(arr, start, end);
printf("QS Partition: %d:%d:%dn", start, boundary, end);
dump_data("QS Pre-recursion L", arr, start, boundary);
quicksort(arr, start, boundary);
dump_data("QS Pre-recursion H", arr, boundary + 1, end);
quicksort(arr, boundary + 1, end);
dump_data("QS Post-Sort", arr, start, end);
}
}

dump_data()函数看起来像：

void dump_data(const char *tag, int *data, int start, int end)
{
printf("%s (%d):", tag, end - start + 1);
for (int i = start; i <= end; i++)
{
printf(" %d", data[i]);
}
putchar('n');
}

测试代码(main())看起来像：

int main(void)
{
int data[] =
{
/* random -n 20 0 9 | commalist -b '        ' -n 10 */
//4, 8, 0, 0, 3, 8, 3, 6, 5, 9,
//6, 8, 3, 6, 5, 5, 0, 8, 1, 1,
//3, 9, 4, 7, 2, 6, 9, 0, 6, 1,
//8, 0, 2, 1, 4, 0, 6, 5, 4, 2,
//7, 6, 2, 5, 4, 4, 6, 0, 8, 3,
//6, 1, 2, 7, 4, 3, 0, 0, 0, 4,
//4, 7, 8, 8, 4, 4, 4, 4, 9, 6,
//9, 0, 2, 7, 6, 5, 9, 2, 7, 7,
9, 7, 0, 9, 5, 4, 8, 7, 9, 9,
2, 9, 9, 7, 0, 3, 9, 6, 8, 5,
//5, 1, 4, 5, 5, 4, 0, 2, 6, 1,
//5, 8, 1, 0, 1, 9, 8, 4, 8, 0,
};
enum { NUM_DATA = sizeof(data) / sizeof(data[0]) };
int data_copy[NUM_DATA];
for (int end = 1; end < NUM_DATA; end++)
{
memcpy(data_copy, data, NUM_DATA * sizeof(data[0]));
dump_data("Unsorted", data_copy, 0, end);
quicksort(data_copy, 0, end);
dump_data("PostSort", data_copy, 0, end);
check_sorted(data_copy, 0, end);
}
return 0;
}

check_sorted()函数通常不会说什么(但在未排序的数据上使用时会说)：

void check_sorted(int *data, int lo, int hi)
{
for (int i = lo + 1; i < hi; i++)
{
if (data[i-1] > data[i])
printf("Inversion @ A[%d] = %d vs A[%d] = %dn", i-1, data[i-1], i, data[i]);
}
}

在main()程序中，存在范围为0..9的20个随机数的各种集合。

仔细重读维基百科页面的第N个时间显示：

分区：对数组进行重新排序，使所有值小于枢轴的元素位于枢轴之前，而所有值大于枢轴的元素则位于枢轴之后(相等的值可以任意选择)。

这个尾部注释的前一个版本担心数据透视值有时会出现在分区数据的下半部分，但注意到排序仍然有效。引用表明这是完全允许的——没有问题。有一些分区方案，比如fat-pistro方案，但事实并非如此。