python中的快速排序

摘要:
在工程实践中,通常需要将python代码转换为c++代码。为了获得相同的结果,需要确保算法的一致性。

       在工程实际中,经常需要将python代码转化成c++代码,为了获得一样的结果,需要保证算法的一致性。最近在目标检测的算法中,发现python默认排序算法为改进版的快速排序,描述如下:

* Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
* the code switches to the O(NlogN) worst case heapsort if not enough progress
* is made on the large side of the two quicksort partitions. This improves the
* worst case while still retaining the speed of quicksort for the common case.
* This is variant known as introsort.
*
*
* def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
*   # sort remainder with heapsort if we are not making enough progress
*   # we arbitrarily choose 2 * log(n) as the cutoff point
*   if recursion_limit < 0:
*       heapsort(lower, higher)
*       return
*
*   if lower < higher:
*      pivot_pos = partition(lower, higher)
*      # recurse into smaller first and leave larger on stack
*      # this limits the required stack space
*      if (pivot_pos - lower > higher - pivot_pos):
*          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*          quicksort(lower, pivot_pos, recursion_limit - 1)
*      else:
*          quicksort(lower, pivot_pos, recursion_limit - 1)
*          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*
*
* the below code implements this converted to an iteration and as an
* additional minor optimization skips the recursion depth checking on the
* smaller partition as it is always less than half of the remaining data and
* will thus terminate fast enough

从上面的内容可以看到,改进的算法结合了快速排序和堆排序,如果我们不按照这个算法去实现c++版的排序算法,很难保证结果一致性。

以下是c++代码,复现了python语言numpy库中的argsort的快速排序算法

#ifndef __myself__sort__h__h__
#define __myself__sort__h__h__

#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20

template<typename T>
void TYPE_SWAP(T* a, T* b)
{
    T t = *a;
    *a = *b;
    *b = t;
}

template<typename T>
bool TYPE_LT(T a, T b)
{
    return a<b;
}

int npy_get_msb(int n)
{
    int k;  
    for(k=0;n>1;n>>=1) 
        ++k;  
    return k; 
}

template<typename T>
int heap_sort(T *start, int n)
{
    T tmp, *a;
    int i,j,l;

    /* The array needs to be offset by one for heapsort indexing */
    a = start - 1;
    //先建立大顶堆
    for (l = n>>1; l > 0; --l) 
    {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) 
        {
            //因为假设根结点的序号是1,所以i结点左孩子和右孩子分别为2j和2i+1
            if (j < n && TYPE_LT(a[j], a[j+1])) //左右孩子的比较
            {
                    j += 1;//j为较大的记录的下标
            }
            if (TYPE_LT(tmp, a[j])) 
            {
                //将孩子结点上位,则以孩子结点的位置进行下一轮的筛选
                    a[i] = a[j];
                    i = j;
                    j += j;
            }
            else 
            {
                    break;
            }
        }
        a[i] = tmp;//插入最开始不和谐的元素
    }
    //进行排序
    for (; n > 1;) 
    {
        //最后一个元素和第一元素进行交换
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        //然后将剩下的无序元素继续调整为大顶堆
        for (i = 1, j = 2; j <= n;) 
        {
            if (j < n && TYPE_LT(a[j], a[j+1])) 
            {
                j++;
            }
            if (TYPE_LT(tmp, a[j])) 
            {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else 
            {
                break;
            }
        }
        a[i] = tmp;
    }    
    return 0;
}

template<typename T>
int quick_sort(T *start, int num)
{
    T vp;
    T *pl = start;
    T *pr = pl + num - 1;
    T *stack[PYA_QS_STACK];
    T **sptr = stack;
    T *pm, *pi, *pj, *pk;
    int depth[PYA_QS_STACK];
    int * psdepth = depth;
    int cdepth = npy_get_msb(num) * 2;

    for (;;) 
    {
        if (cdepth < 0) 
        {
            heap_sort(pl, pr - pl + 1);
            goto stack_pop;
        }
        while ((pr - pl) > SMALL_QUICKSORT) 
        {
            /* quicksort partition */
            pm = pl + ((pr - pl) >> 1);
            if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
            if (TYPE_LT(*pr, *pm)) TYPE_SWAP(pr, pm);
            if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
            vp = *pm;
            pi = pl;
            pj = pr - 1;
            TYPE_SWAP(pm, pj);
            for (;;) 
            {
                do ++pi; while (TYPE_LT(*pi, vp));
                do --pj; while (TYPE_LT(vp, *pj));
                if (pi >= pj) 
                {
                        break;
                }
                TYPE_SWAP(pi,pj);
            }
            pk = pr - 1;
            TYPE_SWAP(pi, pk);
            /* push largest partition on stack */
            if (pi - pl < pr - pi) 
            {
                *sptr++ = pi + 1;
                *sptr++ = pr;
                pr = pi - 1;
            }
            else 
            {
                *sptr++ = pl;
                *sptr++ = pi - 1;
                pl = pi + 1;
            }
            *psdepth++ = --cdepth;
        }

        /* insertion sort */
        for (pi = pl + 1; pi <= pr; ++pi) 
        {
            vp = *pi;
            pj = pi;
            pk = pi - 1;
            while (pj > pl && TYPE_LT(vp, *pk)) 
            {
                *pj-- = *pk--;
            }
            *pj = vp;
        }
stack_pop:
        if (sptr == stack) 
        {
            break;
        }
        pr = *(--sptr);
        pl = *(--sptr);
        cdepth = *(--psdepth);
    }
    return 0;
}
#endif

使用方法:

#include <iostream>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <map>
#include "mysort.h"
using namespace std;

typedef struct tagELEM
{
    double value;
    int index;
    bool operator < (const tagELEM& e)
    {
        return value<e.value;
    }
}ELEM, *PELEM;

int main(int argc, char **argv) 
{
    fstream fdata("data.txt", std::ios::in);
    if(!fdata.is_open())
        return -1;
    fstream fresult("result.txt", std::ios::in);
        if(!fresult.is_open())
        return -1;
    char data_buffer[255] ={0};
    char result_buffer[255] ={0};
    multimap<double, int> mapScore;
    vector<int> vecIdx;
    int idx = 0;
    
    ELEM elems[15280];
    while(!fresult.eof())
    {
        fdata.getline(data_buffer, 255);
        fresult.getline(result_buffer, 255);
        if(data_buffer[0]==0)
            continue;
        elems[idx].index = idx;
        elems[idx].value = atof(data_buffer);
        idx++;
         vecIdx.push_back(atoi(result_buffer));
    }
    
    quick_sort(elems, 15280);// 0, 15279);//, 15280);
    for(int i=0; i<15280; i++)
    {
        if(vecIdx[i] != elems[i].index)
        {
            std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	---------------------"<<elems[i].value<<std::endl;
        }
        else
        {
            std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	"<<elems[i].value<<std::endl;
        }
    }
    fdata.close();
    fresult.close();
    return 0;
}

data.txt为保存score的文件,result.tx保存了python排序结果

参考:

堆排序:http://www.cnblogs.com/mengdd/archive/2012/11/30/2796845.html

快速排序:http://blog.csdn.net/morewindows/article/details/6684558

python排序代码:https://github.com/numpy/numpy/tree/master/numpy/core/src/npysort

免责声明:文章转载自《python中的快速排序》仅用于学习参考。如对内容有疑问,请及时联系本站处理。

上篇Spring常用注解与J2EE注解总结kubernetes之健康状态检测下篇

宿迁高防,2C2G15M,22元/月;香港BGP,2C5G5M,25元/月 雨云优惠码:MjYwNzM=

相关文章

How to install Ta-Lib in Python

Technical indicators are calculated using historical price and volume data to predict the market direction. These indicators are added on charts using which you can set your entry...

修改pycharm中的flask项目名遇到的坑

曾修改过自己的项目名,并在settings中的解释器也更正过来了。然后执行pip list 报错: failed to create process. 解决方法如下: 到你的项目的venv目录下的Scripts文件夹下,有2个文件名pip3-script.py和pip-script.py,在第一行中更正你新项目名的解释器路径即可即可。 #!D:python...

python测试开发django-31.admin后台一对多操作ForeignKey

前言 平常的网页上有很多一对多的场景,比如填写银行卡信息的时候,会从银行列表下拉框选择对应的银行信息。一般会建两张表,一张表放银行的信息,一张表放银行卡信息。 每个银行可以对应多个银行卡,每个银行卡只能是一家银行的。那么银行名称和银行卡就是一对多的关系,反之,银行卡和银行名称就是多对一的关系 一对多表设计 准备两张表,银行信息(Bank)和卡号信息(Ca...

pybedtools:在Python中使用BEDTools

I have a Python, I have a BEDTools. Ah,pybedtools! 前言 pybedtools 是采用 Python 对BEDTools 软件的封装和扩展。为啥要用pybedtools ,而不是直接使用BEDTools 呢? 当然是我们想在Python 使用 BEDTools 啦() Why pybedtools? 作者也...

python源码为何都是pass

最近看Python代码  按照一个函数递进的看下去,最后发现,遇到很多源码什么逻辑都没写,仅仅以一个pass  结尾          但却能得到应该得到的结果,这点真的很奇怪,上网查找后 觉得下面的说法还挺符合我的想法,在这里粘贴出来,以后学习深入后继续增加或者修改相关表述: python是C语言实现的,尽管有很多标准库是由python代码实现,但是涉及...

pyspark 安装配置【linux &amp;amp;&amp;amp; win】

一、windows 安装 1、下载spark http://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz 2、配置spark 环境变量 set SPARK_HOME=d:spark2.3.1set PATH=%SPARK_HO...