如何设计基于两个指标的排序算法

How to design sort algorithm based on two indicators?

本文关键字:排序 算法 于两个      更新时间:2023-10-16

我有一个容器(数组或向量)和数百万个单词。我需要按以下顺序对它们进行排序s

主要排序顺序应该是单词中的字符数。次要排序顺序应词典编纂我不能使用任何库,例如sort。我想从头开始创建算法。如果有人能给我推荐信,我将不胜感激。

所以对单词进行排序:

This is a list of unsorted words

应给予:

a is of This list words unsorted

编辑:

我不允许使用任何STL,如sort

//Following is my final program
//It wi be run with following:  args: <inputfile> <outputfile> <timesfile> <ntests>  
//timesfile is for storing times and ntests is for number of test
/*
Bernard Grey
10 Wednesday 10 Sep 2014
*/
#include <iostream>
#include <ctime>
#include <algorithm>
#include <fstream>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <vector>
using namespace std;
//This node contain two type of information both in the vector
//First is vector for hash function. it contains number of repetition of the word
//Second node contain a word for values in my vector and the other field is for future implementation ;)
struct node
{
    string val;
    int count;
};
//Definition of inner and outer vectors as cintainer of words and hash table
typedef std::vector<node> StringVector;
typedef std::vector<StringVector> StringVector2D;


//Cited at http://stackoverflow.com/questions/8317508/hash-function-for-a-string :In the comment
int HashTable (string word)
{
   int seed = 378551; 
   unsigned long hash = 0;
   for(int i = 0; i < word.length(); i++)
   {
      hash = (hash * seed) + word[i];
   }
   return hash % 1000000;//Later assign it to number of words
}
//Cite at: http://stackoverflow.com/questions/25726530/how-to-find-an-struct-element-in-a-two-dimention-vector
struct find_word
{
    string val;
    find_word(string val) : val(val) {}
    bool operator () ( const node& m ) const
    {
        return m.val == val;
    }
};

//I could use swap function in vector instead of implementing this function
void swap(StringVector& vec, int i, int j)
{
    node tmp = vec[i];
    vec[i] = vec[j];
    vec[j] = tmp;
}
//To compare string alphabetically order
bool comp(node& i,node& p)
{
    int cmp;
    if(i.val.compare(p.val)<0)
    {
        return true;
    }
    return false;
}
void quickSort(StringVector& aVec, int left, int right);
int partition(StringVector& aVec, int left, int right);
void swap(StringVector& aVec, int left, int right);

void quickSort(StringVector& aVec, int left, int right)
{
    if(right>0){
        int index = partition(aVec,left,right);
        if (left<index-1) {
            quickSort(aVec, left, index-1);
        }
        if (index<right) {
            quickSort(aVec, index,right);
        }
    }    
}
int partition(StringVector& aVec, int left, int right)
{
    string pivotNode;
         pivotNode = aVec[(left+right)/2].val;
    while (left<=right) { 
        while (aVec[left].val.compare(pivotNode)<0) {left++;  }  
        while (aVec[right].val.compare(pivotNode)>0) {right--;  }
        if (left<=right) {
           swap(aVec,left,right);
           left++;
           right--;
        }
    }
    return left;
}
//Welcome to Maaaain
int main(int argc, char* argv[])
{
    /*file reading and preprocessing*/
    if(argc != 5)
    {
        cerr << "usage: " << argv[0]  << " infile outfile timesfile ntests" << endl;
    }
    ifstream fin(argv[1]);
    if(fin.fail())
    {
        cerr << "Error: failed to open file " << argv[1]  << " for input" << endl;
        exit(EXIT_FAILURE);
    }
    int ntests = atoi(argv[4]);
    //Len of string and max num word
    int stringlen, numwords;
    get_max_words(fin, stringlen, numwords);
    //initial string
    string init[numwords];

    //Read the file and add it to first array
    for(int i=0; i<numwords; i++)
    {
        string tmp;
        fin >> tmp;
        int len = tmp.length();
        //There is one single ' in the example output file. so I do not want to delete that one :-)
        bool pp = true;
        //Remove punct from leading and tail
        if(len==1)
        {
            pp=false;
        }
        //Remove punc
        if( ispunct(tmp[0]) && pp)
        {
            tmp.erase(0,1);
        }
        //Remove punc
        if( ispunct(tmp[len-1]) && pp)
        {
            tmp.erase(len-1,1);
        }
        init[i] =tmp;
    }
    /*
    At this point, everything should be in the initial array
    The temporary array should be declared but not filled
    */
    clockid_t cpu;
    timespec start, end;
    long time[ntests];
    //2 Dimension vector this will called outer vector
    StringVector2D twoD;

    if(clock_getcpuclockid(0, &cpu) != 0)
    {
        cerr << "Error: could not get cpu clock" << endl;
        exit(EXIT_FAILURE);
    }
    int rep = 0;

    node tmp;
    tmp.count = 0;
    tmp.val = "";
    //Later I need to assign it to number of words * M ... Good for encryption... It is not a security subject
    vector<node> first(1000000,tmp);
    //This is called inner vector
    vector<string> templateVec;
    //Last search?
    bool last = false;
    //Initialize inner map as needed and put it inside the outer vector with no data
    for(int f=0;f<(stringlen);f++)
    {   
        StringVector myVec;
        twoD.push_back(myVec);
    }

    for(int i=0; i<ntests; i++)
    {   
        if(clock_gettime(cpu, &start) == -1)
        {
            cerr << "Error: could not get start time" << endl;
            exit(EXIT_FAILURE);
        }

        //Check if it is last iteration so do not delete data for printing purposeses
        if(i == ntests-1)
        {
            last = true;
        }
        /*copy from initial array to temporary array*/
        //Initialize inner vector with the values. In this point outer vector is filled with inner vector
        //&&&  inner vector is empty  myvec.empty() = true;
        //vector at index 0 is for words with one char... vector 1 is for words with two chars and so on...
        for(int j=0; j<numwords; j++)
        {
            int len = init[j].length()-1;
            if(len<0)continue;
            //Initilize a node to fill up the vector
            node currNode;
            currNode.val = init[j];
            //currNode.count = 0;           
            int hash  =  HashTable(init[j]);
            //Node already existed
            if(first[hash].count != 0){
                //Add to its value in hash table
                first[hash].count++;
            }
            else
            {
                //Activate word first time!
                first[hash].count =1;
                //I can even not use this because of the hash table but it may help in future improvment!!!
                first[hash].val = init[j];
                //Add the word to appropriate level in outer string! 1char == [0] ---  2char== [1] so on
                twoD[len].push_back(currNode);
            }   
        }
        //Sort Alphabetically order
        for(int f=0;f<(stringlen);f++)
        {
            //Eficcient sorting algorithm with no chance of segmentation dump ;)
            quickSort(twoD[f],0,twoD[f].size()-1);          
        }
        //Time finished
        if(clock_gettime(cpu, &end) == -1)
        {
            cerr << "Error: could not get end time" << endl;
            exit(EXIT_FAILURE);
        }
        //Delete items from vector if it is not last iteration --- This is not part of sorting algorithm so it is after clock
        if(!last)
        {
            for(int f=0;f<stringlen;f++)
            {
                twoD[f].clear();
            }
            twoD.clear();
            for(StringVector::iterator it3 = first.begin();it3!=first.end();it3++)
            {
                it3->val="";
                it3->count=0;
            }
            //Initialize inner map as needed and put it inside the outer vector 
            for(int f=0;f<(stringlen);f++)
            {
                StringVector myVec;
                twoD.push_back(myVec);
            }           
        }
        /*time per trial in nanoseconds*/
        time[i] = (end.tv_sec - start.tv_sec)*1000000000 + end.tv_nsec - start.tv_nsec;
    }

    /*output sorted temporary array*/
    int k=0;
    int y =0;
    int num=0;
    ofstream fout(argv[2]); 
    //Pointer for inner vector
    StringVector::iterator it2;
    for (StringVector2D::iterator outer = twoD.begin();  outer != twoD.end();  ++outer){
        y++;
        k=0;
        for (it2= outer->begin(); it2!=outer->end(); ++it2){
            //Get back data from hash table
            int hash  =  HashTable(it2->val);
            //Number of word in other field of the node
            int repWord = first[hash].count;
            //Print according to that
            for(int g=0; g < repWord ;g++){
                    num++;
                    //10 char in one line
                    if(num%10 == 0)
                    {
                        fout << it2->val;
                        fout<<endl;
                        k++;
                    }
                    else
                    {
                        fout<< it2->val << "  ";
                    }
                }
            }
        }
    //Sort times with STL for god sake....
    sort(time,time+ntests);
    //print times to the file///
    ofstream ftimes(argv[3]);
    for(int i=0; i<ntests; i++)
        ftimes << time[i] << endl;
}

//Helper function .. nice job
void get_max_words(ifstream& fin, int& wordlen, int& numwords)
{
    char c;
    int count=0;
    wordlen = numwords = 0;
    while(fin.good() && fin.get(c) && isspace(c)){;} //skip leading space
    while(fin.good())
    {
        ++numwords;
        while(fin.good() && !isspace(c)) 
        {
            ++count;
            fin.get(c);
        }
        if(count > wordlen)
            wordlen = count;
        count = 0;
        while(fin.good() && fin.get(c) && isspace(c)){;} //skip space
    }   
    if(count > wordlen)
        wordlen = count;
    fin.clear();
    fin.seekg(0, ios::beg);
}

您的sort例程主要需要一个比较器来进行排序:

bool lessThan(const std::string a, const std::string b) {
    if (a.length() != b.length()) 
         return a.length() < b.length();
    return a < b;
}

实际上有一种简单的方法可以在stl中实现这一点。有一种sort方法需要一个比较器:

template <class RandomAccessIterator, class Compare>
  void sort (RandomAccessIterator first, RandomAccessIterator last, Compare comp);

所以你可以这样做:

bool comparator(const string& a, const string& b) {
    if (a.length() < b.length())
        return true;
    if (a.length() == b.length())
        return a < b;
    return false;
}
sort(words.begin(), words.end(), comparator);

这是关于基于多个键的排序。我建议你研究一些有效的排序算法,比如快速排序,然后改变比较器以适应多个键。

对于任何基于比较的排序算法,调整多关键字排序的最简单方法是将比较标准从单个值更改为多个值。

如果你甚至不被允许使用STL,也就是说,你不被允许在中使用sort,这里有一篇文章你可以从中开始:使用多个排序标准对数组进行排序(快速排序)

如果允许,只需编写一个支持多关键字比较的比较函数,并将其插入sort函数即可。您可以查看此C++参考以了解更多详细信息。

一个说明(这只是一个说明如何插入比较函数的说明):

bool comparator(const string& a, const string& b) {
    if (a.length() < b.length())
        return true;
    if (a.length() > b.length())
        return false;
    return a < b;
}
void Qsort(string a[],int low,int high)
{
    if(low >= high)
    {
        return;
    }
    int left = low;
    int right = high;
    string key = a[(low + high) >> 1];
    while(left < right)
    {
        while(left < right && comparator(a[left], key)) left++;     
        while(left < right && !comparator(a[right], key)) right--;
        if (left < right)
        {
             swap(a[left], a[right]);
             left++; right--;
        }
    }
    if (left == right) left ++;
    if (low < right) Qsort(a, low, left - 1);
    if (high > left) Qsort(a, right + 1, high);
}

答案想要一个设计,所以我将重点关注排序库的设计,而不是实现

您的排序算法可以将自定义比较器对象与实现用于在两个元素之间进行比较的成员运算符()一起使用。

您的比较器可以是比较器的链表,如果当前比较器打成平手,则可以调用下一个比较器。不过,你必须确保总是有一个真实和虚假的回报。或者实现一些可以创建稳定排序的东西。

因此,第一个比较器是字符数,第二个比较器是字典式的。。这样一来,如果您的需求明天发生变化,这个想法就足够通用了。然后可以重复使用。

这符合责任链模式。掌握要点后,您可以将比较器模板化。

例如:

class Chain_Comparator
{
    Chain_Comparator* next;
public:
     bool operator()( void* a, void* b )
     {
          if( a_is_less_b(a, b) )
              return true;
          else if( b_is_less_a(a,b) )
              return false;
          else if( next )
              return next( a, b )    
     }    
    virtual bool a_is_less( void* a, void* b) = 0;
    virtual bool b_is_less( void* a, void* b) = 0;
 };
class Num_Comparator : public Chain_Comparator
{
    // Implements a_is_less etc.
};
class Lex_Comparator : public Chain_Comparator
{
  // Implement lex comparisons.
};
void your_custom_sorting_method( vector<int > a, Chain_Comparator& c)
{
// Implementation goes here.
//  call the operator() for c with simply : c( a[i], a[j] )
}