从已排序的数组中删除重复项

Remove duplicates from a sorted array

本文关键字:删除 数组 排序      更新时间:2023-10-16

我已经写了几个小时的代码,现在正试图弄清楚如何在插入排序过程中删除排序数组中的重复项。我试图在不重新编写整个程序的情况下做到这一点,但随着我的进步,我似乎只需要从头开始,但在我这样做之前,我希望看看是否可以用下面的代码做到这一步。

手头的问题是,在将排序数组放回文件之前,如何从中删除重复项?

#include <iostream>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <vector>
using std::cout;
using std::endl;
using namespace std;
//sort into an array using insertion sort
//getis info from file "numbers.txt"
//and prints the sorted numbers into "sorted_numbers.txt"
void insertSort(int a[], int length)
{
   int i, j, value;
   int old = 0;
   bool first = true;
   for(i = 1; i < length; i++)
   {
       if(first || old != a[i])
       {
           old = a[i];
           first = false;
           value = a[i];
           for (j = i - 1; j >= 0 && a[j] > value; j--)
           {
               a[j + 1] = a[j];
           }
       }
       a[j + 1] = value;
   }
}
//prints out the array in 'Run I/O'
void printarray(int arg[], int length)
{
    for(int n =0; n<length; ++n)
      cout << arg[n] << ' ';
    cout << 'n';
}
int main ()
{
    std::ifstream in;
    std::ofstream out;
    int N = 10;
    int n = 0;
    int k;
    int* a = new int(N);
    //opens the file "numbers.txt" if it exit
    //gets the chars from file and sorts them 
    //into array "a[n]" 
    in.open("numbers.txt");
    if(!in.is_open())
    {
        std::cout << "File could not be opened FILE NOT FOUND." << std::endl;
        //creates the a new file to be read from if numbers.txt does
        //not already exist and put numbers inside of it
        //to be sorted with the InsertSort function
        out.open("numbers.txt");
        out << "1" << endl;
        out << "3" << endl;
        out << "7" << endl;
        out << "4" << endl;
        out << "2" << endl;
        out << "7" << endl;
        out << "6" << endl;
        out << "9" << endl;
        out << "5" << endl;
        out << "2" << endl;
        out.close();
        //opens the new numbers.txt file and puts the 
        //numbers into an array to be sorted
        in.open("numbers.txt");
        //runs through the items in the file and
        //puts them into an array
        int x;
        while(in >> x) 
        {
            a[n] = x;
            n++; 
        }
        printarray(a,10);
        std::cout << "Read " << n << " integers from the file." << std::endl;
        //sorts the array from when it was read 
        //to the new insertion sort array
        insertSort(a,n);
        std::cout << "Integers are sorted" << std::endl;
        //writes/creates the new sorted array to a new file
        //called "sorted_numbers.txt"
        out.open("sorted_numbers.txt");
        for(k = 0;k < n;k++)
            out << a[k] << std::endl;
            printarray(a,10);
        out.close();
        delete[] a;
        in.close();
    }
    else
    {
        int x;
        while(in >> x) 
        {
            a[n] = x;
            n++;
         }
      printarray(a,10);
      std::cout << "Read " << n << " integers from the file." << std::endl;
      insertSort(a,n);
      std::cout << "Integers are sorted" << std::endl;
      //writes/creates the new sorted array to a new file
      //called "sorted_numbers.txt"
      out.open("sorted_numbers.txt");
      for(k = 0;k < n;k++)
          out << a[k] << std::endl;   
          std::cout << n << " integers stored to the file." << std::endl;
          printarray(a,10);
      out.close();
      delete[] a;
   }
   return 0;
}

Insertion sort从左到右进行操作,并将子数组向右旋转(将最小值放在子数组的左侧),而删除重复项则将子数组向左移动。作为一种替代方案,我创建并修改了一种"删除"排序(反向插入排序),它从右向左操作,并将子数组向左旋转(将最大值放在子数组的右侧),然后将子数组左移以删除重复项。

void deletesort(int a[], int &length)
{
    int i, j, value;
    if(length < 2)
        return;
    for(i = length-2; i >= 0; i--){
        value = a[i];
        for (j = i+1; j < length; j++){
            if(value > a[j]){
                a[j-1] = a[j];
                continue;
            }
            if(value == a[j]){
                for( ; j < length; j++)
                    a[j-1] = a[j];
                length--;
            }
            break;
        }
        a[j-1] = value;
    }
}

这里有一个不删除重复项的删除排序:

void deletesort(int a[], int length)
{
    int i, j, value;
    if(length < 2)
        return;
    for(i = length-2; i >= 0; i--){
        value = a[i];
        for (j = i+1; j < length && value > a[j]; j++)
            a[j-1] = a[j];
        a[j-1] = value;
    }
}

使用algorithm库和std::vector

#include <algorithm>
#include <vector>
#include <fstream>
int main()
{
  std::vector<int> v;
  {
    std::ifstream is("numbers.txt");
    for (int i; is >> i; )
      v.push_back(i);
  }
  std::sort(v.begin(), v.end());
  v.erase(std::unique(v.begin(), v.end()), v.end());
  std::ofstream of("sorted_numbers.txt");
  for (auto i : v)
    of << i << 'n';
  // Or for non-c++11
  // for (std::vector<int>::const_iterator i = v.begin(); i != v.end(); ++i)
  //   of << *i << 'n';
}

或者为了简单起见,甚至是std::set(向量版本可能更快,但可能对您的用例没有太大影响)。

#include <algorithm>
#include <set>
#include <fstream>
int main()
{
  std::set<int> s;
  {
    std::ifstream is("numbers.txt");
    for (int i; is >> i; )
      s.insert(i);
  }
  std::ofstream of("sorted_numbers.txt");
  for (auto i : s)
    of << i << 'n';
  // Or for non-c++11
  // for (std::set<int>::const_iterator i = s.begin(); i != s.end(); ++i)
  //   of << *i << 'n';
}