CUDA推力装置绑带

CUDA Thrust Device strcmp

本文关键字：装置 CUDA 更新时间：2023-10-16

我已经使以下代码能够用于使用THRUST排序方法对char *数组进行排序。由于某些原因，每当代码试图比较字符串中的字符时，它就会中断。

thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());

比较器如下:

struct CharArrayCmp{
__host__ __device__
  bool operator()(const CharArr & o1, const CharArr & o2) {
      return this->compare(o1.value,o1.length,o2.value,o2.length);
  }
    __host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
{
    int end;
    if(lenSrc > lenDest){
        end = lenDest;
    }else{
        end = lenSrc;
    }
    for(int i = 0; i < end; i++){
        if(src[i] > dst[i]){
            return false;
        }
    }
    if(lenSrc >= lenDest){
        return false;
    }
    return true;
}

};

当它试图在这一行的设备上运行时，出现错误:

if(src[i] != dst[i])
    thrust::system::cuda::detail::detail::launch_closure_by_value<thrust::system::cuda::detail::
detail::stable_sort_by_count_detail::stable_sort_by_count_closure<256u, 
thrust::detail::normal_iterator<thrust::pointer<unsigned int, 
thrust::system::cuda::detail::tag, thrust::use_default, thrust::use_default> >, 
thrust::detail::normal_iterator<thrust::pointer<unsigned int, 
thrust::system::cuda::detail::tag, thrust::use_default, thrust::use_default> >, 
    thrust::system::cuda::detail::temporary_indirect_ordering<thrust::system::cuda::detail::tag,
 thrust::detail::normal_iterator<thrust::device_ptr<CharArr> >, CharArrayCmp>::compare, 
thrust::system::cuda::detail::detail::statically_blocked_thread_array<256u> > > [0] [device 
0 (GK104)]  (Signal) - CUDA_EXCEPTION_10:Device Illegal Address 
CUDA Thread (193,0,0) Block (22,0,0)    

All Kernel Threads (144 Blocks of 256 Threads)

我对CUDA很陌生，所以我不确定我做错了什么，但感觉这应该是相当直接的。

这是一个字符结构体:

typedef struct{
char * value;
int length;
} CharArr;

最后这里是使用这个sort_by_key的代码。我已经确保传递给你的信息是正确的。也就是说，arrayToSort和arrayToSortRow都是char *和long - long类型的数组，size是这两个数组的大小。

void sortCharArrayStable(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
        thrust::host_vector<CharArr> hostToSort(size);
        thrust::host_vector<long long> hostToSortRow(size);
        for(int i =0; i < size; i++){
            CharArr sortRow;
            if(arrayToSort[i] == 0x0){
                sortRow.length = 0;
                sortRow.value = "";
                std::cout<<"Had an error on row "<< arrayToSortRow[i]<<" when making column array for sortCharArrayStable"<<std::endl;
            }else{
                sortRow.length = strlen(arrayToSort[i]);
                sortRow.value = arrayToSort[i];
            }

            hostToSort[i] = sortRow;
            hostToSortRow[i] = arrayToSortRow[i];
        }
        thrust::device_vector<CharArr> deviceArrayToSort = hostToSort;// (arrayToSort,arrayToSort + size);
        thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;


thrust::stable_sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
        //copy the contents back into our original array to sort now sorted
        hostToSort = deviceArrayToSort;
        for(int i =0; i < size; i++){
            arrayToSort[i] = hostToSort[i].value;
        }
        arrayToSortRow);

thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);

}

下面是可编译示例中出现的问题的完整示例:

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>

typedef struct{
    char * value;
    int length;
} CharArr;

struct CharArrayCmp{
    __host__ __device__
      bool operator()(const CharArr & o1, const CharArr & o2) {
          return this->compare(o1.value,o1.length,o2.value,o2.length);
      }
    __host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
    {
        int end;
        if(lenSrc > lenDest){
            end = lenDest;
        }else{
            end = lenSrc;
        }
        for(int i = 0; i < end; i++){
            if(src[i] > dst[i]){
                return false;
            }
        }
        if(lenSrc >= lenDest){
            return false;
        }
        return true;
    }
};

void sortCharArray(char ** arrayToSort, long long * arrayToSortRow,long long size){
    std::cout <<"about to start LongIndex" <<std::endl;
            thrust::host_vector<CharArr> hostToSort(size);
            thrust::host_vector<long long> hostToSortRow(size);
            for(int i =0; i < size; i++){
                CharArr sortRow;
                sortRow.value = arrayToSort[i];
                sortRow.length = strlen(arrayToSort[i]);
                hostToSort[i] = sortRow;
                hostToSortRow[i] = arrayToSortRow[i];
            }
            thrust::device_vector<CharArr> deviceArrayToSort = hostToSort;// (arrayToSort,arrayToSort + size);
            thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
           // thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
            thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
            //copy the contents back into our original array to sort now sorted
            hostToSort = deviceArrayToSort;
            for(int i =0; i < size; i++){
                arrayToSort[i] = hostToSort[i].value;
            }
            thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);

}
int main()
{
    char ** charArr = new char*[10];
    charArr[0] = "zyxw";
    charArr[1] = "abcd";
    charArr[2] = "defg";
    charArr[3] = "werd";
    charArr[4] = "aasd";
    charArr[5] = "zwedew";
    charArr[6] = "asde";
    charArr[7] = "rurt";
    charArr[8] = "ntddwe";
    charArr[9] = "erbfde";
    long long * rows = new long long[10];
    for(int i = 0; i < 10;i++ ){
        rows[i] = i;
    }
    sortCharArray(charArr,rows,10);
    for(int i = 0; i < 10; i++){
        std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
    }
}

这行不通:

    thrust::device_vector<CharArr> deviceArrayToSort = hostToSort;// (arrayToSort,arrayToSort + size);
    thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;
    thrust::stable_sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());

hostToSort中的每个CharArr对象都包含一个指向主机内存位置的指针。该指针的数值为，当将其复制到设备向量时，其值不变。如果你尝试在设备代码中解引用这个(伪)指针，它将失败，正如你所看到的。

您将需要遍历deviceArrayToSort设备向量，并且对于其中的每个CharArr对象，您将需要调整其value指针以指向设备内存中的有效位置，这可能是要排序的每个字符串的起始地址。