CUDA 中线程索引的行主或列主访问
row-major or column-major access of thread index in cuda?
我很困惑图像是按行主顺序还是列主顺序存储在设备的全局内存中。我在两个订单中访问图像时获得了图像的两个不同输出。
按行主顺序访问时-
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
int m = numCols * y + x;
if (x >= numCols || y >= numRows)
return;
//marking column boundaries
if (x <= 2){
d_Image[m].x = 255;
d_Image[m].y = 0;
d_Image[m].z = 0;
}
else if (x >= numCols-2){
d_Image[m].x = 0;
d_Image[m].y = 0;
d_Image[m].z = 255;
}
else{
d_Image[m].x = d_sample[m].x;
d_Image[m].y = d_sample[m].y;
d_Image[m].z = d_sample[m].z;
}
d_Image[m].w = d_sample[m].w;
使用行主输出
按列主顺序访问时-
int m = x * numRows + y;
使用列专业
输出尺寸-
const dim3 blockSize(16,16);
const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
blur << < gridSize, blockSize >> >(d_Image, d_sample, numRows, numCols);
我正在使用opencv加载和保存图像。
在第一个输出中,红色和蓝色点散布在整个图像中。在第二个输出(col-major)中,当我尝试标记列时,边界行被标记。我太困惑了。编辑
void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols);
cv::Mat sample;
cv::Mat Image;
size_t numRows() { return sample.rows; }
size_t numCols() { return sample.cols; }
__global__ void blur(const uchar4 *d_sample, uchar4* d_Image, size_t numRows, size_t numCols){
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
int m = y*numCols + x;
if (x >= numCols || y >= numRows)
return;
if (x <= 2){
d_Image[m].x = 255;
d_Image[m].y = 0;
d_Image[m].z = 0;
}
else if (x >= (numCols-2)){
d_Image[m].x = 0;
d_Image[m].y = 0;
d_Image[m].z = 255;
}
else{
d_Image[m].x = d_sample[m].x;
d_Image[m].y = d_sample[m].y;
d_Image[m].z = d_sample[m].z;
}
d_Image[m].w = d_sample[m].w;
}
int main(){
uchar4 *h_sample, *d_sample, *d_Image, *h_Image;
int filter[9];
sample = cv::imread("sample.jpg", CV_LOAD_IMAGE_COLOR);
if (sample.empty()){
std::cout << "error in loading image.";
system("pause");
}
cv::cvtColor(sample,sample,CV_BGR2RGBA);
Image.create(numRows(), numCols(), CV_8UC4);
if (!sample.isContinuous() || !Image.isContinuous()) {
std::cerr << "Images aren't continuous!! Exiting." << std::endl;
system("pause");
exit(1);
}
cv::cvtColor(Image,Image,CV_BGR2RGBA);
h_sample = (uchar4*)sample.data;
h_Image = (uchar4*)Image.data;
size_t numPixels = numRows() * numCols();
//allocate mmeory on device
checkCudaErrors(cudaMalloc((void**)&d_sample, sizeof(uchar4) * numPixels));
checkCudaErrors(cudaMalloc((void**)&d_Image, sizeof(uchar4) * numPixels));
checkCudaErrors(cudaMemset(d_sample, 0, sizeof(uchar4) * numPixels));
checkCudaErrors(cudaMemset(d_Image, 0, sizeof(uchar4) * numPixels));
//copy to device
checkCudaErrors(cudaMemcpy(d_sample, h_sample, sizeof(uchar4) * numPixels, cudaMemcpyHostToDevice));
helper(d_sample, d_Image, numCols(), numRows());
//copy back to host
checkCudaErrors(cudaMemcpy(h_Image, d_Image, sizeof(uchar4) * numPixels, cudaMemcpyDeviceToHost));
cv::cvtColor(Image,Image,CV_RGBA2BGR);
cv::namedWindow("Image", CV_WINDOW_AUTOSIZE);
cv::imshow("Image", Image);
cv::waitKey(0);
cv::imwrite("sample.jpg", Image);
return 0;
}
void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){
const dim3 blockSize(16,16);
const dim3 gridSize(numCols/16+1, numRows/16+1, 1);
blur << < gridSize, blockSize >> >(d_sample, d_Image, numRows, numCols);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}
void helper(uchar4* d_sample, uchar4* d_Image, size_t numRows, size_t numCols){
你打电话
helper(d_sample, d_Image, numCols(), numRows());
我想当你打电话给助手时,你可能已经切换了列和行......
相关文章:
- 最佳做法是从另一个线程访问 qml 中的Q_PROPERTY
- 线程消息传递或更好:在"大师班"中访问其他班级的成员
- C++:在多个线程中访问同一数组/向量的不同单元格是否会产生数据竞赛?
- 线程时访问静态映射时出现隔离错误
- 是否需要 mutex() 来安全地同时访问具有 2 个线程的数组的不同元素?
- 提供对不同类型的数据(建议、代码审查)的线程安全访问的类
- 从子线程访问指针
- 对C++中的队列进行多线程访问
- 对全局变量的多线程访问:我应该使用互斥锁吗?
- cuda:多个线程访问同一个全局变量
- 如何进行线程安全shared_ptr修改和访问?
- 如果迭代器的迭代器永远不会无效,则是STD :: MAP访问线程安全
- 如何在不将类数据成员作为参数传递的情况下访问线程中的类数据成员
- 在 C++ 中包含和访问线程全局变量
- QVector预先分配了访问线程安全性
- 使对unsigned char的访问线程安全(原子)
- 在 OpenMP 中访问线程的专用内存
- 访问线程中的vector.front()会导致运行时错误
- 访问线程(MFC)中的主对话框变量
- 从主循环windows访问线程变量