库达如何将字符**从内核复制到主机
Cuda how to copy char** from kernel to host
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>
using namespace std;
__global__ void kern_2D(char **desc, char** merge_char) {
int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;
if (idx < 10000)
{
char* s1 = desc[idx];
merge_char[idx] = s1;
//printf("From key = %sn", merge_char[idx]);
}
}
int main() {
cudaError_t err = cudaSuccess;
size_t max_line_len = 255;
char line[255];
size_t line_len;
size_t max_lines_desc = 10000;
//---------------------------------------------------------------------------------//
char **d_desc;
cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));
char **m_desc = NULL;
m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
char **d_temp_desc = NULL;
d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));
FILE *f_desc = fopen("desc.txt", "r");
if (!f_desc)
{
fprintf(stderr, "Error opening file!n");
}
int idesc = 0;
do
{
if (!fgets(line, max_line_len, f_desc))
{
if (ferror(f_desc) && !feof(f_desc))
{
fprintf(stderr, "Error reading from file!n");
fclose(f_desc);
}
break;
}
line_len = strlen(line);
if ((line_len > 0) && (line[line_len - 1] == 'n'))
{
line[line_len - 1] = ' ';
--line_len;
}
m_desc[idesc] = line;
cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);
++idesc;
} while (idesc < max_lines_desc);
fclose(f_desc);
//---------------------------------------------------------------------------------//
char **merge_char;
cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));
kern_2D << < 1, 1000 >> > (d_desc , merge_char);
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!n", cudaGetErrorString(err));
}
//---------------------------------------------------------------------------------//
char** h_dev;
cudaMalloc((void**)(&h_dev), max_lines_desc * sizeof(char*));
err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
if (err == cudaSuccess) printf("2: Okay n");
for (int i = 0; i < max_lines_desc; i++)
{
printf("%sn", h_dev[i]);
}
return 0;
}
//nvcc - arch = sm_30 - o kernel kernel.cu
// cuda - memcheck . / kernel
我很抱歉我的错误。我已经更新了我的代码。它完成了。
对于 desc.txt,该文件有 10000 行,如下所示。从设备复制到主机后,我检查了状态,但我错了。我无法打印字符**h_dev。
摩托车 CKD 新爱普索尼克 CKD 2017 CKD 2018 摩托车阿普索尼 新摩托车阿普索尼克 编号 125 摩托车APSONIC AP125 新摩托车APSONIC AP125
我不得不说,我真的不明白你在这里的意图是什么,因为你的内核唯一要做的就是交换指针。 如果这就是你打算做的全部,你肯定会通过在任何地方使用双指针来让自己变得困难。 仅仅管理指数要简单得多。
但是要解决您的问题,据我所知,您的"复制回主机"确实不正确。 您实际上是在将数据从主机到设备进行深拷贝,因此您还需要在另一个方向上进行深拷贝(2 阶段拷贝(。
为此,我们不会在要托管的副本上使用cudaMalloc
。cudaMalloc
分配设备内存。 如果要将某些内容复制到主机,则复制目标是主机内存。 因此,我们需要一组cudaMemcpy
操作,使用主机缓冲区作为目标,将数据深度复制回主机。
以下代码表示我可以对您展示的内容进行最简单的修改以完成此操作,它似乎适用于我的简单测试用例:
$ cat desc.txt
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
$ cat t301.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>
using namespace std;
__global__ void kern_2D(char **desc, char** merge_char) {
int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;
if (idx < 10000)
{
char* s1 = desc[idx];
merge_char[idx] = s1;
//printf("From key = %sn", merge_char[idx]);
}
}
int main() {
cudaError_t err = cudaSuccess;
size_t max_line_len = 255;
char line[255];
size_t line_len;
size_t max_lines_desc = 10000;
//---------------------------------------------------------------------------------//
char **d_desc;
cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));
char **m_desc = NULL;
m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
char **d_temp_desc = NULL;
d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));
FILE *f_desc = fopen("desc.txt", "r");
if (!f_desc)
{
fprintf(stderr, "Error opening file!n");
}
int idesc = 0;
do
{
if (!fgets(line, max_line_len, f_desc))
{
if (ferror(f_desc) && !feof(f_desc))
{
fprintf(stderr, "Error reading from file!n");
fclose(f_desc);
}
break;
}
line_len = strlen(line);
if ((line_len > 0) && (line[line_len - 1] == 'n'))
{
line[line_len - 1] = ' ';
--line_len;
}
m_desc[idesc] = line;
cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);
++idesc;
} while (idesc < max_lines_desc);
fclose(f_desc);
//---------------------------------------------------------------------------------//
char **merge_char;
cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));
kern_2D << < 1, 1000 >> > (d_desc , merge_char);
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!n", cudaGetErrorString(err));
}
//---------------------------------------------------------------------------------//
char** h_dev;
h_dev = (char **)malloc(max_lines_desc * sizeof(char*));
err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
if (err == cudaSuccess) printf("2: Okay n");
for (int i = 0; i < 6; i++)
{
cudaMemcpy(line, h_dev[i], sizeof(line), cudaMemcpyDeviceToHost);
printf("%sn", line);
}
return 0;
}
$ nvcc -o t301 t301.cu
t301.cu(15): warning: variable "idy" was declared but never referenced
$ cuda-memcheck ./t301
========= CUDA-MEMCHECK
2: Okay
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
========= ERROR SUMMARY: 0 errors
$
相关文章:
- Cuda C++:设备上的Malloc类,并用来自主机的数据填充它
- EvtExportLogneneneba API正在将远程计算机的事件日志保存到远程PC本身.如何将其保存到主机
- 如何在内核C++中使用1920x1080x16M图形或类似的16M颜色?(VGA)
- CUDA内核和数学函数的显式命名空间
- 码头化的C++应用程序是否向后兼容早期的内核版本
- C++内核出现Jupyter笔记本错误
- 当我尝试加载内核模块时,如何修复C++中的这个 malloc() 错误?
- 内存围栏是否涉及内核
- 如何停止 CLR 主机?
- 将 2D 推力::d evice_vector 复矩阵传递给 CUDA 内核函数
- 如何强制 Thrift 仅接受来自本地主机的连接
- OpenCL 内核参数中的字符***?
- 具有可分离内核的 2D 模糊卷积
- 如何在Windows内核中获取文件大小
- 库达如何将字符**从内核复制到主机
- 两个单精度浮点向量的点积在 CUDA 内核中产生的结果与在主机上的结果不同
- 如何从主机代码中断或取消 CUDA 内核
- 从全局内核调用主机函数
- CUDA在使用函数指针时启动主机函数作为内核
- 是否有任何内置的CUDA函数允许CUDA内核向主机代码报告错误?