从cuda C 中的函数返回DIM3变量

Return dim3 variable from function in CUDA C++

本文关键字：返回 DIM3 变量函数 cuda 更新时间：2023-10-16

我想从函数返回dim3对象。特定代码是：

dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
    int gridX = (int)ceil((float)width / block_size);
    int gridY = (int)ceil((float)height / block_size);
    return dim3(gridX, gridY);

但是发生错误时：

error: expected expression before ‘dim3’
 return dim3(gridX, gridY);
        ^

所以我对此有所更改：

dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
    int gridX = (int)ceil((float)width / block_size);
    int gridY = (int)ceil((float)height / block_size);
    dim3 gridXY(gridX, gridY);
    return gridXY;

，但现在说：

error: incompatible types when returning type ‘dim3 (*)() 
{aka struct dim3 (*)()}’ but ‘dim3 {aka struct dim3}’ was expected
 return gridXY;
        ^

你能帮我吗？这是什么意思，我该如何解决此问题以与NVCC正确编译？谢谢！

此.C文件的完整代码是：

#include <unistd.h>
#include <stdio.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#define WARP_SIZE 16
#define DEBUG false
float *_copyHostDevice(float *src, int src_size) {
  float *src_d;
  cudaMalloc((void**)&src_d, sizeof(float) * src_size);
  cudaMemcpy(src_d, src, sizeof(float) * src_size, cudaMemcpyHostToDevice);
  return src_d;
  }
float *_copyDeviceHost(float *src, int src_size, float *dst) {
   float *target;
   if (dst == NULL) {
      target = (float*)malloc(sizeof(float) * src_size);
   } else {
      target = dst;
     }
   cudaMemcpy(target, src, sizeof(float) * src_size, cudaMemcpyDeviceToHost);
   return target;
   }
typedef struct {
   int x;
   int y;
} GlobalDim;
__device__ GlobalDim getGlobalDim(dim3 blockDim, dim3 blockIdx, dim3    threadIdx) {
    GlobalDim gd;
    gd.x = blockDim.x * blockIdx.x + threadIdx.x;
    gd.y = blockDim.y * blockIdx.y + threadIdx.y;
    return gd;
}
dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
   int gridX = (int)ceil((float)width / block_size);
   int gridY = (int)ceil((float)height / block_size);
   dim3 gridXY(gridX, gridY);
   return gridXY;
}

void _sleep(int n) {
   usleep(n*1000000);
}
void drawMatrix(float *m, int width, int height) {
   for (int i=0; i < height; i++) {
      for (int j=0; j < width; j++) {
          printf("%f ", m[i * width + j]);
      }
      printf("n");
   }
}

和编译命令和结果：

$ nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
  utils.c: In function ‘getGridBasedOnBlockSize’:
  utils.c:48:5: warning: parameter names (without types) 
  in function    declaration
   dim3 gridXY(gridX, gridY);
   ^
  utils.c:49:12: error: incompatible types when returning  
   type ‘dim3 (*)() {aka struct dim3 (*)()}’ but ‘dim3 {aka struct dim3}’    was expected
 return gridXY;
        ^

，对于dim3（...）情况，它显示：

nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
  utils.c: In function ‘getGridBasedOnBlockSize’:
  utils.c:48:12: error: expected expression before ‘dim3’
    return dim3(gridX, gridY);
        ^

edit1：@zindarod

使用

dim3 gridXY;
gridXY.x = gridX;
gridXY.y = gridY;
return gridXY;

而不是

dim3 gridXY(gridX, gridY);
    return gridXY;

正如您建议的，不幸的是引发了此错误：

$ nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_copyHostDevice':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x16): multiple definition of `_copyHostDevice'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x16): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_copyDeviceHost':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x8c): multiple definition of `_copyDeviceHost'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x8c): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `getGlobalDim':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0xed): multiple definition of `getGlobalDim'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0xed): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `getGridBasedOnBlockSize':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x120): multiple definition of `getGridBasedOnBlockSize'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x120): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_sleep':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x1de): multiple definition of `_sleep'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1de): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `drawMatrix':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x1fc): multiple definition of `drawMatrix'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1fc): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `setWeightsForLayers(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x27b): multiple definition of `setWeightsForLayers(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x27b): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `updateWeightsCUDA(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x885): multiple definition of `updateWeightsCUDA(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x12b2): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `update_layer(float*, float*, int, int, float*)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x3fa): multiple definition of `update_layer(float*, float*, int, int, float*)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x3fa): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `mapStepCUDA(float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x9ad): multiple definition of `mapStepCUDA(float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x13da): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `reduceStepCUDA(float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0xa9d): multiple definition of `reduceStepCUDA(float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x14ca): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z17updateWeightsCUDAPfS_S_S_ii(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x77a): multiple definition of `__device_stub__Z17updateWeightsCUDAPfS_S_S_ii(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x11a7): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z11mapStepCUDAPfS_S_ii(float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x8cd): multiple definition of `__device_stub__Z11mapStepCUDAPfS_S_ii(float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x12fa): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z14reduceStepCUDAPfS_ii(float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x9e8): multiple definition of `__device_stub__Z14reduceStepCUDAPfS_ii(float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1415): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_copyHostDevice':
utils.c:(.text+0x0): multiple definition of `_copyHostDevice'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x16): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_copyDeviceHost':
utils.c:(.text+0x76): multiple definition of `_copyDeviceHost'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x8c): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `getGlobalDim':
utils.c:(.text+0xd7): multiple definition of `getGlobalDim'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0xed): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `getGridBasedOnBlockSize':
utils.c:(.text+0x11d): multiple definition of `getGridBasedOnBlockSize'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x120): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_sleep':
utils.c:(.text+0x1a5): multiple definition of `_sleep'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1de): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `drawMatrix':
utils.c:(.text+0x1c3): multiple definition of `drawMatrix'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1fc): first defined here
collect2: error: ld returned 1 exit status

替换此行：

dim3 gridXY(gridX, gridY);

with：

dim3 gridXY;
gridXY.x = gridX, gridXY.y = gridY;