如何将Class对象传递给CUDA内核
How to pass Class object to CUDA kernel
我正试图将Class
obj从主机传递到设备CUDA内核。我遇到了一些类似Bus error: 10
的错误。请帮助如何传递包含Array
的Class对象。这是我的密码。
-
matrixCU.h
#ifndef __MATRIXCUDA_H__ #define __MATRIXCUDA_H__ #include <iostream> class Matrix{ private: std::size_t height,width,sizeArray; double *array; public: __device__ __host__ Matrix(); __device__ __host__ Matrix(std::size_t); __device__ __host__ Matrix(std::size_t,std::size_t); __device__ __host__ Matrix(const Matrix &); __device__ __host__ Matrix &operator=(const Matrix &mat); __device__ __host__ ~Matrix(); __device__ __host__ void assignValue(std::size_t,std::size_t, double); __device__ __host__ void assignValue(std::size_t , double ); __device__ __host__ void displayArray(); }; #endif
-
matrixCU.cu
#include <iostream> #include"matrixCU.h" Matrix::Matrix(){ height = 1; width = 1; sizeArray = height*width; array= new double[sizeArray]; if (!array) { //cout << "Memory allocation failed"<<endl; printf("Memory allocation failed"); } } Matrix::Matrix(size_t h){ height = h; width = 1; sizeArray = height*width; array = new double[sizeArray]; if (!array) { //cout << "Memory allocation failed"<<endl; printf("Memory allocation failed"); } } Matrix::Matrix(size_t h,size_t w){ height = h; width = w; sizeArray = height*width; array= new double[sizeArray]; if (!array) { //cout << "Memory allocation failed"<<endl; printf("Memory allocation failed"); } } Matrix::Matrix(const Matrix &mat){ height = mat.height; width = mat.width; sizeArray = mat.sizeArray; array = new double[sizeArray]; for(size_t i = 0;i<sizeArray;++i){ array[i] = mat.array[i]; } //copy(mat.array,mat.array+mat.sizeArray,array); } Matrix &Matrix::operator=(const Matrix &mat){ height = mat.height; width = mat.width; sizeArray = mat.sizeArray; array = new double[sizeArray]; for(size_t i = 0;i<sizeArray;++i){ array[i] = mat.array[i]; } //copy(mat.array,mat.array+mat.sizeArray,array); return *this; } Matrix::~Matrix(){ delete [] array; } void Matrix::assignValue(size_t i,size_t j, double value){ size_t l = i*width + j; array[l] = value; } void Matrix::assignValue(size_t l, double value){ array[l] = value; } void Matrix::displayArray(){ size_t i,j,l; for(i=0;i<height;++i){ for(j=0;j<width;++j){ l =i*width + j; //cout<<array[l]<<"t"; printf("%ft",array[l]); } //cout<<endl; printf("n"); } }
这是主代码
-
main.cu
#include<iostream> #include"matrixCU.h" #include<curand_kernel.h> const int N=1000; __global__ void initialize(Matrix *R){ int i= blockIdx.x*blockDim.x + threadIdx.x; if(i<N){ curandState state; curand_init(clock64(), i, 0, &state); R->assignValue(i,curand_uniform(&state)); } } int main(){ Matrix R(N); Matrix *pR; pR=&R; cudaMallocManaged(&pR,N*sizeof(Matrix)); initialize<<<4,256>>>(pR); cudaDeviceSynchronize(); pR->displayArray(); return 0; }
我编译并运行此代码nvcc -dc main.cu matrixCU.cu
和nvcc main.o matrixCU.cu
然后./a.out
我得到了类似Bus error: 10
的错误。请帮忙,我是CUDA的新手。
您的代码存在各种问题。我不确定我是否会提到所有这些,所以研究一下我的文件和你的文件之间的区别。
-
如果要从主机代码中分配在主机和设备上都可用的数据,则必须使用
cudaMallocManaged
。您不能使用主机new
运算符(除非您为特定的类重载了它,而对于像double
这样的基类,您没有也不可能重载它)。 -
同样,您必须使用
cudaFree
释放这些分配。 -
您对R矩阵的主机分配不正确。您将
pR
分配给了R
的地址,这是不起作用的,因为R不是托管分配,然后您用对cudaMallocManaged
的调用来覆盖它。只是坏了。 -
当我们使用
cudaMallocManaged
时,使用的类方法不能标记为__device__
。那个API只能在宿主代码中使用。
以下是我可以做的最小数量的更改,包括上面的项目,以使您的代码正常工作:
$ cat matrixCU.h
#ifndef __MATRIXCUDA_H__
#define __MATRIXCUDA_H__
#include <iostream>
class Matrix{
private:
std::size_t height,width,sizeArray;
double *array;
public:
__host__ Matrix();
__host__ Matrix(std::size_t);
__host__ Matrix(std::size_t,std::size_t);
__host__ Matrix(const Matrix &);
__host__ Matrix &operator=(const Matrix &mat);
__host__ ~Matrix();
__device__ __host__ void assignValue(std::size_t,std::size_t, double);
__device__ __host__ void assignValue(std::size_t , double );
__device__ __host__ void displayArray();
};
#endif
$ cat matrixCU.cu
#include <iostream>
#include <cstdio>
#include"matrixCU.h"
Matrix::Matrix(){
height = 1;
width = 1;
sizeArray = height*width;
cudaError_t err = cudaMallocManaged(&array,sizeArray*sizeof(array[0]));
if (err != cudaSuccess)
{
//cout << "Memory allocation failed"<<endl;
printf("Memory allocation failed");
}
}
Matrix::Matrix(size_t h){
height = h;
width = 1;
sizeArray = height*width;
cudaError_t err = cudaMallocManaged(&array,sizeArray*sizeof(array[0]));
if (err != cudaSuccess)
{
//cout << "Memory allocation failed"<<endl;
printf("Memory allocation failed");
}
}
Matrix::Matrix(size_t h,size_t w){
height = h;
width = w;
sizeArray = height*width;
cudaError_t err = cudaMallocManaged(&array,sizeArray*sizeof(array[0]));
if (err != cudaSuccess)
{
//cout << "Memory allocation failed"<<endl;
printf("Memory allocation failed");
}
}
Matrix::Matrix(const Matrix &mat){
height = mat.height;
width = mat.width;
sizeArray = mat.sizeArray;
cudaError_t err = cudaMallocManaged(&array,sizeArray*sizeof(array[0]));
if (err != cudaSuccess)
{
//cout << "Memory allocation failed"<<endl;
printf("Memory allocation failed");
}
for(size_t i = 0;i<sizeArray;++i){
array[i] = mat.array[i];
}
//copy(mat.array,mat.array+mat.sizeArray,array);
}
Matrix &Matrix::operator=(const Matrix &mat){
height = mat.height;
width = mat.width;
sizeArray = mat.sizeArray;
cudaError_t err = cudaMallocManaged(&array,sizeArray*sizeof(array[0]));
if (err != cudaSuccess)
{
//cout << "Memory allocation failed"<<endl;
printf("Memory allocation failed");
}
for(size_t i = 0;i<sizeArray;++i){
array[i] = mat.array[i];
}
//copy(mat.array,mat.array+mat.sizeArray,array);
return *this;
}
Matrix::~Matrix(){
cudaFree(array);
}
void Matrix::assignValue(size_t i,size_t j, double value){
size_t l = i*width + j;
array[l] = value;
}
void Matrix::assignValue(size_t l, double value){
array[l] = value;
}
void Matrix::displayArray(){
size_t i,j,l;
for(i=0;i<height;++i){
for(j=0;j<width;++j){
l =i*width + j;
//cout<<array[l]<<"t";
printf("%ft",array[l]);
}
//cout<<endl;
printf("n");
}
}
$ cat main.cu
#include<iostream>
#include"matrixCU.h"
#include<curand_kernel.h>
const int N=1000;
__global__ void initialize(Matrix *R){
int i= blockIdx.x*blockDim.x + threadIdx.x;
if(i<N){
curandState state;
curand_init(clock64(), i, 0, &state);
R->assignValue(i,curand_uniform(&state));
}
}
int main(){
Matrix R(N);
Matrix *pR;
cudaMallocManaged(&pR,sizeof(Matrix));
*pR = R;
initialize<<<4,256>>>(pR);
cudaDeviceSynchronize();
pR->displayArray();
return 0;
}
$ nvcc -rdc=true -o test matrixCU.cu main.cu
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
0.071004
0.881221
0.136314
0.365471
0.869905
0.786048
0.072587
0.977478
0.090027
0.784501
0.956906
0.886720
0.716731
0.990675
0.855763
0.572306
0.926382
0.088547
0.690107
0.693889
0.603796
0.380563
0.535087
0.162159
0.130997
0.791521
0.033966
0.186036
0.717896
0.737800
0.401380
0.298967
0.437803
0.783576
0.644548
0.180492
0.048909
0.058355
0.890834
0.822247
0.957458
0.464835
0.260732
0.765984
0.095627
0.696827
0.967914
0.648809
0.475668
0.724241
0.329649
0.651475
0.783935
0.061218
0.193391
0.731417
0.001546
0.057500
0.549647
0.923320
0.480090
0.780765
0.562150
0.415062
0.993346
0.619210
0.573613
0.788595
0.089867
0.126398
0.393461
0.031120
0.562209
0.860722
0.029709
0.020258
0.032154
0.959066
0.539097
0.051973
0.597740
0.309279
0.064356
0.165475
0.936288
0.936569
0.940147
0.809314
0.385906
0.737780
0.245182
0.617622
0.804020
0.075155
0.014515
0.142879
0.705339
0.163538
0.318771
0.065821
0.501200
0.488487
0.554761
0.026746
0.731280
0.591759
0.402278
0.027762
0.646197
0.483466
0.636325
0.958660
0.284055
0.688907
0.863734
0.177655
0.372774
0.309040
0.890085
0.892424
0.559866
0.890482
0.746828
0.664398
0.082499
0.519862
0.822739
0.930987
0.375726
0.761936
0.609439
0.508076
0.297762
0.831960
0.277791
0.810657
0.605123
0.454552
0.239150
0.983239
0.912615
0.724977
0.280365
0.738776
0.299132
0.011427
0.816369
0.101436
0.486195
0.969667
0.641677
0.891698
0.558882
0.872550
0.105379
0.161112
0.530537
0.366833
0.097867
0.121119
0.827264
0.533857
0.874313
0.405111
0.982588
0.191437
0.950727
0.036380
0.284657
0.344919
0.727332
0.086354
0.403660
0.156581
0.113123
0.909607
0.032325
0.993558
0.780771
0.189485
0.337643
0.368653
0.975085
0.607430
0.637252
0.102953
0.431206
0.505485
0.581307
0.674293
0.916469
0.334928
0.000439
0.364342
0.311674
0.824597
0.364305
0.624897
0.094010
0.244369
0.295886
0.686752
0.237741
0.418019
0.429948
0.808117
0.641755
0.874406
0.780955
0.843963
0.280321
0.644448
0.973191
0.656018
0.413099
0.666328
0.098870
0.488157
0.813085
0.310812
0.603307
0.935658
0.681021
0.730611
0.271459
0.618244
0.546704
0.420874
0.847286
0.837329
0.571176
0.162310
0.969490
0.814732
0.620176
0.285194
0.565309
0.827454
0.381201
0.354889
0.120400
0.559674
0.784813
0.784853
0.204673
0.026896
0.942334
0.191417
0.599667
0.623399
0.171693
0.664806
0.550268
0.182918
0.546951
0.515124
0.310977
0.626322
0.755771
0.814052
0.577685
0.921988
0.809203
0.880358
0.628649
0.826777
0.466531
0.522288
0.650234
0.642578
0.692523
0.925580
0.417366
0.744375
0.212205
0.100794
0.682757
0.349675
0.448193
0.763235
0.446690
0.626758
0.479309
0.966294
0.095116
0.247121
0.423500
0.694378
0.684999
0.436070
0.985427
0.124690
0.830675
0.523437
0.290435
0.477991
0.607785
0.583163
0.887029
0.594623
0.282790
0.981283
0.345953
0.132277
0.052429
0.419485
0.495071
0.716816
0.019688
0.046086
0.556042
0.175318
0.858782
0.587863
0.229087
0.248072
0.151296
0.930247
0.583634
0.200742
0.602250
0.243866
0.937832
0.964248
0.541182
0.566934
0.528167
0.455638
0.337338
0.915541
0.014845
0.812097
0.387962
0.814644
0.561018
0.446411
0.196474
0.529297
0.027990
0.143012
0.486798
0.501173
0.103471
0.492401
0.165220
0.262854
0.306416
0.891005
0.148543
0.488292
0.560061
0.139010
0.741020
0.592255
0.877501
0.094123
0.853003
0.953739
0.468713
0.038470
0.055537
0.785538
0.032234
0.653423
0.006316
0.742476
0.262429
0.538420
0.948191
0.422741
0.947780
0.118863
0.251957
0.491484
0.657505
0.467903
0.379952
0.670963
0.635206
0.218026
0.246661
0.019329
0.353579
0.738275
0.115951
0.245934
0.362565
0.339475
0.195376
0.287473
0.610410
0.711066
0.295927
0.602231
0.085744
0.783727
0.211958
0.558423
0.581048
0.840549
0.919790
0.908709
0.710892
0.142209
0.259798
0.358526
0.982587
0.729312
0.355643
0.972439
0.066963
0.766495
0.455677
0.335058
0.105249
0.347533
0.556705
0.642309
0.449140
0.237073
0.923077
0.855320
0.121969
0.686332
0.543143
0.281068
0.167924
0.207383
0.808131
0.665351
0.441563
0.628264
0.834018
0.730585
0.119630
0.214248
0.118301
0.998303
0.345904
0.907547
0.575551
0.498900
0.031708
0.056363
0.971347
0.048892
0.904331
0.728991
0.860126
0.929356
0.775998
0.082585
0.848793
0.491587
0.465580
0.839088
0.489764
0.559898
0.988726
0.166667
0.478742
0.314910
0.556671
0.903586
0.789367
0.084805
0.525357
0.010694
0.801187
0.924251
0.115619
0.222583
0.564765
0.777636
0.083258
0.432392
0.325102
0.051917
0.771337
0.244661
0.410936
0.067163
0.058883
0.206597
0.926144
0.910277
0.931427
0.318363
0.952857
0.262882
0.031549
0.853218
0.864200
0.740018
0.421445
0.778487
0.607826
0.383871
0.733917
0.054857
0.123307
0.403572
0.667892
0.978576
0.770972
0.148966
0.093849
0.434403
0.745678
0.680830
0.365104
0.548875
0.857006
0.027067
0.505060
0.684357
0.653154
0.978205
0.554004
0.933031
0.835429
0.261711
0.707497
0.453605
0.771771
0.133137
0.409224
0.719492
0.595178
0.835815
0.043639
0.688801
0.328865
0.211282
0.864362
0.353246
0.633998
0.189601
0.860122
0.732243
0.930049
0.330278
0.211297
0.553998
0.410360
0.145061
0.594688
0.753200
0.426088
0.311669
0.513507
0.599102
0.540097
0.907585
0.361541
0.925146
0.344398
0.091586
0.931670
0.868378
0.584575
0.161473
0.078835
0.599470
0.453735
0.649924
0.752617
0.717426
0.724703
0.007661
0.466130
0.082873
0.823317
0.803449
0.681720
0.247576
0.370960
0.685922
0.968702
0.972480
0.145600
0.215099
0.576238
0.529124
0.122174
0.873409
0.096452
0.688353
0.461451
0.220745
0.596305
0.654279
0.814172
0.485580
0.691178
0.284079
0.755656
0.640800
0.235559
0.526888
0.881339
0.464136
0.515479
0.701673
0.766042
0.748992
0.637216
0.680738
0.264244
0.513140
0.612362
0.219023
0.181888
0.942823
0.979810
0.007846
0.051845
0.153540
0.658704
0.163448
0.018833
0.150050
0.482700
0.766370
0.380305
0.718186
0.185659
0.397264
0.637206
0.896022
0.755381
0.171956
0.208321
0.472355
0.043897
0.557638
0.543795
0.281032
0.896003
0.226191
0.288041
0.203736
0.247533
0.891655
0.103003
0.533880
0.513406
0.023316
0.865172
0.909065
0.820183
0.889483
0.902325
0.808454
0.172596
0.595968
0.959205
0.763288
0.811832
0.094167
0.655713
0.570013
0.456017
0.335148
0.709506
0.197503
0.469100
0.036439
0.867685
0.866681
0.089328
0.758056
0.919974
0.540013
0.737209
0.245158
0.480957
0.119510
0.680165
0.473584
0.746352
0.812241
0.164265
0.645677
0.408948
0.646678
0.996313
0.266382
0.786109
0.102574
0.081793
0.042725
0.901320
0.903022
0.370827
0.116377
0.148513
0.641947
0.988185
0.534740
0.290124
0.488126
0.688406
0.635819
0.041061
0.488130
0.903068
0.805637
0.379594
0.060992
0.070235
0.245431
0.885446
0.459276
0.405332
0.170523
0.221524
0.351225
0.610599
0.098554
0.494537
0.714730
0.292618
0.753696
0.677983
0.279188
0.041242
0.177235
0.445418
0.486639
0.883625
0.439943
0.906762
0.611522
0.702600
0.192586
0.701352
0.902904
0.831730
0.171905
0.611947
0.043654
0.358982
0.896191
0.371733
0.387501
0.867840
0.474686
0.285156
0.948178
0.265145
0.383775
0.880716
0.178402
0.876509
0.651860
0.479228
0.395485
0.812722
0.714489
0.974786
0.843317
0.873842
0.649662
0.901662
0.018448
0.909130
0.557107
0.973801
0.833762
0.667875
0.445815
0.125707
0.032208
0.381092
0.854889
0.432165
0.842306
0.102883
0.488534
0.317390
0.106481
0.252802
0.537869
0.821866
0.402344
0.580133
0.548850
0.950169
0.729780
0.283909
0.878742
0.896967
0.542452
0.706111
0.201831
0.796350
0.573107
0.547780
0.693936
0.381239
0.896723
0.259790
0.462630
0.097504
0.392813
0.434247
0.338579
0.519444
0.936378
0.550385
0.080931
0.668805
0.794754
0.145421
0.657209
0.816712
0.183759
0.417871
0.377352
0.561842
0.531488
0.468831
0.398729
0.367924
0.105588
0.435630
0.272374
0.600957
0.175964
0.424843
0.474066
0.880879
0.860876
0.388000
0.075704
0.065552
0.394633
0.602822
0.926586
0.807954
0.539659
0.688340
0.677482
0.798851
0.787824
0.616043
0.187996
0.094416
0.093962
0.569587
0.165626
0.534022
0.328510
0.410343
0.932529
0.362764
0.567724
0.045575
0.035658
0.320007
0.510330
0.979675
0.550351
0.566846
0.636248
0.082698
0.037850
0.000394
0.604477
0.791728
0.061347
0.273769
0.112541
0.736046
0.388991
0.697102
0.048412
0.235050
0.178021
0.346823
0.033713
0.540851
0.009417
0.843037
0.493029
0.252058
0.233311
0.493194
0.110216
0.308569
0.877967
0.889423
0.185219
0.119037
0.066453
0.323729
0.736569
0.131080
0.276096
0.911724
0.174000
0.769194
0.016669
0.907405
0.544761
0.904297
0.050780
0.149629
0.979373
0.636944
0.742445
0.699408
0.468510
0.031924
0.504938
0.020986
0.024615
0.847139
0.182187
0.479723
0.315336
0.539866
0.289621
0.539520
0.798866
0.619036
0.565818
0.205962
0.947796
0.572376
0.128618
0.444346
0.429170
0.009642
0.543231
0.239307
0.684082
0.075309
0.903709
0.912572
0.125115
0.525312
0.973700
0.143969
0.368056
0.949429
0.228075
0.483487
0.660288
0.590635
0.968287
0.425279
0.540247
0.240592
0.728381
0.552722
0.470203
0.252317
0.225089
0.743435
0.918008
0.095599
0.423085
0.181413
0.532619
0.713325
0.283655
0.533130
0.667046
0.721132
0.019322
0.715479
0.456797
0.301829
0.829011
0.041646
0.005163
0.731412
0.121360
0.279593
0.905235
0.109898
0.697796
0.209563
0.330866
0.657715
0.427852
0.716525
0.588886
0.998937
0.376172
0.518517
0.027857
0.224398
0.532648
0.259146
0.898033
0.950872
0.935855
========= ERROR SUMMARY: 0 errors
$
相关文章:
- CUDA内核和数学函数的显式命名空间
- 将 2D 推力::d evice_vector 复矩阵传递给 CUDA 内核函数
- 如何将矢量的数据传递给 CUDA 内核?
- 无法在 cuda 内核中使用我的模板类
- CUDA内核printf()在终端中不产生输出,在探查器中工作
- 编译为 cuda 内核调用提供了"expression must have integral or unscoped enum type"
- 使用模板模式优化 CUDA 内核
- 带有大结构变量的 CUDA 内核函数给出了错误的结果
- CUDA 内核在第二次运行时运行得更快 - 为什么?
- 在CUDA内核中传递一个常数整数
- 如何将函数作为CUDA内核参数传递
- 验证调用 cuda 内核的次数
- cuda 内核调用/传递参数中的编译错误
- 预期;在 CUDA 内核上
- CUDA 内核"Only a single pack parameter is allowed"解决方法?
- 降低CUDA内核运行时:内核中矩阵的动态内存分配
- 如何在多个 GPU 上同时执行 cufftXt 和 CUDA 内核
- 执行 CUDA 内核时黑屏C++输出正确的结果
- CUDA 内核中的竞争条件
- 优化三角矩阵计算的 CUDA 内核执行