在Windows上用Qt Creator编译Cuda代码
Compiling Cuda code in Qt Creator on Windows
我几天来一直在尝试在32位Windows 7系统上运行一个Qt项目文件,我希望/需要在其中包含Cuda代码。这些事情的结合要么是如此简单,以至于没有人愿意在网上举一个例子,要么是如此困难,以至于似乎没有人成功。不管怎样,我发现的唯一有用的论坛线程是Linux或Mac上的相同问题,或者Windows上的Visual Studio。然而,所有这些都会产生各种不同的错误,无论是由于链接或冲突库,还是由于文件名中的空格或Windows版本的Cuda SDK中不存在的文件夹。有没有人有一个清晰的.pro
文件可以提供?
我的目标是用Qt风格的普通C++代码编译一个简单的程序,使用Qt 4.8库,这些库引用了.cu文件中的几个Cuda模块。某种形式:
TestCUDA
TestCUDA.pro
main.cpp
test.cu
所以我终于组装了一个.pro
文件,该文件适用于我的系统,可能适用于所有Windows系统。以下是一个简单的测试程序,可能会达到目的。下面是一个小项目文件加上测试程序,它至少在我的系统上运行。
文件系统如下所示:
TestCUDA
TestCUDA.pro
main.cpp
vectorAddition.cu
项目文件显示:
TARGET = TestCUDA
# Define output directories
DESTDIR = release
OBJECTS_DIR = release/obj
CUDA_OBJECTS_DIR = release/cuda
# Source files
SOURCES += src/main.cpp
# This makes the .cu files appear in your project
OTHER_FILES += vectorAddition.cu
# CUDA settings <-- may change depending on your system
CUDA_SOURCES += src/cuda/vectorAddition.cu
CUDA_SDK = "C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.2/C" # Path to cuda SDK install
CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v4.2" # Path to cuda toolkit install
SYSTEM_NAME = Win32 # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 32 # '32' or '64', depending on your system
CUDA_ARCH = sm_11 # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS = --use_fast_math
# include paths
INCLUDEPATH += $$CUDA_DIR/include
$$CUDA_SDK/common/inc/
$$CUDA_SDK/../shared/inc/
# library directories
QMAKE_LIBDIR += $$CUDA_DIR/lib/$$SYSTEM_NAME
$$CUDA_SDK/common/lib/$$SYSTEM_NAME
$$CUDA_SDK/../shared/lib/$$SYSTEM_NAME
# Add the necessary libraries
LIBS += -lcuda -lcudart
# The following library conflicts with something in Cuda
QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
QMAKE_LFLAGS_DEBUG = /NODEFAULTLIB:msvcrtd.lib
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
注意QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
:我花了很长时间才弄清楚,但这个库似乎与Cuda中的其他内容相冲突,这会产生奇怪的链接警告和错误。如果有人对此有解释,并且可能有一个更漂亮的方法来解决这个问题,我很想听听。
此外,由于Windows文件路径通常包含空格(默认情况下NVIDIA的SDK也包含空格),因此有必要人为地在包含路径周围添加引号。再说一遍,如果有人知道解决这个问题的更优雅的方法,我很想知道。
main.cpp
文件如下所示:
#include <cuda.h>
#include <builtin_types.h>
#include <drvapi_error_string.h>
#include <QtCore/QCoreApplication>
#include <QDebug>
// Forward declare the function in the .cu file
void vectorAddition(const float* a, const float* b, float* c, int n);
void printArray(const float* a, const unsigned int n) {
QString s = "(";
unsigned int ii;
for (ii = 0; ii < n - 1; ++ii)
s.append(QString::number(a[ii])).append(", ");
s.append(QString::number(a[ii])).append(")");
qDebug() << s;
}
int main(int argc, char* argv [])
{
QCoreApplication(argc, argv);
int deviceCount = 0;
int cudaDevice = 0;
char cudaDeviceName [100];
unsigned int N = 50;
float *a, *b, *c;
cuInit(0);
cuDeviceGetCount(&deviceCount);
cuDeviceGet(&cudaDevice, 0);
cuDeviceGetName(cudaDeviceName, 100, cudaDevice);
qDebug() << "Number of devices: " << deviceCount;
qDebug() << "Device name:" << cudaDeviceName;
a = new float [N]; b = new float [N]; c = new float [N];
for (unsigned int ii = 0; ii < N; ++ii) {
a[ii] = qrand();
b[ii] = qrand();
}
// This is the function call in which the kernel is called
vectorAddition(a, b, c, N);
qDebug() << "input a:"; printArray(a, N);
qDebug() << "input b:"; printArray(b, N);
qDebug() << "output c:"; printArray(c, N);
if (a) delete a;
if (b) delete b;
if (c) delete c;
}
Cuda文件vectorAddition.cu
描述了一个简单的矢量加法,如下所示:
#include <cuda.h>
#include <builtin_types.h>
extern "C"
__global__ void vectorAdditionCUDA(const float* a, const float* b, float* c, int n)
{
int ii = blockDim.x * blockIdx.x + threadIdx.x;
if (ii < n)
c[ii] = a[ii] + b[ii];
}
void vectorAddition(const float* a, const float* b, float* c, int n) {
float *a_cuda, *b_cuda, *c_cuda;
unsigned int nBytes = sizeof(float) * n;
int threadsPerBlock = 256;
int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock;
// allocate and copy memory into the device
cudaMalloc((void **)& a_cuda, nBytes);
cudaMalloc((void **)& b_cuda, nBytes);
cudaMalloc((void **)& c_cuda, nBytes);
cudaMemcpy(a_cuda, a, nBytes, cudaMemcpyHostToDevice);
cudaMemcpy(b_cuda, b, nBytes, cudaMemcpyHostToDevice);
vectorAdditionCUDA<<<blocksPerGrid, threadsPerBlock>>>(a_cuda, b_cuda, c_cuda, n);
// load the answer back into the host
cudaMemcpy(c, c_cuda, nBytes, cudaMemcpyDeviceToHost);
cudaFree(a_cuda);
cudaFree(b_cuda);
cudaFree(c_cuda);
}
如果你能做到这一点,那么我认为更复杂的例子是不言而喻的。
编辑(24-1-2013):我添加了带有额外D_DEBUG
标志的QMAKE_LFLAGS_DEBUG = /NODEFAULTLIB:msvcrtd.lib
和CONFIG(debug)
,这样它也可以在调试模式下编译。
使用msvc 2010,我发现链接器不接受-l参数,但nvcc需要它。因此,我在.pro文件中做了一个简单的更改:
# Add the necessary libraries
CUDA_LIBS = cuda cudart
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
# LIBRARIES IN FORMAT NEEDED BY NVCC
NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
# LIBRARIES IN FORMAT NEEDED BY VISUAL C++ LINKER
LIBS += $$join(CUDA_LIBS,'.lib ', '', '.lib')
和nvcc命令(发布版本):
cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
插入了$$NVCC_LBS而不是$$LIBS。整个.pro文件,对我有效:
QT += core
QT -= gui
TARGET = TestCUDA
CONFIG += console
CONFIG -= app_bundle
TEMPLATE = app
# Define output directories
DESTDIR = release
OBJECTS_DIR = release/obj
CUDA_OBJECTS_DIR = release/cuda
# Source files
SOURCES += main.cpp
# This makes the .cu files appear in your project
OTHER_FILES += vectorAddition.cu
# CUDA settings <-- may change depending on your system
CUDA_SOURCES += vectorAddition.cu
#CUDA_SDK = "C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.2/C" # Path to cuda SDK install
CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v5.0" # Path to cuda toolkit install
SYSTEM_NAME = win32 # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 32 # '32' or '64', depending on your system
CUDA_ARCH = sm_11 # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS = --use_fast_math
# include paths
INCLUDEPATH += $$CUDA_DIR/include
#$$CUDA_SDK/common/inc/
#$$CUDA_SDK/../shared/inc/
# library directories
QMAKE_LIBDIR += $$CUDA_DIR/lib/$$SYSTEM_NAME
#$$CUDA_SDK/common/lib/$$SYSTEM_NAME
#$$CUDA_SDK/../shared/lib/$$SYSTEM_NAME
# The following library conflicts with something in Cuda
QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
QMAKE_LFLAGS_DEBUG = /NODEFAULTLIB:msvcrtd.lib
# Add the necessary libraries
CUDA_LIBS = cuda cudart
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
LIBS += $$join(CUDA_LIBS,'.lib ', '', '.lib')
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
我还添加了一些必要的声明,即QT+=核心,以便应用程序工作,还删除了SDK部分,我觉得这在这种情况下没有用。
我尝试了这种组合。由于中存在大量依赖项,无法使其工作我的项目。我的最终解决方案是在Windows上将应用程序分解为两个独立的应用程序1)
- CUDA应用程序在VC中开发并在Windows中作为服务/DLL运行
- 在QT中开发的GUI界面,并使用DLL执行CUDA相关任务
希望它能节省其他的一些时间
- 编译时未启用intel oneApi CUDA支持
- 为什么即使使用-cudart-static进行编译,库用户仍然需要链接到cuda运行时
- 编译 CUDA 与数学函数的叮当
- 通过Python Distutils(用于Python C扩展)使用可重定位的设备代码编译CUDA代码
- 如何在Visual Studio中为CUDA项目启用单独的编译
- 使用 msvc 15 在 Qt5.13 中编译 CUDA 代码
- 如何使用CMake将C++编译为CUDA
- 在窗户中编译CUDA的意义
- 编译CUDA的自定义TensorFlow OP
- 在Windows上使用clang编译CUDA时的重新定义
- 错误:在编译 cuda 的开源库 cusp 程序时需要标识符
- 在Windows上用Qt Creator编译Cuda代码
- 使用CMAKE编译CUDA C++-指定了多个编译阶段
- 为什么在 Ubuntu 下编译 CUDA 示例时找不到 libcudart.so.4
- 无法在OS X下编译CUDA驱动程序API示例
- 使用其他c++编译器编译CUDA代码
- 无法编译CUDA C源.提供了简单版本
- 使用 VS5 编译 CUDA 2012 时出现错误"nvcc cannot find a supported cl version"
- 编译CUDA文件、C文件和CUDA头文件
- md5sum值在每次编译cuda源代码时都会改变