How to save kernel code inside a separate .cu file, except for the main .cpp?

How can I separate cuda core code and other cpp codes inside a project? I want to collect all the kernel definitions inside one file, like other cpp files, calling them if necessary. I tried to write all the kernels inside kernel.cu and call the kernels by including the kernel.cu file, but it gives the following compilation error.

/usr/bin/ld: error: ./vector_summation.o: multiple definition of 

'perform_summation_method1(int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
/usr/bin/ld: error: ./vector_summation.o: multiple definition of '__device_stub__Z25perform_summation_method1Pii(int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
/usr/bin/ld: error: ./vector_summation.o: multiple definition of '__device_stub__Z25perform_summation_method2PiS_i(int*, int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
/usr/bin/ld: error: ./vector_summation.o: multiple definition of 'perform_summation_method2(int*, int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
/usr/bin/ld: error: ./vector_summation.o: multiple definition of '__device_stub__Z25perform_summation_method3PiS_i(int*, int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
/usr/bin/ld: error: ./vector_summation.o: multiple definition of 'perform_summation_method3(int*, int*, int)'
/usr/bin/ld: ./kernels.o: previous definition here
+5
source share
3 answers

You do this essentially the same as with regular cpp files / modules. In C ++, you usually don't include one .cpp file in another if you want to access functions from another file. You include headers that usually only contain function prototypes.

Here is one example:

test.h:

void my_cuda_func();

main.cpp:

#include <stdio.h>
#include "test.h"

int main(){
  my_cuda_func();
  return 0;
}

test.cu:

#include <stdio.h>
#include "test.h"


__global__ void my_kernel(){
  printf("Hello!\n");
}

void my_cuda_func(){
  my_kernel<<<1,1>>>();
  cudaDeviceSynchronize();
}

:

g++ -c main.cpp
nvcc -arch=sm_20 -c test.cu
g++ -L/usr/local/cuda/lib64 -lcudart -o test main.o test.o
, . C ++, . -, nvcc g++ ( .cu ). , GPU (, ), .

, , , , , :

test.h:

__global__ void my_kernel();

main.cu:

#include <stdio.h>
#include "test.h"

int main(){
  my_kernel<<<1,1>>>();
  cudaDeviceSynchronize();
  return 0;
}

test.cu:

#include <stdio.h>
#include "test.h"


__global__ void my_kernel(){
  printf("Hello!\n");
}

:

nvcc -arch=sm_20 -c main.cu
nvcc -arch=sm_20 -c test.cu
nvcc -arch=sm_20 -o test main.o test.o
+13

cuda *.cuh . , , , , .. :

#ifndef __CUDAHEADER_CUH__
#define __CUDAHEADER_CUH__

/** Initialize cuda stuff */
void cudaInit(Data * host_data);

/** Cleanup, frees resources used by the device. */
void cudaFinalize();

#endif

, , cuda:

#include "cudaHeader.cuh"

//some global variables like:
Data * device_data;

//some kernels and device functions:
__global__ void someKernel(data * device_data) {
    ...
}

void cudaInit(Data * host_data) {
    some cudaMalloc()
    some cudaMemcpy()
    someKernel<<< gridRes, blockRes >>>(device_data);
}


void cudaFinalize() {
    cudaFree(device_data);
}

...

+1

First example: I think you should build (the last line) as follows:

g++ -L/usr/local/cuda/lib64 -o test main.o test.o -lcudart

i.e. put the library of the latter in the link. (I do not care)

-1
source

All Articles