I'm having trouble understanding how to send a 2D array to Cuda. I have a program that parses a large file with 30 data points on each line. I read about 10 rows at a time, and then create a matrix for each row and elements (so in my example of 10 rows with 30 data points it will be int list[10][30];My goal is to send this array to my core and each block processes the row (I got this to work perfectly in normal C, but Cuda was a bit more complicated).
Here is what I have been doing so far, but no luck (note: sizeofbucket = rows and sizeOfBucketsHoldings = elements in a row ... I know that I have to win the reward for the names of the odd variables):
int list[sizeOfBuckets][sizeOfBucketsHoldings];
#define sizeOfBuckets 10
#define sizeOfBucketsHoldings 30
int *dev_current_list[sizeOfBuckets][sizeOfBucketsHoldings];
size_t pitch;
cudaMallocPitch((int**)&dev_current_list, (size_t *)&pitch, sizeOfBucketsHoldings * sizeof(int), sizeOfBuckets);
cudaMemcpy2D( dev_current_list, pitch, list, sizeOfBuckets * sizeof(int), sizeOfBuckets * sizeof(int), sizeOfBucketsHoldings * sizeof(int),cudaMemcpyHostToDevice );
process_list<<<count,1>>> (sizeOfBuckets, sizeOfBucketsHoldings, dev_current_list, pitch);
cudaFree( dev_current_list );
__global__ void process_list(int sizeOfBuckets, int sizeOfBucketsHoldings, int *current_list, int pitch) {
int tid = blockIdx.x;
for (int r = 0; r < sizeOfBuckets; ++r) {
int* row = (int*)((char*)current_list + r * pitch);
for (int c = 0; c < sizeOfBucketsHoldings; ++c) {
int element = row[c];
}
}
The error I get is:
main.cu(266): error: argument of type "int *(*)[30]" is incompatible with parameter of type "int *"
1 error detected in the compilation of "/tmp/tmpxft_00003f32_00000000-4_main.cpp1.ii".
266 - process_list<<<count,1>>> (count, countListItem, dev_current_list, pitch); , , int *, ? C- int current_list[num_of_rows][num_items_in_row], , Cuda.
. , (sizeOfBuckets), (sizeOfBucketHoldings). , , cudamalloc cudaMemcpy, , MallocPitch 2dcopy ( cuda by example), , ( CUDA_C, . 22, ). ? , ?
:
, ( ).
:
__global__ void process_list(int sizeOfBuckets, int sizeOfBucketsHoldings, int *current_list, size_t pitch, int *total) {
int tid = blockIdx.x;
for (int c = 0; c < sizeOfBucketsHoldings; ++c) {
total[tid] = total + current_list[tid][c];
}
}
, :
int *dev_total;
cudaMalloc( (void**)&dev_total, sizeOfBuckets * sizeof(int) );