CUDA Array / Surface Memory

After calling the function test, I print the array dtr1. I expect to receive 100for all items, but I do not receive it. Why is this?

#include "ImageUtil2D.h"
#define W 10
#define H 10
#define MAX 100000
#define No_THREADS 10
surface<void,2> surfD;

__global__ void test()
{
for(int i=0;i<W;i++)
    for(int j=0;j<H;j++)
    {
        float a=100;
        surf2Dwrite(a, surfD, i,j, cudaBoundaryModeTrap);
    }
}

int main()
{
int *image = new int[W*H];
float *dtr = new float[W*H];
ImageUtil2D::InitImg(image, dtr, W, H);
const size_t sizef = size_t(W*H)*sizeof(float);

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray* cuArrD;
cudaMallocArray(&cuArrD, &channelDesc, W*H, 0, cudaArraySurfaceLoadStore);
//cudaMemcpyToArray(cuArrD, 0, 0, dtr, sizef, cudaMemcpyHostToDevice);
cudaBindSurfaceToArray(surfD, cuArrD);

test<<<1, 1>>>();

float *dtr1=new float[W*H];
cudaMemcpyFromArray(&dtr1, cuArrD, 0, 0, sizef, cudaMemcpyDeviceToHost );
ImageUtil2D::Print(dtr1);
return 0;
}
+3
source share
2 answers

CUDA C Programming Guide C 3.2. Section: 3.2.4.2.2 Surface Linkage

Unlike texture memory, surface memory uses byte addressing. This means that the x-coordinate used to access the texture element through texture functions must be multiplied by the byte size of the element to access the same element through the surface function.

Try the following:

surf2Dwrite(a, surfD, i * 4, j, cudaBoundaryModeTrap);

I hope for this help.

: / , ;)

+4

, pQB

cudaMemcpyFromArray(&dtr1, cuArrD, 0, 0, sizef, cudaMemcpyDeviceToHost );

,

cudaMemcpyFromArray(dtr1, cuArrD, 0, 0, sizef, cudaMemcpyDeviceToHost );
+1