CUDA Single Instruction Multiple Data (SIMD). , (SM). SM 32: warp
.
, GPU , SM. SM, , . , , , SM. , SIMD . , A B SM , A .
CUDA , , . , , , . , , SM.
,
"GPGPU" CUDA/OpenCL
__global__ void persistent(int* ahead, int* bhead, int count, float* a, float* b)
{
int local_input_data_index, local_output_data_index;
while ((local_input_data_index = read_and_increment(ahead)) < count)
{
load_locally(a[local_input_data_index]);
do_work_with_locally_loaded_data();
int out_index = read_and_increment(bhead);
write_result(b[out_index]);
}
}
persistent<<numBlocks,blockSize>>(ahead_addr, bhead_addr, total_count, A, B);