How to create random permutations with CUDA

What parallel algorithms can I use to generate random permutations from a given set? Suggestions or links to documents suitable for CUDA are especially useful.

The serial version of this will be a Fisher-Yates shuffle.

Example:

Let S = {1, 2, ..., 7} be the set of initial indices. The goal is to generate n random permutations in parallel. Each of n permutations contains each of the source indices exactly once, for example, {7, 6, ..., 1}.

+5
source share
4 answers

- . , 4 3 , 8 . 0 ↔ 1, 2 ↔ 3, 4 ↔ 5, 6 ↔ 7; 0 ↔ 2, 1 ↔ 3, 4 ↔ 5, 6 ↔ 7; 0 ↔ 4, 1 ↔ 5, 2 ↔ 6, 3 ↔ 7.

ParallelFisherYates

CUDA __device__ ( / ):

const int id  = threadIdx.x;
__shared__ int perm_shared[2 * BLOCK_SIZE];
perm_shared[2 * id]     = 2 * id;
perm_shared[2 * id + 1] = 2 * id + 1;
__syncthreads();

unsigned int shift = 1;
unsigned int pos = id * 2;  
while(shift <= BLOCK_SIZE)
{
    if (curand(&curand_state) & 1) swap(perm_shared, pos, pos + shift);
    shift = shift << 1;
    pos = (pos & ~shift) | ((pos & shift) >> 1);
    __syncthreads();
}

curand , swap(int *p, int i, int j) p[i] p[j].

, :

  • - 2 * BLOCK_SIZE, BLOCK_SIZE - 2.
  • 2 * BLOCK_SIZE __shared__ CUDA
  • BLOCK_SIZE CUDA ( 32 512).

, CUDA. , 7 ( ), , .

+11

s = s_L, :

http://thrust.github.com.

val s_L x n, s n .

val_keys, n , s_L val, ,

  val = {1,2,...,7,1,2,...,7,....,1,2,...7}
  val_keys = {0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,...., n,n,n}

. s_L x n

  U  = {0.24, 0.1, .... , 0.83} 

zip- val, val_keys U:

http://codeyarns.com/2011/04/04/thrust-zip_iterator/

val, val_keys , thrust:: stable_sort_by_key(), , val [i] val [j] [k ], val [i] val [j] , val [i] val [j]. , val_keys , , val .

+1

sort . :

const int N = 65535;
thrust:device_vector<uint16_t> d_cards(N);
thrust:device_vector<uint16_t> d_keys(N);
thrust::sequence(d_cards.begin(), d_cards.end());

, d_cards, :

thrust::tabulate(d_keys.begin(), d_keys.end(), PRNFunc(rand()*rand());
thrust::sort_by_key(d_keys.begin(), d_keys.end(), d_cards.begin());
// d_cards now freshly shuffled

, ( - ​​ ) ( ):

struct PRNFunc
{
  uint32_t seed;
  PRNFunc(uint32_t s) { seed = s; }
  __device__ __host__ uint32_t operator()(uint32_t kn) const
  {
    thrust::minstd_rand randEng(seed);
    randEng.discard(kn);
    return randEnd();
  }
};

, (, 30%), , , :: sort_by_key .

.

0

All Articles