Merge pull request #351 from emankov/cuRAND
[HIPIFY][tests] add poisson-api-example test
This commit is contained in:
@@ -101,6 +101,8 @@
|
||||
| struct | `curandStateMRG32k3a_t` | `hiprandStateMRG32k3a_t` |
|
||||
| struct | `curandStatePhilox4_32_10_t` | `hiprandStatePhilox4_32_10_t` |
|
||||
| struct | `curandStateXORWOW_t` | `hiprandStateXORWOW_t` |
|
||||
| struct | `curandState_t` | `hiprandState_t` |
|
||||
| struct | `curandState` | `hiprandState_t` |
|
||||
|
||||
## **2. Host API Functions**
|
||||
|
||||
|
||||
@@ -362,6 +362,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_TYPE_NAME_MAP{
|
||||
{"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", CONV_TYPE, API_RAND}},
|
||||
{"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", CONV_TYPE, API_RAND}},
|
||||
{"curandStateXORWOW_t", {"hiprandStateXORWOW_t", CONV_TYPE, API_RAND}},
|
||||
{"curandState_t", {"hiprandState_t", CONV_TYPE, API_RAND}},
|
||||
{"curandState", {"hiprandState_t", CONV_TYPE, API_RAND}},
|
||||
};
|
||||
|
||||
/// Maps cuda header names to hip header names.
|
||||
@@ -2872,7 +2874,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
|
||||
{"curand_uniform2_double", {"hiprand_uniform2_double", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_uniform4", {"hiprand_uniform4", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_uniform4_double", {"hiprand_uniform4_double", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_discrete", {"hiprand_discrete", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_discrete", {"hiprand_discrete", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_discrete4", {"hiprand_discrete4", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_poisson", {"hiprand_poisson", CONV_DEVICE_FUNC, API_RAND}},
|
||||
{"curand_poisson4", {"hiprand_poisson4", CONV_DEVICE_FUNC, API_RAND}},
|
||||
|
||||
@@ -0,0 +1,417 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
|
||||
// Taken from: http://docs.nvidia.com/cuda/curand/device-api-overview.html#poisson-api-example
|
||||
/*
|
||||
* This program uses CURAND library for Poisson distribution
|
||||
* to simulate queues in store for 16 hours. It shows the
|
||||
* difference of using 3 different APIs:
|
||||
* - HOST API -arrival of customers is described by Poisson(4)
|
||||
* - SIMPLE DEVICE API -arrival of customers is described by
|
||||
* Poisson(4*(sin(x/100)+1)), where x is number of minutes
|
||||
* from store opening time.
|
||||
* - ROBUST DEVICE API -arrival of customers is described by:
|
||||
* - Poisson(2) for first 3 hours.
|
||||
* - Poisson(1) for second 3 hours.
|
||||
* - Poisson(3) after 6 hours.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda.h>
|
||||
// CHECK: #include <hiprand_kernel.h>
|
||||
#include <curand_kernel.h>
|
||||
// CHECK: #include <hiprand.h>
|
||||
#include <curand.h>
|
||||
|
||||
// CHECK: #define CUDA_CALL(x) do { if((x) != hipSuccess) {
|
||||
#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \
|
||||
printf("Error at %s:%d\n",__FILE__,__LINE__); \
|
||||
return EXIT_FAILURE;}} while(0)
|
||||
// CHECK: #define CURAND_CALL(x) do { if((x)!=HIPRAND_STATUS_SUCCESS) {
|
||||
#define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
|
||||
printf("Error at %s:%d\n",__FILE__,__LINE__);\
|
||||
return EXIT_FAILURE;}} while(0)
|
||||
|
||||
|
||||
#define HOURS 16
|
||||
#define OPENING_HOUR 7
|
||||
#define CLOSING_HOUR (OPENING_HOUR + HOURS)
|
||||
|
||||
#define access_2D(type, ptr, row, column, pitch)\
|
||||
*((type*)((char*)ptr + (row) * pitch) + column)
|
||||
|
||||
enum API_TYPE {
|
||||
HOST_API = 0,
|
||||
SIMPLE_DEVICE_API = 1,
|
||||
ROBUST_DEVICE_API = 2,
|
||||
};
|
||||
|
||||
/* global variables */
|
||||
API_TYPE api;
|
||||
int report_break;
|
||||
int cashiers_load_h[HOURS];
|
||||
__constant__ int cashiers_load[HOURS];
|
||||
// CHECK: __global__ void setup_kernel(hiprandState_t *state)
|
||||
__global__ void setup_kernel(curandState *state)
|
||||
{
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
/* Each thread gets same seed, a different sequence
|
||||
number, no offset */
|
||||
// CHECK: hiprand_init(1234, id, 0, &state[id]);
|
||||
curand_init(1234, id, 0, &state[id]);
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
void update_queue(int id, int min, unsigned int new_customers,
|
||||
unsigned int &queue_length,
|
||||
unsigned int *queue_lengths, size_t pitch)
|
||||
{
|
||||
int balance;
|
||||
balance = new_customers - 2 * cashiers_load[(min-1)/60];
|
||||
if (balance + (int)queue_length <= 0){
|
||||
queue_length = 0;
|
||||
}else{
|
||||
queue_length += balance;
|
||||
}
|
||||
/* Store results */
|
||||
access_2D(unsigned int, queue_lengths, min-1, id, pitch)
|
||||
= queue_length;
|
||||
}
|
||||
|
||||
// CHECK: __global__ void simple_device_API_kernel(hiprandState_t *state,
|
||||
__global__ void simple_device_API_kernel(curandState *state,
|
||||
unsigned int *queue_lengths, size_t pitch)
|
||||
{
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int new_customers;
|
||||
unsigned int queue_length = 0;
|
||||
/* Copy state to local memory for efficiency */
|
||||
// CHECK: hiprandState_t localState = state[id];
|
||||
curandState localState = state[id];
|
||||
/* Simulate queue in time */
|
||||
for(int min = 1; min <= 60 * HOURS; min++) {
|
||||
/* Draw number of new customers depending on API */
|
||||
// CHECK: new_customers = hiprand_poisson(&localState,
|
||||
new_customers = curand_poisson(&localState,
|
||||
4*(sin((float)min/100.0)+1));
|
||||
/* Update queue */
|
||||
update_queue(id, min, new_customers, queue_length,
|
||||
queue_lengths, pitch);
|
||||
}
|
||||
/* Copy state back to global memory */
|
||||
state[id] = localState;
|
||||
}
|
||||
|
||||
|
||||
__global__ void host_API_kernel(unsigned int *poisson_numbers,
|
||||
unsigned int *queue_lengths, size_t pitch)
|
||||
{
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int new_customers;
|
||||
unsigned int queue_length = 0;
|
||||
/* Simulate queue in time */
|
||||
for(int min = 1; min <= 60 * HOURS; min++) {
|
||||
/* Get random number from global memory */
|
||||
new_customers = poisson_numbers
|
||||
[blockDim.x * gridDim.x * (min -1) + id];
|
||||
/* Update queue */
|
||||
update_queue(id, min, new_customers, queue_length,
|
||||
queue_lengths, pitch);
|
||||
}
|
||||
}
|
||||
// CHECK: __global__ void robust_device_API_kernel(hiprandState_t *state,
|
||||
// CHECK: hiprandDiscreteDistribution_t poisson_1,
|
||||
// CHECK: hiprandDiscreteDistribution_t poisson_2,
|
||||
// CHECK: hiprandDiscreteDistribution_t poisson_3,
|
||||
__global__ void robust_device_API_kernel(curandState *state,
|
||||
curandDiscreteDistribution_t poisson_1,
|
||||
curandDiscreteDistribution_t poisson_2,
|
||||
curandDiscreteDistribution_t poisson_3,
|
||||
unsigned int *queue_lengths, size_t pitch)
|
||||
{
|
||||
int id = threadIdx.x + blockIdx.x * 64;
|
||||
unsigned int new_customers;
|
||||
unsigned int queue_length = 0;
|
||||
/* Copy state to local memory for efficiency */
|
||||
// CHECK: hiprandState_t localState = state[id];
|
||||
curandState localState = state[id];
|
||||
/* Simulate queue in time */
|
||||
/* first 3 hours */
|
||||
for(int min = 1; min <= 60 * 3; min++) {
|
||||
/* draw number of new customers depending on API */
|
||||
new_customers =
|
||||
// CHECK: hiprand_discrete(&localState, poisson_2);
|
||||
curand_discrete(&localState, poisson_2);
|
||||
/* Update queue */
|
||||
update_queue(id, min, new_customers, queue_length,
|
||||
queue_lengths, pitch);
|
||||
}
|
||||
/* second 3 hours */
|
||||
for(int min = 60 * 3 + 1; min <= 60 * 6; min++) {
|
||||
/* draw number of new customers depending on API */
|
||||
new_customers =
|
||||
// CHECK: hiprand_discrete(&localState, poisson_1);
|
||||
curand_discrete(&localState, poisson_1);
|
||||
/* Update queue */
|
||||
update_queue(id, min, new_customers, queue_length,
|
||||
queue_lengths, pitch);
|
||||
}
|
||||
/* after 6 hours */
|
||||
for(int min = 60 * 6 + 1; min <= 60 * HOURS; min++) {
|
||||
/* draw number of new customers depending on API */
|
||||
new_customers =
|
||||
// CHECK: hiprand_discrete(&localState, poisson_3);
|
||||
curand_discrete(&localState, poisson_3);
|
||||
/* Update queue */
|
||||
update_queue(id, min, new_customers, queue_length,
|
||||
queue_lengths, pitch);
|
||||
}
|
||||
/* Copy state back to global memory */
|
||||
state[id] = localState;
|
||||
}
|
||||
|
||||
/* Set time intervals between reports */
|
||||
void report_settings()
|
||||
{
|
||||
do{
|
||||
printf("Set time intervals between queue reports");
|
||||
printf("(in minutes > 0)\n");
|
||||
if (scanf("%d", &report_break) == 0) continue;
|
||||
}while(report_break <= 0);
|
||||
}
|
||||
|
||||
|
||||
/* Set number of cashiers each hour */
|
||||
void add_cachiers(int *cashiers_load)
|
||||
{
|
||||
int i, min, max, begin, end;
|
||||
printf("Cashier serves 2 customers per minute...\n");
|
||||
for (i = 0; i < HOURS; i++){
|
||||
cashiers_load_h[i] = 0;
|
||||
}
|
||||
while (true){
|
||||
printf("Adding cashier...\n");
|
||||
min = OPENING_HOUR;
|
||||
max = CLOSING_HOUR-1;
|
||||
do{
|
||||
printf("Set hour that cahier comes (%d-%d)",
|
||||
min, max);
|
||||
printf(" [type 0 to finish adding cashiers]\n");
|
||||
if (scanf("%d", &begin) == 0) continue;
|
||||
}while (begin > max || (begin < min && begin != 0));
|
||||
if (begin == 0) break;
|
||||
min = begin+1;
|
||||
max = CLOSING_HOUR;
|
||||
do{
|
||||
printf("Set hour that cahier leaves (%d-%d)",
|
||||
min, max);
|
||||
printf(" [type 0 to finish adding cashiers]\n");
|
||||
if (scanf("%d", &end) == 0) continue;
|
||||
}while (end > max || (end < min && end != 0));
|
||||
if (end == 0) break;
|
||||
for (i = begin - OPENING_HOUR;
|
||||
i < end - OPENING_HOUR; i++){
|
||||
cashiers_load_h[i]++;
|
||||
}
|
||||
}
|
||||
for (i = OPENING_HOUR; i < CLOSING_HOUR; i++){
|
||||
printf("\n%2d:00 - %2d:00 %d cashier",
|
||||
i, i+1, cashiers_load_h[i-OPENING_HOUR]);
|
||||
if (cashiers_load[i-OPENING_HOUR] != 1) printf("s");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Set API type */
|
||||
API_TYPE set_API_type()
|
||||
{
|
||||
printf("Choose API type:\n");
|
||||
int choose;
|
||||
do{
|
||||
printf("type 1 for HOST API\n");
|
||||
printf("type 2 for SIMPLE DEVICE API\n");
|
||||
printf("type 3 for ROBUST DEVICE API\n");
|
||||
if (scanf("%d", &choose) == 0) continue;
|
||||
}while( choose < 1 || choose > 3);
|
||||
switch(choose){
|
||||
case 1: return HOST_API;
|
||||
case 2: return SIMPLE_DEVICE_API;
|
||||
case 3: return ROBUST_DEVICE_API;
|
||||
default:
|
||||
fprintf(stderr, "wrong API\n");
|
||||
return HOST_API;
|
||||
}
|
||||
}
|
||||
|
||||
void settings()
|
||||
{
|
||||
add_cachiers(cashiers_load);
|
||||
// CHECK: hipMemcpyToSymbol("cashiers_load", cashiers_load_h,
|
||||
// CHECK: HOURS * sizeof(int), 0, hipMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol("cashiers_load", cashiers_load_h,
|
||||
HOURS * sizeof(int), 0, cudaMemcpyHostToDevice);
|
||||
report_settings();
|
||||
api = set_API_type();
|
||||
}
|
||||
|
||||
void print_statistics(unsigned int *hostResults, size_t pitch)
|
||||
{
|
||||
int min, i, hour, minute;
|
||||
unsigned int sum;
|
||||
for(min = report_break; min <= 60 * HOURS;
|
||||
min += report_break) {
|
||||
sum = 0;
|
||||
for(i = 0; i < 64 * 64; i++) {
|
||||
sum += access_2D(unsigned int, hostResults,
|
||||
min-1, i, pitch);
|
||||
}
|
||||
hour = OPENING_HOUR + min/60;
|
||||
minute = min%60;
|
||||
printf("%2d:%02d # of waiting customers = %10.4g |",
|
||||
hour, minute, (float)sum/(64.0 * 64.0));
|
||||
printf(" # of cashiers = %d | ",
|
||||
cashiers_load_h[(min-1)/60]);
|
||||
printf("# of new customers/min ~= ");
|
||||
switch (api){
|
||||
case HOST_API:
|
||||
printf("%2.2f\n", 4.0);
|
||||
break;
|
||||
case SIMPLE_DEVICE_API:
|
||||
printf("%2.2f\n",
|
||||
4*(sin((float)min/100.0)+1));
|
||||
break;
|
||||
case ROBUST_DEVICE_API:
|
||||
if (min <= 3 * 60){
|
||||
printf("%2.2f\n", 2.0);
|
||||
}else{
|
||||
if (min <= 6 * 60){
|
||||
printf("%2.2f\n", 1.0);
|
||||
}else{
|
||||
printf("%2.2f\n", 3.0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Wrong API\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int n;
|
||||
size_t pitch;
|
||||
// CHECK: hiprandState_t *devStates;
|
||||
curandState *devStates;
|
||||
unsigned int *devResults, *hostResults;
|
||||
unsigned int *poisson_numbers_d;
|
||||
// CHECK: hiprandDiscreteDistribution_t poisson_1, poisson_2;
|
||||
// CHECK: hiprandDiscreteDistribution_t poisson_3;
|
||||
// CHECK: hiprandGenerator_t gen;
|
||||
curandDiscreteDistribution_t poisson_1, poisson_2;
|
||||
curandDiscreteDistribution_t poisson_3;
|
||||
curandGenerator_t gen;
|
||||
|
||||
/* Setting cashiers, report and API */
|
||||
settings();
|
||||
|
||||
/* Allocate space for results on device */
|
||||
// CHECK: CUDA_CALL(hipMallocPitch((void **)&devResults, &pitch,
|
||||
CUDA_CALL(cudaMallocPitch((void **)&devResults, &pitch,
|
||||
64 * 64 * sizeof(unsigned int), 60 * HOURS));
|
||||
|
||||
/* Allocate space for results on host */
|
||||
hostResults = (unsigned int *)calloc(pitch * 60 * HOURS,
|
||||
sizeof(unsigned int));
|
||||
|
||||
/* Allocate space for prng states on device */
|
||||
// CHECK: CUDA_CALL(hipMalloc((void **)&devStates, 64 * 64 *
|
||||
// CHECK: sizeof(hiprandState_t)));
|
||||
CUDA_CALL(cudaMalloc((void **)&devStates, 64 * 64 *
|
||||
sizeof(curandState)));
|
||||
|
||||
/* Setup prng states */
|
||||
if (api != HOST_API){
|
||||
// CHECK: hipLaunchKernelGGL(setup_kernel, dim3(64), dim3(64), 0, 0, devStates);
|
||||
setup_kernel<<<64, 64>>>(devStates);
|
||||
}
|
||||
/* Simulate queue */
|
||||
switch (api){
|
||||
case HOST_API:
|
||||
/* Create pseudo-random number generator */
|
||||
// CHECK: CURAND_CALL(hiprandCreateGenerator(&gen,
|
||||
// CHECK: HIPRAND_RNG_PSEUDO_DEFAULT));
|
||||
CURAND_CALL(curandCreateGenerator(&gen,
|
||||
CURAND_RNG_PSEUDO_DEFAULT));
|
||||
/* Set seed */
|
||||
// CHECK: CURAND_CALL(hiprandSetPseudoRandomGeneratorSeed(
|
||||
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(
|
||||
gen, 1234ULL));
|
||||
/* compute n */
|
||||
n = 64 * 64 * HOURS * 60;
|
||||
/* Allocate n unsigned ints on device */
|
||||
// CHECK: CUDA_CALL(hipMalloc((void **)&poisson_numbers_d,
|
||||
CUDA_CALL(cudaMalloc((void **)&poisson_numbers_d,
|
||||
n * sizeof(unsigned int)));
|
||||
/* Generate n unsigned ints on device */
|
||||
// CHECK: CURAND_CALL(hiprandGeneratePoisson(gen,
|
||||
CURAND_CALL(curandGeneratePoisson(gen,
|
||||
poisson_numbers_d, n, 4.0));
|
||||
// CHECK: hipLaunchKernelGGL(host_API_kernel, dim3(64), dim3(64), 0, 0, poisson_numbers_d,
|
||||
host_API_kernel<<<64, 64>>>(poisson_numbers_d,
|
||||
devResults, pitch);
|
||||
/* Cleanup */
|
||||
// CHECK: CURAND_CALL(hiprandDestroyGenerator(gen));
|
||||
CURAND_CALL(curandDestroyGenerator(gen));
|
||||
break;
|
||||
case SIMPLE_DEVICE_API:
|
||||
// CHECK: hipLaunchKernelGGL(simple_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates,
|
||||
simple_device_API_kernel<<<64, 64>>>(devStates,
|
||||
devResults, pitch);
|
||||
break;
|
||||
case ROBUST_DEVICE_API:
|
||||
/* Create histograms for Poisson(1) */
|
||||
// CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(1.0,
|
||||
CURAND_CALL(curandCreatePoissonDistribution(1.0,
|
||||
&poisson_1));
|
||||
/* Create histograms for Poisson(2) */
|
||||
// CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(2.0,
|
||||
CURAND_CALL(curandCreatePoissonDistribution(2.0,
|
||||
&poisson_2));
|
||||
/* Create histograms for Poisson(3) */
|
||||
// CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(3.0,
|
||||
CURAND_CALL(curandCreatePoissonDistribution(3.0,
|
||||
&poisson_3));
|
||||
// CHECK: hipLaunchKernelGGL(robust_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates,
|
||||
robust_device_API_kernel<<<64, 64>>>(devStates,
|
||||
poisson_1, poisson_2, poisson_3,
|
||||
devResults, pitch);
|
||||
/* Cleanup */
|
||||
// CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_1));
|
||||
// CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_2));
|
||||
// CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_3));
|
||||
CURAND_CALL(curandDestroyDistribution(poisson_1));
|
||||
CURAND_CALL(curandDestroyDistribution(poisson_2));
|
||||
CURAND_CALL(curandDestroyDistribution(poisson_3));
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Wrong API\n");
|
||||
}
|
||||
/* Copy device memory to host */
|
||||
// CHECK: CUDA_CALL(hipMemcpy2D(hostResults, pitch, devResults,
|
||||
// CHECK: 60 * HOURS, hipMemcpyDeviceToHost));
|
||||
CUDA_CALL(cudaMemcpy2D(hostResults, pitch, devResults,
|
||||
pitch, 64 * 64 * sizeof(unsigned int),
|
||||
60 * HOURS, cudaMemcpyDeviceToHost));
|
||||
/* Show result */
|
||||
print_statistics(hostResults, pitch);
|
||||
/* Cleanup */
|
||||
// CHECK: CUDA_CALL(hipFree(devStates));
|
||||
// CHECK: CUDA_CALL(hipFree(devResults));
|
||||
CUDA_CALL(cudaFree(devStates));
|
||||
CUDA_CALL(cudaFree(devResults));
|
||||
free(hostResults);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Fai riferimento in un nuovo problema
Block a user