436 строки
11 KiB
C++
436 строки
11 KiB
C++
/*
|
|
* Copyright (c) 2017 Intel Corporation. All rights reserved.
|
|
* This software is available to you under the BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
*for back to back operation testing: independent buffers for each operation
|
|
*as well as alternating PE waiting
|
|
* */
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include <rocshmem/rocshmem.hpp>
|
|
|
|
using namespace rocshmem;
|
|
|
|
typedef enum { NUM_WRITE = 8, NUM_READ = 5, NUM_SYNC = 3 } max_ops;
|
|
|
|
long *target;
|
|
long *source;
|
|
long *sync_pes;
|
|
|
|
int verbose;
|
|
int debug;
|
|
|
|
static inline void wait_until(long *wait_var, int iterations, int pe) {
|
|
if (debug) printf("PE %d waiting...%ld\n", pe, *wait_var);
|
|
|
|
rocshmem_long_wait_until(wait_var, ROCSHMEM_CMP_EQ, iterations);
|
|
|
|
if (debug) printf("PE %d wait_until passed\n", pe);
|
|
}
|
|
|
|
static inline void pre_op_check(const char *op, int check_var, int iterations,
|
|
int pe) {
|
|
if (verbose) printf("SHMEM %s, performing %d iterations\n", op, iterations);
|
|
|
|
if (debug) printf("BEFORE operation PE %d target = %d\n", pe, check_var);
|
|
}
|
|
|
|
static inline void post_op_check(const char *op, int check_var, int iterations,
|
|
int pe) {
|
|
if (check_var != iterations) {
|
|
fprintf(stderr, "%s ERR: PE %d source = %d != %d\n", op, pe, check_var,
|
|
iterations);
|
|
rocshmem_global_exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
static inline void putfence(int me, int iterations, int T) {
|
|
int i;
|
|
|
|
if (me == 1) pre_op_check(__func__, target[T], iterations, 1);
|
|
|
|
if (me == 0) {
|
|
for (i = 1; i < iterations; i++) {
|
|
rocshmem_long_p(&target[T], i, 1);
|
|
rocshmem_fence();
|
|
}
|
|
|
|
rocshmem_long_p(&target[T], i, 1);
|
|
|
|
} else
|
|
wait_until(&target[T], iterations, 1);
|
|
|
|
if (verbose)
|
|
if (me == 0) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
|
|
static inline void gettest(int me, int iterations, int T, int S, int P) {
|
|
int i;
|
|
|
|
if (me == 1) {
|
|
pre_op_check(__func__, target[T], iterations, 1);
|
|
|
|
rocshmem_long_p(&source[S], iterations, 0);
|
|
rocshmem_fence();
|
|
|
|
for (i = 0; i < iterations; i++)
|
|
target[T] = rocshmem_long_g(&source[S], 0);
|
|
|
|
rocshmem_long_p(&sync_pes[P], iterations, 0);
|
|
|
|
post_op_check("get", target[T], iterations, 1);
|
|
|
|
} else
|
|
wait_until(&sync_pes[P], iterations, 0);
|
|
|
|
if (verbose) {
|
|
if (me == 0) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
static inline void atomic_inc(int me, int iterations, int T) {
|
|
int i;
|
|
|
|
if (me == 1) pre_op_check(__func__, target[T], iterations, 1);
|
|
|
|
target[T] = 0;
|
|
rocshmem_barrier_all();
|
|
|
|
if (me == 0) {
|
|
for (i = 0; i < iterations; i++) {
|
|
rocshmem_int64_atomic_inc((int64_t *)&target[T], 1);
|
|
rocshmem_fence();
|
|
}
|
|
rocshmem_int64_atomic_inc((int64_t *)&target[T], 1);
|
|
|
|
if (debug) printf("PE 0 done with operation\n");
|
|
|
|
} else
|
|
wait_until(&target[T], (iterations + 1), 1);
|
|
|
|
if (verbose) {
|
|
if (me == 1) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
static inline void atomic_add(int me, int iterations, int T) {
|
|
int i;
|
|
|
|
if (me == 0) pre_op_check(__func__, target[T], iterations, 0);
|
|
|
|
target[T] = 0;
|
|
rocshmem_barrier_all();
|
|
|
|
if (me == 1) {
|
|
for (i = 0; i < iterations; i++) {
|
|
rocshmem_int64_atomic_add((int64_t *)&target[T], 1, 0);
|
|
rocshmem_fence();
|
|
}
|
|
rocshmem_int64_atomic_add((int64_t *)&target[T], 1, 0);
|
|
|
|
if (debug) printf("PE 1 done with operation\n");
|
|
|
|
} else
|
|
wait_until(&target[T], (iterations + 1), 0);
|
|
|
|
if (verbose) {
|
|
if (me == 1) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
/*
|
|
static inline void swaptest(int me, int iterations, int T, int S, int P)
|
|
{
|
|
|
|
int i;
|
|
const int tswap = 5, sswap = 2;
|
|
target[T] = tswap;
|
|
source[S] = sswap;
|
|
|
|
rocshmem_barrier_all(); /* Ensure target/source initialization completed */
|
|
/*
|
|
|
|
if (me == 0)
|
|
pre_op_check(__func__, source[S], iterations, 0);
|
|
|
|
if (me == 0) {
|
|
for (i = 0; i < iterations; i++)
|
|
source[S] = rocshmem_long_atomic_swap(&target[T], source[S], 1);
|
|
|
|
rocshmem_long_p(&sync_pes[P], i, 1);
|
|
|
|
if (debug)
|
|
printf("AFTER flag PE 0 value of source is %d"
|
|
" = 5?\n", source[S]);
|
|
|
|
if (((iterations % 2 == 1) && (source[S] != tswap)) ||
|
|
((iterations % 2 == 0) &&
|
|
(source[S] != sswap))) {
|
|
fprintf(stderr, "swap ERR: PE 0 source = %d\n",
|
|
source[S]);
|
|
rocshmem_global_exit(EXIT_FAILURE);
|
|
}
|
|
|
|
} else {
|
|
wait_until(&sync_pes[P], iterations, 1);
|
|
|
|
if (((iterations % 2 == 1) && (target[T] != sswap)) ||
|
|
((iterations % 2 == 0) &&
|
|
(target[T] != tswap))) {
|
|
fprintf(stderr, "swap ERR: PE 0 target = %d \n",
|
|
target[T]);
|
|
rocshmem_global_exit(EXIT_FAILURE);
|
|
}
|
|
|
|
}
|
|
|
|
if (verbose) {
|
|
if (me == 0)
|
|
printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
*/
|
|
|
|
static inline void cswaptest(int me, int iterations, int T, int S, int P) {
|
|
int i;
|
|
source[S] = -100;
|
|
|
|
target[T] = 0;
|
|
rocshmem_barrier_all();
|
|
|
|
if (me == 1) {
|
|
pre_op_check(__func__, source[S], iterations, 1);
|
|
|
|
for (i = 0; i < iterations; i++)
|
|
source[S] = rocshmem_int64_atomic_compare_swap((int64_t *)&(target[T]),
|
|
i, (i + 1), 0);
|
|
|
|
rocshmem_long_p(&sync_pes[P], i, 0);
|
|
|
|
post_op_check("compare_swap", source[S], (iterations - 1), 1);
|
|
|
|
} else {
|
|
wait_until(&sync_pes[P], iterations, 0);
|
|
|
|
if (target[T] != iterations) {
|
|
fprintf(stderr, "compare_swap ERR: PE 1 target = %ld != %d\n", target[T],
|
|
iterations);
|
|
rocshmem_global_exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (verbose) {
|
|
if (me == 1) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
static inline void fetchatomic_add(int me, int iterations, int T, int S) {
|
|
int i;
|
|
|
|
if (me == 1) pre_op_check(__func__, target[T], iterations, 1);
|
|
|
|
target[T] = 0;
|
|
rocshmem_barrier_all();
|
|
|
|
if (me == 0) {
|
|
if (debug) {
|
|
printf(
|
|
"BEFORE flag PE 0 value of source is"
|
|
" %ld = 0?\n",
|
|
source[S]);
|
|
}
|
|
|
|
for (i = 0; i < iterations; i++) {
|
|
source[S] = rocshmem_int64_atomic_fetch_add((int64_t *)&target[T], 1, 1);
|
|
rocshmem_fence();
|
|
}
|
|
source[S] = rocshmem_int64_atomic_fetch_add((int64_t *)&target[T], 1, 1);
|
|
|
|
post_op_check("fetch_add", source[S], iterations, 0);
|
|
|
|
} else
|
|
wait_until(&target[T], (iterations + 1), 1);
|
|
|
|
if (verbose) {
|
|
if (me == 0) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
static inline void fetchatomic_inc(int me, int iterations, int T, int S) {
|
|
int i;
|
|
|
|
if (me == 0) pre_op_check(__func__, target[T], iterations, 0);
|
|
|
|
target[T] = 0;
|
|
rocshmem_barrier_all();
|
|
|
|
if (me == 1) {
|
|
if (debug) {
|
|
printf("BEFORE flag PE 1 value of source is %ld\n", source[S]);
|
|
}
|
|
|
|
for (i = 0; i < iterations; i++) {
|
|
source[S] = rocshmem_int64_atomic_fetch_inc((int64_t *)&target[T], 0);
|
|
rocshmem_fence();
|
|
}
|
|
|
|
post_op_check("fetch_inc", source[S], (iterations - 1), 1);
|
|
} else
|
|
wait_until(&target[T], iterations, 0);
|
|
|
|
if (verbose) {
|
|
if (me == 1) printf("SHMEM %s finished\n", __func__);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
int me, nproc;
|
|
int c, all_ops = 1;
|
|
int T = 0, S = 0, P = 0;
|
|
const int DEFAULT_ITR = 7;
|
|
int iterations = DEFAULT_ITR;
|
|
|
|
rocshmem_init();
|
|
|
|
me = rocshmem_my_pe();
|
|
nproc = rocshmem_n_pes();
|
|
|
|
target = (long *)rocshmem_malloc(NUM_WRITE * sizeof(long));
|
|
source = (long *)rocshmem_malloc(NUM_READ * sizeof(long));
|
|
sync_pes = (long *)rocshmem_malloc(NUM_SYNC * sizeof(long));
|
|
|
|
memset(target, -1, NUM_WRITE * sizeof(int));
|
|
memset(source, -1, NUM_READ * sizeof(int));
|
|
memset(sync_pes, -1, NUM_SYNC * sizeof(int));
|
|
|
|
rocshmem_barrier_all();
|
|
|
|
if (nproc != 2) {
|
|
if (me == 0) {
|
|
fprintf(stderr,
|
|
"This is a micro test and is only "
|
|
"intended to run on exactly two processes you"
|
|
" are using %d\n",
|
|
nproc);
|
|
}
|
|
rocshmem_finalize();
|
|
return 0;
|
|
}
|
|
|
|
while ((c = getopt(argc, argv, "i:vdpgaAscfFh")) != -1) {
|
|
switch (c) {
|
|
case 'i':
|
|
iterations = atoi(optarg);
|
|
assert(iterations > 0);
|
|
all_ops += 2;
|
|
break;
|
|
case 'v':
|
|
verbose = 1;
|
|
all_ops++;
|
|
break;
|
|
case 'd':
|
|
debug = 1;
|
|
break;
|
|
case 'p':
|
|
putfence(me, iterations, T++);
|
|
break;
|
|
case 'g':
|
|
gettest(me, iterations, T++, S++, P++);
|
|
break;
|
|
case 'a':
|
|
atomic_add(me, iterations, T++);
|
|
break;
|
|
case 'A':
|
|
atomic_inc(me, iterations, T++);
|
|
break;
|
|
/*
|
|
case 's':
|
|
swaptest(me, iterations, T++, S++, P++);
|
|
break;
|
|
*/
|
|
case 'c':
|
|
cswaptest(me, iterations, T++, S++, P++);
|
|
break;
|
|
case 'f':
|
|
fetchatomic_add(me, iterations, T++, S++);
|
|
break;
|
|
case 'F':
|
|
fetchatomic_inc(me, iterations, T++, S++);
|
|
break;
|
|
case 'h':
|
|
default:
|
|
if (me == 0) {
|
|
fprintf(stderr,
|
|
"input options:\n 1) single"
|
|
" argument option will run all tests by default"
|
|
"and additionally request: -v (verbose) | "
|
|
"-i <number of interations>\n");
|
|
fprintf(stderr,
|
|
" 2) two argument options "
|
|
"choose any combination of the following "
|
|
"to run individual tests: -i <iterations>, -v"
|
|
", -d, -p, -g, -a, -A, -s, -c, -f, -F, -h\n");
|
|
}
|
|
rocshmem_finalize();
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (argc == all_ops || argc == 1) {
|
|
putfence(me, iterations, T++);
|
|
gettest(me, iterations, T++, S++, P++);
|
|
atomic_add(me, iterations, T++);
|
|
atomic_inc(me, iterations, T++);
|
|
// swaptest(me, iterations, T++, S++, P++);
|
|
cswaptest(me, iterations, T++, S++, P++);
|
|
fetchatomic_add(me, iterations, T++, S++);
|
|
fetchatomic_inc(me, iterations, T++, S++);
|
|
}
|
|
|
|
if (verbose) {
|
|
if (me == 1)
|
|
printf("PE 1: PASS: %8d iterations\n", iterations);
|
|
else
|
|
printf("PE 0 Successful exit\n");
|
|
}
|
|
|
|
rocshmem_free(target);
|
|
rocshmem_free(source);
|
|
rocshmem_free(sync_pes);
|
|
|
|
rocshmem_finalize();
|
|
|
|
return 0;
|
|
}
|