/* * Copyright 2011 Sandia Corporation. Under the terms of Contract * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government * retains certain rights in this software. * * Copyright (c) 2017 Intel Corporation. All rights reserved. * This software is available to you under the BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * to_all - exercise SHMEM max,min,or,prod,sum,or,xor_to_all() reduction calls. * Each reduction is invoked for all data types: * short, int, long, float, double, long double, long long. * Point being numerous SHMEM atomics and synchronizations in flight. * From OpenSHMEM_specification_v1.0-final doc: * The pWrk and pSync arrays on all PEs in the active set must not be * in use from a prior call to a collective OpenSHMEM routine. * * frank @ SystemFabric Works identified an interesting overflow issue in the * prod_to_all test. In the presence of slightly larger PE counts (>=14), * overflow is encountered in short, int and float, double and long double. * The short and int both wrap correctly and are both uniformly * wrong...uniformly being the salient point. float, double and long double all * suffer from floating point rounding errors, hence the FP test results are * ignored (assumed to pass)when FP rounding is encountered. FP*_prod_to_all() * calls are still made so as not to upset the pSync ordering. * * usage: to_all {-amopsSv|h} * where: * -a do not run and_to_all * -m do not run min_to_all, max_to_all() always run. * -o do not run or_to_all * -p do not run prod_to_all * -s do not run sum_to_all * -x do not run xor_to_all * -S Serialize *_to_all() calls with barriers. * -v verbose(additional -v, more verbose) * -h this text. */ #include #include #include #include #include #include using namespace rocshmem; #define Rprintf \ if (rocshmem_my_pe() == 0) printf #define Rfprintf \ if (rocshmem_my_pe() == 0) fprintf #define Vprintf \ if (Verbose > 1) printf int sum_to_all(int me, int npes); int and_to_all(int me, int npes); int min_to_all(int me, int npes); int max_to_all(int me, int npes); int prod_to_all(int me, int npes); int or_to_all(int me, int npes); int xor_to_all(int me, int npes); int Verbose; int Serialize; int Min, And, Sum, Prod, Or, Xor; int Passed; long *pSync; long *pSync1; #define N 128 #define MAX(a, b) ((a) > (b)) ? (a) : (b) #define WRK_SIZE MAX(N / 2 + 1, ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE) short *src0, *dst0, *pWrk0; int *src1, *dst1, *pWrk1; long *src2, *dst2, *pWrk2; float *src3, *dst3, *pWrk3; double *src4, *dst4, *pWrk4; long double *src5, *dst5, *pWrk5; long long *src6, *dst6, *pWrk6; short expected_result0; int expected_result1; long expected_result2; float expected_result3; double expected_result4; long double expected_result5; long long expected_result6; int ok[7]; int max_to_all(int me, int npes) { int i, j, pass = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; } rocshmem_barrier_all(); rocshmem_ctx_short_max_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_max_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_max_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_float_max_to_all(ROCSHMEM_CTX_DEFAULT, dst3, src3, N, 0, 0, npes, pWrk3, pSync1); rocshmem_ctx_double_max_to_all(ROCSHMEM_CTX_DEFAULT, dst4, src4, N, 0, 0, npes, pWrk4, pSync); // rocshmem_ctx_longdouble_max_to_all(ROCSHMEM_CTX_DEFAULT, dst5, src5, N, // 0, 0, npes, pWrk5, pSync1); rocshmem_ctx_longlong_max_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync); if (me == 0) { for (i = 0, j = -1; i < N; i++, j++) { if (dst0[i] != npes + j) ok[0] = 1; if (dst1[i] != npes + j) ok[1] = 1; if (dst2[i] != npes + j) ok[2] = 1; if (dst3[i] != npes + j) ok[3] = 1; if (dst4[i] != npes + j) ok[4] = 1; if (dst5[i] != npes + j) ok[5] = 1; if (dst6[i] != npes + j) ok[6] = 1; } if (ok[0] == 1) { printf("Reduction operation rocshmem_short_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_short_max_to_all: Passed\n"); pass++; } if (ok[1] == 1) { printf("Reduction operation rocshmem_int_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_int_max_to_all: Passed\n"); pass++; } if (ok[2] == 1) { printf("Reduction operation rocshmem_long_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_long_max_to_all: Passed\n"); pass++; } if (ok[3] == 1) { printf("Reduction operation rocshmem_float_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_float_max_to_all: Passed\n"); pass++; } if (ok[4] == 1) { printf("Reduction operation rocshmem_double_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_double_max_to_all: Passed\n"); pass++; } /* if(ok[5]==1){ printf("Reduction operation rocshmem_longdouble_max_to_all: Failed\n"); } else{ Vprintf("Reduction operation rocshmem_longdouble_max_to_all: Passed\n"); pass++; } */ pass++; if (ok[6] == 1) { printf("Reduction operation rocshmem_longlong_max_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_longlong_max_to_all: Passed\n"); pass++; } Vprintf("\n"); } if (Serialize) rocshmem_barrier_all(); return (pass == 7 ? 1 : 0); } int min_to_all(int me, int npes) { int i, pass = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } rocshmem_barrier_all(); rocshmem_ctx_short_min_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_min_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_min_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_float_min_to_all(ROCSHMEM_CTX_DEFAULT, dst3, src3, N, 0, 0, npes, pWrk3, pSync1); rocshmem_ctx_double_min_to_all(ROCSHMEM_CTX_DEFAULT, dst4, src4, N, 0, 0, npes, pWrk4, pSync); // rocshmem_ctx_longdouble_min_to_all(ROCSHMEM_CTX_DEFAULT, dst5, src5, N, // 0, 0, npes, pWrk5, pSync1); rocshmem_ctx_longlong_min_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync); if (me == 0) { for (i = 0; i < N; i++) { if (dst0[i] != i) ok[0] = 1; if (dst1[i] != i) ok[1] = 1; if (dst2[i] != i) ok[2] = 1; if (dst3[i] != i) ok[3] = 1; if (dst4[i] != i) ok[4] = 1; if (dst5[i] != i) ok[5] = 1; if (dst6[i] != i) ok[6] = 1; } if (ok[0] == 1) { printf("Reduction operation rocshmem_short_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_short_min_to_all: Passed\n"); pass++; } if (ok[1] == 1) { printf("Reduction operation rocshmem_int_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_int_min_to_all: Passed\n"); pass++; } if (ok[2] == 1) { printf("Reduction operation rocshmem_long_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_long_min_to_all: Passed\n"); pass++; } if (ok[3] == 1) { printf("Reduction operation rocshmem_float_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_float_min_to_all: Passed\n"); pass++; } if (ok[4] == 1) { printf("Reduction operation rocshmem_double_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_double_min_to_all: Passed\n"); pass++; } /* if(ok[5]==1){ printf("Reduction operation rocshmem_longdouble_min_to_all: Failed\n"); } else{ Vprintf("Reduction operation rocshmem_longdouble_min_to_all: Passed\n"); pass++; } */ pass++; if (ok[6] == 1) { printf("Reduction operation rocshmem_longlong_min_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_longlong_min_to_all: Passed\n"); pass++; } Vprintf("\n"); } if (Serialize) rocshmem_barrier_all(); return (pass == 7 ? 1 : 0); } int sum_to_all(int me, int npes) { int i, pass = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } rocshmem_barrier_all(); rocshmem_ctx_short_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_float_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst3, src3, N, 0, 0, npes, pWrk3, pSync1); rocshmem_ctx_double_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst4, src4, N, 0, 0, npes, pWrk4, pSync); // rocshmem_ctx_longdouble_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst5, src5, N, // 0, 0, npes, pWrk5, pSync1); rocshmem_ctx_longlong_sum_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync); if (me == 0) { for (i = 0; i < N; i++) { if (dst0[i] != (short)(npes * (npes - 1) / 2)) ok[0] = 1; if (dst1[i] != (int)(npes * (npes - 1) / 2)) ok[1] = 1; if (dst2[i] != (long)(npes * (npes - 1) / 2)) ok[2] = 1; if (dst3[i] != (float)(npes * (npes - 1) / 2)) ok[3] = 1; if (dst4[i] != (double)(npes * (npes - 1) / 2)) ok[4] = 1; if (dst5[i] != (long double)(npes * (npes - 1) / 2)) ok[5] = 1; if (dst6[i] != (long long)(npes * (npes - 1) / 2)) ok[6] = 1; } if (ok[0] == 1) { printf("Reduction operation rocshmem_short_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_short_sum_to_all: Passed\n"); pass++; } if (ok[1] == 1) { printf("Reduction operation rocshmem_int_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_int_sum_to_all: Passed\n"); pass++; } if (ok[2] == 1) { printf("Reduction operation rocshmem_long_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_long_sum_to_all: Passed\n"); pass++; } if (ok[3] == 1) { printf("Reduction operation rocshmem_float_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_float_sum_to_all: Passed\n"); pass++; } if (ok[4] == 1) { printf("Reduction operation rocshmem_double_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_double_sum_to_all: Passed\n"); pass++; } /* if(ok[5]==1){ printf("Reduction operation rocshmem_longdouble_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation rocshmem_longdouble_sum_to_all: Passed\n"); pass++; } */ pass++; if (ok[6] == 1) { printf("Reduction operation rocshmem_longlong_sum_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_longlong_sum_to_all: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); } if (Serialize) rocshmem_barrier_all(); return (pass == 7 ? 1 : 0); } int and_to_all(int me, int num_pes) { int i, pass = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src6[i] = me; dst0[i] = dst1[i] = dst2[i] = dst6[i] = -9; } rocshmem_barrier_all(); rocshmem_ctx_short_and_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, num_pes, pWrk0, pSync); rocshmem_ctx_int_and_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, num_pes, pWrk1, pSync1); rocshmem_ctx_long_and_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, num_pes, pWrk2, pSync); rocshmem_ctx_longlong_and_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, num_pes, pWrk6, pSync1); if (me == 0) { for (i = 0; i < N; i++) { if (dst0[i] != 0) ok[0] = 1; if (dst1[i] != 0) ok[1] = 1; if (dst2[i] != 0) ok[2] = 1; if (dst6[i] != 0) ok[3] = 1; } if (ok[0] == 1) { printf("Reduction operation rocshmem_short_and_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_short_and_to_all: Passed\n"); pass++; } if (ok[1] == 1) { printf("Reduction operation rocshmem_int_and_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_int_and_to_all: Passed\n"); pass++; } if (ok[2] == 1) { printf("Reduction operation rocshmem_long_and_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_long_and_to_all: Passed\n"); pass++; } if (ok[3] == 1) { printf("Reduction operation rocshmem_longlong_and_to_all: Failed\n"); } else { Vprintf("Reduction operation rocshmem_longlong_and_to_all: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); } if (Serialize) rocshmem_barrier_all(); return (pass == 4 ? 1 : 0); } int prod_to_all(int me, int npes) { int i, pass = 0; int float_rounding_err = 0; int double_rounding_err = 0; int ldouble_rounding_err = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + 1; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } expected_result0 = expected_result1 = expected_result2 = expected_result6 = 1; expected_result3 = expected_result4 = expected_result5 = 1.0; for (i = 1; i <= npes; i++) { expected_result0 *= i; expected_result1 *= i; expected_result2 *= i; expected_result3 *= (float)i; expected_result4 *= (double)i; if ((double)expected_result3 != expected_result4) { if (!float_rounding_err && Verbose > 2 && me == 0) printf("float_err @ npes %d\n", i); float_rounding_err = 1; } expected_result5 *= (long double)i; if ((long double)expected_result4 != expected_result5) { if (!double_rounding_err && Verbose > 2 && me == 0) printf("double_err @ npes %d\n", i); ldouble_rounding_err = double_rounding_err = 1; } expected_result6 *= i; } rocshmem_barrier_all(); rocshmem_ctx_short_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_float_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst3, src3, N, 0, 0, npes, pWrk3, pSync1); rocshmem_ctx_double_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst4, src4, N, 0, 0, npes, pWrk4, pSync); // rocshmem_ctx_longdouble_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst5, src5, N, // 0, 0, npes, pWrk5, pSync1); rocshmem_ctx_longlong_prod_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync); if (me == 0) { for (i = 0; i < N; i++) { if (dst0[i] != expected_result0) ok[0] = 1; if (dst1[i] != expected_result1) ok[1] = 1; if (dst2[i] != expected_result2) ok[2] = 1; /* check for overflow */ if (!float_rounding_err && dst3[i] != expected_result3) { ok[3] = 1; printf("dst3[%d]: %f, expected val: %f\n", i, dst3[i], expected_result3); } if (!double_rounding_err && dst4[i] != expected_result4) { ok[4] = 1; printf("dst4[%d]: %f, expected val: %f\n", i, dst4[i], expected_result4); } /* if(!ldouble_rounding_err && dst5[i] != expected_result5) {ok[5] = 1; printf("dst5[%d]: %Lf, expected val: %Lf T4 %f\n",i, dst5[i], expected_result5,dst4[i]); } */ if (dst6[i] != expected_result6) ok[6] = 1; } if (ok[0] == 1) printf("Reduction operation rocshmem_short_prod_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_short_prod_to_all: Passed\n"); pass++; } if (ok[1] == 1) printf("Reduction operation rocshmem_int_prod_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_int_prod_to_all: Passed\n"); pass++; } if (ok[2] == 1) printf("Reduction operation rocshmem_long_prod_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_long_prod_to_all: Passed\n"); pass++; } if (ok[3] == 1) printf("Reduction operation rocshmem_float_prod_to_all: Failed\n"); else { if (float_rounding_err) { Vprintf( "Reduction operation rocshmem_float_prod_to_all: skipped due to " "float rounding error\n"); } else { Vprintf("Reduction operation rocshmem_float_prod_to_all: Passed\n"); } pass++; } if (ok[4] == 1) printf("Reduction operation rocshmem_double_prod_to_all: Failed\n"); else { if (double_rounding_err) { Vprintf( "Reduction operation rocshmem_double_prod_to_all: skipped due to " "double rounding error\n"); } else { Vprintf("Reduction operation rocshmem_double_prod_to_all: Passed\n"); } pass++; } /* if(ok[5]==1) printf("Reduction operation rocshmem_longdouble_prod_to_all: Failed\n"); else { if (double_rounding_err) { Vprintf("Reduction operation rocshmem_longdouble_prod_to_all: skipped due to long double rounding error\n"); } else { Vprintf("Reduction operation rocshmem_longdouble_prod_to_all: Passed\n"); } pass++; } */ pass++; if (ok[6] == 1) printf("Reduction operation rocshmem_longlong_prod_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_longlong_prod_to_all: Passed\n"); pass++; } Vprintf("\n"); } if (Serialize) rocshmem_barrier_all(); return (pass == 7 ? 1 : 0); } int or_to_all(int me, int npes) { int i, pass = 0; memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src6[i] = (me + 1) % 4; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } rocshmem_barrier_all(); rocshmem_ctx_short_or_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_or_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_or_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_longlong_or_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if (me == 0) { for (i = 0; i < N; i++) { int expected = (npes == 1) ? 1 : 3; if (dst0[i] != expected) ok[0] = 1; if (dst1[i] != expected) ok[1] = 1; if (dst2[i] != expected) ok[2] = 1; if (dst6[i] != expected) ok[6] = 1; } if (ok[0] == 1) printf("Reduction operation rocshmem_short_or_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_short_or_to_all: Passed\n"); pass++; } if (ok[1] == 1) printf("Reduction operation rocshmem_int_or_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_int_or_to_all: Passed\n"); pass++; } if (ok[2] == 1) printf("Reduction operation rocshmem_long_or_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_long_or_to_all: Passed\n"); pass++; } if (ok[6] == 1) printf("Reduction operation rocshmem_longlong_or_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_longlong_or_to_all: Passed\n"); pass++; } Vprintf("\n"); } if (Serialize) rocshmem_barrier_all(); return (pass == 4 ? 1 : 0); } int xor_to_all(int me, int npes) { int i, pass = 0; int expected_result = ((int)(npes / 2) % 2); memset(ok, 0, sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src6[i] = me % 2; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } rocshmem_barrier_all(); rocshmem_ctx_short_xor_to_all(ROCSHMEM_CTX_DEFAULT, dst0, src0, N, 0, 0, npes, pWrk0, pSync); rocshmem_ctx_int_xor_to_all(ROCSHMEM_CTX_DEFAULT, dst1, src1, N, 0, 0, npes, pWrk1, pSync1); rocshmem_ctx_long_xor_to_all(ROCSHMEM_CTX_DEFAULT, dst2, src2, N, 0, 0, npes, pWrk2, pSync); rocshmem_ctx_longlong_xor_to_all(ROCSHMEM_CTX_DEFAULT, dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if (me == 0) { for (i = 0; i < N; i++) { if (dst0[i] != expected_result) ok[0] = 1; if (dst1[i] != expected_result) ok[1] = 1; if (dst2[i] != expected_result) ok[2] = 1; if (dst6[i] != expected_result) ok[6] = 1; } if (ok[0] == 1) printf("Reduction operation rocshmem_short_xor_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_short_xor_to_all: Passed\n"); pass++; } if (ok[1] == 1) printf("Reduction operation rocshmem_int_xor_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_int_xor_to_all: Passed\n"); pass++; } if (ok[2] == 1) printf("Reduction operation rocshmem_long_xor_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_long_xor_to_all: Passed\n"); pass++; } if (ok[6] == 1) printf("Reduction operation rocshmem_longlong_xor_to_all: Failed\n"); else { Vprintf("Reduction operation rocshmem_longlong_xor_to_all: Passed\n"); pass++; } Vprintf("\n"); } if (Serialize) rocshmem_barrier_all(); return (pass == 4 ? 1 : 0); } int main(int argc, char *argv[]) { int c, i, mype, num_pes, tests, passed; char *pgm; rocshmem_init(); mype = rocshmem_my_pe(); num_pes = rocshmem_n_pes(); if ((pgm = strrchr(argv[0], '/'))) { pgm++; } else { pgm = argv[0]; } while ((c = getopt(argc, argv, "ampsSoxhv")) != -1) { switch (c) { case 'a': And++; // do not run and_to_all break; case 'm': Min++; // do not run min_to_all break; case 'o': Or++; // do not run or_to_all break; case 'p': Prod++; // do not run prod_to_all break; case 's': Sum++; // do not run sum_to_all break; case 'x': Xor++; // do not run xor_to_all break; case 'S': Serialize++; break; case 'v': Verbose++; break; case 'h': default: Rfprintf(stderr, "usage: %s {-v(verbose)|h(help)}\n", pgm); rocshmem_finalize(); return 1; } } tests = passed = 0; pSync = (long *)rocshmem_malloc(ROCSHMEM_BCAST_SYNC_SIZE * sizeof(long)); pSync1 = (long *)rocshmem_malloc(ROCSHMEM_BCAST_SYNC_SIZE * sizeof(long)); if (!pSync || !pSync1) { fprintf(stderr, "ERR: cannot allocate one of the pSync arrays\n"); } for (i = 0; i < ROCSHMEM_REDUCE_SYNC_SIZE; i++) { pSync[i] = ROCSHMEM_SYNC_VALUE; pSync1[i] = ROCSHMEM_SYNC_VALUE; } pWrk0 = (short *)rocshmem_malloc(WRK_SIZE * sizeof(short)); pWrk1 = (int *)rocshmem_malloc(WRK_SIZE * sizeof(int)); pWrk2 = (long *)rocshmem_malloc(WRK_SIZE * sizeof(long)); pWrk3 = (float *)rocshmem_malloc(WRK_SIZE * sizeof(float)); pWrk4 = (double *)rocshmem_malloc(WRK_SIZE * sizeof(double)); pWrk5 = (long double *)rocshmem_malloc(WRK_SIZE * sizeof(long double)); pWrk6 = (long long *)rocshmem_malloc(WRK_SIZE * sizeof(long long)); if (!pWrk0 || !pWrk1 || !pWrk2 || !pWrk3 || !pWrk4 || !pWrk5 || !pWrk6) { fprintf(stderr, "ERR: cannot allocate one of the pWrk arrays\n"); } src0 = (short *)rocshmem_malloc(N * sizeof(short)); src1 = (int *)rocshmem_malloc(N * sizeof(int)); src2 = (long *)rocshmem_malloc(N * sizeof(long)); src3 = (float *)rocshmem_malloc(N * sizeof(float)); src4 = (double *)rocshmem_malloc(N * sizeof(double)); src5 = (long double *)rocshmem_malloc(N * sizeof(long double)); src6 = (long long *)rocshmem_malloc(N * sizeof(long long)); if (!src0 || !src1 || !src2 || !src3 || !src4 || !src5 || !src6) { fprintf(stderr, "ERR: cannot allocate one of the src arrays\n"); } dst0 = (short *)rocshmem_malloc(N * sizeof(short)); dst1 = (int *)rocshmem_malloc(N * sizeof(int)); dst2 = (long *)rocshmem_malloc(N * sizeof(long)); dst3 = (float *)rocshmem_malloc(N * sizeof(float)); dst4 = (double *)rocshmem_malloc(N * sizeof(double)); dst5 = (long double *)rocshmem_malloc(N * sizeof(long double)); dst6 = (long long *)rocshmem_malloc(N * sizeof(long long)); if (!dst0 || !dst1 || !dst2 || !dst3 || !dst4 || !dst5 || !dst6) { fprintf(stderr, "ERR: cannot allocate one of the dst arrays\n"); } rocshmem_barrier_all(); passed += max_to_all(mype, num_pes); tests++; if (!Min) { passed += min_to_all(mype, num_pes); tests++; } if (!Sum) { passed += sum_to_all(mype, num_pes); tests++; } if (!And) { passed += and_to_all(mype, num_pes); tests++; } if (!Prod) { passed += prod_to_all(mype, num_pes); tests++; } if (!Or) { passed += or_to_all(mype, num_pes); tests++; } if (!Xor) { passed += xor_to_all(mype, num_pes); tests++; } c = 0; if (mype == 0) { if ((Verbose || tests != passed)) fprintf(stderr, "to_all[%d] %d of %d tests passed\n", mype, passed, tests); c = (tests == passed ? 0 : 1); } rocshmem_free(pSync); rocshmem_free(pSync1); rocshmem_free(pWrk0); rocshmem_free(pWrk1); rocshmem_free(pWrk2); rocshmem_free(pWrk3); rocshmem_free(pWrk4); rocshmem_free(pWrk5); rocshmem_free(pWrk6); rocshmem_free(src0); rocshmem_free(src1); rocshmem_free(src2); rocshmem_free(src3); rocshmem_free(src4); rocshmem_free(src5); rocshmem_free(src6); rocshmem_free(dst0); rocshmem_free(dst1); rocshmem_free(dst2); rocshmem_free(dst3); rocshmem_free(dst4); rocshmem_free(dst5); rocshmem_free(dst6); rocshmem_finalize(); return c; }