diff --git a/projects/rocshmem/scripts/functional_tests/driver.sh b/projects/rocshmem/scripts/functional_tests/driver.sh index b1f4597b80..a61496bf21 100755 --- a/projects/rocshmem/scripts/functional_tests/driver.sh +++ b/projects/rocshmem/scripts/functional_tests/driver.sh @@ -116,6 +116,12 @@ declare -A TEST_NUMBERS=( ["putmem_on_stream"]="80" ["putmem_signal_on_stream"]="81" ["signal_wait_until_on_stream"]="82" + ["flood_put"]="83" + ["flood_putnbi"]="84" + ["flood_p"]="85" + ["flood_get"]="86" + ["flood_getnbi"]="87" + ["flood_g"]="88" ) ExecTest() { @@ -474,6 +480,16 @@ TestOther() { ExecTest "pingall" 2 8 1 ExecTest "pingall" 2 32 1 + ExecTest "flood_put" 2 64 1024 + ExecTest "flood_get" 2 64 1024 + + ExecTest "flood_put" 8 64 1024 + ExecTest "flood_putnbi" 8 64 1024 + ExecTest "flood_p" 8 64 1024 + ExecTest "flood_get" 8 64 1024 + ExecTest "flood_getnbi" 8 64 1024 + ExecTest "flood_g" 8 64 1024 + # This test requires more contexts than workgroups export ROCSHMEM_MAX_NUM_CONTEXTS=1024 ExecTest "teamctxinfra" 2 1 1 @@ -654,9 +670,19 @@ TestGDA() { ExecTest "pingpong" 2 8 1 ExecTest "pingpong" 2 32 1 + ExecTest "flood_put" 2 64 1024 + ExecTest "flood_get" 2 64 1024 + + ExecTest "flood_put" 8 64 1024 + ExecTest "flood_putnbi" 8 64 1024 + ExecTest "flood_p" 8 64 1024 + ExecTest "flood_get" 8 64 1024 + ExecTest "flood_getnbi" 8 64 1024 +# ExecTest "flood_g" 8 64 1024 # _g not implemented + # This test requires more contexts than workgroups export ROCSHMEM_MAX_NUM_CONTEXTS=1024 - ExecTest "teamctxinfra" 2 1 1 + ExecTest "teamctxinfra" 2 1 1 ExecTest "teamctxsingleinfra" 2 1 1 ExecTest "teamctxblockinfra" 4 1 1 ExecTest "teamctxblockinfra" 5 1 1 diff --git a/projects/rocshmem/tests/functional_tests/CMakeLists.txt b/projects/rocshmem/tests/functional_tests/CMakeLists.txt index a474e02234..2de5ac9dbe 100644 --- a/projects/rocshmem/tests/functional_tests/CMakeLists.txt +++ b/projects/rocshmem/tests/functional_tests/CMakeLists.txt @@ -66,6 +66,7 @@ target_sources( workgroup_primitives.cpp empty_tester.cpp wavefront_primitives.cpp + flood_tester.cpp ) ############################################################################### diff --git a/projects/rocshmem/tests/functional_tests/flood_tester.cpp b/projects/rocshmem/tests/functional_tests/flood_tester.cpp new file mode 100644 index 0000000000..ea2e1e7e82 --- /dev/null +++ b/projects/rocshmem/tests/functional_tests/flood_tester.cpp @@ -0,0 +1,217 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#include "flood_tester.hpp" + +#include + +using namespace rocshmem; + +/****************************************************************************** + * DEVICE TEST KERNEL + *****************************************************************************/ +__global__ void FloodTest(int loop, int skip, long long int *start_time, + long long int *end_time, uint64_t *r_buf, uint64_t *s_buf, + TestType type, ShmemContextType ctx_type, int wf_size) { + __shared__ rocshmem_ctx_t ctx; + + /** + * Shared array to capture the start time for each wavefront + * Max threads per block = 1024, wavefront size = 64 or 32 depending + * on the GPUs. Using 32 since its safer for the dimensioning of the array, + * the last 16 elements will not be used on GPUs with a wf size of 64. + * Maximum array size required = 1024/32 = 32 + */ + __shared__ long long int wf_start_time[32]; + + rocshmem_wg_ctx_create(ctx_type, &ctx); + + int num_pe {rocshmem_ctx_n_pes(ctx)}; + int num_wg {get_grid_num_blocks()}; + int num_th {get_flat_block_size()}; + int my_pe {rocshmem_ctx_my_pe(ctx)}; + int wg_id {get_flat_grid_id()}; + int t_id {get_flat_block_id()}; + int wf_id {t_id / wf_size}; + + auto t_offset {wg_id * num_th + t_id}; + auto tgt_offset {my_pe * num_wg * num_th + t_offset}; + auto dst_offset {0}; + + for (int i = 0; i < loop + skip; i++) { + if (i == skip) { + // Capture the start time of each wavefront to identify the earliest one + wf_start_time[wf_id] = wall_clock64(); + } + + for (int j{0}; j < num_pe; j++) { + // shuffle ordering so that threads in the wave put to a + // different pe 'simultaneously' + auto pe = (t_id + j) % num_pe; + switch (type) { + case FloodPutTestType: + rocshmem_ctx_putmem(ctx, &r_buf[tgt_offset], &s_buf[t_offset], sizeof(uint64_t), pe); + break; + case FloodPutNBITestType: + rocshmem_ctx_putmem_nbi(ctx, &r_buf[tgt_offset], &s_buf[t_offset], sizeof(uint64_t), pe); + break; + case FloodPTestType: + rocshmem_ctx_ulong_p(ctx, &r_buf[tgt_offset], s_buf[t_offset], pe); + break; + case FloodGetTestType: + dst_offset = pe * num_wg * num_th + t_offset; + rocshmem_ctx_getmem(ctx, &r_buf[dst_offset], &s_buf[t_offset], sizeof(uint64_t), pe); + break; + case FloodGetNBITestType: + dst_offset = pe * num_wg * num_th + t_offset; + rocshmem_ctx_getmem_nbi(ctx, &r_buf[dst_offset], &s_buf[t_offset], sizeof(uint64_t), pe); + break; + case FloodGTestType: + dst_offset = pe * num_wg * num_th + t_offset; + r_buf[dst_offset] = rocshmem_ctx_ulong_g(ctx, &s_buf[t_offset], pe); + break; + default: + break; + } + __syncthreads(); + if (is_thread_zero_in_block()) { + rocshmem_ctx_quiet(ctx); + } + } + } + + __syncthreads(); + if (is_thread_zero_in_wave()) { + end_time[wg_id] = wall_clock64(); + } + // Find the earliest start time + int num_wfs = (get_flat_block_size() - 1 ) / wf_size + 1; + for (int i = num_wfs / 2; i > 0; i >>= 1 ) { + if(t_id < i) { + wf_start_time[t_id] = min(wf_start_time[t_id], wf_start_time[t_id + i]); + } + } + __syncthreads(); + if (t_id == 0) { + start_time[wg_id] = wf_start_time[0]; + } + + rocshmem_wg_ctx_destroy(&ctx); +} + +static __global__ void verify_results_kernel(uint64_t *dest, size_t buf_size, + bool *verification_error) { + int num_pe {rocshmem_n_pes()}; + int num_wg {get_grid_num_blocks()}; + int num_th {get_flat_block_size()}; + int my_pe {rocshmem_my_pe()}; + int wg_id {get_flat_grid_id()}; + int t_id {get_flat_block_id()}; + + auto t_offset {wg_id * num_th + t_id}; + + for (int pe{0}; pe < num_pe; pe++) { + auto dst_offset {pe * num_wg * num_th + t_offset}; + auto value = dest[dst_offset]; + auto v_th = value & 0x0fff; + auto v_wg = (value>>12) & 0xffff'ffff; + auto v_pe = (value>>44); + + if (v_th != t_id || v_wg != wg_id || v_pe != pe) { + *verification_error = true; + } + } +} + +/****************************************************************************** + * HOST TESTER CLASS METHODS + *****************************************************************************/ +FloodTester::FloodTester(TesterArguments args) : Tester(args) { + int num_pes {rocshmem_n_pes()}; + int my_pe {rocshmem_my_pe()}; + s_buf = (uint64_t*)rocshmem_malloc(sizeof(uint64_t) * args.num_wgs * args.wg_size); + for(int wg = 0; wg < args.num_wgs; wg++) for(int th = 0; th < args.wg_size; th++) { + s_buf[wg * args.wg_size + th] = (((uint64_t)my_pe)<<44) + (wg<<12) + th; // set value for verification + } + r_buf = (uint64_t*)rocshmem_malloc(sizeof(uint64_t) * args.num_wgs * args.wg_size * num_pes); +} + +FloodTester::~FloodTester() { + rocshmem_free(s_buf); + rocshmem_free(r_buf); +} + +void FloodTester::resetBuffers(size_t size) { + int num_pes {rocshmem_n_pes()}; + memset(r_buf, 0, sizeof(uint64_t) * args.num_wgs * args.wg_size * num_pes); +} + +void FloodTester::launchKernel(dim3 gridSize, dim3 blockSize, int loop, + size_t size) { + size_t shared_bytes = 0; + int num_pes {rocshmem_n_pes()}; + + hipLaunchKernelGGL(FloodTest, gridSize, blockSize, shared_bytes, stream, + loop, args.skip, start_time, end_time, r_buf, s_buf, + _type, _shmem_context, wf_size); + + + num_msgs = (loop + args.skip) * gridSize.x * blockSize.x * num_pes; + num_timed_msgs = loop * gridSize.x * blockSize.x * num_pes; +} + +void FloodTester::verifyResults(size_t size) { + int num_pes {rocshmem_n_pes()}; + int my_pe {rocshmem_my_pe()}; + + if (num_pes > 1<<20 || args.num_wgs > 1<<31 || args.wg_size > 1<<12) { + // can't check + return; + } + assert(size == sizeof(uint64_t)); + + hipLaunchKernelGGL(verify_results_kernel, args.num_wgs, args.wg_size, 0, stream, + r_buf, sizeof(uint64_t), verification_error); + CHECK_HIP(hipStreamSynchronize(stream)); + + if (*verification_error) { + for(auto pe = 0; pe < num_pes; pe++) + for(auto wg = 0; wg < args.num_wgs; wg++) + for(auto th = 0; th < args.wg_size; th++) { + auto t_offset {wg * args.wg_size + th}; + auto dst_offset {pe * args.num_wgs * args.wg_size + t_offset}; + auto value = r_buf[dst_offset]; + auto v_th = value & 0x0fff; + auto v_wg = (value>>12) & 0xffff'ffff; + auto v_pe = (value>>44); + if (v_th != th || v_wg != wg || v_pe != pe) { + std::cerr << "Data validation error at idx " << dst_offset << std::endl; + std::cerr << " Got " << v_pe << ":" << v_wg << ":" << v_th + << ", Expected " << pe << ":" << wg << ":" << th << std::endl; + + *verification_error = false; + } + } + } +} diff --git a/projects/rocshmem/tests/functional_tests/flood_tester.hpp b/projects/rocshmem/tests/functional_tests/flood_tester.hpp new file mode 100644 index 0000000000..7f40fea9b9 --- /dev/null +++ b/projects/rocshmem/tests/functional_tests/flood_tester.hpp @@ -0,0 +1,56 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef _FLOOD_TESTER_HPP_ +#define _FLOOD_TESTER_HPP_ + +#include "tester.hpp" + +/****************************************************************************** + * DEVICE TEST KERNEL + *****************************************************************************/ +__global__ void FloodTest(int loop, int skip, long long int *start_time, + long long int *end_time, uint64_t *r_buf); + +/****************************************************************************** + * HOST TESTER CLASS + *****************************************************************************/ +class FloodTester : public Tester { + public: + explicit FloodTester(TesterArguments args); + virtual ~FloodTester(); + + protected: + virtual void resetBuffers(size_t size) override; + + virtual void launchKernel(dim3 gridSize, dim3 blockSize, int loop, + size_t size) override; + + virtual void verifyResults(size_t size) override; + + uint64_t *r_buf; + uint64_t *s_buf; +}; + +#endif diff --git a/projects/rocshmem/tests/functional_tests/tester.cpp b/projects/rocshmem/tests/functional_tests/tester.cpp index bf6ce6c23c..591e4fd328 100644 --- a/projects/rocshmem/tests/functional_tests/tester.cpp +++ b/projects/rocshmem/tests/functional_tests/tester.cpp @@ -62,6 +62,7 @@ #include "team_reduction_tester.hpp" #include "wavefront_primitives.hpp" #include "workgroup_primitives.hpp" +#include "flood_tester.hpp" #include "backend_bc.hpp" extern Backend* backend; @@ -530,6 +531,30 @@ std::vector Tester::create(TesterArguments args) { if (rank == 0) std::cout << "Wave Signal Fetch ###" << std::endl; testers.push_back(new SignalingOperationsTester(args)); return testers; + case FloodPutTestType: + if (rank == 0) std::cout << "Flood Put (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; + case FloodPutNBITestType: + if (rank == 0) std::cout << "Flood Non-Blocking Put (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; + case FloodPTestType: + if (rank == 0) std::cout << "Flood P (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; + case FloodGetTestType: + if (rank == 0) std::cout << "Flood Get (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; + case FloodGetNBITestType: + if (rank == 0) std::cout << "Flood Non-Blocking Get (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; + case FloodGTestType: + if (rank == 0) std::cout << "Flood G (multidirectional) ###" << std::endl; + testers.push_back(new FloodTester(args)); + return testers; default: if (rank == 0) std::cout << "Empty Test ###" << std::endl; return testers; @@ -644,6 +669,12 @@ bool Tester::peLaunchesKernel() { case PutmemOnStreamTestType: case PutmemSignalOnStreamTestType: case SignalWaitUntilOnStreamTestType: + case FloodPutTestType: + case FloodPutNBITestType: + case FloodPTestType: + case FloodGetTestType: + case FloodGetNBITestType: + case FloodGTestType: is_launcher = true; break; default: diff --git a/projects/rocshmem/tests/functional_tests/tester.hpp b/projects/rocshmem/tests/functional_tests/tester.hpp index a11d5ccba9..fda015f427 100644 --- a/projects/rocshmem/tests/functional_tests/tester.hpp +++ b/projects/rocshmem/tests/functional_tests/tester.hpp @@ -120,6 +120,12 @@ enum TestType { PutmemOnStreamTestType = 80, PutmemSignalOnStreamTestType = 81, SignalWaitUntilOnStreamTestType = 82, + FloodPutTestType = 83, + FloodPutNBITestType = 84, + FloodPTestType = 85, + FloodGetTestType = 86, + FloodGetNBITestType = 87, + FloodGTestType = 88, }; enum OpType { PutType = 0, GetType = 1 }; diff --git a/projects/rocshmem/tests/functional_tests/tester_arguments.cpp b/projects/rocshmem/tests/functional_tests/tester_arguments.cpp index f99cd9fc92..8db7cc2dbe 100644 --- a/projects/rocshmem/tests/functional_tests/tester_arguments.cpp +++ b/projects/rocshmem/tests/functional_tests/tester_arguments.cpp @@ -149,6 +149,15 @@ TesterArguments::TesterArguments(int argc, char *argv[]) { case GTestType: min_msg_size = 1; max_msg_size = 1; + break; + case FloodPutTestType: + case FloodPutNBITestType: + case FloodPTestType: + case FloodGetTestType: + case FloodGetNBITestType: + case FloodGTestType: + min_msg_size = max_msg_size = 8; + break; default: break; } @@ -209,6 +218,12 @@ void TesterArguments::get_arguments() { case PutmemOnStreamTestType: case PutmemSignalOnStreamTestType: case SignalWaitUntilOnStreamTestType: + case FloodPutTestType: + case FloodPutNBITestType: + case FloodPTestType: + case FloodGetTestType: + case FloodGetNBITestType: + case FloodGTestType: requires_two_pes = false; break; default: