diff --git a/scripts/build_configs/ro_net b/scripts/build_configs/ro_net index 17809fa0a9..95ea2950a8 100755 --- a/scripts/build_configs/ro_net +++ b/scripts/build_configs/ro_net @@ -21,7 +21,7 @@ cmake \ -DUSE_IPC=ON \ -DUSE_THREADS=ON \ -DUSE_WF_COAL=OFF \ - -DUSE_COHERENT_HEAP=ON \ + -DUSE_COHERENT_HEAP=OFF \ $src_path cmake --build . --parallel 8 cmake --install . diff --git a/scripts/build_configs/ro_net_debug b/scripts/build_configs/ro_net_debug index 67c3f2d0a5..c2b8afdc00 100755 --- a/scripts/build_configs/ro_net_debug +++ b/scripts/build_configs/ro_net_debug @@ -21,7 +21,7 @@ cmake \ -DUSE_IPC=OFF \ -DUSE_THREADS=ON \ -DUSE_WF_COAL=OFF \ - -DUSE_COHERENT_HEAP=ON \ + -DUSE_COHERENT_HEAP=OFF \ $src_path cmake --build . --parallel 8 cmake --install . diff --git a/src/atomic.hpp b/src/atomic.hpp new file mode 100644 index 0000000000..f0828e4145 --- /dev/null +++ b/src/atomic.hpp @@ -0,0 +1,141 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef LIBRARY_SRC_ATOMIC_HPP +#define LIBRARY_SRC_ATOMIC_HPP + +#include + +namespace rocshmem { +namespace detail { +namespace atomic { + +typedef enum rocshmem_memory_scope { + memory_scope_thread = __HIP_MEMORY_SCOPE_SINGLETHREAD, + memory_scope_wavefront = __HIP_MEMORY_SCOPE_WAVEFRONT, + memory_scope_workgroup = __HIP_MEMORY_SCOPE_WORKGROUP, + memory_scope_agent = __HIP_MEMORY_SCOPE_AGENT, + memory_scope_system = __HIP_MEMORY_SCOPE_SYSTEM, +} rocshmem_memory_scope; + +typedef enum rocshmem_memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} rocshmem_memory_order; + +struct rocshmem_memory_orders { + rocshmem_memory_order load {memory_order_acquire}; + rocshmem_memory_order store {memory_order_release}; + rocshmem_memory_order atomic {memory_order_acq_rel}; + rocshmem_memory_order weak_cas_success {memory_order_acq_rel}; + rocshmem_memory_order weak_cas_failure {memory_order_acq_rel}; + rocshmem_memory_order strong_cas_success {memory_order_acq_rel}; + rocshmem_memory_order strong_cas_failure {memory_order_acq_rel}; +}; + +template +__host__ __device__ +T load(const T* address, rocshmem_memory_orders o) { + return __hip_atomic_load(address, o.load, s); +} + +template +__host__ __device__ +void store(T* address, const T value, rocshmem_memory_orders o) { + return __hip_atomic_store(address, value, o.store, s); +} + +template +__host__ __device__ +bool compare_exchange_weak(T& expected, T desired, rocshmem_memory_orders o) { + return __hip_atomic_compare_exchange_weak(expected, desired, o.weak_cas_success, o.weak_cas_failure, s); +} + +template +__host__ __device__ +bool compare_exchange_strong(T& expected, T desired, rocshmem_memory_orders o) { + return __hip_atomic_compare_exchange_strong(expected, desired, o.strong_cas_success, o.strong_cas_failure, s); +} + +template +__host__ __device__ +T fetch_add(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_add(obj, arg, o.atomic, s); +} + +template +__host__ __device__ +T fetch_sub(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_sub(obj, arg, o.atomic, s); +} + +template +__host__ __device__ +T fetch_and(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_and(obj, arg, o.atomic, s); +} + +template +__host__ __device__ +T fetch_or(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_or(obj, arg, o, s); +} + +template +__host__ __device__ +T fetch_xor(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_xor(obj, arg, o.atomic, s); +} + +template +__host__ __device__ +T fetch_max(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_max(obj, arg, o.atomic, s); +} + +template +__host__ __device__ +T fetch_min(T* obj, U arg, rocshmem_memory_orders o) { + return __hip_atomic_fetch_min(obj, arg, o.atomic, s); +} + +template +__device__ +void threadfence() { + if constexpr (s == memory_scope_system) { + __threadfence_system(); + } else if constexpr (s == memory_scope_agent) { + __threadfence(); + } else if constexpr (s == memory_scope_workgroup) { + __threadfence_block(); + } +} + +} // namespace atomic +} // namespace detail +} // namespace rocshmem + +#endif // LIBRARY_SRC_ATOMIC_HPP_ diff --git a/src/ipc_policy.hpp b/src/ipc_policy.hpp index c0190198ca..1b84c52613 100644 --- a/src/ipc_policy.hpp +++ b/src/ipc_policy.hpp @@ -64,38 +64,38 @@ class IpcOnImpl { __device__ void ipcCopy_wave(void *dst, void *src, size_t size); - __device__ void ipcFence() { __threadfence(); } + __device__ void ipcFence() { __threadfence_system(); } template __device__ T ipcAMOFetchAdd(T *val, T value) { - return __hip_atomic_fetch_add(val, value, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); + return __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); } template __device__ T ipcAMOFetchCas(T *val, T cond, T value) { - __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_RELAXED, - __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); return cond; } template __device__ void ipcAMOAdd(T *val, T value) { - __hip_atomic_fetch_add(val, value, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_fetch_add(val, value, __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); } template __device__ void ipcAMOCas(T *val, T cond, T value) { - __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_RELAXED, - __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_compare_exchange_strong(val, &cond, value, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST, + __HIP_MEMORY_SCOPE_SYSTEM); } template __device__ void ipcAMOSet(T *val, T value) { - __hip_atomic_store(val, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(val, value, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM); } __device__ void zero_byte_read(int pe) { diff --git a/src/memory/notifier.hpp b/src/memory/notifier.hpp index 0d8cd92f2f..946a9c291e 100644 --- a/src/memory/notifier.hpp +++ b/src/memory/notifier.hpp @@ -20,59 +20,96 @@ * IN THE SOFTWARE. *****************************************************************************/ -/** - * @file notifier.hpp - * - * @brief Contains the notification memory space for threads to communicate - * results with one another. - * - * Assume one thread does work on behalf of other threads (as a leader) and - * that work needs to be communicated to the other threads as a result. - * To expose the result, the threads need to share a memory space where the - * result can be written. The leader thread writes the result out to this - * memory space and all threads synchronize on it. - * - * This class allows the leader thread to notify other threads of the update. - */ - #ifndef LIBRARY_SRC_MEMORY_NOTIFIER_HPP_ #define LIBRARY_SRC_MEMORY_NOTIFIER_HPP_ #include "../device_proxy.hpp" #include "../util.hpp" +#include "../atomic.hpp" namespace rocshmem { +template class Notifier { - public: - __device__ uint64_t read() { return value_; } - __device__ void write(uint64_t val) { - if (is_thread_zero_in_block()) { - value_ = val; - } - publish(); + public: + __device__ uint64_t load() { + return detail::atomic::load(&value_, orders_); } - __device__ void done() { __syncthreads(); } + __device__ void store(uint64_t val) { + detail::atomic::store(&value_, val, orders_); + } - private: - __device__ void publish() { - if (is_thread_zero_in_block()) { - __threadfence(); + __device__ void fence() { + detail::atomic::threadfence(); + } + + __device__ void sync() { + if constexpr (scope == detail::atomic::memory_scope_thread || + scope == detail::atomic::memory_scope_wavefront) { + return; + } + if constexpr (scope == detail::atomic::memory_scope_workgroup) { + __syncthreads(); + return; + } + if constexpr (scope == detail::atomic::memory_scope_system) { + assert(false); + return; + } + + uint32_t done {signal_ + 1}; + __syncthreads(); + + uint32_t retval {0}; + bool executor {!threadIdx.x && !threadIdx.y && !threadIdx.z}; + if (executor) { + retval = detail::atomic::fetch_add(&count_, 1, orders_); + fence(); + } + __syncthreads(); + + if (retval == ((gridDim.x * gridDim.y * gridDim.z) - 1)) { + if (executor) { + detail::atomic::store(&count_, 0, orders_); + fence(); + detail::atomic::fetch_add(&signal_, 1, orders_); + } + } + + if (executor) { + while (detail::atomic::load(&signal_, orders_) != done) { + ; + } } __syncthreads(); } + private: + detail::atomic::rocshmem_memory_orders orders_{}; + uint64_t value_{}; + + uint32_t signal_ {}; + + uint32_t count_ {}; }; -template +template class NotifierProxy { - using ProxyT = DeviceProxy; + using ProxyT = DeviceProxy>; public: - __host__ __device__ Notifier* get() { return proxy_.get(); } + NotifierProxy() { + new (proxy_.get()) Notifier(); + } + + ~NotifierProxy() { + proxy_.get()->~Notifier(); + } + + __host__ __device__ Notifier* get() { return proxy_.get(); } private: ProxyT proxy_{}; diff --git a/src/memory/slab_heap.cpp b/src/memory/slab_heap.cpp index 89067b78da..faf3a84b1b 100644 --- a/src/memory/slab_heap.cpp +++ b/src/memory/slab_heap.cpp @@ -75,9 +75,12 @@ __device__ void SlabHeap::malloc(void** ptr, size_t size) { * Notify other threads in block about the allocation result. */ auto notifier{notifier_.get()}; - notifier->write(ptr_deref_u64); - uint64_t notification_u64{notifier->read()}; - notifier->done(); + if (!threadIdx.x) { + notifier->store(ptr_deref_u64); + notifier->fence(); + } + __syncthreads(); + uint64_t notification_u64{notifier->load()}; /* * Write to the ptr parameter (to return it back up the call stack). diff --git a/src/memory/slab_heap.hpp b/src/memory/slab_heap.hpp index a3655b27c1..171332bee3 100644 --- a/src/memory/slab_heap.hpp +++ b/src/memory/slab_heap.hpp @@ -48,7 +48,7 @@ class SlabHeap { /** * @brief Helper type for notifier */ - using NOTIFIER_PROXY_T = NotifierProxy; + using NOTIFIER_PROXY_T = NotifierProxy; /** * @brief Helper type for notifier diff --git a/src/util.hpp b/src/util.hpp index c209750d49..c02f891dc4 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -93,6 +93,7 @@ __device__ __forceinline__ bool is_thread_zero_in_block() { __device__ __forceinline__ bool is_block_zero_in_grid() { return hipBlockIdx_x == 0 && hipBlockIdx_y == 0 && hipBlockIdx_z == 0; } + /* * Returns the number of threads in the caller's flattened thread block. */ @@ -100,6 +101,13 @@ __device__ __forceinline__ int get_flat_block_size() { return hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; } +/* + * Returns the number of threads in the caller's flattened grid. + */ +__device__ __forceinline__ int get_flat_grid_size() { + return get_flat_block_size() * hipGridDim_x * hipGridDim_y * hipGridDim_z; +} + /* * Returns the flattened thread index of the calling thread within its * thread block. diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 349bb2c2bf..b47da1c154 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -90,6 +90,7 @@ target_sources( free_list_gtest.cpp #context_ipc_gtest.cpp ipc_impl_simple_coarse_gtest.cpp + ipc_impl_simple_fine_gtest.cpp ) ############################################################################### diff --git a/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp b/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp index dbf72923b5..37833f9cfe 100644 --- a/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp +++ b/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp @@ -33,6 +33,7 @@ TEST_F(IPCImplSimpleCoarseTestFixture, MPI_num_pes) { } TEST_F(IPCImplSimpleCoarseTestFixture, IPC_bases) { + ASSERT_NE(ipc_impl_.ipc_bases, nullptr); for(int i{0}; i < mpi_.num_pes(); i++) { ASSERT_NE(ipc_impl_.ipc_bases[i], nullptr); } diff --git a/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp b/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp index 02dfd8c55a..083d73f418 100644 --- a/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp +++ b/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp @@ -218,8 +218,6 @@ class IPCImplSimpleCoarseTestFixture : public ::testing::Test { protected: std::vector golden_; - std::vector output_; - HEAP_T heap_mem_ {}; MPI_T mpi_ {heap_mem_.get_ptr(), heap_mem_.get_size()}; diff --git a/tests/unit_tests/ipc_impl_simple_fine_gtest.cpp b/tests/unit_tests/ipc_impl_simple_fine_gtest.cpp new file mode 100644 index 0000000000..71cb85dc9e --- /dev/null +++ b/tests/unit_tests/ipc_impl_simple_fine_gtest.cpp @@ -0,0 +1,1037 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#include "ipc_impl_simple_fine_gtest.hpp" + +using namespace rocshmem; + +TEST_F(IPCImplSimpleFineTestFixture, ptr_check) { + ASSERT_NE(heap_mem_.get_ptr(), nullptr); +} + +TEST_F(IPCImplSimpleFineTestFixture, MPI_num_pes) { + ASSERT_EQ(mpi_.num_pes(), 2); +} + +TEST_F(IPCImplSimpleFineTestFixture, IPC_bases) { + ASSERT_NE(ipc_impl_.ipc_bases, nullptr); + for(int i{0}; i < mpi_.num_pes(); i++) { + ASSERT_NE(ipc_impl_.ipc_bases[i], nullptr); + } +} + +TEST_F(IPCImplSimpleFineTestFixture, golden_1048576_int) { + iota_golden(1048576); + validate_golden(1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_1x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write_wg(grid, block, 32); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_1024x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + write_wg(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_1024x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + read_wg(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_128x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {128,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_256x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {256,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_512x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {512,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_768x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {768,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wg_1x1x1_1024x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + write_wg(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_128x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {128,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_256x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {256,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_512x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {512,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_768x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {768,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wg_1x1x1_1024x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + read_wg(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_64x1x1_1_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 1); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_64x1x1_1_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 1); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_64x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_64x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_3x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {3,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_5x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {5,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_6x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {6,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_7x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {7,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_9x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {9,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_10x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {10,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_11x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {11,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_12x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {12,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_13x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {13,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_14x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {14,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_15x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {15,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_17x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {17,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_18x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {18,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_19x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {19,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_20x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {20,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_21x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {21,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_22x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {22,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_23x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {23,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_24x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {24,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_25x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {25,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_26x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {26,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_27x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {27,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_28x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {28,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_29x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {29,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_30x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {30,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_31x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {31,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_33x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {33,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_34x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {34,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_35x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {35,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_36x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {36,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_37x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {37,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_38x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {38,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_39x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {39,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_40x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {40,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_41x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {41,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_42x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {42,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_43x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {43,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_44x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {44,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_45x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {45,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_46x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {46,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_47x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {47,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_48x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {48,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_49x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {49,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_50x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {50,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_51x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {51,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_52x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {52,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_53x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {53,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_54x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {54,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_55x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {55,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_56x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {56,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_57x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {57,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_58x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {58,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_59x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {59,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_60x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {60,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_61x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {61,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_62x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {62,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_63x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {63,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, write_wave_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_3x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {3,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_5x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {5,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_6x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {6,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_7x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {7,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_9x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {9,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_10x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {10,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_11x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {11,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_12x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {12,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_13x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {13,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_14x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {14,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_15x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {15,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_17x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {17,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_18x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {18,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_19x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {19,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_20x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {20,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_21x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {21,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_22x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {22,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_23x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {23,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_24x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {24,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_25x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {25,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_26x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {26,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_27x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {27,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_28x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {28,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_29x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {29,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_30x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {30,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_31x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {31,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_33x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {33,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_34x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {34,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_35x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {35,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_36x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {36,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_37x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {37,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_38x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {38,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_39x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {39,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_40x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {40,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_41x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {41,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_42x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {42,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_43x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {43,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_44x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {44,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_45x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {45,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_46x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {46,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_47x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {47,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_48x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {48,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_49x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {49,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_50x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {50,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_51x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {51,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_52x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {52,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_53x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {53,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_54x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {54,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_55x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {55,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_56x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {56,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_57x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {57,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_58x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {58,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_59x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {59,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_60x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {60,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_61x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {61,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_62x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {62,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_63x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {63,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleFineTestFixture, read_wave_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, write_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleFineTestFixture, read_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read(grid, block, 1048576); +} + + diff --git a/tests/unit_tests/ipc_impl_simple_fine_gtest.hpp b/tests/unit_tests/ipc_impl_simple_fine_gtest.hpp new file mode 100644 index 0000000000..ba4c20e288 --- /dev/null +++ b/tests/unit_tests/ipc_impl_simple_fine_gtest.hpp @@ -0,0 +1,344 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef ROCSHMEM_IPC_IMPL_SIMPLE_FINE_GTEST_HPP +#define ROCSHMEM_IPC_IMPL_SIMPLE_FINE_GTEST_HPP + +#include "gtest/gtest.h" + +#include +#include + +#include "../src/atomic.hpp" +#include "../src/ipc_policy.hpp" +#include "../src/memory/notifier.hpp" +#include "../src/memory/symmetric_heap.hpp" +#include "../src/util.hpp" + +namespace rocshmem { + +enum TestType { + READ = 0, + WRITE = 1 +}; + +const uint32_t SIGNAL_OFFSET {67108864}; + +__device__ +void +validator(bool *error, int *golden, int *dest, size_t bytes) { + size_t elements {bytes / sizeof(int)}; + for (int i {get_flat_id()}; i < elements; i += get_flat_grid_size()) { + if (golden[i] != dest[i]) { + printf("golden[%d] %d != dest[%d] %d\n", i, golden[i], i, dest[i]); + *error = true; + } + } +} + +template +__global__ +void +kernel_put_with_signal_validator(bool *error, int *golden, int *dest, size_t bytes, NotifierT *notifier) { + detail::atomic::rocshmem_memory_orders orders{}; + if (!get_flat_id()) { + while (detail::atomic::load(dest + SIGNAL_OFFSET, orders) == 0) { + ; + } + } + notifier->sync(); + validator(error, golden, dest, bytes); +} + +template +__global__ +void +kernel_simple_fine_copy(IpcImpl *ipc_impl, bool *error, int *golden, int *src, int *dest, size_t bytes, TestType test, NotifierT *notifier) { + if (!get_flat_id()) { + ipc_impl->ipcCopy(dest, src, bytes); + ipc_impl->ipcFence(); + if (test == WRITE) { + ipc_impl->ipcAMOFetchAdd(dest + SIGNAL_OFFSET, 1); + } + } + if (test == READ) { + notifier->sync(); + validator(error, golden, dest, bytes); + } +} + +template +__global__ +void +kernel_simple_fine_copy_wg(IpcImpl *ipc_impl, bool *error, int *golden, int *src, int *dest, size_t bytes, TestType test, NotifierT *notifier) { + if (!blockIdx.x) { + ipc_impl->ipcCopy_wg(dest, src, bytes); + ipc_impl->ipcFence(); + if (test == WRITE) { + if (!threadIdx.x) { + ipc_impl->ipcAMOFetchAdd(dest + SIGNAL_OFFSET, 1); + } + } + } + if (test == READ) { + notifier->sync(); + validator(error, golden, dest, bytes); + } +} + +template +__global__ +void +kernel_simple_fine_copy_wave(IpcImpl *ipc_impl, bool *error, int *golden, int *src, int *dest, size_t bytes, TestType test, NotifierT *notifier) { + if (!blockIdx.x && threadIdx.x < 64) { + ipc_impl->ipcCopy_wave(dest, src, bytes); + ipc_impl->ipcFence(); + if (test == WRITE) { + if (!threadIdx.x) { + ipc_impl->ipcAMOFetchAdd(dest + SIGNAL_OFFSET, 1); + } + } + } + __syncthreads(); + if (test == READ) { + notifier->sync(); + validator(error, golden, dest, bytes); + } +} + +class IPCImplSimpleFineTestFixture : public ::testing::Test { + using HEAP_T = HeapMemory; + using MPI_T = RemoteHeapInfo; + using NotifierT = Notifier; + using NotifierProxyT = NotifierProxy; + using FN_T1 = void (*)(IpcImpl*, bool*, int*, int*, int*, size_t, TestType, NotifierT*); + using FN_T2 = void (*)(bool*, int*, int*, size_t, NotifierT*); + + public: + IPCImplSimpleFineTestFixture() { + ipc_impl_.ipcHostInit(mpi_.my_pe(), mpi_.get_heap_bases() , MPI_COMM_WORLD); + + assert(ipc_impl_dptr_ == nullptr); + hip_allocator_.allocate((void**)&ipc_impl_dptr_, sizeof(IpcImpl)); + CHECK_HIP(hipMemcpy(ipc_impl_dptr_, &ipc_impl_, sizeof(IpcImpl), hipMemcpyHostToDevice)); + + assert(error_dptr_ == nullptr); + hip_allocator_.allocate((void**)&error_dptr_, sizeof(bool)); + *error_dptr_ = false; + } + + ~IPCImplSimpleFineTestFixture() { + if (ipc_impl_dptr_) { + hip_allocator_.deallocate(ipc_impl_dptr_); + } + if (error_dptr_) { + hip_allocator_.deallocate(error_dptr_); + } + if (golden_dptr_) { + hip_allocator_.deallocate(golden_dptr_); + } + + ipc_impl_.ipcHostStop(); + } + + void launch(FN_T1 f, const dim3 grid, const dim3 block, int* src, int* dest, size_t bytes, TestType test) { + f<<>>(ipc_impl_dptr_, error_dptr_, golden_dptr_, src, dest, bytes, test, notifier_.get()); + CHECK_HIP(hipStreamSynchronize(nullptr)); + } + + void launch(FN_T2 f, const dim3 grid, const dim3 block, int* dest, size_t bytes) { + f<<>>(error_dptr_, golden_dptr_, dest, bytes, notifier_.get()); + CHECK_HIP(hipStreamSynchronize(nullptr)); + } + + void write(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(WRITE); + initialize_src_buffer(WRITE); + copy(WRITE, grid, block); + check_device_validation_errors(WRITE); + } + + void write_wg(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(WRITE); + initialize_src_buffer(WRITE); + copy_wg(WRITE, grid, block); + check_device_validation_errors(WRITE); + } + + void write_wave(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(WRITE); + initialize_src_buffer(WRITE); + copy_wave(WRITE, grid, block); + check_device_validation_errors(WRITE); + } + + void read(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(READ); + initialize_src_buffer(READ); + copy(READ, grid, block); + check_device_validation_errors(READ); + } + + void read_wg(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(READ); + initialize_src_buffer(READ); + copy_wg(READ, grid, block); + check_device_validation_errors(READ); + } + + void read_wave(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_signal(READ); + initialize_src_buffer(READ); + copy_wave(READ, grid, block); + check_device_validation_errors(READ); + } + + void iota_golden(size_t elems) { + golden_.resize(elems); + std::iota(golden_.begin(), golden_.end(), 0); + + assert(golden_dptr_ == nullptr); + size_t golden_dptr_bytes {golden_.size() * sizeof(int)}; + hip_allocator_.allocate((void**)&golden_dptr_, golden_dptr_bytes); + CHECK_HIP(hipMemcpy(golden_dptr_, golden_.data(), golden_dptr_bytes, hipMemcpyHostToDevice)); + } + + void validate_golden(size_t elems) { + ASSERT_EQ(golden_.size(), elems); + for (int i{0}; i < golden_.size(); i++) { + ASSERT_EQ(golden_[i], i); + } + } + + void initialize_signal(TestType test) { + bool is_write_test = test; + if (is_write_test && mpi_.my_pe() == 0) { + int *dest = reinterpret_cast(ipc_impl_.ipc_bases[1]); + *(dest + SIGNAL_OFFSET) = 0; + } + } + + void initialize_src_buffer(TestType test) { + if (!pe_initializes_src_buffer(test)) { + return; + } + size_t bytes = golden_.size() * sizeof(int); + auto dev_src = reinterpret_cast(ipc_impl_.ipc_bases[mpi_.my_pe()]); + CHECK_HIP(hipMemcpy(dev_src, golden_.data(), bytes, hipMemcpyHostToDevice)); + } + + bool pe_initializes_src_buffer(TestType test) { + bool is_write_test = test; + bool is_read_test = !test; + return (is_write_test && mpi_.my_pe() == 0) || + (is_read_test && mpi_.my_pe() == 1); + } + + void execute(TestType test, FN_T1 fn, const dim3 grid, const dim3 block) { + size_t bytes = golden_.size() * sizeof(int); + if (mpi_.my_pe()) { + mpi_.barrier(); + if (test == WRITE) { + int *dest = reinterpret_cast(ipc_impl_.ipc_bases[1]); + FN_T2 val_fn = kernel_put_with_signal_validator; + launch(val_fn, grid, block, dest, bytes); + } + mpi_.barrier(); + return; + } + int *src{nullptr}; + int *dest{nullptr}; + if (test == WRITE) { + src = reinterpret_cast(ipc_impl_.ipc_bases[0]); + dest = reinterpret_cast(ipc_impl_.ipc_bases[1]); + } else { + src = reinterpret_cast(ipc_impl_.ipc_bases[1]); + dest = reinterpret_cast(ipc_impl_.ipc_bases[0]); + } + mpi_.barrier(); + launch(fn, grid, block, src, dest, bytes, test); + mpi_.barrier(); + } + + void copy(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_fine_copy, grid, block); + } + + void copy_wg(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_fine_copy_wg, grid, block); + } + + void copy_wave(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_fine_copy_wave, grid, block); + } + + void check_device_validation_errors(TestType test) { + if (!pe_validates_dest_buffer(test)) { + return; + } + ASSERT_EQ(*error_dptr_, false); + } + + void validate_dest_buffer(TestType test) { + if (!pe_validates_dest_buffer(test)) { + return; + } + + auto dev_dest = reinterpret_cast(ipc_impl_.ipc_bases[mpi_.my_pe()]); + for (int i{0}; i < golden_.size(); i++) { + ASSERT_EQ(golden_[i], dev_dest[i]); + } + } + + bool pe_validates_dest_buffer(TestType test) { + return !pe_initializes_src_buffer(test); + } + + protected: + HIPDefaultFinegrainedAllocator hip_allocator_ {}; + + NotifierProxyT notifier_ {}; + + HEAP_T heap_mem_ {}; + + MPI_T mpi_ {heap_mem_.get_ptr(), heap_mem_.get_size()}; + + std::vector golden_; + + int *golden_dptr_ {nullptr}; + + IpcImpl ipc_impl_ {}; + + IpcImpl *ipc_impl_dptr_ {nullptr}; + + bool *error_dptr_ {nullptr}; +}; + +} // namespace rocshmem + +#endif // ROCSHMEM_IPC_IMPL_SIMPLE_FINE_GTEST_HPP diff --git a/tests/unit_tests/notifier_gtest.cpp b/tests/unit_tests/notifier_gtest.cpp index 9f79c62397..d3b699e3d9 100644 --- a/tests/unit_tests/notifier_gtest.cpp +++ b/tests/unit_tests/notifier_gtest.cpp @@ -28,30 +28,98 @@ using namespace rocshmem; ******************************* Fixture Tests ******************************* *****************************************************************************/ -TEST_F(NotifierTestFixture, run_all_threads_once_1_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_1_1) { run_all_threads_once(1, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_2_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_2_1) { run_all_threads_once(2, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_64_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_64_1) { run_all_threads_once(64, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_128_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_128_1) { run_all_threads_once(128, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_256_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_256_1) { run_all_threads_once(256, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_512_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_512_1) { run_all_threads_once(512, 1); } -TEST_F(NotifierTestFixture, run_all_threads_once_1024_1) { +TEST_F(NotifierBlockTestFixture, run_all_threads_once_1024_1) { run_all_threads_once(1024, 1); } + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_1) { + run_all_threads_once(1, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_2_1) { + run_all_threads_once(2, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_64_1) { + run_all_threads_once(64, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_128_1) { + run_all_threads_once(128, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_256_1) { + run_all_threads_once(256, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_512_1) { + run_all_threads_once(512, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_1) { + run_all_threads_once(1024, 1); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_2) { + run_all_threads_once(1, 2); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_2) { + run_all_threads_once(1024, 2); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_4) { + run_all_threads_once(1, 4); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_4) { + run_all_threads_once(1024, 4); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_8) { + run_all_threads_once(1, 8); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_8) { + run_all_threads_once(1024, 8); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_32) { + run_all_threads_once(1, 32); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_32) { + run_all_threads_once(1024, 32); +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1_38) { + run_all_threads_once(1, 38); // MI300 CPX +} + +TEST_F(NotifierAgentTestFixture, run_all_threads_once_1024_38) { + run_all_threads_once(1024, 38); // MI300 CPX +} diff --git a/tests/unit_tests/notifier_gtest.hpp b/tests/unit_tests/notifier_gtest.hpp index ecb2e7a619..e130159b2e 100644 --- a/tests/unit_tests/notifier_gtest.hpp +++ b/tests/unit_tests/notifier_gtest.hpp @@ -44,66 +44,45 @@ static const uint64_t NOTIFIER_OFFSET {0x100B00}; inline __device__ void write_to_memory(uint8_t* raw_memory) { - auto thread_idx {get_flat_block_id()}; + auto thread_idx {get_flat_id()}; raw_memory[thread_idx] = THREAD_VALUE; __threadfence(); } +template __global__ void all_threads_once(uint8_t* raw_memory, - Notifier* notifier) { - notifier->write(NOTIFIER_OFFSET); - uint64_t offset_u64 {notifier->read()}; - notifier->done(); - + NotifierT * notifier) { + if (!get_flat_id()) { + notifier->store(NOTIFIER_OFFSET); + notifier->fence(); + } + notifier->sync(); + uint64_t offset_u64 {notifier->load()}; uint64_t raw_memory_u64 {reinterpret_cast(raw_memory)}; uint64_t address_u64 {raw_memory_u64 + offset_u64}; uint8_t* address {reinterpret_cast(address_u64)}; write_to_memory(address); - __syncthreads(); } -class NotifierTestFixture : public ::testing::Test { - using NotifierProxyT = NotifierProxy; - +class NotifierBase : public ::testing::Test { public: - NotifierTestFixture() { + NotifierBase() { assert(raw_memory_ == nullptr); hip_allocator_.allocate((void**)&raw_memory_, GIBIBYTE_); assert(raw_memory_); } - ~NotifierTestFixture() { + ~NotifierBase() { if (raw_memory_) { hip_allocator_.deallocate(raw_memory_); } } void - run_all_threads_once(uint32_t x_block_dim, - uint32_t x_grid_dim) { - const dim3 hip_blocksize(x_block_dim, 1, 1); - const dim3 hip_gridsize(x_grid_dim, 1, 1); - - hipLaunchKernelGGL(all_threads_once, - hip_gridsize, - hip_blocksize, - 0, - nullptr, - raw_memory_, - notifier_.get()); - - hipError_t return_code = hipStreamSynchronize(nullptr); - if (return_code != hipSuccess) { - printf("Failed in stream synchronize\n"); - assert(return_code == hipSuccess); - } - - size_t number_threads {x_block_dim * x_grid_dim}; - + verify(size_t number_threads) { uint8_t* offset_addr {compute_offset_addr()}; - for (size_t i {0}; i < number_threads; i++) { ASSERT_EQ(offset_addr[i], THREAD_VALUE); } @@ -136,12 +115,51 @@ class NotifierTestFixture : public ::testing::Test { */ uint8_t *raw_memory_ {nullptr}; +}; + +class NotifierBlockTestFixture : public NotifierBase { + using NotifierT = Notifier; + using NotifierProxyT = NotifierProxy; + + public: + void + run_all_threads_once(uint32_t x_block_dim, + uint32_t x_grid_dim) { + new (notifier_.get()) NotifierT(); + const dim3 block(x_block_dim, 1, 1); + const dim3 grid(x_grid_dim, 1, 1); + all_threads_once<<>>(raw_memory_, notifier_.get()); + CHECK_HIP(hipStreamSynchronize(nullptr)); + verify(x_block_dim * x_grid_dim); + } + /** * @brief Used to broadcast base offset for writing. */ NotifierProxyT notifier_ {}; }; +class NotifierAgentTestFixture : public NotifierBase { + using NotifierT = Notifier; + using NotifierProxyT = NotifierProxy; + + public: + void + run_all_threads_once(uint32_t x_block_dim, + uint32_t x_grid_dim) { + new (notifier_.get()) NotifierT(); + const dim3 block(x_block_dim, 1, 1); + const dim3 grid(x_grid_dim, 1, 1); + all_threads_once<<>>(raw_memory_, notifier_.get()); + CHECK_HIP(hipStreamSynchronize(nullptr)); + verify(x_block_dim * x_grid_dim); + } + + /** + * @brief Used to broadcast base offset for writing. + */ + NotifierProxyT notifier_ {}; +}; } // namespace rocshmem