diff --git a/scripts/build_configs/ro_net b/scripts/build_configs/ro_net index 30c4ad27bc..17809fa0a9 100755 --- a/scripts/build_configs/ro_net +++ b/scripts/build_configs/ro_net @@ -18,7 +18,7 @@ cmake \ -DPROFILE=OFF \ -DUSE_GPU_IB=OFF \ -DUSE_DC=OFF \ - -DUSE_IPC=OFF \ + -DUSE_IPC=ON \ -DUSE_THREADS=ON \ -DUSE_WF_COAL=OFF \ -DUSE_COHERENT_HEAP=ON \ diff --git a/src/ipc/ipc_policy.cpp b/src/ipc/ipc_policy.cpp index e4555ac8f2..356aa81ad8 100644 --- a/src/ipc/ipc_policy.cpp +++ b/src/ipc/ipc_policy.cpp @@ -50,7 +50,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, /* * Figure out how this process' rank among local processes. */ - int shm_rank; MPI_Comm_rank(shmcomm, &shm_rank); /* @@ -92,7 +91,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, void **ipc_base_uncast = reinterpret_cast(&ipc_base[i]); CHECK_HIP(hipIpcOpenMemHandle(ipc_base_uncast, vec_ipc_handle[i], hipIpcMemLazyEnablePeerAccess)); - // TODO(bpotter): add some error checking here if happens to fail } else { ipc_base[i] = base_heap; } @@ -110,6 +108,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, free(vec_ipc_handle); } +__host__ void IpcOnImpl::ipcHostStop() { + for (size_t i = 0; i < shm_size; i++) { + if (i != shm_rank) { + CHECK_HIP(hipIpcCloseMemHandle(ipc_bases[i])); + } + } + CHECK_HIP(hipFree(ipc_bases)); +} + __device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) { memcpy(dst, src, size); } diff --git a/src/ipc/ipc_policy.hpp b/src/ipc/ipc_policy.hpp index d608dc8d54..9609aa42cb 100644 --- a/src/ipc/ipc_policy.hpp +++ b/src/ipc/ipc_policy.hpp @@ -42,6 +42,8 @@ class IpcOnImpl { using HEAP_BASES_T = std::vector>; public: + int shm_rank{0}; + uint32_t shm_size{0}; char **ipc_bases{nullptr}; @@ -49,6 +51,8 @@ class IpcOnImpl { __host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, MPI_Comm thread_comm); + __host__ void ipcHostStop(); + __device__ bool isIpcAvailable(int my_pe, int target_pe) { return my_pe / shm_size == target_pe / shm_size; } @@ -115,6 +119,8 @@ class IpcOffImpl { __host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, MPI_Comm thread_comm) {} + __host__ void ipcHostStop() {} + __device__ bool isIpcAvailable(int my_pe, int target_pe) { return false; } __device__ void ipcGpuInit(Backend *roc_shmem_handle, Context *ctx, diff --git a/src/util.hpp b/src/util.hpp index 0d4dad806a..67fdac6786 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -302,19 +302,34 @@ __device__ __forceinline__ void memcpy_wg(void* dst, void* src, size_t size) { } __device__ __forceinline__ void memcpy_wave(void* dst, void* src, size_t size) { - uint8_t* dst_bytes{static_cast(dst)}; - uint8_t* src_bytes{static_cast(src)}; + int wave_tid = get_flat_block_id() % WF_SIZE; + int wave_size{wave_SZ()}; int cpy_size{}; - int thread_id{get_flat_block_id()}; + uint8_t* dst_bytes{nullptr}; + uint8_t* dst_def{nullptr}; + uint8_t* src_bytes{nullptr}; + uint8_t* src_def{nullptr}; + + dst_def = reinterpret_cast(dst); + src_def = reinterpret_cast(src); + dst_bytes = dst_def; + src_bytes = src_def; + for (int j{8}; j > 1; j >>= 1) { cpy_size = size / j; - for (int i{thread_id}; i < cpy_size; i += WF_SIZE) { - store_asm(src_bytes, dst_bytes, j); + for (int i{wave_tid}; i < cpy_size; i += wave_size) { + dst_bytes = dst_def; + src_bytes = src_def; + src_bytes += i * j; dst_bytes += i * j; - size -= cpy_size * j; + + store_asm(src_bytes, dst_bytes, j); } + size -= cpy_size * j; + dst_def += cpy_size * j; + src_def += cpy_size * j; } if (size == 1) { diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 1f71119fd6..3f1cf89a17 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -71,6 +71,7 @@ target_sources( PRIVATE shmem_gtest.cpp heap_memory_gtest.cpp + hipmalloc_gtest.cpp bin_gtest.cpp binner_gtest.cpp #bitwise_gtest.cpp # Test is disabled becasue of compilation errors @@ -88,6 +89,7 @@ target_sources( #forward_list_gtest.cpp free_list_gtest.cpp context_ipc_gtest.cpp + ipc_impl_simple_coarse_gtest.cpp ) ############################################################################### diff --git a/tests/unit_tests/hipmalloc_gtest.cpp b/tests/unit_tests/hipmalloc_gtest.cpp new file mode 100644 index 0000000000..598952c0e6 --- /dev/null +++ b/tests/unit_tests/hipmalloc_gtest.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#include "hipmalloc_gtest.hpp" + +using namespace rocshmem; + +TEST_F(HipMallocTestFixture, normal_1GBx256) { + void* ptr{nullptr}; + size_t gb {1073741824}; + for (int i{0}; i < 256; i++) { + hip_allocator_.allocate(&ptr, gb); + hip_allocator_.deallocate(ptr); + } +} + +TEST_F(HipMallocTestFixture, fine_1GBx256) { + void* ptr{nullptr}; + size_t gb {1073741824}; + for (int i{0}; i < 256; i++) { + hip_allocator_fg_.allocate(&ptr, gb); + hip_allocator_fg_.deallocate(ptr); + } +} diff --git a/tests/unit_tests/hipmalloc_gtest.hpp b/tests/unit_tests/hipmalloc_gtest.hpp new file mode 100644 index 0000000000..d6d341a57b --- /dev/null +++ b/tests/unit_tests/hipmalloc_gtest.hpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef ROCSHMEM_HIPMALLOC_GTEST_HPP +#define ROCSHMEM_HIPMALLOC_GTEST_HPP + +#include "gtest/gtest.h" + +#include "../src/memory/symmetric_heap.hpp" +#include "../src/util.hpp" + +namespace rocshmem { + +class HipMallocTestFixture : public ::testing::Test { + public: + HIPAllocator hip_allocator_ {}; + HIPAllocatorFinegrained hip_allocator_fg_ {}; +}; + +} // namespace rocshmem + +#endif // ROCSHMEM_HIPMALLOC_GTEST_HPP diff --git a/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp b/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp new file mode 100644 index 0000000000..dbf72923b5 --- /dev/null +++ b/tests/unit_tests/ipc_impl_simple_coarse_gtest.cpp @@ -0,0 +1,1029 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#include "ipc_impl_simple_coarse_gtest.hpp" + +using namespace rocshmem; + +TEST_F(IPCImplSimpleCoarseTestFixture, ptr_check) { + ASSERT_NE(heap_mem_.get_ptr(), nullptr); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, MPI_num_pes) { + ASSERT_EQ(mpi_.num_pes(), 2); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, IPC_bases) { + for(int i{0}; i < mpi_.num_pes(); i++) { + ASSERT_NE(ipc_impl_.ipc_bases[i], nullptr); + } +} + +TEST_F(IPCImplSimpleCoarseTestFixture, golden_1048576_int) { + iota_golden(1048576); + validate_golden(1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_1024x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + write_wg(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_1024x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + read_wg(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_128x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {128,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_256x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {256,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_512x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {512,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_768x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {768,1,1}; + write_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wg_1x1x1_1024x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + write_wg(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_128x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {128,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_256x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {256,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_512x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {512,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_768x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {768,1,1}; + read_wg(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wg_1x1x1_1024x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1024,1,1}; + read_wg(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_64x1x1_1_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 1); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_64x1x1_1_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 1); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_64x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_64x1x1_32_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 32); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_3x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {3,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_5x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {5,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_6x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {6,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_7x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {7,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_9x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {9,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_10x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {10,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_11x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {11,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_12x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {12,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_13x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {13,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_14x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {14,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_15x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {15,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_17x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {17,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_18x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {18,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_19x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {19,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_20x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {20,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_21x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {21,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_22x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {22,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_23x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {23,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_24x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {24,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_25x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {25,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_26x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {26,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_27x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {27,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_28x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {28,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_29x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {29,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_30x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {30,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_31x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {31,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_33x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {33,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_34x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {34,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_35x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {35,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_36x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {36,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_37x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {37,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_38x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {38,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_39x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {39,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_40x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {40,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_41x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {41,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_42x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {42,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_43x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {43,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_44x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {44,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_45x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {45,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_46x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {46,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_47x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {47,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_48x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {48,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_49x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {49,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_50x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {50,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_51x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {51,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_52x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {52,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_53x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {53,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_54x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {54,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_55x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {55,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_56x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {56,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_57x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {57,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_58x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {58,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_59x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {59,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_60x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {60,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_61x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {61,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_62x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {62,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_63x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {63,1,1}; + write_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, write_wave_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + write_wave(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_2x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {2,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_3x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {3,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_4x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {4,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_5x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {5,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_6x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {6,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_7x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {7,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_8x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {8,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_9x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {9,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_10x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {10,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_11x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {11,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_12x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {12,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_13x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {13,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_14x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {14,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_15x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {15,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_16x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {16,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_17x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {17,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_18x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {18,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_19x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {19,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_20x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {20,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_21x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {21,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_22x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {22,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_23x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {23,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_24x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {24,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_25x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {25,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_26x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {26,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_27x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {27,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_28x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {28,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_29x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {29,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_30x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {30,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_31x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {31,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_32x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {32,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_33x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {33,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_34x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {34,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_35x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {35,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_36x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {36,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_37x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {37,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_38x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {38,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_39x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {39,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_40x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {40,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_41x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {41,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_42x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {42,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_43x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {43,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_44x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {44,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_45x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {45,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_46x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {46,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_47x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {47,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_48x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {48,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_49x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {49,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_50x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {50,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_51x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {51,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_52x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {52,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_53x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {53,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_54x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {54,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_55x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {55,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_56x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {56,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_57x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {57,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_58x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {58,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_59x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {59,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_60x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {60,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_61x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {61,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_62x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {62,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_63x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {63,1,1}; + read_wave(grid, block, 1048576); +} + +TEST_F(IPCImplSimpleCoarseTestFixture, read_wave_1x1x1_64x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {64,1,1}; + read_wave(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, write_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + write(grid, block, 1048576); +} + +//============================================================================= + +TEST_F(IPCImplSimpleCoarseTestFixture, read_1x1x1_1x1x1_1048576_int) { + dim3 grid {1,1,1}; + dim3 block {1,1,1}; + read(grid, block, 1048576); +} + diff --git a/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp b/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp new file mode 100644 index 0000000000..f31f406416 --- /dev/null +++ b/tests/unit_tests/ipc_impl_simple_coarse_gtest.hpp @@ -0,0 +1,236 @@ +/****************************************************************************** + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP +#define ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP + +#include "gtest/gtest.h" + +#include + +#include +#include "../src/memory/symmetric_heap.hpp" +#include "../src/ipc/ipc_policy.hpp" + +namespace rocshmem { + +__global__ +void +kernel_simple_coarse_copy(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) { + if (!threadIdx.x) { + ipc_impl->ipcCopy(dest, src, bytes); + ipc_impl->ipcFence(); + } + __syncthreads(); +} + +__global__ +void +kernel_simple_coarse_copy_wg(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) { + ipc_impl->ipcCopy_wg(dest, src, bytes); + ipc_impl->ipcFence(); + __syncthreads(); +} + +__global__ +void +kernel_simple_coarse_copy_wave(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) { + ipc_impl->ipcCopy_wave(dest, src, bytes); + ipc_impl->ipcFence(); + __syncthreads(); +} + +class IPCImplSimpleCoarseTestFixture : public ::testing::Test { + + using HEAP_T = HeapMemory; + + using MPI_T = RemoteHeapInfo; + + using FN_T = void (*)(IpcImpl*, int*, int*, size_t); + + public: + IPCImplSimpleCoarseTestFixture() { + ipc_impl_.ipcHostInit(mpi_.my_pe(), mpi_.get_heap_bases() , MPI_COMM_WORLD); + + assert(ipc_impl_dptr_ == nullptr); + hip_allocator_.allocate((void**)&ipc_impl_dptr_, sizeof(IpcImpl)); + + CHECK_HIP(hipMemcpy(ipc_impl_dptr_, &ipc_impl_, + sizeof(IpcImpl), hipMemcpyHostToDevice)); + } + + ~IPCImplSimpleCoarseTestFixture() { + if (ipc_impl_dptr_) { + hip_allocator_.deallocate(ipc_impl_dptr_); + } + + ipc_impl_.ipcHostStop(); + } + + void launch(FN_T f, const dim3 grid, const dim3 block, int* src, int* dest, size_t bytes) { + f<<>>(ipc_impl_dptr_, src, dest, bytes); + CHECK_HIP(hipStreamSynchronize(nullptr)); + } + + enum TestType { + READ = 0, + WRITE = 1 + }; + + void write(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(WRITE); + copy(WRITE, grid, block); + validate_dest_buffer(WRITE); + } + + void write_wg(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(WRITE); + copy_wg(WRITE, grid, block); + validate_dest_buffer(WRITE); + } + + void write_wave(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(WRITE); + copy_wave(WRITE, grid, block); + validate_dest_buffer(WRITE); + } + + void read(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(READ); + copy(READ, grid, block); + validate_dest_buffer(READ); + } + + void read_wg(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(READ); + copy_wg(READ, grid, block); + validate_dest_buffer(READ); + } + + void read_wave(const dim3 grid, const dim3 block, size_t elems) { + iota_golden(elems); + initialize_src_buffer(READ); + copy_wave(READ, grid, block); + validate_dest_buffer(READ); + } + + void iota_golden(size_t elems) { + golden_.resize(elems); + std::iota(golden_.begin(), golden_.end(), 0); + } + + void validate_golden(size_t elems) { + ASSERT_EQ(golden_.size(), elems); + for (int i{0}; i < golden_.size(); i++) { + ASSERT_EQ(golden_[i], i); + } + } + + void initialize_src_buffer(TestType test) { + if (!pe_initializes_src_buffer(test)) { + return; + } + size_t bytes = golden_.size() * sizeof(int); + auto dev_src = reinterpret_cast(ipc_impl_.ipc_bases[mpi_.my_pe()]); + CHECK_HIP(hipMemcpy(dev_src, golden_.data(), bytes, hipMemcpyHostToDevice)); + CHECK_HIP(hipStreamSynchronize(nullptr)); + } + + bool pe_initializes_src_buffer(TestType test) { + bool is_write_test = test; + bool is_read_test = !test; + return (is_write_test && mpi_.my_pe() == 0) || + (is_read_test && mpi_.my_pe() == 1); + } + + void execute(TestType test, FN_T fn, const dim3 grid, const dim3 block) { + if (mpi_.my_pe()) { + mpi_.barrier(); + mpi_.barrier(); + return; + } + int *src{nullptr}; + int *dest{nullptr}; + if (test == WRITE) { + src = reinterpret_cast(ipc_impl_.ipc_bases[0]); + dest = reinterpret_cast(ipc_impl_.ipc_bases[1]); + } else { + src = reinterpret_cast(ipc_impl_.ipc_bases[1]); + dest = reinterpret_cast(ipc_impl_.ipc_bases[0]); + } + size_t bytes = golden_.size() * sizeof(int); + mpi_.barrier(); + launch(fn, grid, block, src, dest, bytes); + mpi_.barrier(); + } + + void copy(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_coarse_copy, grid, block); + } + + void copy_wg(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_coarse_copy_wg, grid, block); + } + + void copy_wave(TestType test, dim3 grid, dim3 block) { + execute(test, kernel_simple_coarse_copy_wave, grid, block); + } + + void validate_dest_buffer(TestType test) { + if (!pe_validates_dest_buffer(test)) { + return; + } + + auto dev_dest = reinterpret_cast(ipc_impl_.ipc_bases[mpi_.my_pe()]); + for (int i{0}; i < golden_.size(); i++) { + ASSERT_EQ(golden_[i], dev_dest[i]); + } + } + + bool pe_validates_dest_buffer(TestType test) { + return !pe_initializes_src_buffer(test); + } + + protected: + std::vector golden_; + + std::vector output_; + + HEAP_T heap_mem_ {}; + + MPI_T mpi_ {heap_mem_.get_ptr(), heap_mem_.get_size()}; + + IpcImpl ipc_impl_ {}; + + IpcImpl *ipc_impl_dptr_ {nullptr}; + + HIPAllocator hip_allocator_ {}; +}; + +} // namespace rocshmem + +#endif // ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP