Merge pull request #16 from BKP/ipc_bringup_coarse_unit_08-07-24

Create unit tests for simple buffer transfers using coarse-grained memory
Этот коммит содержится в:
Avinash Kethineedi
2024-08-07 15:05:30 -05:00
коммит произвёл GitHub
родитель d21d5aa431 58c5a98b5d
Коммит 733cb09e2a
9 изменённых файлов: 1388 добавлений и 9 удалений
+1 -1
Просмотреть файл
@@ -18,7 +18,7 @@ cmake \
-DPROFILE=OFF \
-DUSE_GPU_IB=OFF \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_IPC=ON \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
-DUSE_COHERENT_HEAP=ON \
+9 -2
Просмотреть файл
@@ -50,7 +50,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
/*
* Figure out how this process' rank among local processes.
*/
int shm_rank;
MPI_Comm_rank(shmcomm, &shm_rank);
/*
@@ -92,7 +91,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
void **ipc_base_uncast = reinterpret_cast<void **>(&ipc_base[i]);
CHECK_HIP(hipIpcOpenMemHandle(ipc_base_uncast, vec_ipc_handle[i],
hipIpcMemLazyEnablePeerAccess));
// TODO(bpotter): add some error checking here if happens to fail
} else {
ipc_base[i] = base_heap;
}
@@ -110,6 +108,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
free(vec_ipc_handle);
}
__host__ void IpcOnImpl::ipcHostStop() {
for (size_t i = 0; i < shm_size; i++) {
if (i != shm_rank) {
CHECK_HIP(hipIpcCloseMemHandle(ipc_bases[i]));
}
}
CHECK_HIP(hipFree(ipc_bases));
}
__device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) {
memcpy(dst, src, size);
}
+6
Просмотреть файл
@@ -42,6 +42,8 @@ class IpcOnImpl {
using HEAP_BASES_T = std::vector<char *, StdAllocatorHIP<char *>>;
public:
int shm_rank{0};
uint32_t shm_size{0};
char **ipc_bases{nullptr};
@@ -49,6 +51,8 @@ class IpcOnImpl {
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
MPI_Comm thread_comm);
__host__ void ipcHostStop();
__device__ bool isIpcAvailable(int my_pe, int target_pe) {
return my_pe / shm_size == target_pe / shm_size;
}
@@ -115,6 +119,8 @@ class IpcOffImpl {
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
MPI_Comm thread_comm) {}
__host__ void ipcHostStop() {}
__device__ bool isIpcAvailable(int my_pe, int target_pe) { return false; }
__device__ void ipcGpuInit(Backend *roc_shmem_handle, Context *ctx,
+21 -6
Просмотреть файл
@@ -302,19 +302,34 @@ __device__ __forceinline__ void memcpy_wg(void* dst, void* src, size_t size) {
}
__device__ __forceinline__ void memcpy_wave(void* dst, void* src, size_t size) {
uint8_t* dst_bytes{static_cast<uint8_t*>(dst)};
uint8_t* src_bytes{static_cast<uint8_t*>(src)};
int wave_tid = get_flat_block_id() % WF_SIZE;
int wave_size{wave_SZ()};
int cpy_size{};
int thread_id{get_flat_block_id()};
uint8_t* dst_bytes{nullptr};
uint8_t* dst_def{nullptr};
uint8_t* src_bytes{nullptr};
uint8_t* src_def{nullptr};
dst_def = reinterpret_cast<uint8_t*>(dst);
src_def = reinterpret_cast<uint8_t*>(src);
dst_bytes = dst_def;
src_bytes = src_def;
for (int j{8}; j > 1; j >>= 1) {
cpy_size = size / j;
for (int i{thread_id}; i < cpy_size; i += WF_SIZE) {
store_asm(src_bytes, dst_bytes, j);
for (int i{wave_tid}; i < cpy_size; i += wave_size) {
dst_bytes = dst_def;
src_bytes = src_def;
src_bytes += i * j;
dst_bytes += i * j;
size -= cpy_size * j;
store_asm(src_bytes, dst_bytes, j);
}
size -= cpy_size * j;
dst_def += cpy_size * j;
src_def += cpy_size * j;
}
if (size == 1) {
+2
Просмотреть файл
@@ -71,6 +71,7 @@ target_sources(
PRIVATE
shmem_gtest.cpp
heap_memory_gtest.cpp
hipmalloc_gtest.cpp
bin_gtest.cpp
binner_gtest.cpp
#bitwise_gtest.cpp # Test is disabled becasue of compilation errors
@@ -88,6 +89,7 @@ target_sources(
#forward_list_gtest.cpp
free_list_gtest.cpp
context_ipc_gtest.cpp
ipc_impl_simple_coarse_gtest.cpp
)
###############################################################################
+43
Просмотреть файл
@@ -0,0 +1,43 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "hipmalloc_gtest.hpp"
using namespace rocshmem;
TEST_F(HipMallocTestFixture, normal_1GBx256) {
void* ptr{nullptr};
size_t gb {1073741824};
for (int i{0}; i < 256; i++) {
hip_allocator_.allocate(&ptr, gb);
hip_allocator_.deallocate(ptr);
}
}
TEST_F(HipMallocTestFixture, fine_1GBx256) {
void* ptr{nullptr};
size_t gb {1073741824};
for (int i{0}; i < 256; i++) {
hip_allocator_fg_.allocate(&ptr, gb);
hip_allocator_fg_.deallocate(ptr);
}
}
+41
Просмотреть файл
@@ -0,0 +1,41 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef ROCSHMEM_HIPMALLOC_GTEST_HPP
#define ROCSHMEM_HIPMALLOC_GTEST_HPP
#include "gtest/gtest.h"
#include "../src/memory/symmetric_heap.hpp"
#include "../src/util.hpp"
namespace rocshmem {
class HipMallocTestFixture : public ::testing::Test {
public:
HIPAllocator hip_allocator_ {};
HIPAllocatorFinegrained hip_allocator_fg_ {};
};
} // namespace rocshmem
#endif // ROCSHMEM_HIPMALLOC_GTEST_HPP
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+236
Просмотреть файл
@@ -0,0 +1,236 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP
#define ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP
#include "gtest/gtest.h"
#include <numeric>
#include <mpi.h>
#include "../src/memory/symmetric_heap.hpp"
#include "../src/ipc/ipc_policy.hpp"
namespace rocshmem {
__global__
void
kernel_simple_coarse_copy(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
if (!threadIdx.x) {
ipc_impl->ipcCopy(dest, src, bytes);
ipc_impl->ipcFence();
}
__syncthreads();
}
__global__
void
kernel_simple_coarse_copy_wg(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
ipc_impl->ipcCopy_wg(dest, src, bytes);
ipc_impl->ipcFence();
__syncthreads();
}
__global__
void
kernel_simple_coarse_copy_wave(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
ipc_impl->ipcCopy_wave(dest, src, bytes);
ipc_impl->ipcFence();
__syncthreads();
}
class IPCImplSimpleCoarseTestFixture : public ::testing::Test {
using HEAP_T = HeapMemory<HIPAllocator>;
using MPI_T = RemoteHeapInfo<CommunicatorMPI>;
using FN_T = void (*)(IpcImpl*, int*, int*, size_t);
public:
IPCImplSimpleCoarseTestFixture() {
ipc_impl_.ipcHostInit(mpi_.my_pe(), mpi_.get_heap_bases() , MPI_COMM_WORLD);
assert(ipc_impl_dptr_ == nullptr);
hip_allocator_.allocate((void**)&ipc_impl_dptr_, sizeof(IpcImpl));
CHECK_HIP(hipMemcpy(ipc_impl_dptr_, &ipc_impl_,
sizeof(IpcImpl), hipMemcpyHostToDevice));
}
~IPCImplSimpleCoarseTestFixture() {
if (ipc_impl_dptr_) {
hip_allocator_.deallocate(ipc_impl_dptr_);
}
ipc_impl_.ipcHostStop();
}
void launch(FN_T f, const dim3 grid, const dim3 block, int* src, int* dest, size_t bytes) {
f<<<grid, block>>>(ipc_impl_dptr_, src, dest, bytes);
CHECK_HIP(hipStreamSynchronize(nullptr));
}
enum TestType {
READ = 0,
WRITE = 1
};
void write(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(WRITE);
copy(WRITE, grid, block);
validate_dest_buffer(WRITE);
}
void write_wg(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(WRITE);
copy_wg(WRITE, grid, block);
validate_dest_buffer(WRITE);
}
void write_wave(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(WRITE);
copy_wave(WRITE, grid, block);
validate_dest_buffer(WRITE);
}
void read(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(READ);
copy(READ, grid, block);
validate_dest_buffer(READ);
}
void read_wg(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(READ);
copy_wg(READ, grid, block);
validate_dest_buffer(READ);
}
void read_wave(const dim3 grid, const dim3 block, size_t elems) {
iota_golden(elems);
initialize_src_buffer(READ);
copy_wave(READ, grid, block);
validate_dest_buffer(READ);
}
void iota_golden(size_t elems) {
golden_.resize(elems);
std::iota(golden_.begin(), golden_.end(), 0);
}
void validate_golden(size_t elems) {
ASSERT_EQ(golden_.size(), elems);
for (int i{0}; i < golden_.size(); i++) {
ASSERT_EQ(golden_[i], i);
}
}
void initialize_src_buffer(TestType test) {
if (!pe_initializes_src_buffer(test)) {
return;
}
size_t bytes = golden_.size() * sizeof(int);
auto dev_src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[mpi_.my_pe()]);
CHECK_HIP(hipMemcpy(dev_src, golden_.data(), bytes, hipMemcpyHostToDevice));
CHECK_HIP(hipStreamSynchronize(nullptr));
}
bool pe_initializes_src_buffer(TestType test) {
bool is_write_test = test;
bool is_read_test = !test;
return (is_write_test && mpi_.my_pe() == 0) ||
(is_read_test && mpi_.my_pe() == 1);
}
void execute(TestType test, FN_T fn, const dim3 grid, const dim3 block) {
if (mpi_.my_pe()) {
mpi_.barrier();
mpi_.barrier();
return;
}
int *src{nullptr};
int *dest{nullptr};
if (test == WRITE) {
src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[0]);
dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[1]);
} else {
src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[1]);
dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[0]);
}
size_t bytes = golden_.size() * sizeof(int);
mpi_.barrier();
launch(fn, grid, block, src, dest, bytes);
mpi_.barrier();
}
void copy(TestType test, dim3 grid, dim3 block) {
execute(test, kernel_simple_coarse_copy, grid, block);
}
void copy_wg(TestType test, dim3 grid, dim3 block) {
execute(test, kernel_simple_coarse_copy_wg, grid, block);
}
void copy_wave(TestType test, dim3 grid, dim3 block) {
execute(test, kernel_simple_coarse_copy_wave, grid, block);
}
void validate_dest_buffer(TestType test) {
if (!pe_validates_dest_buffer(test)) {
return;
}
auto dev_dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[mpi_.my_pe()]);
for (int i{0}; i < golden_.size(); i++) {
ASSERT_EQ(golden_[i], dev_dest[i]);
}
}
bool pe_validates_dest_buffer(TestType test) {
return !pe_initializes_src_buffer(test);
}
protected:
std::vector<int> golden_;
std::vector<int> output_;
HEAP_T heap_mem_ {};
MPI_T mpi_ {heap_mem_.get_ptr(), heap_mem_.get_size()};
IpcImpl ipc_impl_ {};
IpcImpl *ipc_impl_dptr_ {nullptr};
HIPAllocator hip_allocator_ {};
};
} // namespace rocshmem
#endif // ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP