Merge pull request #16 from BKP/ipc_bringup_coarse_unit_08-07-24
Create unit tests for simple buffer transfers using coarse-grained memory
Этот коммит содержится в:
@@ -18,7 +18,7 @@ cmake \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=OFF \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_COHERENT_HEAP=ON \
|
||||
|
||||
@@ -50,7 +50,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
/*
|
||||
* Figure out how this process' rank among local processes.
|
||||
*/
|
||||
int shm_rank;
|
||||
MPI_Comm_rank(shmcomm, &shm_rank);
|
||||
|
||||
/*
|
||||
@@ -92,7 +91,6 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
void **ipc_base_uncast = reinterpret_cast<void **>(&ipc_base[i]);
|
||||
CHECK_HIP(hipIpcOpenMemHandle(ipc_base_uncast, vec_ipc_handle[i],
|
||||
hipIpcMemLazyEnablePeerAccess));
|
||||
// TODO(bpotter): add some error checking here if happens to fail
|
||||
} else {
|
||||
ipc_base[i] = base_heap;
|
||||
}
|
||||
@@ -110,6 +108,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
free(vec_ipc_handle);
|
||||
}
|
||||
|
||||
__host__ void IpcOnImpl::ipcHostStop() {
|
||||
for (size_t i = 0; i < shm_size; i++) {
|
||||
if (i != shm_rank) {
|
||||
CHECK_HIP(hipIpcCloseMemHandle(ipc_bases[i]));
|
||||
}
|
||||
}
|
||||
CHECK_HIP(hipFree(ipc_bases));
|
||||
}
|
||||
|
||||
__device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) {
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
@@ -42,6 +42,8 @@ class IpcOnImpl {
|
||||
using HEAP_BASES_T = std::vector<char *, StdAllocatorHIP<char *>>;
|
||||
|
||||
public:
|
||||
int shm_rank{0};
|
||||
|
||||
uint32_t shm_size{0};
|
||||
|
||||
char **ipc_bases{nullptr};
|
||||
@@ -49,6 +51,8 @@ class IpcOnImpl {
|
||||
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
MPI_Comm thread_comm);
|
||||
|
||||
__host__ void ipcHostStop();
|
||||
|
||||
__device__ bool isIpcAvailable(int my_pe, int target_pe) {
|
||||
return my_pe / shm_size == target_pe / shm_size;
|
||||
}
|
||||
@@ -115,6 +119,8 @@ class IpcOffImpl {
|
||||
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
MPI_Comm thread_comm) {}
|
||||
|
||||
__host__ void ipcHostStop() {}
|
||||
|
||||
__device__ bool isIpcAvailable(int my_pe, int target_pe) { return false; }
|
||||
|
||||
__device__ void ipcGpuInit(Backend *roc_shmem_handle, Context *ctx,
|
||||
|
||||
+21
-6
@@ -302,19 +302,34 @@ __device__ __forceinline__ void memcpy_wg(void* dst, void* src, size_t size) {
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void memcpy_wave(void* dst, void* src, size_t size) {
|
||||
uint8_t* dst_bytes{static_cast<uint8_t*>(dst)};
|
||||
uint8_t* src_bytes{static_cast<uint8_t*>(src)};
|
||||
int wave_tid = get_flat_block_id() % WF_SIZE;
|
||||
int wave_size{wave_SZ()};
|
||||
|
||||
int cpy_size{};
|
||||
int thread_id{get_flat_block_id()};
|
||||
uint8_t* dst_bytes{nullptr};
|
||||
uint8_t* dst_def{nullptr};
|
||||
uint8_t* src_bytes{nullptr};
|
||||
uint8_t* src_def{nullptr};
|
||||
|
||||
dst_def = reinterpret_cast<uint8_t*>(dst);
|
||||
src_def = reinterpret_cast<uint8_t*>(src);
|
||||
dst_bytes = dst_def;
|
||||
src_bytes = src_def;
|
||||
|
||||
for (int j{8}; j > 1; j >>= 1) {
|
||||
cpy_size = size / j;
|
||||
for (int i{thread_id}; i < cpy_size; i += WF_SIZE) {
|
||||
store_asm(src_bytes, dst_bytes, j);
|
||||
for (int i{wave_tid}; i < cpy_size; i += wave_size) {
|
||||
dst_bytes = dst_def;
|
||||
src_bytes = src_def;
|
||||
|
||||
src_bytes += i * j;
|
||||
dst_bytes += i * j;
|
||||
size -= cpy_size * j;
|
||||
|
||||
store_asm(src_bytes, dst_bytes, j);
|
||||
}
|
||||
size -= cpy_size * j;
|
||||
dst_def += cpy_size * j;
|
||||
src_def += cpy_size * j;
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
|
||||
@@ -71,6 +71,7 @@ target_sources(
|
||||
PRIVATE
|
||||
shmem_gtest.cpp
|
||||
heap_memory_gtest.cpp
|
||||
hipmalloc_gtest.cpp
|
||||
bin_gtest.cpp
|
||||
binner_gtest.cpp
|
||||
#bitwise_gtest.cpp # Test is disabled becasue of compilation errors
|
||||
@@ -88,6 +89,7 @@ target_sources(
|
||||
#forward_list_gtest.cpp
|
||||
free_list_gtest.cpp
|
||||
context_ipc_gtest.cpp
|
||||
ipc_impl_simple_coarse_gtest.cpp
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "hipmalloc_gtest.hpp"
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
TEST_F(HipMallocTestFixture, normal_1GBx256) {
|
||||
void* ptr{nullptr};
|
||||
size_t gb {1073741824};
|
||||
for (int i{0}; i < 256; i++) {
|
||||
hip_allocator_.allocate(&ptr, gb);
|
||||
hip_allocator_.deallocate(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(HipMallocTestFixture, fine_1GBx256) {
|
||||
void* ptr{nullptr};
|
||||
size_t gb {1073741824};
|
||||
for (int i{0}; i < 256; i++) {
|
||||
hip_allocator_fg_.allocate(&ptr, gb);
|
||||
hip_allocator_fg_.deallocate(ptr);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef ROCSHMEM_HIPMALLOC_GTEST_HPP
|
||||
#define ROCSHMEM_HIPMALLOC_GTEST_HPP
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../src/memory/symmetric_heap.hpp"
|
||||
#include "../src/util.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
class HipMallocTestFixture : public ::testing::Test {
|
||||
public:
|
||||
HIPAllocator hip_allocator_ {};
|
||||
HIPAllocatorFinegrained hip_allocator_fg_ {};
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // ROCSHMEM_HIPMALLOC_GTEST_HPP
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -0,0 +1,236 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP
|
||||
#define ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <mpi.h>
|
||||
#include "../src/memory/symmetric_heap.hpp"
|
||||
#include "../src/ipc/ipc_policy.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
__global__
|
||||
void
|
||||
kernel_simple_coarse_copy(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
|
||||
if (!threadIdx.x) {
|
||||
ipc_impl->ipcCopy(dest, src, bytes);
|
||||
ipc_impl->ipcFence();
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__global__
|
||||
void
|
||||
kernel_simple_coarse_copy_wg(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
|
||||
ipc_impl->ipcCopy_wg(dest, src, bytes);
|
||||
ipc_impl->ipcFence();
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__global__
|
||||
void
|
||||
kernel_simple_coarse_copy_wave(IpcImpl *ipc_impl, int *src, int *dest, size_t bytes) {
|
||||
ipc_impl->ipcCopy_wave(dest, src, bytes);
|
||||
ipc_impl->ipcFence();
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
class IPCImplSimpleCoarseTestFixture : public ::testing::Test {
|
||||
|
||||
using HEAP_T = HeapMemory<HIPAllocator>;
|
||||
|
||||
using MPI_T = RemoteHeapInfo<CommunicatorMPI>;
|
||||
|
||||
using FN_T = void (*)(IpcImpl*, int*, int*, size_t);
|
||||
|
||||
public:
|
||||
IPCImplSimpleCoarseTestFixture() {
|
||||
ipc_impl_.ipcHostInit(mpi_.my_pe(), mpi_.get_heap_bases() , MPI_COMM_WORLD);
|
||||
|
||||
assert(ipc_impl_dptr_ == nullptr);
|
||||
hip_allocator_.allocate((void**)&ipc_impl_dptr_, sizeof(IpcImpl));
|
||||
|
||||
CHECK_HIP(hipMemcpy(ipc_impl_dptr_, &ipc_impl_,
|
||||
sizeof(IpcImpl), hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
~IPCImplSimpleCoarseTestFixture() {
|
||||
if (ipc_impl_dptr_) {
|
||||
hip_allocator_.deallocate(ipc_impl_dptr_);
|
||||
}
|
||||
|
||||
ipc_impl_.ipcHostStop();
|
||||
}
|
||||
|
||||
void launch(FN_T f, const dim3 grid, const dim3 block, int* src, int* dest, size_t bytes) {
|
||||
f<<<grid, block>>>(ipc_impl_dptr_, src, dest, bytes);
|
||||
CHECK_HIP(hipStreamSynchronize(nullptr));
|
||||
}
|
||||
|
||||
enum TestType {
|
||||
READ = 0,
|
||||
WRITE = 1
|
||||
};
|
||||
|
||||
void write(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(WRITE);
|
||||
copy(WRITE, grid, block);
|
||||
validate_dest_buffer(WRITE);
|
||||
}
|
||||
|
||||
void write_wg(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(WRITE);
|
||||
copy_wg(WRITE, grid, block);
|
||||
validate_dest_buffer(WRITE);
|
||||
}
|
||||
|
||||
void write_wave(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(WRITE);
|
||||
copy_wave(WRITE, grid, block);
|
||||
validate_dest_buffer(WRITE);
|
||||
}
|
||||
|
||||
void read(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(READ);
|
||||
copy(READ, grid, block);
|
||||
validate_dest_buffer(READ);
|
||||
}
|
||||
|
||||
void read_wg(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(READ);
|
||||
copy_wg(READ, grid, block);
|
||||
validate_dest_buffer(READ);
|
||||
}
|
||||
|
||||
void read_wave(const dim3 grid, const dim3 block, size_t elems) {
|
||||
iota_golden(elems);
|
||||
initialize_src_buffer(READ);
|
||||
copy_wave(READ, grid, block);
|
||||
validate_dest_buffer(READ);
|
||||
}
|
||||
|
||||
void iota_golden(size_t elems) {
|
||||
golden_.resize(elems);
|
||||
std::iota(golden_.begin(), golden_.end(), 0);
|
||||
}
|
||||
|
||||
void validate_golden(size_t elems) {
|
||||
ASSERT_EQ(golden_.size(), elems);
|
||||
for (int i{0}; i < golden_.size(); i++) {
|
||||
ASSERT_EQ(golden_[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
void initialize_src_buffer(TestType test) {
|
||||
if (!pe_initializes_src_buffer(test)) {
|
||||
return;
|
||||
}
|
||||
size_t bytes = golden_.size() * sizeof(int);
|
||||
auto dev_src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[mpi_.my_pe()]);
|
||||
CHECK_HIP(hipMemcpy(dev_src, golden_.data(), bytes, hipMemcpyHostToDevice));
|
||||
CHECK_HIP(hipStreamSynchronize(nullptr));
|
||||
}
|
||||
|
||||
bool pe_initializes_src_buffer(TestType test) {
|
||||
bool is_write_test = test;
|
||||
bool is_read_test = !test;
|
||||
return (is_write_test && mpi_.my_pe() == 0) ||
|
||||
(is_read_test && mpi_.my_pe() == 1);
|
||||
}
|
||||
|
||||
void execute(TestType test, FN_T fn, const dim3 grid, const dim3 block) {
|
||||
if (mpi_.my_pe()) {
|
||||
mpi_.barrier();
|
||||
mpi_.barrier();
|
||||
return;
|
||||
}
|
||||
int *src{nullptr};
|
||||
int *dest{nullptr};
|
||||
if (test == WRITE) {
|
||||
src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[0]);
|
||||
dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[1]);
|
||||
} else {
|
||||
src = reinterpret_cast<int*>(ipc_impl_.ipc_bases[1]);
|
||||
dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[0]);
|
||||
}
|
||||
size_t bytes = golden_.size() * sizeof(int);
|
||||
mpi_.barrier();
|
||||
launch(fn, grid, block, src, dest, bytes);
|
||||
mpi_.barrier();
|
||||
}
|
||||
|
||||
void copy(TestType test, dim3 grid, dim3 block) {
|
||||
execute(test, kernel_simple_coarse_copy, grid, block);
|
||||
}
|
||||
|
||||
void copy_wg(TestType test, dim3 grid, dim3 block) {
|
||||
execute(test, kernel_simple_coarse_copy_wg, grid, block);
|
||||
}
|
||||
|
||||
void copy_wave(TestType test, dim3 grid, dim3 block) {
|
||||
execute(test, kernel_simple_coarse_copy_wave, grid, block);
|
||||
}
|
||||
|
||||
void validate_dest_buffer(TestType test) {
|
||||
if (!pe_validates_dest_buffer(test)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto dev_dest = reinterpret_cast<int*>(ipc_impl_.ipc_bases[mpi_.my_pe()]);
|
||||
for (int i{0}; i < golden_.size(); i++) {
|
||||
ASSERT_EQ(golden_[i], dev_dest[i]);
|
||||
}
|
||||
}
|
||||
|
||||
bool pe_validates_dest_buffer(TestType test) {
|
||||
return !pe_initializes_src_buffer(test);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<int> golden_;
|
||||
|
||||
std::vector<int> output_;
|
||||
|
||||
HEAP_T heap_mem_ {};
|
||||
|
||||
MPI_T mpi_ {heap_mem_.get_ptr(), heap_mem_.get_size()};
|
||||
|
||||
IpcImpl ipc_impl_ {};
|
||||
|
||||
IpcImpl *ipc_impl_dptr_ {nullptr};
|
||||
|
||||
HIPAllocator hip_allocator_ {};
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // ROCSHMEM_IPC_IMPL_SIMPLE_COARSE_GTEST_HPP
|
||||
Ссылка в новой задаче
Block a user