From 72286016c6d137447828c9acda2ec721363a1f82 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Sat, 24 Jan 2026 13:31:10 -0600 Subject: [PATCH] add additional runtime checks and gfx1201 fix (#2806) * add additional runtime checks and gfx1201 fix This commit contains three fixes: - increase the max. number of files at the beginning of the run to the max. allowed by the system - check for large BAR support. WE don not abort if its not available, but print a warning. - for gfx1201, do not use uncached memory at the moment. * Change get_arch_name to return const char* Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix C++ new syntax not sure how it compiled before Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * use snprintf instead of strncpy Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * destructor cleanip Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * add const keyword --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- projects/rocshmem/src/bootstrap/bootstrap.cpp | 14 ----------- projects/rocshmem/src/ipc/backend_ipc.cpp | 25 +++++++++++++++---- projects/rocshmem/src/ipc/backend_ipc.hpp | 2 +- projects/rocshmem/src/rocshmem.cpp | 16 ++++++++++++ projects/rocshmem/src/util.cpp | 10 ++++++++ projects/rocshmem/src/util.hpp | 6 +++++ 6 files changed, 53 insertions(+), 20 deletions(-) diff --git a/projects/rocshmem/src/bootstrap/bootstrap.cpp b/projects/rocshmem/src/bootstrap/bootstrap.cpp index f20d444413..40b3985199 100644 --- a/projects/rocshmem/src/bootstrap/bootstrap.cpp +++ b/projects/rocshmem/src/bootstrap/bootstrap.cpp @@ -37,19 +37,6 @@ namespace rocshmem { -static void setFilesLimit() { - rlimit filesLimit; - if (getrlimit(RLIMIT_NOFILE, &filesLimit) != 0) { - DPRINTF("getrlimit failed\n"); - return; - } - filesLimit.rlim_cur = filesLimit.rlim_max; - if (setrlimit(RLIMIT_NOFILE, &filesLimit) != 0) { - DPRINTF("setrlimit failed\n"); - return; - } -} - /* Socket Interface Selection type */ enum bootstrapInterface_t { findSubnetIf = -1, dontCareIf = -2 }; @@ -391,7 +378,6 @@ void TcpBootstrap::Impl::bootstrapRoot() { std::memset(rankAddresses.data(), 0, sizeof(SocketAddress) * nRanks_); std::memset(rankAddressesRoot.data(), 0, sizeof(SocketAddress) * nRanks_); - setFilesLimit(); DPRINTF("BEGIN bootstrapRoot\n"); /* Receive addresses from all ranks */ diff --git a/projects/rocshmem/src/ipc/backend_ipc.cpp b/projects/rocshmem/src/ipc/backend_ipc.cpp index 1e732c8d68..6eb46810f7 100644 --- a/projects/rocshmem/src/ipc/backend_ipc.cpp +++ b/projects/rocshmem/src/ipc/backend_ipc.cpp @@ -112,6 +112,13 @@ IPCBackend::IPCBackend(TcpBootstrap *bootstrap): Backend(bootstrap) { void IPCBackend::init() { ROCSHMEM_HOST_CTX_DEFAULT.ctx_opaque = default_host_ctx.get(); + const char *arch_name = get_arch_name(hip_dev_id); + if (strncmp(arch_name, "gfx1201", strlen("gfx1201")) == 0) { + fine_grained_allocator_ = new HIPAllocatorFinegrained(); + } else { + fine_grained_allocator_ = new HIPDefaultFinegrainedAllocator(); + } + setup_team_world(); setup_wrk_sync_buffers(); @@ -141,6 +148,14 @@ IPCBackend::~IPCBackend() { CHECK_HIP(hipFree(team_world)); CHECK_HIP(hipFree(ctx_array)); + if (fine_grained_allocator_) { + const char *arch_name = get_arch_name(hip_dev_id); + if (strncmp(arch_name, "gfx1201", strlen("gfx1201")) == 0) { + delete static_cast(fine_grained_allocator_); + } else { + delete static_cast(fine_grained_allocator_); + } + } } void IPCBackend::setup_ctxs() { @@ -368,8 +383,8 @@ void IPCBackend::setup_wrk_sync_buffers() { * Allocate a buffer of size wrk_sync_pool_size_, using fine-grained * memory allocator */ - fine_grained_allocator_.allocate((void**)&wrk_sync_pool_, - wrk_sync_pool_size_); + fine_grained_allocator_->allocate((void**)&wrk_sync_pool_, + wrk_sync_pool_size_); assert(wrk_sync_pool_); wrk_sync_pool_top_ = wrk_sync_pool_; @@ -400,7 +415,7 @@ void IPCBackend::setup_wrk_sync_buffers() { * Allocate device-side fine grained memory to hold IPC addresses of * work/sync buffers */ - fine_grained_allocator_.allocate( + fine_grained_allocator_->allocate( reinterpret_cast(&wrk_sync_pool_bases_), num_pes * sizeof(char*)); assert(wrk_sync_pool_bases_); @@ -427,8 +442,8 @@ void IPCBackend::cleanup_wrk_sync_buffer() { CHECK_HIP(hipIpcCloseMemHandle(wrk_sync_pool_bases_[i])); } } - fine_grained_allocator_.deallocate(wrk_sync_pool_bases_); - fine_grained_allocator_.deallocate(wrk_sync_pool_); + fine_grained_allocator_->deallocate(wrk_sync_pool_bases_); + fine_grained_allocator_->deallocate(wrk_sync_pool_); } void IPCBackend::setup_fence_buffer() { diff --git a/projects/rocshmem/src/ipc/backend_ipc.hpp b/projects/rocshmem/src/ipc/backend_ipc.hpp index 14617f984f..32ce351961 100644 --- a/projects/rocshmem/src/ipc/backend_ipc.hpp +++ b/projects/rocshmem/src/ipc/backend_ipc.hpp @@ -258,7 +258,7 @@ class IPCBackend : public Backend { /** * Fine grained memory allocator for buffers used in collectives Routines */ - HIPDefaultFinegrainedAllocator fine_grained_allocator_ {}; + MemoryAllocator *fine_grained_allocator_{nullptr}; /** * @brief Collective routines work/sync buffer size diff --git a/projects/rocshmem/src/rocshmem.cpp b/projects/rocshmem/src/rocshmem.cpp index fb22aee7fb..90109f4af8 100644 --- a/projects/rocshmem/src/rocshmem.cpp +++ b/projects/rocshmem/src/rocshmem.cpp @@ -60,6 +60,8 @@ #include #include #include +#include +#include namespace rocshmem { @@ -127,6 +129,18 @@ static BackendType select_backend_type() { return BackendType::IPC_BACKEND; } #endif +static void setFilesLimit() { + rlimit filesLimit; + if (getrlimit(RLIMIT_NOFILE, &filesLimit) != 0) { + DPRINTF("getrlimit failed\n"); + return; + } + filesLimit.rlim_cur = filesLimit.rlim_max; + if (setrlimit(RLIMIT_NOFILE, &filesLimit) != 0) { + DPRINTF("setrlimit failed\n"); + return; + } +} [[maybe_unused]] __host__ void inline library_init(MPI_Comm comm) { assert(!backend); @@ -138,6 +152,7 @@ static BackendType select_backend_type() { abort(); } + setFilesLimit(); rocm_init(); int ret; @@ -255,6 +270,7 @@ static BackendType select_backend_type() { abort(); } + setFilesLimit(); rocm_init(); #if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC) diff --git a/projects/rocshmem/src/util.cpp b/projects/rocshmem/src/util.cpp index d3c7de0d1d..298f889a6a 100644 --- a/projects/rocshmem/src/util.cpp +++ b/projects/rocshmem/src/util.cpp @@ -52,11 +52,21 @@ static void device_properties_init(void) { device_prop_t prop; hipDeviceProp_t hipprop; + int has_large_bar = 0; for (int i=0; i device_properties; @@ -171,6 +172,11 @@ static int get_wf_size(int device_id) { return device_properties[device_id].warpSize; } +static const char* get_arch_name(int device_id) { + assert(device_properties.size() > device_id); + return device_properties[device_id].gcnArchName; +} + /* Device-side internal functions */ __device__ __forceinline__ uint32_t lowerID() { return __ffsll(__ballot(1)) - 1;