From fb7d89c7e65d440aa348b9349563ae48bbff02ac Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Mon, 8 Sep 2025 10:55:06 -0400 Subject: [PATCH] Cleanup/remove a2a buff (#232) * Do not allocate a large unused buffer for alltoall * Remove ATA_MAX_WRKDATA_SIZE from include/rocshmem_common.hpp [ROCm/rocshmem commit: 30ba79bcad7a7a5930b6d2b96ad6cce425dacb61] --- projects/rocshmem/include/rocshmem/rocshmem_common.hpp | 1 - projects/rocshmem/src/gda/backend_gda.cpp | 8 +------- projects/rocshmem/src/gda/gda_team.cpp | 1 - projects/rocshmem/src/ipc/backend_ipc.cpp | 8 +------- projects/rocshmem/src/ipc/ipc_team.cpp | 1 - projects/rocshmem/src/reverse_offload/ro_net_team.hpp | 2 -- 6 files changed, 2 insertions(+), 19 deletions(-) diff --git a/projects/rocshmem/include/rocshmem/rocshmem_common.hpp b/projects/rocshmem/include/rocshmem/rocshmem_common.hpp index bdbe640d85..288aa98292 100644 --- a/projects/rocshmem/include/rocshmem/rocshmem_common.hpp +++ b/projects/rocshmem/include/rocshmem/rocshmem_common.hpp @@ -88,7 +88,6 @@ typedef struct { } rocshmem_team_config_t; constexpr size_t ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE = 1024; -constexpr size_t ROCSHMEM_ATA_MAX_WRKDATA_SIZE = (4 * 1024 * 1024); constexpr size_t ROCSHMEM_BARRIER_SYNC_SIZE = 256; constexpr size_t ROCSHMEM_REDUCE_SYNC_SIZE = 256; // Internally calls sync function, which matches barrier implementation diff --git a/projects/rocshmem/src/gda/backend_gda.cpp b/projects/rocshmem/src/gda/backend_gda.cpp index a7c555a527..fde76b2ecd 100644 --- a/projects/rocshmem/src/gda/backend_gda.cpp +++ b/projects/rocshmem/src/gda/backend_gda.cpp @@ -399,8 +399,7 @@ void GDABackend::setup_wrk_sync_buffer() { * Accommodate largest possible data type for pWrk */ wrk_sync_pool_size_ += sizeof(double) * max_num_teams * - (ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE + - ROCSHMEM_ATA_MAX_WRKDATA_SIZE); + ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE; /** * Size of fence array @@ -479,11 +478,6 @@ void GDABackend::setup_teams() { wrk_sync_pool_top_ += sizeof(double) * ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE * max_num_teams; - - pAta_pool = reinterpret_cast(wrk_sync_pool_top_); - wrk_sync_pool_top_ += sizeof(double) * ROCSHMEM_ATA_MAX_WRKDATA_SIZE - * max_num_teams; - /** * Initialize the sync arrays in the pool with default values. */ diff --git a/projects/rocshmem/src/gda/gda_team.cpp b/projects/rocshmem/src/gda/gda_team.cpp index d419ada7a7..9ca6066450 100644 --- a/projects/rocshmem/src/gda/gda_team.cpp +++ b/projects/rocshmem/src/gda/gda_team.cpp @@ -46,7 +46,6 @@ GDATeam::GDATeam(Backend *backend, TeamInfo *team_info_parent, alltoall_pSync = &(b->alltoall_pSync_pool[pool_index * ROCSHMEM_ALLTOALL_SYNC_SIZE]); pWrk = reinterpret_cast(b->pWrk_pool) + ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE * sizeof(double) * pool_index; - pAta = reinterpret_cast(b->pAta_pool) + ROCSHMEM_ATA_MAX_WRKDATA_SIZE * sizeof(double) * pool_index; } GDATeam::~GDATeam() {} diff --git a/projects/rocshmem/src/ipc/backend_ipc.cpp b/projects/rocshmem/src/ipc/backend_ipc.cpp index 895bf924d1..2d4dfbdf58 100644 --- a/projects/rocshmem/src/ipc/backend_ipc.cpp +++ b/projects/rocshmem/src/ipc/backend_ipc.cpp @@ -356,8 +356,7 @@ void IPCBackend::setup_wrk_sync_buffers() { * Accommodate largest possible data type for pWrk */ wrk_sync_pool_size_ += sizeof(double) * max_num_teams * - (ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE + - ROCSHMEM_ATA_MAX_WRKDATA_SIZE); + ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE; /** * Size of fence array @@ -494,11 +493,6 @@ void IPCBackend::teams_init() { wrk_sync_pool_top_ += sizeof(double) * ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE * max_num_teams; - - pAta_pool = reinterpret_cast(wrk_sync_pool_top_); - wrk_sync_pool_top_ += sizeof(double) * ROCSHMEM_ATA_MAX_WRKDATA_SIZE - * max_num_teams; - /** * Initialize the sync arrays in the pool with default values. */ diff --git a/projects/rocshmem/src/ipc/ipc_team.cpp b/projects/rocshmem/src/ipc/ipc_team.cpp index 757bbe059c..1316063531 100644 --- a/projects/rocshmem/src/ipc/ipc_team.cpp +++ b/projects/rocshmem/src/ipc/ipc_team.cpp @@ -46,7 +46,6 @@ IPCTeam::IPCTeam(Backend *backend, TeamInfo *team_info_parent, alltoall_pSync = &(b->alltoall_pSync_pool[pool_index * ROCSHMEM_ALLTOALL_SYNC_SIZE]); pWrk = reinterpret_cast(b->pWrk_pool) + ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE * sizeof(double) * pool_index; - pAta = reinterpret_cast(b->pAta_pool) + ROCSHMEM_ATA_MAX_WRKDATA_SIZE * sizeof(double) * pool_index; } IPCTeam::~IPCTeam() {} diff --git a/projects/rocshmem/src/reverse_offload/ro_net_team.hpp b/projects/rocshmem/src/reverse_offload/ro_net_team.hpp index 8af113b701..53643b2bd9 100644 --- a/projects/rocshmem/src/reverse_offload/ro_net_team.hpp +++ b/projects/rocshmem/src/reverse_offload/ro_net_team.hpp @@ -27,8 +27,6 @@ #include "team.hpp" -#define MAX_ATA_BUFF_SIZE (1024 * 1024 * 128) - namespace rocshmem { class Backend;