From 12561783de3537f4e1dd0d359c2266f91ceb47f1 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Wed, 26 Mar 2025 21:09:26 -0500 Subject: [PATCH] Performance tuning for inter-node communication (#66) This PR addresses two issues: - reduce the number of contexts supported by the host-interface by default to 1, we are not using those at the moment, and hence we now create fewer MPI_Win at the startup - introduces a micro-sleep in RO progress engine in case there are no pending requests. This leads significant performance improvements observed for inter-node communication with THor2 NICs. --- src/host/host.hpp | 2 +- src/reverse_offload/mpi_transport.cpp | 5 +++++ src/util.cpp | 6 ++++++ src/util.hpp | 1 + 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/host/host.hpp b/src/host/host.hpp index dbdcec9297..83cc1ceb59 100644 --- a/src/host/host.hpp +++ b/src/host/host.hpp @@ -341,7 +341,7 @@ class HostInterface { /** * @brief Max number of contexts for the application */ - int max_num_ctxs_{40}; + int max_num_ctxs_{1}; /** * @brief Pool of HostContexWindowInfos diff --git a/src/reverse_offload/mpi_transport.cpp b/src/reverse_offload/mpi_transport.cpp index 3b393f0a31..a0e672a88b 100644 --- a/src/reverse_offload/mpi_transport.cpp +++ b/src/reverse_offload/mpi_transport.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include "../host/host.hpp" #include "backend_ro.hpp" @@ -591,6 +592,10 @@ void MPITransport::progress() { const int tag{1000}; int flag{0}; MPI_Status status{}; + + // Slowing the progress engine down a bit avoid hammering the memory subsystem. + // This leads to significant performance benefits + usleep (rocshmem_env_config.ro_progress_delay); NET_CHECK(MPI_Iprobe(MPI_ANY_SOURCE, tag, ro_net_comm_world, &flag, &status)); } else { DPRINTF("Testing all outstanding requests (%zu)\n", requests.size()); diff --git a/src/util.cpp b/src/util.cpp index e2af50ea64..337f33dc04 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -155,6 +155,12 @@ void rocshmem_env_config_init(void) { if (NULL != env_value) { rocshmem_env_config.ro_disable_ipc = atoi(env_value); } + + env_value = getenv("ROCSHMEM_RO_PROGRESS_DELAY"); + if (nullptr != env_value) { + rocshmem_env_config.ro_progress_delay = atoi(env_value); + } + } } // namespace rocshmem diff --git a/src/util.hpp b/src/util.hpp index 3f47c55d89..fcf3e9e496 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -271,6 +271,7 @@ uint64_t wallClk_freq_mhz(); struct rocshmem_env_config_t { int ro_disable_ipc = 0; + int ro_progress_delay = 3; }; extern struct rocshmem_env_config_t rocshmem_env_config;