Performance tuning for inter-node communication (#66)

This PR addresses two issues:
 - reduce the number of contexts supported by the host-interface by
   default to 1, we are not using those at the moment, and hence
   we now create fewer MPI_Win at the startup
 - introduces a micro-sleep in RO progress engine in case there are no
   pending requests. This leads significant performance improvements
   observed for inter-node communication with THor2 NICs.
This commit is contained in:
Edgar Gabriel
2025-03-26 21:09:26 -05:00
committed by GitHub
parent 607c6bd044
commit 12561783de
4 changed files with 13 additions and 1 deletions
+1 -1
View File
@@ -341,7 +341,7 @@ class HostInterface {
/**
* @brief Max number of contexts for the application
*/
int max_num_ctxs_{40};
int max_num_ctxs_{1};
/**
* @brief Pool of HostContexWindowInfos
+5
View File
@@ -25,6 +25,7 @@
#include <functional>
#include <utility>
#include <vector>
#include <unistd.h>
#include "../host/host.hpp"
#include "backend_ro.hpp"
@@ -591,6 +592,10 @@ void MPITransport::progress() {
const int tag{1000};
int flag{0};
MPI_Status status{};
// Slowing the progress engine down a bit avoid hammering the memory subsystem.
// This leads to significant performance benefits
usleep (rocshmem_env_config.ro_progress_delay);
NET_CHECK(MPI_Iprobe(MPI_ANY_SOURCE, tag, ro_net_comm_world, &flag, &status));
} else {
DPRINTF("Testing all outstanding requests (%zu)\n", requests.size());
+6
View File
@@ -155,6 +155,12 @@ void rocshmem_env_config_init(void) {
if (NULL != env_value) {
rocshmem_env_config.ro_disable_ipc = atoi(env_value);
}
env_value = getenv("ROCSHMEM_RO_PROGRESS_DELAY");
if (nullptr != env_value) {
rocshmem_env_config.ro_progress_delay = atoi(env_value);
}
}
} // namespace rocshmem
+1
View File
@@ -271,6 +271,7 @@ uint64_t wallClk_freq_mhz();
struct rocshmem_env_config_t {
int ro_disable_ipc = 0;
int ro_progress_delay = 3;
};
extern struct rocshmem_env_config_t rocshmem_env_config;