diff --git a/src/host/host.hpp b/src/host/host.hpp index dbdcec9297..83cc1ceb59 100644 --- a/src/host/host.hpp +++ b/src/host/host.hpp @@ -341,7 +341,7 @@ class HostInterface { /** * @brief Max number of contexts for the application */ - int max_num_ctxs_{40}; + int max_num_ctxs_{1}; /** * @brief Pool of HostContexWindowInfos diff --git a/src/reverse_offload/mpi_transport.cpp b/src/reverse_offload/mpi_transport.cpp index 3b393f0a31..a0e672a88b 100644 --- a/src/reverse_offload/mpi_transport.cpp +++ b/src/reverse_offload/mpi_transport.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include "../host/host.hpp" #include "backend_ro.hpp" @@ -591,6 +592,10 @@ void MPITransport::progress() { const int tag{1000}; int flag{0}; MPI_Status status{}; + + // Slowing the progress engine down a bit avoid hammering the memory subsystem. + // This leads to significant performance benefits + usleep (rocshmem_env_config.ro_progress_delay); NET_CHECK(MPI_Iprobe(MPI_ANY_SOURCE, tag, ro_net_comm_world, &flag, &status)); } else { DPRINTF("Testing all outstanding requests (%zu)\n", requests.size()); diff --git a/src/util.cpp b/src/util.cpp index e2af50ea64..337f33dc04 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -155,6 +155,12 @@ void rocshmem_env_config_init(void) { if (NULL != env_value) { rocshmem_env_config.ro_disable_ipc = atoi(env_value); } + + env_value = getenv("ROCSHMEM_RO_PROGRESS_DELAY"); + if (nullptr != env_value) { + rocshmem_env_config.ro_progress_delay = atoi(env_value); + } + } } // namespace rocshmem diff --git a/src/util.hpp b/src/util.hpp index 3f47c55d89..fcf3e9e496 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -271,6 +271,7 @@ uint64_t wallClk_freq_mhz(); struct rocshmem_env_config_t { int ro_disable_ipc = 0; + int ro_progress_delay = 3; }; extern struct rocshmem_env_config_t rocshmem_env_config;