diff --git a/README.md b/README.md index 1dfeb0b0d7..ed8bff6821 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ rocSHMEM has the following enviroment variables: ROCSHMEM_HEAP_SIZE (default : 1 GB) Defines the size of the rocSHMEM symmetric heap Note the heap is on the GPU memory. + + ROCSHMEM_RO_DISABLE_IPC (default : 0) + Disables IPC support for the reverse offload backend. ``` ## Examples diff --git a/src/ipc_policy.cpp b/src/ipc_policy.cpp index 41ec4fa05b..cadd5435ba 100644 --- a/src/ipc_policy.cpp +++ b/src/ipc_policy.cpp @@ -106,6 +106,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, * addresses. */ free(vec_ipc_handle); + + if (0 == rocshmem_env_config.ro_disable_ipc) { + int thread_comm_rank; + + CHECK_HIP(hipMalloc(reinterpret_cast(&pes_with_ipc_avail), shm_size * sizeof(int))); + + MPI_Comm_rank(thread_comm, &thread_comm_rank); + MPI_Allgather(&thread_comm_rank, 1, MPI_INT, pes_with_ipc_avail, 1, MPI_INT, shmcomm); + } } __host__ void IpcOnImpl::ipcHostStop() { @@ -115,6 +124,10 @@ __host__ void IpcOnImpl::ipcHostStop() { } } CHECK_HIP(hipFree(ipc_bases)); + + if (nullptr != pes_with_ipc_avail) { + CHECK_HIP(hipFree(pes_with_ipc_avail)); + } } __device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) { diff --git a/src/ipc_policy.hpp b/src/ipc_policy.hpp index 45d1a004ea..1c44aad67a 100644 --- a/src/ipc_policy.hpp +++ b/src/ipc_policy.hpp @@ -48,14 +48,23 @@ class IpcOnImpl { char **ipc_bases{nullptr}; + int *pes_with_ipc_avail{nullptr}; + __host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases, MPI_Comm thread_comm); __host__ void ipcHostStop(); __device__ bool isIpcAvailable(int my_pe, int target_pe) { - return my_pe / shm_size == target_pe / shm_size; + if (nullptr == pes_with_ipc_avail) { return false; } + + for (int i=0; iipcImpl.ipc_bases; ipcImpl_.shm_size = b->ipcImpl.shm_size; + ipcImpl_.pes_with_ipc_avail = b->ipcImpl.pes_with_ipc_avail; } __device__ void ROContext::putmem(void *dest, const void *source, size_t nelems, diff --git a/src/rocshmem.cpp b/src/rocshmem.cpp index 5b2a595cc4..e9106c8fb4 100644 --- a/src/rocshmem.cpp +++ b/src/rocshmem.cpp @@ -84,6 +84,8 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT; rocm_init(); + rocshmem_env_config_init(); + #ifdef USE_GPU_IB CHECK_HIP(hipHostMalloc(&backend, sizeof(GPUIBBackend))); backend = new (backend) GPUIBBackend(comm); diff --git a/src/util.cpp b/src/util.cpp index e2f05faf4b..e2af50ea64 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -146,4 +146,15 @@ uint64_t wallClk_freq_mhz() { return 0; } +struct rocshmem_env_config_t rocshmem_env_config; + +void rocshmem_env_config_init(void) { + char* env_value = NULL; + + env_value = getenv("ROCSHMEM_RO_DISABLE_IPC"); + if (NULL != env_value) { + rocshmem_env_config.ro_disable_ipc = atoi(env_value); + } +} + } // namespace rocshmem diff --git a/src/util.hpp b/src/util.hpp index 3ca945ec6d..3f47c55d89 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -269,6 +269,13 @@ void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr, // Returns clock frequency used by s_memrealtime() in Mhz uint64_t wallClk_freq_mhz(); +struct rocshmem_env_config_t { + int ro_disable_ipc = 0; +}; +extern struct rocshmem_env_config_t rocshmem_env_config; + +void rocshmem_env_config_init(void); + } // namespace rocshmem #endif // LIBRARY_SRC_UTIL_HPP_