Updated IPC detection logic (#51)

* Added environment variable to enable/disable IPC at runtime

* Fixed IPC detection logic allow for difference process mappings

* Updated README.md
Cette révision appartient à :
Yiltan
2025-03-17 11:36:11 -04:00
révisé par GitHub
Parent df4ad2c04d
révision b7f3839f27
7 fichiers modifiés avec 47 ajouts et 1 suppressions
+3
Voir le fichier
@@ -106,6 +106,9 @@ rocSHMEM has the following enviroment variables:
ROCSHMEM_HEAP_SIZE (default : 1 GB)
Defines the size of the rocSHMEM symmetric heap
Note the heap is on the GPU memory.
ROCSHMEM_RO_DISABLE_IPC (default : 0)
Disables IPC support for the reverse offload backend.
```
## Examples
+13
Voir le fichier
@@ -106,6 +106,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
* addresses.
*/
free(vec_ipc_handle);
if (0 == rocshmem_env_config.ro_disable_ipc) {
int thread_comm_rank;
CHECK_HIP(hipMalloc(reinterpret_cast<void**>(&pes_with_ipc_avail), shm_size * sizeof(int)));
MPI_Comm_rank(thread_comm, &thread_comm_rank);
MPI_Allgather(&thread_comm_rank, 1, MPI_INT, pes_with_ipc_avail, 1, MPI_INT, shmcomm);
}
}
__host__ void IpcOnImpl::ipcHostStop() {
@@ -115,6 +124,10 @@ __host__ void IpcOnImpl::ipcHostStop() {
}
}
CHECK_HIP(hipFree(ipc_bases));
if (nullptr != pes_with_ipc_avail) {
CHECK_HIP(hipFree(pes_with_ipc_avail));
}
}
__device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) {
+10 -1
Voir le fichier
@@ -48,14 +48,23 @@ class IpcOnImpl {
char **ipc_bases{nullptr};
int *pes_with_ipc_avail{nullptr};
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
MPI_Comm thread_comm);
__host__ void ipcHostStop();
__device__ bool isIpcAvailable(int my_pe, int target_pe) {
return my_pe / shm_size == target_pe / shm_size;
if (nullptr == pes_with_ipc_avail) { return false; }
for (int i=0; i<shm_size; i++) {
if (pes_with_ipc_avail[i] == target_pe) { return true; }
}
return false;
}
__device__ void ipcGpuInit(Backend *gpu_backend, Context *ctx, int thread_id);
__device__ void ipcCopy(void *dst, void *src, size_t size);
+1
Voir le fichier
@@ -54,6 +54,7 @@ __host__ ROContext::ROContext(Backend *b, size_t block_id)
ipcImpl_.ipc_bases = b->ipcImpl.ipc_bases;
ipcImpl_.shm_size = b->ipcImpl.shm_size;
ipcImpl_.pes_with_ipc_avail = b->ipcImpl.pes_with_ipc_avail;
}
__device__ void ROContext::putmem(void *dest, const void *source, size_t nelems,
+2
Voir le fichier
@@ -84,6 +84,8 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
rocm_init();
rocshmem_env_config_init();
#ifdef USE_GPU_IB
CHECK_HIP(hipHostMalloc(&backend, sizeof(GPUIBBackend)));
backend = new (backend) GPUIBBackend(comm);
+11
Voir le fichier
@@ -146,4 +146,15 @@ uint64_t wallClk_freq_mhz() {
return 0;
}
struct rocshmem_env_config_t rocshmem_env_config;
void rocshmem_env_config_init(void) {
char* env_value = NULL;
env_value = getenv("ROCSHMEM_RO_DISABLE_IPC");
if (NULL != env_value) {
rocshmem_env_config.ro_disable_ipc = atoi(env_value);
}
}
} // namespace rocshmem
+7
Voir le fichier
@@ -269,6 +269,13 @@ void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr,
// Returns clock frequency used by s_memrealtime() in Mhz
uint64_t wallClk_freq_mhz();
struct rocshmem_env_config_t {
int ro_disable_ipc = 0;
};
extern struct rocshmem_env_config_t rocshmem_env_config;
void rocshmem_env_config_init(void);
} // namespace rocshmem
#endif // LIBRARY_SRC_UTIL_HPP_