Updated IPC detection logic (#51)
* Added environment variable to enable/disable IPC at runtime * Fixed IPC detection logic allow for difference process mappings * Updated README.md
Cette révision appartient à :
@@ -106,6 +106,9 @@ rocSHMEM has the following enviroment variables:
|
||||
ROCSHMEM_HEAP_SIZE (default : 1 GB)
|
||||
Defines the size of the rocSHMEM symmetric heap
|
||||
Note the heap is on the GPU memory.
|
||||
|
||||
ROCSHMEM_RO_DISABLE_IPC (default : 0)
|
||||
Disables IPC support for the reverse offload backend.
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -106,6 +106,15 @@ __host__ void IpcOnImpl::ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
* addresses.
|
||||
*/
|
||||
free(vec_ipc_handle);
|
||||
|
||||
if (0 == rocshmem_env_config.ro_disable_ipc) {
|
||||
int thread_comm_rank;
|
||||
|
||||
CHECK_HIP(hipMalloc(reinterpret_cast<void**>(&pes_with_ipc_avail), shm_size * sizeof(int)));
|
||||
|
||||
MPI_Comm_rank(thread_comm, &thread_comm_rank);
|
||||
MPI_Allgather(&thread_comm_rank, 1, MPI_INT, pes_with_ipc_avail, 1, MPI_INT, shmcomm);
|
||||
}
|
||||
}
|
||||
|
||||
__host__ void IpcOnImpl::ipcHostStop() {
|
||||
@@ -115,6 +124,10 @@ __host__ void IpcOnImpl::ipcHostStop() {
|
||||
}
|
||||
}
|
||||
CHECK_HIP(hipFree(ipc_bases));
|
||||
|
||||
if (nullptr != pes_with_ipc_avail) {
|
||||
CHECK_HIP(hipFree(pes_with_ipc_avail));
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void IpcOnImpl::ipcCopy(void *dst, void *src, size_t size) {
|
||||
|
||||
@@ -48,14 +48,23 @@ class IpcOnImpl {
|
||||
|
||||
char **ipc_bases{nullptr};
|
||||
|
||||
int *pes_with_ipc_avail{nullptr};
|
||||
|
||||
__host__ void ipcHostInit(int my_pe, const HEAP_BASES_T &heap_bases,
|
||||
MPI_Comm thread_comm);
|
||||
|
||||
__host__ void ipcHostStop();
|
||||
|
||||
__device__ bool isIpcAvailable(int my_pe, int target_pe) {
|
||||
return my_pe / shm_size == target_pe / shm_size;
|
||||
if (nullptr == pes_with_ipc_avail) { return false; }
|
||||
|
||||
for (int i=0; i<shm_size; i++) {
|
||||
if (pes_with_ipc_avail[i] == target_pe) { return true; }
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
__device__ void ipcGpuInit(Backend *gpu_backend, Context *ctx, int thread_id);
|
||||
|
||||
__device__ void ipcCopy(void *dst, void *src, size_t size);
|
||||
|
||||
@@ -54,6 +54,7 @@ __host__ ROContext::ROContext(Backend *b, size_t block_id)
|
||||
|
||||
ipcImpl_.ipc_bases = b->ipcImpl.ipc_bases;
|
||||
ipcImpl_.shm_size = b->ipcImpl.shm_size;
|
||||
ipcImpl_.pes_with_ipc_avail = b->ipcImpl.pes_with_ipc_avail;
|
||||
}
|
||||
|
||||
__device__ void ROContext::putmem(void *dest, const void *source, size_t nelems,
|
||||
|
||||
@@ -84,6 +84,8 @@ rocshmem_ctx_t ROCSHMEM_HOST_CTX_DEFAULT;
|
||||
|
||||
rocm_init();
|
||||
|
||||
rocshmem_env_config_init();
|
||||
|
||||
#ifdef USE_GPU_IB
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GPUIBBackend)));
|
||||
backend = new (backend) GPUIBBackend(comm);
|
||||
|
||||
@@ -146,4 +146,15 @@ uint64_t wallClk_freq_mhz() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct rocshmem_env_config_t rocshmem_env_config;
|
||||
|
||||
void rocshmem_env_config_init(void) {
|
||||
char* env_value = NULL;
|
||||
|
||||
env_value = getenv("ROCSHMEM_RO_DISABLE_IPC");
|
||||
if (NULL != env_value) {
|
||||
rocshmem_env_config.ro_disable_ipc = atoi(env_value);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -269,6 +269,13 @@ void rocm_memory_lock_to_fine_grain(void* ptr, size_t size, void** gpu_ptr,
|
||||
// Returns clock frequency used by s_memrealtime() in Mhz
|
||||
uint64_t wallClk_freq_mhz();
|
||||
|
||||
struct rocshmem_env_config_t {
|
||||
int ro_disable_ipc = 0;
|
||||
};
|
||||
extern struct rocshmem_env_config_t rocshmem_env_config;
|
||||
|
||||
void rocshmem_env_config_init(void);
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_SRC_UTIL_HPP_
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur