Update Barrier_All and Sync_All APIs (#72)

* Fix deadlock in `rocshmem_ctx_wg_barrier_all` API in IPC conduit by adding per-context pSync buffers and context IDs
  - Added separate pSync buffers for each device context
  - Resolved deadlock when invoking barrier API (`rocshmem_ctx_wg_barrier_all`) concurrently from multiple contexts

* Update barrier_all functional tests for multi-context support

* Add thread, wavefront, and workgroup-level barrier_all APIs in IPC and RO conduits
  - Implemented barrier_all APIs at thread, wavefront, and workgroup granularity
  - Added support in both IPC and RO conduits
  - Updated functional tests to cover all `barrier_all` APIs

* Add thread, wavefront, and workgroup-level sync_all APIs in IPC and RO conduits
  - Implemented sync_all APIs for thread, wavefront, and workgroup scopes
  - Added support into both IPC and RO conduits
  - Added functional tests to cover all `sync_all` APIs
Этот коммит содержится в:
Avinash Kethineedi
2025-04-02 11:58:55 -05:00
коммит произвёл GitHub
родитель e16ca7a1e3
Коммит c652f58cef
22 изменённых файлов: 508 добавлений и 53 удалений
+27 -3
Просмотреть файл
@@ -148,6 +148,18 @@ __device__ void Context::barrier_all() {
DISPATCH(barrier_all());
}
__device__ void Context::barrier_all_wave() {
ctxStats.incStat(NUM_BARRIER_ALL_WAVE);
DISPATCH(barrier_all_wave());
}
__device__ void Context::barrier_all_wg() {
ctxStats.incStat(NUM_BARRIER_ALL_WG);
DISPATCH(barrier_all_wg());
}
__device__ void Context::barrier(rocshmem_team_t team) {
ctxStats.incStat(NUM_BARRIER_ALL);
@@ -160,10 +172,22 @@ __device__ void Context::sync_all() {
DISPATCH(sync_all());
}
__device__ void Context::sync(rocshmem_team_t team) {
ctxStats.incStat(NUM_SYNC_ALL);
__device__ void Context::sync_all_wave() {
ctxStats.incStat(NUM_SYNC_ALL_WAVE);
DISPATCH(sync(team));
DISPATCH(sync_all_wave());
}
__device__ void Context::sync_all_wg() {
ctxStats.incStat(NUM_SYNC_ALL_WG);
DISPATCH(sync_all_wg());
}
__device__ void Context::sync_wg(rocshmem_team_t team) {
ctxStats.incStat(NUM_SYNC_ALL_WG);
DISPATCH(sync_wg(team));
}
__device__ void Context::putmem_wg(void* dest, const void* source,