Add back __syncthreads() in barrier and adjust stack size (#688)
[ROCm/rccl commit: 1c166046a2]
This commit is contained in:
@@ -7,6 +7,7 @@ Full documentation for RCCL is available at [https://rccl.readthedocs.io](https:
|
||||
- Compatibility with NCCL 2.16.2
|
||||
### Added
|
||||
### Fixed
|
||||
- Remove workaround and use indirect function call
|
||||
### Removed
|
||||
|
||||
## Unreleased - RCCL 2.15.5 for ROCm 5.5.0
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
|
||||
#ifdef __GFX9__
|
||||
#define STORE(DST, SRC) \
|
||||
{ atomicExch((unsigned long long *)(DST), (SRC)); }
|
||||
{ __threadfence(); atomicExch((unsigned long long *)(DST), (SRC)); }
|
||||
#else
|
||||
#define STORE(DST, SRC) \
|
||||
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
||||
|
||||
@@ -17,12 +17,16 @@
|
||||
#define barrier_by_group() do { \
|
||||
const int w = threadIdx.x/WARP_SIZE; \
|
||||
const int wid = threadIdx.x%WARP_SIZE; \
|
||||
if (wid == 0) { \
|
||||
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)"); \
|
||||
barrier_next[w] += nthreads/WARP_SIZE; \
|
||||
atomicAdd((unsigned long long *)barriers, 1); \
|
||||
while (atomicAdd((unsigned long long *)barriers, 0) < barrier_next[w]) __builtin_amdgcn_s_sleep(1); \
|
||||
__asm__ __volatile__("s_wakeup"); \
|
||||
__threadfence(); \
|
||||
if (nthreads == NCCL_MAX_NTHREADS) { \
|
||||
__syncthreads(); \
|
||||
} else { \
|
||||
if (wid == 0) { \
|
||||
barrier_next[w] += nthreads/WARP_SIZE; \
|
||||
atomicAdd((unsigned long long *)barriers, 1); \
|
||||
while (atomicAdd((unsigned long long *)barriers, 0) < barrier_next[w]) __builtin_amdgcn_s_sleep(1); \
|
||||
__asm__ __volatile__("s_wakeup"); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
@@ -1415,7 +1415,7 @@ fail:
|
||||
|
||||
#ifdef USE_INDIRECT_FUNCTION_CALL
|
||||
NCCL_PARAM(SetStackSize, "SET_STACK_SIZE", 1);
|
||||
RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 8);
|
||||
RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 256);
|
||||
#else
|
||||
NCCL_PARAM(SetStackSize, "SET_STACK_SIZE", 0);
|
||||
RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 0);
|
||||
|
||||
Reference in New Issue
Block a user