Fix gfx950 gating conditions to match ROCm 7.0.2 (#2003)
[ROCm/rccl commit: 8444b3c6e9]
Este cometimento está contido em:
cometido por
GitHub
ascendente
eb0b1387b7
cometimento
03d37f6305
@@ -270,8 +270,8 @@ private:
|
||||
i4.flag2 = flag;
|
||||
*((u64_gptr) dst->v) = *((u64_gptr) i4.v);
|
||||
*((u64_gptr) dst->v+1) = *((u64_gptr) i4.v+1);
|
||||
#if defined(__gfx950__) && ROCM_VERSION < 70200
|
||||
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache
|
||||
#if defined(__gfx950__) && ROCM_VERSION < 70002
|
||||
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache on gfx950 if ROCr fix for hipHostMallocUncached is not available (ROCm version < 7.0.2)
|
||||
#endif
|
||||
#else
|
||||
asm volatile("st.volatile.global.v4.u32 [%0], {%1,%2,%3,%4};" :: "l"(&dst->i4), "r"((uint32_t)val), "r"(flag), "r"((uint32_t)(val >> 32)), "r"(flag) : "memory");
|
||||
@@ -346,8 +346,8 @@ private:
|
||||
__builtin_nontemporal_store(u4, (uint32_t*)dst);
|
||||
else
|
||||
__builtin_nontemporal_store(u8, (uint64_t*)dst);
|
||||
#if defined(__gfx950__) && ROCM_VERSION < 70200
|
||||
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache
|
||||
#if defined(__gfx950__) && ROCM_VERSION < 70002
|
||||
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache on gfx950 if ROCr fix for hipHostMallocUncached is not available (ROCm version < 7.0.2)
|
||||
#endif
|
||||
#else
|
||||
if(sizeof(U) == 1)
|
||||
|
||||
@@ -1410,7 +1410,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
|
||||
comm -> gfx9CheapFenceOff = 0;
|
||||
}
|
||||
else if(IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx950")){
|
||||
comm -> gfx9CheapFenceOff = ROCM_VERSION < 70200 && nNodes > 1; // Enable for single node only prior to ROCm 7.0.2
|
||||
comm -> gfx9CheapFenceOff = ROCM_VERSION < 70002 && nNodes > 1; // Enable for single node only prior to ROCm 7.0.2
|
||||
}
|
||||
}
|
||||
INFO(NCCL_INIT, "GFX9 cheap fence is %s", comm -> gfx9CheapFenceOff ? "OFF" : "ON");
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador