[DEVICE] Fix validation errors for multi-node LL with gfx950 non-coherent system memory (#1795)

This commit is contained in:
Nilesh M Negi
2025-07-10 09:05:46 -05:00
committed by GitHub
parent 058264b3f3
commit 68d6f99e0f
+1 -1
View File
@@ -260,7 +260,7 @@ private:
__device__ void storeLL(union ncclLLFifoLine* dst, uint64_t val, uint32_t flag) {
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
#if defined(__gfx950__)
#if (defined(__gfx950__) && defined(HIP_HOST_UNCACHED_MEMORY))
using Vec = uint32_t __attribute__((ext_vector_type(4)));
Vec i4;
i4[0] = val & 0xffffffff;