Gate code by rocm_version (#1945)

This commit is contained in:
alex-breslow-amd
2025-09-26 13:28:41 -07:00
committed by GitHub
parent 0dd2b2f65e
commit 45166f6586
+2 -2
View File
@@ -268,7 +268,7 @@ private:
i4.flag2 = flag;
__builtin_nontemporal_store(i4.v[0], dst->v);
__builtin_nontemporal_store(i4.v[1], dst->v+1);
#if defined(__gfx950__)
#if defined(__gfx950__) && ROCM_VERSION < 70200
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache
#endif
#else
@@ -344,7 +344,7 @@ private:
__builtin_nontemporal_store(u4, (uint32_t*)dst);
else
__builtin_nontemporal_store(u8, (uint64_t*)dst);
#if defined(__gfx950__)
#if defined(__gfx950__) && ROCM_VERSION < 70200
__builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); // flush cache
#endif
#else