reenable gfx1100 (#328)

* reenable gfx1100

use the modified version of the flat_store_short assembly instruction as suggested by the compiler team (32bit input value instead of 16bit)

* add fix for gfx1201

add the same fix for gfx1201 that was introduced for gfx1100

[ROCm/rocshmem commit: 224c969bef]
This commit is contained in:
Edgar Gabriel
2025-12-03 13:49:38 -06:00
committad av GitHub
förälder 4b04b540bf
incheckning 3d658b558b
2 ändrade filer med 24 tillägg och 17 borttagningar
+1
Visa fil
@@ -108,6 +108,7 @@ include(cmake/rocm_local_targets.cmake)
set(DEFAULT_GPUS
gfx90a:xnack-;
gfx90a:xnack+;
gfx1100;
gfx1201;
gfx942)
+23 -17
Visa fil
@@ -44,9 +44,9 @@ __device__ __forceinline__ int uncached_load_ubyte(uint8_t* src) {
int ret;
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile(
"global_load_ubyte %0 %1 off glc slc \n"
"s_waitcnt vmcnt(0)"
@@ -74,9 +74,9 @@ __device__ __forceinline__ void refresh_volatile_sbyte(volatile int *assigned_va
volatile char *read_value) {
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile(
"global_load_sbyte %0 %1 off glc slc\n "
"s_waitcnt vmcnt(0)"
@@ -103,9 +103,9 @@ __device__ __forceinline__ void refresh_volatile_dwordx2(volatile uint64_t *assi
volatile uint64_t *read_value) {
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile(
"global_load_dwordx2 %0 %1 off glc slc\n "
"s_waitcnt vmcnt(0)"
@@ -141,9 +141,9 @@ NOWARN(-Wdeprecated-volatile,
case 4:
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile(
"global_load_dword %0 %1 off glc slc \n"
"s_waitcnt vmcnt(0)"
@@ -168,9 +168,9 @@ NOWARN(-Wdeprecated-volatile,
case 8:
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile(
"global_load_dwordx2 %0 %1 off glc slc \n"
"s_waitcnt vmcnt(0)"
@@ -221,19 +221,25 @@ __device__ __forceinline__ void store_asm(uint8_t* val, uint8_t* dst,
int size) {
switch (size) {
case 2: {
int16_t val16{*(reinterpret_cast<int16_t*>(val))};
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
int16_t val16{*(reinterpret_cast<int16_t*>(val))};
asm volatile("flat_store_short %0 %1 glc slc" : : "v"(dst), "v"(val16));
#endif
#if defined(__gfx942__) || defined(__gfx950__)
int16_t val16{*(reinterpret_cast<int16_t*>(val))};
asm volatile("flat_store_short %0 %1 sc0 sc1" : : "v"(dst), "v"(val16));
#endif
#if defined(__gfx1100__)
int32_t val32{*(reinterpret_cast<int32_t*>(val))};
asm volatile("flat_store_short %0 %1 glc slc" : : "v"(dst), "v"(val32));
#endif
#if defined(__gfx1201__)
asm volatile("flat_store_b16 %0 %1 scope:SCOPE_SYS" : : "v"(dst), "v"(val16));
int32_t val32{*(reinterpret_cast<int32_t*>(val))};
asm volatile("flat_store_b16 %0 %1 scope:SCOPE_SYS" : : "v"(dst), "v"(val32));
#endif
break;
}
@@ -241,9 +247,9 @@ __device__ __forceinline__ void store_asm(uint8_t* val, uint8_t* dst,
int32_t val32{*(reinterpret_cast<int32_t*>(val))};
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile("flat_store_dword %0 %1 glc slc" : : "v"(dst), "v"(val32));
#endif
#if defined(__gfx942__) || defined(__gfx950__)
@@ -258,9 +264,9 @@ __device__ __forceinline__ void store_asm(uint8_t* val, uint8_t* dst,
int64_t val64{*(reinterpret_cast<int64_t*>(val))};
#if defined(__gfx906__)
#endif
#if defined(__gfx908__) || defined(__gfx1100__)
#if defined(__gfx908__)
#endif
#if defined(__gfx90a__)
#if defined(__gfx90a__) || defined(__gfx1100__)
asm volatile("flat_store_dwordx2 %0 %1 glc slc" : : "v"(dst), "v"(val64));
#endif
#if defined(__gfx942__) || defined(__gfx950__)