SWDEV-516564 - SWDEV-512817 - Remove mentions of gfx940 and gfx941

Change-Id: Ia069fcb9c6948c3fc9a00961593c9dcc59609375
此提交包含在:
Branislav Brzak
2025-02-26 11:49:41 +00:00
提交者 Branislav Brzak
父節點 e03e4f3b5d
當前提交 c2d1776ebd
共有 10 個檔案被更改,包括 10 行新增469 行删除
+4 -4
查看文件
@@ -828,14 +828,14 @@ int __syncthreads_or(int predicate)
CU_ID 11:8 Compute Unit the wave is assigned to.
SH_ID 12 Shader Array (within an SE) the wave is assigned to.
SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a
14:13 Shader Engine the wave is assigned to for gfx940-942
14:13 Shader Engine the wave is assigned to for 942
TG_ID 19:16 Thread-group ID
VM_ID 23:20 Virtual Memory ID
QUEUE_ID 26:24 Queue from which this wave was dispatched.
STATE_ID 29:27 State ID (graphics only, not compute).
ME_ID 31:30 Micro-engine ID.
XCC_ID Register bit structure for gfx940/941/942/950
XCC_ID Register bit structure for 942/950
XCC_ID 3:0 XCC the wave is assigned to.
*/
@@ -860,7 +860,7 @@ int __syncthreads_or(int predicate)
#if (defined(__gfx908__) || defined(__gfx90a__) || \
defined(__GFX11__))
#define HW_ID_SE_ID_SIZE 3
#else //4 SEs/XCC for gfx940-942
#else //4 SEs/XCC for 942
#define HW_ID_SE_ID_SIZE 2
#endif
#if (defined(__GFX10__) || defined(__GFX11__))
@@ -871,7 +871,7 @@ int __syncthreads_or(int predicate)
#define HW_ID_SE_ID_OFFSET 13
#endif
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__))
#if (defined(__gfx942__) || defined(__gfx950__))
#define __gfx94plus_clr__
#define XCC_ID 20
#define XCC_ID_XCC_ID_SIZE 4
+1 -407
查看文件
@@ -476,154 +476,62 @@ double atomicExch_system(double* address, double val) {
__device__
inline
int atomicMin(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int x, int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicMin_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int x, int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMin(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMin(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long x, unsigned long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long x, unsigned long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long x, unsigned long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long x, unsigned long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
long long atomicMin(long long* address, long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](long long x, long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
long long atomicMin_system(long long* address, long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](long long x, long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
@@ -721,153 +629,61 @@ double atomicMin_system(double* addr, double val) {
__device__
inline
int atomicMax(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int x, int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicMax_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int x, int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMax(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMax(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long x, unsigned long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long x, unsigned long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long x, unsigned long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long x, unsigned long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
long long atomicMax(long long* address, long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](long long x, long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
long long atomicMax_system(long long* address, long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](long long x, long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
@@ -970,18 +786,7 @@ __device__
inline
unsigned int atomicInc(unsigned int* address, unsigned int val)
{
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
[=]() {
return
__builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
});
#else
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
#endif // __gfx941__
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
}
@@ -989,356 +794,145 @@ __device__
inline
unsigned int atomicDec(unsigned int* address, unsigned int val)
{
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
[=]() {
return
__builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
});
#else
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
#endif // __gfx941__
}
__device__
inline
int atomicAnd(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicAnd_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicAnd(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicAnd(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x &= y; },
[=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x &= y; },
[=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
int atomicOr(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicOr_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicOr(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicOr(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x |= y; },
[=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x |= y; },
[=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
int atomicXor(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicXor_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicXor(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicXor(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x ^= y; },
[=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
+1 -1
查看文件
@@ -37,7 +37,7 @@
#define HIP_FP8_CVT_FAST_PATH 0
#endif
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && __HIP_DEVICE_COMPILE__
#if defined(__gfx942__) && __HIP_DEVICE_COMPILE__
#define HIP_FP8_TYPE_OCP 0
#define HIP_FP8_TYPE_FNUZ 1
#elif (defined(__gfx1200__) || defined(__gfx1201__)) && __HIP_DEVICE_COMPILE__
+3 -5
查看文件
@@ -303,14 +303,12 @@ __device__ inline double unsafeAtomicMin(double* addr, double val) {
* @return Original value contained in \p addr.
*/
__device__ inline float safeAtomicAdd(float* addr, float value) {
#if defined(__gfx908__) || defined(__gfx941__) \
|| ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__) || \
#if defined(__gfx908__) \
|| ((defined(__gfx90a__) || defined(__gfx942__) || \
defined(__gfx950__)) && !__has_builtin(__hip_atomic_fetch_add))
// On gfx908, we can generate unsafe FP32 atomic add that does not follow all
// IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead.
// On gfx941, we can generate unsafe FP32 atomic add that may not always happen atomically,
// so we need to force a CAS loop emulation to ensure safety.
// On gfx90a, gfx940, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we
// On gfx90a, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we
// need to force a CAS loop here.
float old_val;
#if __has_builtin(__hip_atomic_load)
-12
查看文件
@@ -224,16 +224,6 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
sramEccSupported = false;
proc_name = "gfx90c";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX940:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx940";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX941:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx941";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX942:
xnackSupported = true;
sramEccSupported = true;
@@ -467,8 +457,6 @@ static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& ag
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx9-4-generic"
{"gfx940", "gfx9-4-generic"},
{"gfx941", "gfx9-4-generic"},
{"gfx942", "gfx9-4-generic"},
{"gfx950", "gfx9-4-generic"},
// "gfx10-1-generic"
-10
查看文件
@@ -160,16 +160,6 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
sramEccSupported = false;
proc_name = "gfx90c";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX940:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx940";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX941:
xnackSupported = true;
sramEccSupported = true;
proc_name = "gfx941";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX942:
xnackSupported = true;
sramEccSupported = true;
-2
查看文件
@@ -58,8 +58,6 @@ static const AMDDeviceInfo DeviceInfo[] = {
{"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx950", "gfx950", 4, 16, 1, 256, 160 * Ki, 64, 9, 5},
{"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
-18
查看文件
@@ -170,24 +170,6 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx90a:sramecc-:xnack+", nullptr, true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90a:sramecc+:xnack-", nullptr, true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90a:sramecc+:xnack+", nullptr, true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940", nullptr, true, false, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc-", nullptr, true, false, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc+", nullptr, true, false, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:xnack-", nullptr, true, false, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:xnack+", nullptr, true, false, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc-:xnack-", nullptr, true, false, 9, 4, 0, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc-:xnack+", nullptr, true, false, 9, 4, 0, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc+:xnack-", nullptr, true, false, 9, 4, 0, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx940:sramecc+:xnack+", nullptr, true, false, 9, 4, 0, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941", nullptr, true, false, 9, 4, 1, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc-", nullptr, true, false, 9, 4, 1, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc+", nullptr, true, false, 9, 4, 1, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:xnack-", nullptr, true, false, 9, 4, 1, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:xnack+", nullptr, true, false, 9, 4, 1, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc-:xnack-", nullptr, true, false, 9, 4, 1, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc-:xnack+", nullptr, true, false, 9, 4, 1, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc+:xnack-", nullptr, true, false, 9, 4, 1, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx941:sramecc+:xnack+", nullptr, true, false, 9, 4, 1, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx942", nullptr, true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx942:sramecc-", nullptr, true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx942:sramecc+", nullptr, true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
+1 -1
查看文件
@@ -98,7 +98,7 @@ const char* TrapHandlerCode = RUNTIME_KERNEL(
// ttmp1 = 0[2:0], PCRewind[3:0], HostTrap[0], TrapId[7:0], PC[47:32]
// gfx906/gfx908/gfx90a:
// ttmp11 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
// gfx940/gfx941/gfx942/gfx950:
// gfx942/gfx950:
// ttmp13 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[22:0]
// gfx10:
// ttmp1 = 0[0], PCRewind[5:0], HostTrap[0], TrapId[7:0], PC[47:32]
-9
查看文件
@@ -1251,15 +1251,6 @@ bool Device::populateOCLDeviceConstants() {
GPU_SINGLE_ALLOC_PERCENT = 75;
}
}
// Limit gpu single allocation percentage for gfx940
if ((isa().versionMajor() == 9) && (isa().versionMinor() == 4) &&
(isa().versionStepping() == 0) && (info_.hostUnifiedMemory_ == 1)) {
if (gpu_agents_.size() == 1 || p2p_agents_.size() == 0) {
if (flagIsDefault(GPU_SINGLE_ALLOC_PERCENT)) {
GPU_SINGLE_ALLOC_PERCENT = 60;
}
}
}
gpuvm_segment_max_alloc_ =
uint64_t(info_.globalMemSize_ * std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);