SWDEV-501963 - Add missing codes for gfx950

Cherry-pick https://gerrit-git.amd.com/c/compute/ec/clr/+/1162997

Change-Id: I6b3c6bf55c61cffd43cd6f17b75998f751b75723
Этот коммит содержится в:
taosang2
2024-12-02 15:17:47 -05:00
коммит произвёл Tao Sang
родитель 4ec274c7d4
Коммит 32daa8f384
6 изменённых файлов: 19 добавлений и 19 удалений
+6 -6
Просмотреть файл
@@ -835,7 +835,7 @@ int __syncthreads_or(int predicate)
STATE_ID 29:27 State ID (graphics only, not compute).
ME_ID 31:30 Micro-engine ID.
XCC_ID Register bit structure for gfx940
XCC_ID Register bit structure for gfx940/941/942/950
XCC_ID 3:0 XCC the wave is assigned to.
*/
@@ -871,14 +871,14 @@ int __syncthreads_or(int predicate)
#define HW_ID_SE_ID_OFFSET 13
#endif
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__))
#define __gfx94plus_clr__
#define XCC_ID 20
#define XCC_ID_XCC_ID_SIZE 4
#define XCC_ID_XCC_ID_OFFSET 0
#endif
#if (!defined(__HIP_NO_IMAGE_SUPPORT) && \
(defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)))
#if !defined(__HIP_NO_IMAGE_SUPPORT) && defined(__gfx94plus_clr__)
#define __HIP_NO_IMAGE_SUPPORT 1
#endif
@@ -913,7 +913,7 @@ unsigned __smid(void)
GETREG_IMMED(HW_ID_CU_ID_SIZE - 1, HW_ID_CU_ID_OFFSET, HW_ID));
#endif
#else
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
#if defined(__gfx94plus_clr__)
unsigned xcc_id = __builtin_amdgcn_s_getreg(
GETREG_IMMED(XCC_ID_XCC_ID_SIZE - 1, XCC_ID_XCC_ID_OFFSET, XCC_ID));
#endif
@@ -929,7 +929,7 @@ unsigned __smid(void)
#endif
return temp;
//TODO : CU Mode impl
#elif (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
#elif defined(__gfx94plus_clr__)
unsigned temp = xcc_id;
temp = (temp << HW_ID_SE_ID_SIZE) | se_id;
temp = (temp << HW_ID_CU_ID_SIZE) | cu_id;
+3 -4
Просмотреть файл
@@ -30,9 +30,8 @@
#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx1200__) || \
defined(__gfx1201__)) && \
__HIP_DEVICE_COMPILE__
#if (defined(__gfx94plus_clr__) || defined(__gfx1200__) || defined(__gfx1201__)) && \
__HIP_DEVICE_COMPILE__
#define HIP_FP8_CVT_FAST_PATH 1
#else
#define HIP_FP8_CVT_FAST_PATH 0
@@ -3173,4 +3172,4 @@ struct __hip_fp8x4_e5m2 {
}
};
#endif // ENABLE_OCP_HIPRTC
#endif // _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
#endif // _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
+7 -7
Просмотреть файл
@@ -209,8 +209,8 @@ __device__ inline double unsafeAtomicAdd(double* addr, double value) {
* @return Original value contained at \p addr.
*/
__device__ inline double unsafeAtomicMax(double* addr, double val) {
#if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
__has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64)
#if (defined(__gfx90a__) || defined(__gfx94plus_clr__)) && \
__has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64)
return __builtin_amdgcn_flat_atomic_fmax_f64(addr, val);
#else
#if __has_builtin(__hip_atomic_load) && \
@@ -262,8 +262,8 @@ __device__ inline double unsafeAtomicMax(double* addr, double val) {
* @return Original value contained at \p addr.
*/
__device__ inline double unsafeAtomicMin(double* addr, double val) {
#if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
__has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64)
#if (defined(__gfx90a__) || defined(__gfx94plus_clr__)) && \
__has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64)
return __builtin_amdgcn_flat_atomic_fmin_f64(addr, val);
#else
#if __has_builtin(__hip_atomic_load) && \
@@ -304,13 +304,13 @@ __device__ inline double unsafeAtomicMin(double* addr, double val) {
*/
__device__ inline float safeAtomicAdd(float* addr, float value) {
#if defined(__gfx908__) || defined(__gfx941__) \
|| ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__)) \
&& !__has_builtin(__hip_atomic_fetch_add))
|| ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__) || \
defined(__gfx950__)) && !__has_builtin(__hip_atomic_fetch_add))
// On gfx908, we can generate unsafe FP32 atomic add that does not follow all
// IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead.
// On gfx941, we can generate unsafe FP32 atomic add that may not always happen atomically,
// so we need to force a CAS loop emulation to ensure safety.
// On gfx90a, gfx940 and gfx942 if we do not have the __hip_atomic_fetch_add builtin, we
// On gfx90a, gfx940, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we
// need to force a CAS loop here.
float old_val;
#if __has_builtin(__hip_atomic_load)
+1
Просмотреть файл
@@ -61,6 +61,7 @@ static const AMDDeviceInfo DeviceInfo[] = {
{"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx950", "gfx950", 4, 16, 1, 256, 160 * Ki, 64, 9, 5},
{"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
{"gfx1011", "gfx1011", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
{"gfx1012", "gfx1012", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
+1 -1
Просмотреть файл
@@ -98,7 +98,7 @@ const char* TrapHandlerCode = RUNTIME_KERNEL(
// ttmp1 = 0[2:0], PCRewind[3:0], HostTrap[0], TrapId[7:0], PC[47:32]
// gfx906/gfx908/gfx90a:
// ttmp11 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
// gfx940/gfx941/gfx942:
// gfx940/gfx941/gfx942/gfx950:
// ttmp13 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[22:0]
// gfx10:
// ttmp1 = 0[0], PCRewind[5:0], HostTrap[0], TrapId[7:0], PC[47:32]
+1 -1
Просмотреть файл
@@ -246,7 +246,7 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH
const uint32_t gfxipMinor = isa.versionMinor();
const uint32_t gfxStepping = isa.versionStepping();
const bool isGfx94x = gfxipMajor == 9 && gfxipMinor == 4 &&
const bool isGfx94x = gfxipMajor == 9 && (gfxipMinor == 4 || gfxipMinor == 5) &&
(gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2);
const bool isGfx90a = (gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10);
const bool isPreGfx908 =