diff --git a/hipamd/include/hip/amd_detail/amd_device_functions.h b/hipamd/include/hip/amd_detail/amd_device_functions.h index 4c55103e21..36353096f3 100644 --- a/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -828,14 +828,14 @@ int __syncthreads_or(int predicate) CU_ID 11:8 Compute Unit the wave is assigned to. SH_ID 12 Shader Array (within an SE) the wave is assigned to. SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a - 14:13 Shader Engine the wave is assigned to for gfx940-942 + 14:13 Shader Engine the wave is assigned to for 942 TG_ID 19:16 Thread-group ID VM_ID 23:20 Virtual Memory ID QUEUE_ID 26:24 Queue from which this wave was dispatched. STATE_ID 29:27 State ID (graphics only, not compute). ME_ID 31:30 Micro-engine ID. - XCC_ID Register bit structure for gfx940/941/942/950 + XCC_ID Register bit structure for 942/950 XCC_ID 3:0 XCC the wave is assigned to. */ @@ -860,7 +860,7 @@ int __syncthreads_or(int predicate) #if (defined(__gfx908__) || defined(__gfx90a__) || \ defined(__GFX11__)) #define HW_ID_SE_ID_SIZE 3 -#else //4 SEs/XCC for gfx940-942 +#else //4 SEs/XCC for 942 #define HW_ID_SE_ID_SIZE 2 #endif #if (defined(__GFX10__) || defined(__GFX11__)) @@ -871,7 +871,7 @@ int __syncthreads_or(int predicate) #define HW_ID_SE_ID_OFFSET 13 #endif -#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__)) +#if (defined(__gfx942__) || defined(__gfx950__)) #define __gfx94plus_clr__ #define XCC_ID 20 #define XCC_ID_XCC_ID_SIZE 4 diff --git a/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/hipamd/include/hip/amd_detail/amd_hip_atomic.h index fb0f31fb75..ba5019b984 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_atomic.h +++ b/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -476,154 +476,62 @@ double atomicExch_system(double* address, double val) { __device__ inline int atomicMin(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](int x, int y) { return x < y; }, [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline int atomicMin_system(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](int x, int y) { return x < y; }, [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned int atomicMin(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned int atomicMin_system(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long atomicMin(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long x, unsigned long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long atomicMin_system(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long x, unsigned long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long long atomicMin(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long long x, unsigned long long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long long x, unsigned long long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline long long atomicMin(long long* address, long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](long long x, long long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline long long atomicMin_system(long long* address, long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](long long x, long long y) { return x < y; }, - [=]() { - return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ @@ -721,153 +629,61 @@ double atomicMin_system(double* addr, double val) { __device__ inline int atomicMax(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](int x, int y) { return y < x; }, [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline int atomicMax_system(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](int x, int y) { return y < x; }, [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned int atomicMax(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned int atomicMax_system(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long atomicMax(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long x, unsigned long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long atomicMax_system(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long x, unsigned long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long long atomicMax(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long long x, unsigned long long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long long x, unsigned long long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline long long atomicMax(long long* address, long long val) { - #if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](long long x, long long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline long long atomicMax_system(long long* address, long long val) { -#if defined(__gfx941__) - return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](long long x, long long y) { return y < x; }, - [=]() { - return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ @@ -970,18 +786,7 @@ __device__ inline unsigned int atomicInc(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); }, - [=]() { - return - __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); - }); -#else - return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); -#endif // __gfx941__ + return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); } @@ -989,356 +794,145 @@ __device__ inline unsigned int atomicDec(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); }, - [=]() { - return - __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent"); - }); -#else return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent"); -#endif // __gfx941__ - } __device__ inline int atomicAnd(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](int& x, int y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline int atomicAnd_system(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](int& x, int y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned int atomicAnd(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned int atomicAnd_system(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long atomicAnd(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long atomicAnd_system(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long long& x, unsigned long long y) { x &= y; }, - [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long long& x, unsigned long long y) { x &= y; }, - [=]() { - return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline int atomicOr(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](int& x, int y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline int atomicOr_system(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](int& x, int y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned int atomicOr(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned int atomicOr_system(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long atomicOr(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long atomicOr_system(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long long atomicOr(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long long& x, unsigned long long y) { x |= y; }, - [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, - val, - [](unsigned long long& x, unsigned long long y) { x |= y; }, - [=]() { - return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline int atomicXor(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](int& x, int y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline int atomicXor_system(int* address, int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](int& x, int y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned int atomicXor(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned int atomicXor_system(unsigned int* address, unsigned int val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long atomicXor(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ inline unsigned long atomicXor_system(unsigned long* address, unsigned long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( - address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_SYSTEM); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); -#endif // __gfx941__ } __device__ inline unsigned long long atomicXor(unsigned long long* address, unsigned long long val) { -#if defined(__gfx941__) - return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( - address, - val, - [](unsigned long long& x, unsigned long long y) { x ^= y; }, - [=]() { - return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, - __HIP_MEMORY_SCOPE_AGENT); - }); -#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); -#endif // __gfx941__ } __device__ diff --git a/hipamd/include/hip/amd_detail/amd_hip_fp8.h b/hipamd/include/hip/amd_detail/amd_hip_fp8.h index e730ff3e16..08757cedca 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_fp8.h +++ b/hipamd/include/hip/amd_detail/amd_hip_fp8.h @@ -37,7 +37,7 @@ #define HIP_FP8_CVT_FAST_PATH 0 #endif -#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && __HIP_DEVICE_COMPILE__ +#if defined(__gfx942__) && __HIP_DEVICE_COMPILE__ #define HIP_FP8_TYPE_OCP 0 #define HIP_FP8_TYPE_FNUZ 1 #elif (defined(__gfx1200__) || defined(__gfx1201__)) && __HIP_DEVICE_COMPILE__ diff --git a/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h b/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h index 8c01cc5529..c7cd9ec400 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +++ b/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h @@ -303,14 +303,12 @@ __device__ inline double unsafeAtomicMin(double* addr, double val) { * @return Original value contained in \p addr. */ __device__ inline float safeAtomicAdd(float* addr, float value) { -#if defined(__gfx908__) || defined(__gfx941__) \ - || ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__) || \ +#if defined(__gfx908__) \ + || ((defined(__gfx90a__) || defined(__gfx942__) || \ defined(__gfx950__)) && !__has_builtin(__hip_atomic_fetch_add)) // On gfx908, we can generate unsafe FP32 atomic add that does not follow all // IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead. - // On gfx941, we can generate unsafe FP32 atomic add that may not always happen atomically, - // so we need to force a CAS loop emulation to ensure safety. - // On gfx90a, gfx940, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we + // On gfx90a, gfx942 and gfx950 if we do not have the __hip_atomic_fetch_add builtin, we // need to force a CAS loop here. float old_val; #if __has_builtin(__hip_atomic_load) diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp index 2cee6a585a..8700531484 100644 --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -224,16 +224,6 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp sramEccSupported = false; proc_name = "gfx90c"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX940: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx940"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX941: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx941"; - break; case EF_AMDGPU_MACH_AMDGCN_GFX942: xnackSupported = true; sramEccSupported = true; @@ -467,8 +457,6 @@ static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& ag {"gfx909", "gfx9-generic"}, {"gfx90c", "gfx9-generic"}, // "gfx9-4-generic" - {"gfx940", "gfx9-4-generic"}, - {"gfx941", "gfx9-4-generic"}, {"gfx942", "gfx9-4-generic"}, {"gfx950", "gfx9-4-generic"}, // "gfx10-1-generic" diff --git a/hipamd/src/hip_comgr_helper.cpp b/hipamd/src/hip_comgr_helper.cpp index 5e336df75e..755875c287 100644 --- a/hipamd/src/hip_comgr_helper.cpp +++ b/hipamd/src/hip_comgr_helper.cpp @@ -160,16 +160,6 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp sramEccSupported = false; proc_name = "gfx90c"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX940: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx940"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX941: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx941"; - break; case EF_AMDGPU_MACH_AMDGCN_GFX942: xnackSupported = true; sramEccSupported = true; diff --git a/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp b/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp index 127add49e9..667420b408 100644 --- a/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp +++ b/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp @@ -58,8 +58,6 @@ static const AMDDeviceInfo DeviceInfo[] = { {"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, {"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - {"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, - {"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, {"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, {"gfx950", "gfx950", 4, 16, 1, 256, 160 * Ki, 64, 9, 5}, {"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index ba512962cc..423b40312b 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -170,24 +170,6 @@ std::pair Isa::supportedIsas() { {"gfx90a:sramecc-:xnack+", nullptr, true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+:xnack-", nullptr, true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+:xnack+", nullptr, true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940", nullptr, true, false, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc-", nullptr, true, false, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc+", nullptr, true, false, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:xnack-", nullptr, true, false, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:xnack+", nullptr, true, false, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc-:xnack-", nullptr, true, false, 9, 4, 0, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc-:xnack+", nullptr, true, false, 9, 4, 0, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc+:xnack-", nullptr, true, false, 9, 4, 0, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx940:sramecc+:xnack+", nullptr, true, false, 9, 4, 0, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941", nullptr, true, false, 9, 4, 1, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc-", nullptr, true, false, 9, 4, 1, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc+", nullptr, true, false, 9, 4, 1, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:xnack-", nullptr, true, false, 9, 4, 1, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:xnack+", nullptr, true, false, 9, 4, 1, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc-:xnack-", nullptr, true, false, 9, 4, 1, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc-:xnack+", nullptr, true, false, 9, 4, 1, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc+:xnack-", nullptr, true, false, 9, 4, 1, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx941:sramecc+:xnack+", nullptr, true, false, 9, 4, 1, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx942", nullptr, true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx942:sramecc-", nullptr, true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx942:sramecc+", nullptr, true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, diff --git a/rocclr/device/pal/palblitcl.cpp b/rocclr/device/pal/palblitcl.cpp index 007ec8ba0b..4f8eb5a2df 100644 --- a/rocclr/device/pal/palblitcl.cpp +++ b/rocclr/device/pal/palblitcl.cpp @@ -98,7 +98,7 @@ const char* TrapHandlerCode = RUNTIME_KERNEL( // ttmp1 = 0[2:0], PCRewind[3:0], HostTrap[0], TrapId[7:0], PC[47:32] // gfx906/gfx908/gfx90a: // ttmp11 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0] -// gfx940/gfx941/gfx942/gfx950: +// gfx942/gfx950: // ttmp13 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[22:0] // gfx10: // ttmp1 = 0[0], PCRewind[5:0], HostTrap[0], TrapId[7:0], PC[47:32] diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 6d44d2f080..219cc3a46c 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -1251,15 +1251,6 @@ bool Device::populateOCLDeviceConstants() { GPU_SINGLE_ALLOC_PERCENT = 75; } } - // Limit gpu single allocation percentage for gfx940 - if ((isa().versionMajor() == 9) && (isa().versionMinor() == 4) && - (isa().versionStepping() == 0) && (info_.hostUnifiedMemory_ == 1)) { - if (gpu_agents_.size() == 1 || p2p_agents_.size() == 0) { - if (flagIsDefault(GPU_SINGLE_ALLOC_PERCENT)) { - GPU_SINGLE_ALLOC_PERCENT = 60; - } - } - } gpuvm_segment_max_alloc_ = uint64_t(info_.globalMemSize_ * std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);