Switch to using ROCDL directly, as opposed to via HC. Add missing bits.
Этот коммит содержится в:
@@ -943,97 +943,6 @@ __device__ void* __get_dynamicgroupbaseptr() {
|
||||
|
||||
__host__ void* __get_dynamicgroupbaseptr() { return nullptr; }
|
||||
|
||||
// Precise Math Functions
|
||||
__device__ float __hip_precise_cosf(float x) { return hc::precise_math::cosf(x); }
|
||||
|
||||
__device__ float __hip_precise_exp10f(float x) { return hc::precise_math::exp10f(x); }
|
||||
|
||||
__device__ float __hip_precise_expf(float x) { return hc::precise_math::expf(x); }
|
||||
|
||||
__device__ float __hip_precise_frsqrt_rn(float x) { return hc::precise_math::rsqrt(x); }
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rd(float x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rn(float x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ float __hip_precise_fsqrt_ru(float x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ float __hip_precise_fsqrt_rz(float x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ float __hip_precise_log10f(float x) { return hc::precise_math::log10(x); }
|
||||
|
||||
__device__ float __hip_precise_log2f(float x) { return hc::precise_math::log2(x); }
|
||||
|
||||
__device__ float __hip_precise_logf(float x) { return hc::precise_math::logf(x); }
|
||||
|
||||
__device__ float __hip_precise_powf(float base, float exponent) {
|
||||
return hc::precise_math::powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ void __hip_precise_sincosf(float x, float* s, float* c) {
|
||||
hc::precise_math::sincosf(x, s, c);
|
||||
}
|
||||
|
||||
__device__ float __hip_precise_sinf(float x) { return hc::precise_math::sinf(x); }
|
||||
|
||||
__device__ float __hip_precise_tanf(float x) { return hc::precise_math::tanf(x); }
|
||||
|
||||
// Double Precision Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_precise_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_precise_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_precise_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
#define LOG_BASE2_E 1.4426950408889634
|
||||
#define LOG_BASE2_10 3.32192809488736
|
||||
#define ONE_DIV_LOG_BASE2_E 0.69314718056
|
||||
#define ONE_DIV_LOG_BASE2_10 0.30102999566
|
||||
|
||||
// Fast Math Intrinsics
|
||||
__device__ float __hip_fast_exp10f(float x) { return __hip_fast_exp2f(x * LOG_BASE2_E); }
|
||||
|
||||
__device__ float __hip_fast_expf(float x) { return __hip_fast_exp2f(x * LOG_BASE2_10); }
|
||||
|
||||
__device__ float __hip_fast_frsqrt_rn(float x) {
|
||||
return 1 / __hip_fast_fsqrt_rd(x);
|
||||
;
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_fsqrt_rn(float x) { return __hip_fast_fsqrt_rd(x); }
|
||||
|
||||
__device__ float __hip_fast_fsqrt_ru(float x) { return __hip_fast_fsqrt_rd(x); }
|
||||
|
||||
__device__ float __hip_fast_fsqrt_rz(float x) { return __hip_fast_fsqrt_rd(x); }
|
||||
|
||||
__device__ float __hip_fast_log10f(float x) { return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x); }
|
||||
|
||||
__device__ float __hip_fast_logf(float x) { return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x); }
|
||||
|
||||
__device__ float __hip_fast_powf(float base, float exponent) {
|
||||
return hc::fast_math::powf(base, exponent);
|
||||
}
|
||||
|
||||
__device__ void __hip_fast_sincosf(float x, float* s, float* c) {
|
||||
*s = __hip_fast_sinf(x);
|
||||
*c = __hip_fast_cosf(x);
|
||||
}
|
||||
|
||||
__device__ float __hip_fast_tanf(float x) { return hc::fast_math::tanf(x); }
|
||||
|
||||
// Double Precision Math
|
||||
// FIXME - HCC doesn't have a fast_math version double FP sqrt
|
||||
// Another issue is that these intrinsics call for a specific rounding mode;
|
||||
// however, their implementation all map to the same sqrt builtin
|
||||
__device__ double __hip_fast_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_fast_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_fast_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ double __hip_fast_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
|
||||
|
||||
__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user