Switch to using ROCDL directly, as opposed to via HC. Add missing bits.

Этот коммит содержится в:
Alex Voicu
2018-05-31 03:17:26 +01:00
родитель fea366cc89
Коммит 59db16fd36
10 изменённых файлов: 1550 добавлений и 1045 удалений
-91
Просмотреть файл
@@ -943,97 +943,6 @@ __device__ void* __get_dynamicgroupbaseptr() {
__host__ void* __get_dynamicgroupbaseptr() { return nullptr; }
// Precise Math Functions
__device__ float __hip_precise_cosf(float x) { return hc::precise_math::cosf(x); }
__device__ float __hip_precise_exp10f(float x) { return hc::precise_math::exp10f(x); }
__device__ float __hip_precise_expf(float x) { return hc::precise_math::expf(x); }
__device__ float __hip_precise_frsqrt_rn(float x) { return hc::precise_math::rsqrt(x); }
__device__ float __hip_precise_fsqrt_rd(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_rn(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_ru(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_fsqrt_rz(float x) { return hc::precise_math::sqrt(x); }
__device__ float __hip_precise_log10f(float x) { return hc::precise_math::log10(x); }
__device__ float __hip_precise_log2f(float x) { return hc::precise_math::log2(x); }
__device__ float __hip_precise_logf(float x) { return hc::precise_math::logf(x); }
__device__ float __hip_precise_powf(float base, float exponent) {
return hc::precise_math::powf(base, exponent);
}
__device__ void __hip_precise_sincosf(float x, float* s, float* c) {
hc::precise_math::sincosf(x, s, c);
}
__device__ float __hip_precise_sinf(float x) { return hc::precise_math::sinf(x); }
__device__ float __hip_precise_tanf(float x) { return hc::precise_math::tanf(x); }
// Double Precision Math
__device__ double __hip_precise_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_precise_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
#define LOG_BASE2_E 1.4426950408889634
#define LOG_BASE2_10 3.32192809488736
#define ONE_DIV_LOG_BASE2_E 0.69314718056
#define ONE_DIV_LOG_BASE2_10 0.30102999566
// Fast Math Intrinsics
__device__ float __hip_fast_exp10f(float x) { return __hip_fast_exp2f(x * LOG_BASE2_E); }
__device__ float __hip_fast_expf(float x) { return __hip_fast_exp2f(x * LOG_BASE2_10); }
__device__ float __hip_fast_frsqrt_rn(float x) {
return 1 / __hip_fast_fsqrt_rd(x);
;
}
__device__ float __hip_fast_fsqrt_rn(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_fsqrt_ru(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_fsqrt_rz(float x) { return __hip_fast_fsqrt_rd(x); }
__device__ float __hip_fast_log10f(float x) { return ONE_DIV_LOG_BASE2_E * __hip_fast_log2f(x); }
__device__ float __hip_fast_logf(float x) { return ONE_DIV_LOG_BASE2_10 * __hip_fast_log2f(x); }
__device__ float __hip_fast_powf(float base, float exponent) {
return hc::fast_math::powf(base, exponent);
}
__device__ void __hip_fast_sincosf(float x, float* s, float* c) {
*s = __hip_fast_sinf(x);
*c = __hip_fast_cosf(x);
}
__device__ float __hip_fast_tanf(float x) { return hc::fast_math::tanf(x); }
// Double Precision Math
// FIXME - HCC doesn't have a fast_math version double FP sqrt
// Another issue is that these intrinsics call for a specific rounding mode;
// however, their implementation all map to the same sqrt builtin
__device__ double __hip_fast_dsqrt_rd(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_rn(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_ru(double x) { return hc::precise_math::sqrt(x); }
__device__ double __hip_fast_dsqrt_rz(double x) { return hc::precise_math::sqrt(x); }
__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }