From 16d2b986aaaf59739cb46529cf3add8b289e1ece Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Tue, 24 Apr 2018 18:10:07 +0000 Subject: [PATCH 1/3] Checkin to fix bugs in math functions. This change fixes the following bugs that were discovered while debuggnig TF unit test failures (cwise_ops_test) 1. __hisinf and __hisnan routines Both had incorrect implementations. 2. abs A "long long" (64bit int) version was missing, resulting in the 32bit version being used for 64bit ints (which resulted in incorrect results, when the value passed in was outside the 32bit int range) 3. lgamma We seemed to have a custom version for the 'double' datatype (which was giving incorrect results). Replaced it with a call to the 'double' version of the underlying 'hc::precision_math::lgamma' [ROCm/hip commit: af586bbbf292d01dd6d305bc637aeefcaf2e0c86] --- .../include/hip/hcc_detail/math_functions.h | 1 + projects/hip/src/hip_fp16.cpp | 24 ++++++++++++------- projects/hip/src/math_functions.cpp | 12 ++++------ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index 5482f34093..e717df07c1 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -57,6 +57,7 @@ __device__ float exp2f(float x); __device__ float expf(float x); __device__ float expm1f(float x); __device__ int abs(int x); +__device__ long long abs(long long x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); __device__ float fdividef(float x, float y); diff --git a/projects/hip/src/hip_fp16.cpp b/projects/hip/src/hip_fp16.cpp index e4c1b43786..3004b7805d 100644 --- a/projects/hip/src/hip_fp16.cpp +++ b/projects/hip/src/hip_fp16.cpp @@ -29,10 +29,6 @@ struct hipHalfHolder { }; }; -#define HINF 65504 - -__device__ static struct hipHalfHolder __hInfValue = {HINF}; - __device__ __half __hadd(__half a, __half b) { return a + b; } __device__ __half __hadd_sat(__half a, __half b) { return a + b; } @@ -63,9 +59,21 @@ __device__ bool __hge(__half a, __half b) { return a >= b ? true : false; } __device__ bool __hgt(__half a, __half b) { return a > b ? true : false; } -__device__ bool __hisinf(__half a) { return a == HINF ? true : false; } +__device__ bool __hisinf(__half a) { + hipHalfHolder hH; + hH.h = a; + // mask with 0x7fff to drop the sign bit + // 0x7c00 is bit pattern for inf (exp = 11111, significand = 0) + return ((hH.s & 0x7fff) == 0x7c00) ? true : false; +} -__device__ bool __hisnan(__half a) { return a > HINF ? true : false; } +__device__ bool __hisnan(__half a) { + hipHalfHolder hH; + hH.h = a; + // mask with 0x7fff to drop the sign bit + // 0x7cXX is bit pattern for inf (exp = 11111, significand = 0) + return ((hH.s & 0x7fff) > 0x7c00) ? true : false; +} __device__ bool __hle(__half a, __half b) { return a <= b ? true : false; } @@ -124,8 +132,8 @@ __device__ __half2 __hgt2(__half2 a, __half2 b) { __device__ __half2 __hisnan2(__half2 a) { __half2 c; - c.x = (a.x > HINF) ? (__half)1 : (__half)0; - c.y = (a.y > HINF) ? (__half)1 : (__half)0; + c.x = (__hisnan(a.x)) ? (__half)1 : (__half)0; + c.y = (__hisnan(a.y)) ? (__half)1 : (__half)0; return c; } diff --git a/projects/hip/src/math_functions.cpp b/projects/hip/src/math_functions.cpp index 3c0a7f6541..dedc40f2ae 100644 --- a/projects/hip/src/math_functions.cpp +++ b/projects/hip/src/math_functions.cpp @@ -56,6 +56,9 @@ __device__ float expm1f(float x) { return hc::precise_math::expm1f(x); } __device__ int abs(int x) { return x >= 0 ? x : -x; // TODO - optimize with OCML } +__device__ long long abs(long long x) { + return x >= 0 ? x : -x; +} __device__ float fabsf(float x) { return hc::precise_math::fabsf(x); } __device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); } __device__ float fdividef(float x, float y) { return x / y; } @@ -220,14 +223,7 @@ __device__ double j0(double x) { return __hip_j0(x); } __device__ double j1(double x) { return __hip_j1(x); } __device__ double jn(int n, double x) { return __hip_jn(n, x); } __device__ double ldexp(double x, int exp) { return hc::precise_math::ldexp(x, exp); } -__device__ double lgamma(double x) { - double val = 0.0; - double y = x - 1; - while (y > 0) { - val += log(y--); - } - return val; -} +__device__ double lgamma(double x) { return hc::precise_math::lgamma(x); } __device__ long long int llrint(double x) { long long int y = hc::precise_math::round(x); return y; From 2b3332ac726c620abf16ddbb77a4d48dbdebfa0e Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 25 Apr 2018 17:53:21 +0000 Subject: [PATCH 2/3] Checkin to add unit tests for fixes in my previous commit This change adds unit tests for 1. __hisinf 2. __hisnan 2. abs(long long) 3. lgamma(double) [ROCm/hip commit: 4be4cf644f576aa3b5f0455b7e38add755dbecb7] --- .../tests/src/deviceLib/hipMathFunctions.cpp | 138 +++++++++++++++++ .../hip/tests/src/deviceLib/hipTestHalf.cpp | 145 ++++++++++++++++++ 2 files changed, 283 insertions(+) diff --git a/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp b/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp index ba3707bc59..ff69087f82 100644 --- a/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp +++ b/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp @@ -20,12 +20,150 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ #include "hip/hip_runtime.h" #include "test_common.h" +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ + +__global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) { + int tx = threadIdx.x; + output[tx] = abs(input[tx]); +} + +__global__ void kernel_lgamma_double(hipLaunchParm lp, double *input, double *output) { + int tx = threadIdx.x; + output[tx] = lgamma(input[tx]); +} + +#endif + +#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \ + } \ + } + +#define CHECK_ABS_INT64(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, EXP); \ + } \ + } + +void check_lgamma_double() { + + using datatype_t = double; + + const int NUM_INPUTS = 8; + auto memsize = NUM_INPUTS * sizeof(datatype_t); + + // allocate memories + datatype_t *inputCPU = (datatype_t *) malloc(memsize); + datatype_t *outputCPU = (datatype_t *) malloc(memsize); + datatype_t *inputGPU = nullptr; hipMalloc((void**)&inputGPU, memsize); + datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize); + + // populate input + inputCPU[0] = -3.5; + inputCPU[0] = -2.5; + inputCPU[0] = -1.5; + inputCPU[0] = -0.5; + inputCPU[0] = 0.5; + inputCPU[0] = 1.5; + inputCPU[0] = 2.5; + inputCPU[0] = 3.5; + + // copy inputs to device + hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); + + // launch kernel + hipLaunchKernel(kernel_lgamma_double, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy outputs from device + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check outputs + for (int i=0; i #include #include "hip/hip_runtime.h" @@ -59,8 +65,143 @@ __global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C c = __hmul2_sat(b, c); } +__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) { + int tx = threadIdx.x; + output[tx] = __hisnan(input[tx]); +} + +__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) { + int tx = threadIdx.x; + output[tx] = __hisinf(input[tx]); +} + #endif + +__half host_ushort_as_half(unsigned short s) { + union {__half h; unsigned short s; } converter; + converter.s = s; + return converter.h; +} + + +void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) { + + // allocate memory + auto memsize = NUM_INPUTS * sizeof(int); + int* outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + + // launch the kernel + hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy output from device + int* outputCPU = (int*) malloc(memsize); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check output + for (int i=0; i Date: Fri, 27 Apr 2018 12:59:51 +0000 Subject: [PATCH 3/3] Fixing a copy/paste error in my previous checkin [ROCm/hip commit: 76a7d7e374ff193dfa73dcdc0504895f23aaada6] --- projects/hip/tests/src/deviceLib/hipMathFunctions.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp b/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp index ff69087f82..7fe0003672 100644 --- a/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp +++ b/projects/hip/tests/src/deviceLib/hipMathFunctions.cpp @@ -71,14 +71,9 @@ void check_lgamma_double() { datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize); // populate input - inputCPU[0] = -3.5; - inputCPU[0] = -2.5; - inputCPU[0] = -1.5; - inputCPU[0] = -0.5; - inputCPU[0] = 0.5; - inputCPU[0] = 1.5; - inputCPU[0] = 2.5; - inputCPU[0] = 3.5; + for (int i=0; i