diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index ba3707bc59..ff69087f82 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -20,12 +20,150 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ #include "hip/hip_runtime.h" #include "test_common.h" +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ + +__global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) { + int tx = threadIdx.x; + output[tx] = abs(input[tx]); +} + +__global__ void kernel_lgamma_double(hipLaunchParm lp, double *input, double *output) { + int tx = threadIdx.x; + output[tx] = lgamma(input[tx]); +} + +#endif + +#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \ + } \ + } + +#define CHECK_ABS_INT64(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, EXP); \ + } \ + } + +void check_lgamma_double() { + + using datatype_t = double; + + const int NUM_INPUTS = 8; + auto memsize = NUM_INPUTS * sizeof(datatype_t); + + // allocate memories + datatype_t *inputCPU = (datatype_t *) malloc(memsize); + datatype_t *outputCPU = (datatype_t *) malloc(memsize); + datatype_t *inputGPU = nullptr; hipMalloc((void**)&inputGPU, memsize); + datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize); + + // populate input + inputCPU[0] = -3.5; + inputCPU[0] = -2.5; + inputCPU[0] = -1.5; + inputCPU[0] = -0.5; + inputCPU[0] = 0.5; + inputCPU[0] = 1.5; + inputCPU[0] = 2.5; + inputCPU[0] = 3.5; + + // copy inputs to device + hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); + + // launch kernel + hipLaunchKernel(kernel_lgamma_double, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy outputs from device + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check outputs + for (int i=0; i #include #include "hip/hip_runtime.h" @@ -59,8 +65,143 @@ __global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C c = __hmul2_sat(b, c); } +__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) { + int tx = threadIdx.x; + output[tx] = __hisnan(input[tx]); +} + +__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) { + int tx = threadIdx.x; + output[tx] = __hisinf(input[tx]); +} + #endif + +__half host_ushort_as_half(unsigned short s) { + union {__half h; unsigned short s; } converter; + converter.s = s; + return converter.h; +} + + +void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) { + + // allocate memory + auto memsize = NUM_INPUTS * sizeof(int); + int* outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + + // launch the kernel + hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy output from device + int* outputCPU = (int*) malloc(memsize); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check output + for (int i=0; i