Enable template max and min for HIP-Clang (#2028)

It was for HCC only. HIP-Clang also needs it for __fp16 since AMDMIGraphX uses it.

Change-Id: Id49322b7b89ef799accdf6b47627a6fce51d1ab5

[ROCm/hip commit: 808dae6813]
This commit is contained in:
Yaxun (Sam) Liu
2020-04-24 15:30:28 -04:00
committato da GitHub
parent 5b9a4d2a8e
commit f79898e90e
2 ha cambiato i file con 38 aggiunte e 7 eliminazioni
@@ -1410,12 +1410,18 @@ float func(float x, int y) \
}
__DEF_FLOAT_FUN2I(scalbn)
#if __HCC__
template<class T>
__DEVICE__ inline static T min(T arg1, T arg2) {
return (arg1 < arg2) ? arg1 : arg2;
}
template<class T>
__DEVICE__ inline static T max(T arg1, T arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
#if __HCC__
__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
return min(arg1, (uint32_t) arg2);
}
@@ -1437,11 +1443,6 @@ __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long lo
return min((unsigned long long) arg1, arg2);
}*/
template<class T>
__DEVICE__ inline static T max(T arg1, T arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
return max(arg1, (uint32_t) arg2);
}
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc
* TEST: %t
* HIT_END
*/
@@ -158,6 +158,34 @@ void check_abs_int64() {
}
template<class T, class F>
__global__ void kernel_simple(F f, T *out) {
*out = f();
}
template<class T, class F>
void check_simple(F f, T expected, const char* file, unsigned line) {
auto memsize = sizeof(T);
T *outputCPU = (T *) malloc(memsize);
T *outputGPU = nullptr;
hipMalloc((void**)&outputGPU, memsize);
hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU);
hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost);
if (*outputCPU != expected) {
failed("%s line %u : check failed (output = %lf, expected = %lf)\n",
file, line, (double)(*outputCPU), (double)expected);
}
hipFree(outputGPU);
free(outputCPU);
}
#define CHECK_SIMPLE(lambda, expected) \
check_simple(lambda, expected, __FILE__, __LINE__);
void test_fp16() {
CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f);
CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f);
}
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
@@ -165,5 +193,7 @@ int main(int argc, char* argv[]) {
// check_lgamma_double();
test_fp16();
passed();
}