Merge "Enable template max and min for HIP-Clang" into amd-master-next
Этот коммит содержится в:
@@ -1397,12 +1397,18 @@ float func(float x, int y) \
|
||||
}
|
||||
__DEF_FLOAT_FUN2I(scalbn)
|
||||
|
||||
#if __HCC__
|
||||
template<class T>
|
||||
__DEVICE__ inline static T min(T arg1, T arg2) {
|
||||
return (arg1 < arg2) ? arg1 : arg2;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__DEVICE__ inline static T max(T arg1, T arg2) {
|
||||
return (arg1 > arg2) ? arg1 : arg2;
|
||||
}
|
||||
|
||||
#if __HCC__
|
||||
|
||||
__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
|
||||
return min(arg1, (uint32_t) arg2);
|
||||
}
|
||||
@@ -1424,11 +1430,6 @@ __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long lo
|
||||
return min((unsigned long long) arg1, arg2);
|
||||
}*/
|
||||
|
||||
template<class T>
|
||||
__DEVICE__ inline static T max(T arg1, T arg2) {
|
||||
return (arg1 > arg2) ? arg1 : arg2;
|
||||
}
|
||||
|
||||
__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
|
||||
return max(arg1, (uint32_t) arg2);
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -146,17 +146,45 @@ void check_abs_int64() {
|
||||
CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]);
|
||||
CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]);
|
||||
CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]);
|
||||
|
||||
|
||||
// free memories
|
||||
hipFree(inputGPU);
|
||||
hipFree(outputGPU);
|
||||
free(inputCPU);
|
||||
free(outputCPU);
|
||||
|
||||
|
||||
// done
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class T, class F>
|
||||
__global__ void kernel_simple(F f, T *out) {
|
||||
*out = f();
|
||||
}
|
||||
|
||||
template<class T, class F>
|
||||
void check_simple(F f, T expected, const char* file, unsigned line) {
|
||||
auto memsize = sizeof(T);
|
||||
T *outputCPU = (T *) malloc(memsize);
|
||||
T *outputGPU = nullptr;
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU);
|
||||
hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost);
|
||||
if (*outputCPU != expected) {
|
||||
failed("%s line %u : check failed (output = %lf, expected = %lf)\n",
|
||||
file, line, (double)(*outputCPU), (double)expected);
|
||||
}
|
||||
hipFree(outputGPU);
|
||||
free(outputCPU);
|
||||
}
|
||||
#define CHECK_SIMPLE(lambda, expected) \
|
||||
check_simple(lambda, expected, __FILE__, __LINE__);
|
||||
|
||||
void test_fp16() {
|
||||
CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f);
|
||||
CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
@@ -164,6 +192,8 @@ int main(int argc, char* argv[]) {
|
||||
check_abs_int64();
|
||||
|
||||
// check_lgamma_double();
|
||||
|
||||
|
||||
test_fp16();
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user