diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 28d3ae7051..a166935823 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -392,10 +392,53 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri typedef int hipLaunchParm; +template ::type* = nullptr> +void pArgs(const std::tuple&, void*) {} + +template ::type* = nullptr> +void pArgs(const std::tuple& formals, void** _vargs) { + using T = typename std::tuple_element >::type; + + static_assert(!std::is_reference{}, + "A __global__ function cannot have a reference as one of its " + "arguments."); +#if defined(HIP_STRICT) + static_assert(std::is_trivially_copyable{}, + "Only TriviallyCopyable types can be arguments to a __global__ " + "function"); +#endif + _vargs[n] = const_cast(reinterpret_cast(&std::get(formals))); + return pArgs(formals, _vargs); +} + +template +std::tuple validateArgsCountType(void (*kernel)(Formals...), std::tuple(actuals)) { + static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch"); + std::tuple to_formals{std::move(actuals)}; + return to_formals; +} + +#if defined(HIP_TEMPLATE_KERNEL_LAUNCH) +template +void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream); +} +#else #define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \ do { \ kernelName<<<(numblocks), (numthreads), (memperblock), (streamId)>>>(__VA_ARGS__); \ } while (0) +#endif #include diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h index df5c4e5b60..e374707cb3 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h @@ -3378,6 +3378,11 @@ hipError_t hipLaunchKernel(const void* function_address, hipStream_t stream __dparm(0)); #if __HIP_ROCclr__ || !defined(__HCC__) +//TODO: Move this to hip_ext.h +hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); + hipError_t hipBindTexture( size_t* offset, const textureReference* tex, diff --git a/projects/hip/include/hip/hip_ext.h b/projects/hip/include/hip/hip_ext.h index 90d1e34d2d..c16e841719 100644 --- a/projects/hip/include/hip/hip_ext.h +++ b/projects/hip/include/hip/hip_ext.h @@ -23,6 +23,10 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HIP_EXT_H #define HIP_INCLUDE_HIP_HIP_EXT_H #include "hip/hip_runtime.h" +#if defined(__cplusplus) +#include +#include +#endif #ifdef __HCC__ // Forward declarations: @@ -109,8 +113,29 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t stopEvent = nullptr) __attribute__((deprecated("use hipExtModuleLaunchKernel instead"))); -//#if !__HIP_ROCclr__ && defined(__cplusplus) -#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) +#if defined(__HIP_ROCclr__) && defined(__cplusplus) + +extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags); + +template +inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags, + Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent, + stopEvent, (int)flags); +} +#elif defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) //kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp" namespace hip_impl { diff --git a/projects/hip/rocclr/hip_hcc.def.in b/projects/hip/rocclr/hip_hcc.def.in index a3476c7f33..3a54928764 100755 --- a/projects/hip/rocclr/hip_hcc.def.in +++ b/projects/hip/rocclr/hip_hcc.def.in @@ -51,6 +51,7 @@ hipExtGetLinkTypeAndHopCount hipExtLaunchMultiKernelMultiDevice hipExtMallocWithFlags hipExtModuleLaunchKernel +hipExtLaunchKernel hipFree hipFreeArray hipFuncSetCacheConfig diff --git a/projects/hip/rocclr/hip_hcc.map.in b/projects/hip/rocclr/hip_hcc.map.in index 11637f2696..42c5335f35 100755 --- a/projects/hip/rocclr/hip_hcc.map.in +++ b/projects/hip/rocclr/hip_hcc.map.in @@ -52,6 +52,7 @@ global: hipExtLaunchMultiKernelMultiDevice; hipExtMallocWithFlags; hipExtModuleLaunchKernel; + hipExtLaunchKernel; hipFree; hipFreeArray; hipFuncSetCacheConfig; diff --git a/projects/hip/rocclr/hip_module.cpp b/projects/hip/rocclr/hip_module.cpp index 8f3d4ca936..a29ad9a962 100755 --- a/projects/hip/rocclr/hip_module.cpp +++ b/projects/hip/rocclr/hip_module.cpp @@ -29,6 +29,16 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size); +extern hipError_t ihipLaunchKernel(const void* hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream, + hipEvent_t startEvent, + hipEvent_t stopEvent, + int flags); + const std::string& FunctionName(const hipFunction_t f) { return hip::Function::asFunction(f)->function_->name(); @@ -539,6 +549,31 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } +extern "C" hipError_t hipLaunchKernel(const void *hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream) +{ + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr, nullptr, 0)); +} + +extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream, + hipEvent_t startEvent, + hipEvent_t stopEvent, + int flags) +{ + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags)); +} + hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/rocclr/hip_platform.cpp index d23197bd9e..55bc3e8e7b 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/rocclr/hip_platform.cpp @@ -62,6 +62,14 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes); +extern hipError_t ihipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, + uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0); static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) { // Workaround for device name mismatch. @@ -1339,16 +1347,16 @@ void hipLaunchCooperativeKernelGGLImpl( #endif // defined(ATI_OS_LINUX) -extern "C" hipError_t hipLaunchKernel(const void *hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream) +hipError_t ihipLaunchKernel(const void* hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream, + hipEvent_t startEvent, + hipEvent_t stopEvent, + int flags) { - HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, - stream); - hip::Stream* s = reinterpret_cast(stream); int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice(); if (deviceId == -1) { @@ -1368,10 +1376,10 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, HIP_RETURN(hipErrorInvalidDeviceFunction); #endif } - - HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, - blockDim.x, blockDim.y, blockDim.z, - sharedMemBytes, stream, args, nullptr)); + HIP_RETURN(ihipModuleLaunchKernel(func, (gridDim.x * blockDim.x), (gridDim.y * blockDim.y), + (gridDim.z * blockDim.z), blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, stream, args, nullptr, startEvent, stopEvent, + flags)); } // conversion routines between float and half precision diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index 0f608d9843..c2ecff3366 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -1709,12 +1709,33 @@ hipError_t hipLaunchKernel( const void* func_addr, dim3 numBlocks, dim3 dimBlocks, void** args, size_t sharedMemBytes, hipStream_t stream) { - HIP_INIT_API(hipLaunchKernel,func_addr,numBlocks,dimBlocks,args,sharedMemBytes,stream); + HIP_INIT_API(hipLaunchKernel,func_addr,numBlocks,dimBlocks,args,sharedMemBytes,stream); - hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr, + hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr, hip_impl::target_agent(stream)); - return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z, + return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, stream, args, nullptr); } + +hipError_t hipExtLaunchKernel(const void* function, dim3 numBlocks, dim3 dimBlocks, void** args, + size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags) { + HIP_INIT_API(hipExtLaunchKernel,function,numBlocks,dimBlocks,args,sharedMemBytes,stream,startEvent,stopEvent,flags); + + hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)function, + hip_impl::target_agent(stream)); + + uint32_t globalWorkSizeX = numBlocks.x * dimBlocks.x; + uint32_t globalWorkSizeY = numBlocks.y * dimBlocks.y; + uint32_t globalWorkSizeZ = numBlocks.z * dimBlocks.z; + if (globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || + globalWorkSizeZ > UINT32_MAX) { + return hipErrorInvalidConfiguration; + } + + return ihipLogStatus(ihipModuleLaunchKernel( + tls, kd, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, dimBlocks.x, dimBlocks.y, + dimBlocks.z, sharedMemBytes, stream, args, nullptr, startEvent, stopEvent, flags)); +} diff --git a/projects/hip/tests/src/deviceLib/hipLaunchKernelFunc.cpp b/projects/hip/tests/src/deviceLib/hipLaunchKernelFunc.cpp new file mode 100644 index 0000000000..75dbc81da7 --- /dev/null +++ b/projects/hip/tests/src/deviceLib/hipLaunchKernelFunc.cpp @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +#define HIP_TEMPLATE_KERNEL_LAUNCH +#include "hip/hip_runtime.h" +#include "test_common.h" + +__global__ void kernel_abs_int64(long long* input, long long* output) { + int tx = threadIdx.x; + output[tx] = abs(input[tx]); +} + +__global__ void kernel_lgamma_double(double* input, double* output) { + int tx = threadIdx.x; + output[tx] = lgamma(input[tx]); +} + +#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \ + } \ + } + +#define CHECK_ABS_INT64(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, \ + EXP); \ + } \ + } + +void check_lgamma_double() { + using datatype_t = double; + + const int NUM_INPUTS = 8; + auto memsize = NUM_INPUTS * sizeof(datatype_t); + + // allocate memories + datatype_t* inputCPU = (datatype_t*)malloc(memsize); + datatype_t* outputCPU = (datatype_t*)malloc(memsize); + datatype_t* inputGPU = nullptr; + hipMalloc((void**)&inputGPU, memsize); + datatype_t* outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + + // populate input + for (int i = 0; i < NUM_INPUTS; i++) { + inputCPU[i] = -3.5 + i; + } + + // copy inputs to device + hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); + + // launch kernel + hipLaunchKernelGGL(kernel_lgamma_double, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy outputs from device + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check outputs + for (int i = 0; i < NUM_INPUTS; i++) { + CHECK_LGAMMA_DOUBLE(inputCPU[i], outputCPU[i], lgamma(inputCPU[i])); + } + + // free memories + hipFree(inputGPU); + hipFree(outputGPU); + free(inputCPU); + free(outputCPU); + + // done + return; +} + + +void check_abs_int64() { + using datatype_t = long long; + + const int NUM_INPUTS = 8; + auto memsize = NUM_INPUTS * sizeof(datatype_t); + + // allocate memories + datatype_t* inputCPU = (datatype_t*)malloc(memsize); + datatype_t* outputCPU = (datatype_t*)malloc(memsize); + datatype_t* inputGPU = nullptr; + hipMalloc((void**)&inputGPU, memsize); + datatype_t* outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + + // populate input + inputCPU[0] = -81985529216486895ll; + inputCPU[1] = 81985529216486895ll; + inputCPU[2] = -1250999896491ll; + inputCPU[3] = 1250999896491ll; + inputCPU[4] = -19088743ll; + inputCPU[5] = 19088743ll; + inputCPU[6] = -291ll; + inputCPU[7] = 291ll; + + // copy inputs to device + hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); + + // launch kernel + hipLaunchKernelGGL(kernel_abs_int64, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); + + // copy outputs from device + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + + // check outputs + CHECK_ABS_INT64(inputCPU[0], outputCPU[0], outputCPU[1]); + CHECK_ABS_INT64(inputCPU[1], outputCPU[1], outputCPU[1]); + CHECK_ABS_INT64(inputCPU[2], outputCPU[2], outputCPU[3]); + CHECK_ABS_INT64(inputCPU[3], outputCPU[3], outputCPU[3]); + CHECK_ABS_INT64(inputCPU[4], outputCPU[4], outputCPU[5]); + CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]); + CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]); + CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]); + + // free memories + hipFree(inputGPU); + hipFree(outputGPU); + free(inputCPU); + free(outputCPU); + + // done + return; +} + + +template +__global__ void kernel_simple(F f, T* out) { + *out = f(); +} + +template +void check_simple(F f, T expected, const char* file, unsigned line) { + auto memsize = sizeof(T); + T* outputCPU = (T*)malloc(memsize); + T* outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + if (*outputCPU != expected) { + failed("%s line %u : check failed (output = %lf, expected = %lf)\n", file, line, + (double)(*outputCPU), (double)expected); + } + hipFree(outputGPU); + free(outputCPU); +} +#define CHECK_SIMPLE(lambda, expected) check_simple(lambda, expected, __FILE__, __LINE__); + +void test_fp16() { + CHECK_SIMPLE([] __device__() { return max<__fp16>(1.0f, 2.0f); }, 2.0f); + CHECK_SIMPLE([] __device__() { return min<__fp16>(1.0f, 2.0f); }, 1.0f); +} + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + check_abs_int64(); + + // check_lgamma_double(); + + test_fp16(); + + passed(); +} diff --git a/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp b/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp index 39c660322b..12b96578de 100644 --- a/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp +++ b/projects/hip/tests/src/kernel/hipExtLaunchKernelGGL.cpp @@ -28,9 +28,52 @@ THE SOFTWARE. #include "hip/hip_ext.h" #include "test_common.h" +struct _t { + double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j; +}; + +typedef struct _t _T; + +__global__ void sKernel(_T s, double *a) { + *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j; +} + +__global__ void mKernel(char f, short a, int b, double c, short d, int e, double* res) { + *res = a + b + c + d + e + f; +} + +void testMixData() { + double m = 0; + double *d_m; + HIPCHECK(hipMalloc(&d_m, sizeof(double))); + int a = 1, e = 10; + short b = 2, d = 4; + double c = 3.0; + char ff = 10; + hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff, b, a, c, d, e, d_m); + HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); + if (m != 30.0) { + std::cout << "M is:: " << m << std::endl; + failed("Mismatch"); + } + hipFree(d_m); +} +void testStruct() { + double m = 0; + double *d_m; + HIPCHECK(hipMalloc(&d_m, sizeof(double))); + _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m); + HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); + if (m != 55.0) { + std::cout << "M is:: " << m << std::endl; + failed("Mismatch"); + } + hipFree(d_m); +} + void test(size_t N) { size_t Nbytes = N * sizeof(int); -#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; @@ -51,13 +94,13 @@ void test(size_t N) { HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); -#endif } int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, true); test(N); - + testStruct(); + testMixData(); passed(); }