Adding changes for hipExtLaunchKernel for rocCLR
Change-Id: Iba52bc3bde7c37f3fb375a55ba0947e87b3cdc9b
[ROCm/hip commit: 2d517fdcc6]
Bu işleme şunda yer alıyor:
@@ -392,10 +392,53 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
|
||||
|
||||
typedef int hipLaunchParm;
|
||||
|
||||
template <std::size_t n, typename... Ts,
|
||||
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
|
||||
void pArgs(const std::tuple<Ts...>&, void*) {}
|
||||
|
||||
template <std::size_t n, typename... Ts,
|
||||
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
|
||||
void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
|
||||
using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
|
||||
|
||||
static_assert(!std::is_reference<T>{},
|
||||
"A __global__ function cannot have a reference as one of its "
|
||||
"arguments.");
|
||||
#if defined(HIP_STRICT)
|
||||
static_assert(std::is_trivially_copyable<T>{},
|
||||
"Only TriviallyCopyable types can be arguments to a __global__ "
|
||||
"function");
|
||||
#endif
|
||||
_vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
|
||||
return pArgs<n + 1>(formals, _vargs);
|
||||
}
|
||||
|
||||
template <typename... Formals, typename... Actuals>
|
||||
std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
|
||||
static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
|
||||
std::tuple<Formals...> to_formals{std::move(actuals)};
|
||||
return to_formals;
|
||||
}
|
||||
|
||||
#if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
|
||||
template <typename... Args, typename F = void (*)(Args...)>
|
||||
void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
|
||||
constexpr size_t count = sizeof...(Args);
|
||||
auto tup_ = std::tuple<Args...>{args...};
|
||||
auto tup = validateArgsCountType(kernel, tup_);
|
||||
void* _Args[count];
|
||||
pArgs<0>(tup, _Args);
|
||||
|
||||
auto k = reinterpret_cast<void*>(kernel);
|
||||
hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
|
||||
}
|
||||
#else
|
||||
#define hipLaunchKernelGGL(kernelName, numblocks, numthreads, memperblock, streamId, ...) \
|
||||
do { \
|
||||
kernelName<<<(numblocks), (numthreads), (memperblock), (streamId)>>>(__VA_ARGS__); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
|
||||
@@ -3378,6 +3378,11 @@ hipError_t hipLaunchKernel(const void* function_address,
|
||||
hipStream_t stream __dparm(0));
|
||||
|
||||
#if __HIP_ROCclr__ || !defined(__HCC__)
|
||||
//TODO: Move this to hip_ext.h
|
||||
hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks,
|
||||
void** args, size_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, int flags);
|
||||
|
||||
hipError_t hipBindTexture(
|
||||
size_t* offset,
|
||||
const textureReference* tex,
|
||||
|
||||
@@ -23,6 +23,10 @@ THE SOFTWARE.
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
#define HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
#include "hip/hip_runtime.h"
|
||||
#if defined(__cplusplus)
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#endif
|
||||
#ifdef __HCC__
|
||||
|
||||
// Forward declarations:
|
||||
@@ -109,8 +113,29 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipEvent_t stopEvent = nullptr)
|
||||
__attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
|
||||
|
||||
//#if !__HIP_ROCclr__ && defined(__cplusplus)
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
#if defined(__HIP_ROCclr__) && defined(__cplusplus)
|
||||
|
||||
extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks,
|
||||
dim3 dimBlocks, void** args, size_t sharedMemBytes,
|
||||
hipStream_t stream, hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent, int flags);
|
||||
|
||||
template <typename... Args, typename F = void (*)(Args...)>
|
||||
inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
||||
Args... args) {
|
||||
constexpr size_t count = sizeof...(Args);
|
||||
auto tup_ = std::tuple<Args...>{args...};
|
||||
auto tup = validateArgsCountType(kernel, tup_);
|
||||
void* _Args[count];
|
||||
pArgs<0>(tup, _Args);
|
||||
|
||||
auto k = reinterpret_cast<void*>(kernel);
|
||||
hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent,
|
||||
stopEvent, (int)flags);
|
||||
}
|
||||
#elif defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp"
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
@@ -51,6 +51,7 @@ hipExtGetLinkTypeAndHopCount
|
||||
hipExtLaunchMultiKernelMultiDevice
|
||||
hipExtMallocWithFlags
|
||||
hipExtModuleLaunchKernel
|
||||
hipExtLaunchKernel
|
||||
hipFree
|
||||
hipFreeArray
|
||||
hipFuncSetCacheConfig
|
||||
|
||||
@@ -52,6 +52,7 @@ global:
|
||||
hipExtLaunchMultiKernelMultiDevice;
|
||||
hipExtMallocWithFlags;
|
||||
hipExtModuleLaunchKernel;
|
||||
hipExtLaunchKernel;
|
||||
hipFree;
|
||||
hipFreeArray;
|
||||
hipFuncSetCacheConfig;
|
||||
|
||||
@@ -29,6 +29,16 @@
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size);
|
||||
|
||||
extern hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags);
|
||||
|
||||
const std::string& FunctionName(const hipFunction_t f)
|
||||
{
|
||||
return hip::Function::asFunction(f)->function_->name();
|
||||
@@ -539,6 +549,31 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr, nullptr, 0));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernel(const void* f,
|
||||
dim3 gridDim, dim3 blockDim,
|
||||
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
|
||||
|
||||
@@ -62,6 +62,14 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
||||
hipDeviceptr_t* dptr, size_t* bytes);
|
||||
|
||||
extern hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
|
||||
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
|
||||
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0);
|
||||
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
|
||||
const char* device_name) {
|
||||
// Workaround for device name mismatch.
|
||||
@@ -1339,16 +1347,16 @@ void hipLaunchCooperativeKernelGGLImpl(
|
||||
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes,
|
||||
stream);
|
||||
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice();
|
||||
if (deviceId == -1) {
|
||||
@@ -1368,10 +1376,10 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
#endif
|
||||
}
|
||||
|
||||
HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z,
|
||||
blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, stream, args, nullptr));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(func, (gridDim.x * blockDim.x), (gridDim.y * blockDim.y),
|
||||
(gridDim.z * blockDim.z), blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, stream, args, nullptr, startEvent, stopEvent,
|
||||
flags));
|
||||
}
|
||||
|
||||
// conversion routines between float and half precision
|
||||
|
||||
@@ -1709,12 +1709,33 @@ hipError_t hipLaunchKernel(
|
||||
const void* func_addr, dim3 numBlocks, dim3 dimBlocks, void** args,
|
||||
size_t sharedMemBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchKernel,func_addr,numBlocks,dimBlocks,args,sharedMemBytes,stream);
|
||||
HIP_INIT_API(hipLaunchKernel,func_addr,numBlocks,dimBlocks,args,sharedMemBytes,stream);
|
||||
|
||||
hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr,
|
||||
hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr,
|
||||
hip_impl::target_agent(stream));
|
||||
|
||||
return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
|
||||
stream, args, nullptr);
|
||||
}
|
||||
|
||||
hipError_t hipExtLaunchKernel(const void* function, dim3 numBlocks, dim3 dimBlocks, void** args,
|
||||
size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent, int flags) {
|
||||
HIP_INIT_API(hipExtLaunchKernel,function,numBlocks,dimBlocks,args,sharedMemBytes,stream,startEvent,stopEvent,flags);
|
||||
|
||||
hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)function,
|
||||
hip_impl::target_agent(stream));
|
||||
|
||||
uint32_t globalWorkSizeX = numBlocks.x * dimBlocks.x;
|
||||
uint32_t globalWorkSizeY = numBlocks.y * dimBlocks.y;
|
||||
uint32_t globalWorkSizeZ = numBlocks.z * dimBlocks.z;
|
||||
if (globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX ||
|
||||
globalWorkSizeZ > UINT32_MAX) {
|
||||
return hipErrorInvalidConfiguration;
|
||||
}
|
||||
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
tls, kd, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, dimBlocks.x, dimBlocks.y,
|
||||
dimBlocks.z, sharedMemBytes, stream, args, nullptr, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#define HIP_TEMPLATE_KERNEL_LAUNCH
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
__global__ void kernel_abs_int64(long long* input, long long* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = abs(input[tx]);
|
||||
}
|
||||
|
||||
__global__ void kernel_lgamma_double(double* input, double* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = lgamma(input[tx]);
|
||||
}
|
||||
|
||||
#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \
|
||||
{ \
|
||||
if (OUT != EXP) { \
|
||||
failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_ABS_INT64(IN, OUT, EXP) \
|
||||
{ \
|
||||
if (OUT != EXP) { \
|
||||
failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, \
|
||||
EXP); \
|
||||
} \
|
||||
}
|
||||
|
||||
void check_lgamma_double() {
|
||||
using datatype_t = double;
|
||||
|
||||
const int NUM_INPUTS = 8;
|
||||
auto memsize = NUM_INPUTS * sizeof(datatype_t);
|
||||
|
||||
// allocate memories
|
||||
datatype_t* inputCPU = (datatype_t*)malloc(memsize);
|
||||
datatype_t* outputCPU = (datatype_t*)malloc(memsize);
|
||||
datatype_t* inputGPU = nullptr;
|
||||
hipMalloc((void**)&inputGPU, memsize);
|
||||
datatype_t* outputGPU = nullptr;
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// populate input
|
||||
for (int i = 0; i < NUM_INPUTS; i++) {
|
||||
inputCPU[i] = -3.5 + i;
|
||||
}
|
||||
|
||||
// copy inputs to device
|
||||
hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice);
|
||||
|
||||
// launch kernel
|
||||
hipLaunchKernelGGL(kernel_lgamma_double, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy outputs from device
|
||||
hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost);
|
||||
|
||||
// check outputs
|
||||
for (int i = 0; i < NUM_INPUTS; i++) {
|
||||
CHECK_LGAMMA_DOUBLE(inputCPU[i], outputCPU[i], lgamma(inputCPU[i]));
|
||||
}
|
||||
|
||||
// free memories
|
||||
hipFree(inputGPU);
|
||||
hipFree(outputGPU);
|
||||
free(inputCPU);
|
||||
free(outputCPU);
|
||||
|
||||
// done
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void check_abs_int64() {
|
||||
using datatype_t = long long;
|
||||
|
||||
const int NUM_INPUTS = 8;
|
||||
auto memsize = NUM_INPUTS * sizeof(datatype_t);
|
||||
|
||||
// allocate memories
|
||||
datatype_t* inputCPU = (datatype_t*)malloc(memsize);
|
||||
datatype_t* outputCPU = (datatype_t*)malloc(memsize);
|
||||
datatype_t* inputGPU = nullptr;
|
||||
hipMalloc((void**)&inputGPU, memsize);
|
||||
datatype_t* outputGPU = nullptr;
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// populate input
|
||||
inputCPU[0] = -81985529216486895ll;
|
||||
inputCPU[1] = 81985529216486895ll;
|
||||
inputCPU[2] = -1250999896491ll;
|
||||
inputCPU[3] = 1250999896491ll;
|
||||
inputCPU[4] = -19088743ll;
|
||||
inputCPU[5] = 19088743ll;
|
||||
inputCPU[6] = -291ll;
|
||||
inputCPU[7] = 291ll;
|
||||
|
||||
// copy inputs to device
|
||||
hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice);
|
||||
|
||||
// launch kernel
|
||||
hipLaunchKernelGGL(kernel_abs_int64, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy outputs from device
|
||||
hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost);
|
||||
|
||||
// check outputs
|
||||
CHECK_ABS_INT64(inputCPU[0], outputCPU[0], outputCPU[1]);
|
||||
CHECK_ABS_INT64(inputCPU[1], outputCPU[1], outputCPU[1]);
|
||||
CHECK_ABS_INT64(inputCPU[2], outputCPU[2], outputCPU[3]);
|
||||
CHECK_ABS_INT64(inputCPU[3], outputCPU[3], outputCPU[3]);
|
||||
CHECK_ABS_INT64(inputCPU[4], outputCPU[4], outputCPU[5]);
|
||||
CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]);
|
||||
CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]);
|
||||
CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]);
|
||||
|
||||
// free memories
|
||||
hipFree(inputGPU);
|
||||
hipFree(outputGPU);
|
||||
free(inputCPU);
|
||||
free(outputCPU);
|
||||
|
||||
// done
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
template <class T, class F>
|
||||
__global__ void kernel_simple(F f, T* out) {
|
||||
*out = f();
|
||||
}
|
||||
|
||||
template <class T, class F>
|
||||
void check_simple(F f, T expected, const char* file, unsigned line) {
|
||||
auto memsize = sizeof(T);
|
||||
T* outputCPU = (T*)malloc(memsize);
|
||||
T* outputGPU = nullptr;
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU);
|
||||
hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost);
|
||||
if (*outputCPU != expected) {
|
||||
failed("%s line %u : check failed (output = %lf, expected = %lf)\n", file, line,
|
||||
(double)(*outputCPU), (double)expected);
|
||||
}
|
||||
hipFree(outputGPU);
|
||||
free(outputCPU);
|
||||
}
|
||||
#define CHECK_SIMPLE(lambda, expected) check_simple(lambda, expected, __FILE__, __LINE__);
|
||||
|
||||
void test_fp16() {
|
||||
CHECK_SIMPLE([] __device__() { return max<__fp16>(1.0f, 2.0f); }, 2.0f);
|
||||
CHECK_SIMPLE([] __device__() { return min<__fp16>(1.0f, 2.0f); }, 1.0f);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
check_abs_int64();
|
||||
|
||||
// check_lgamma_double();
|
||||
|
||||
test_fp16();
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -28,9 +28,52 @@ THE SOFTWARE.
|
||||
#include "hip/hip_ext.h"
|
||||
#include "test_common.h"
|
||||
|
||||
struct _t {
|
||||
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
||||
};
|
||||
|
||||
typedef struct _t _T;
|
||||
|
||||
__global__ void sKernel(_T s, double *a) {
|
||||
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
|
||||
}
|
||||
|
||||
__global__ void mKernel(char f, short a, int b, double c, short d, int e, double* res) {
|
||||
*res = a + b + c + d + e + f;
|
||||
}
|
||||
|
||||
void testMixData() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIPCHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
int a = 1, e = 10;
|
||||
short b = 2, d = 4;
|
||||
double c = 3.0;
|
||||
char ff = 10;
|
||||
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff, b, a, c, d, e, d_m);
|
||||
HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
if (m != 30.0) {
|
||||
std::cout << "M is:: " << m << std::endl;
|
||||
failed("Mismatch");
|
||||
}
|
||||
hipFree(d_m);
|
||||
}
|
||||
void testStruct() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIPCHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
|
||||
HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
if (m != 55.0) {
|
||||
std::cout << "M is:: " << m << std::endl;
|
||||
failed("Mismatch");
|
||||
}
|
||||
hipFree(d_m);
|
||||
}
|
||||
|
||||
void test(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
@@ -51,13 +94,13 @@ void test(size_t N) {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
test(N);
|
||||
|
||||
testStruct();
|
||||
testMixData();
|
||||
passed();
|
||||
}
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle