Merge branch 'amd-master-next' into amd-npi-next
Change-Id: I75be5fbe5ddd6552a6a316ec99ca8833ee02cb6c
[ROCm/hip commit: e19c8e176f]
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -620,7 +620,17 @@ foreach $arg (@ARGV)
|
||||
$toolArgs = substr $toolArgs, 0, -8;
|
||||
chomp $toolArgs;
|
||||
}
|
||||
} elsif ($arg eq 'c' and $prevArg eq '-x') {
|
||||
$hasC = 1;
|
||||
$hasCXX = 0;
|
||||
$hasHIP = 0;
|
||||
} elsif ($arg eq 'c++' and $prevArg eq '-x') {
|
||||
$hasC = 0;
|
||||
$hasCXX = 1;
|
||||
$hasHIP = 0;
|
||||
} elsif ($arg eq 'hip' and $prevArg eq '-x') {
|
||||
$hasC = 0;
|
||||
$hasCXX = 0;
|
||||
$hasHIP = 1;
|
||||
} elsif ($arg =~ m/^-/) {
|
||||
# options start with -
|
||||
@@ -647,28 +657,35 @@ foreach $arg (@ARGV)
|
||||
#print "O: <$arg>\n";
|
||||
} elsif ($prevArg ne '-o') {
|
||||
# input files and libraries
|
||||
if ($arg =~ /\.c$/) {
|
||||
$hasC = 1;
|
||||
# Skip guessing if `-x {c|c++|hip}` is already specified.
|
||||
if (not ($hasC or $hasCXX or $hasHIP)) {
|
||||
if ($arg =~ /\.c$/) {
|
||||
$hasC = 1;
|
||||
$needCFLAGS = 1;
|
||||
$toolArgs .= " -x c"
|
||||
} elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) {
|
||||
$needCXXFLAGS = 1;
|
||||
if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") {
|
||||
$hasCXX = 1;
|
||||
} else {
|
||||
$hasHIP = 1;
|
||||
$toolArgs .= " -x hip";
|
||||
}
|
||||
} elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) {
|
||||
$needCXXFLAGS = 1;
|
||||
if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") {
|
||||
$hasHIP = 1;
|
||||
$toolArgs .= " -x hip";
|
||||
} else {
|
||||
$hasCU = 1;
|
||||
}
|
||||
}
|
||||
} elsif ($hasC) {
|
||||
$needCFLAGS = 1;
|
||||
$toolArgs .= " -x c"
|
||||
}
|
||||
elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) {
|
||||
} elsif ($hasCXX) {
|
||||
$needCXXFLAGS = 1;
|
||||
if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") {
|
||||
$hasCXX = 1;
|
||||
} else {
|
||||
$hasHIP = 1;
|
||||
$toolArgs .= " -x hip";
|
||||
}
|
||||
}
|
||||
elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) {
|
||||
} else {
|
||||
$needCXXFLAGS = 1;
|
||||
if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") {
|
||||
$hasHIP = 1;
|
||||
$toolArgs .= " -x hip";
|
||||
} else {
|
||||
$hasCU = 1;
|
||||
}
|
||||
}
|
||||
push (@inputs, $arg);
|
||||
#print "I: <$arg>\n";
|
||||
|
||||
@@ -35,7 +35,8 @@ set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "@HIP_CLANG_PARALLEL_BUILD_COMPILE_
|
||||
@_HIP_HCC_FLAGS@
|
||||
@_HIP_CLANG_FLAGS@
|
||||
@_HIP_NVCC_FLAGS@
|
||||
set(HIP_HIPCC_INCLUDE_ARGS "@HIP_HIPCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly)
|
||||
#Needed to bring the HIP_HIPCC_INCLUDE_ARGS variable in scope
|
||||
set(HIP_HIPCC_INCLUDE_ARGS @HIP_HIPCC_INCLUDE_ARGS@) # list
|
||||
|
||||
set(cmake_dependency_file "@cmake_dependency_file@") # path
|
||||
set(source_file "@source_file@") # path
|
||||
|
||||
@@ -513,7 +513,7 @@ hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig);
|
||||
*
|
||||
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
|
||||
*/
|
||||
hipError_t hipGetDeviceFlags(unsigned *flags);
|
||||
hipError_t hipGetDeviceFlags(unsigned int* flags);
|
||||
|
||||
/**
|
||||
* @brief The bank width of shared memory on current device is set
|
||||
|
||||
@@ -132,6 +132,8 @@ target_include_directories(hip64
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/amdocl
|
||||
${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/elfio
|
||||
|
||||
# FIXME: Remove ROCclr_DIr explicit references
|
||||
${ROCclr_DIR}
|
||||
${ROCclr_DIR}/include
|
||||
${ROCclr_DIR}/compiler/lib
|
||||
@@ -139,6 +141,7 @@ target_include_directories(hip64
|
||||
${ROCclr_DIR}/elf/utils/common
|
||||
${ROCclr_DIR}/elf/utils/libelf
|
||||
${ROCR_INCLUDES}
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
$<TARGET_PROPERTY:amd_comgr,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
target_compile_definitions(hip64
|
||||
PRIVATE
|
||||
|
||||
@@ -99,7 +99,7 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCount(int* count) {
|
||||
HIP_INIT_API(NONE, count);
|
||||
HIP_INIT_API(hipDeviceGetCount, count);
|
||||
|
||||
HIP_RETURN(ihipDeviceGetCount(count));
|
||||
}
|
||||
@@ -238,7 +238,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device )
|
||||
}
|
||||
|
||||
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) {
|
||||
HIP_INIT_API(NONE, deviceId, acc);
|
||||
HIP_INIT_API(hipHccGetAccelerator, deviceId, acc);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
@@ -246,7 +246,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) {
|
||||
}
|
||||
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) {
|
||||
HIP_INIT_API(NONE, stream, av);
|
||||
HIP_INIT_API(hipHccGetAcceleratorView, stream, av);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
|
||||
@@ -480,7 +480,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
|
||||
}
|
||||
|
||||
hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) {
|
||||
HIP_INIT_API(NONE, handle, event);
|
||||
HIP_INIT_API(hipIpcGetEventHandle, handle, event);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
@@ -488,7 +488,7 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event
|
||||
}
|
||||
|
||||
hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) {
|
||||
HIP_INIT_API(NONE, event, handle);
|
||||
HIP_INIT_API(hipIpcOpenEventHandle, event, handle);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
@@ -542,7 +542,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
}
|
||||
|
||||
hipError_t hipSetValidDevices ( int* device_arr, int len ) {
|
||||
HIP_INIT_API(NONE, device_arr, len);
|
||||
HIP_INIT_API(hipSetValidDevices, device_arr, len);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
|
||||
@@ -90,9 +90,15 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) {
|
||||
return hipErrorNotReady;
|
||||
}
|
||||
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
// For certain HIP Api's that take start and stop event
|
||||
// the command is the same
|
||||
if (event_ == eStop.event_) {
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().start_))/1000000.f;
|
||||
} else {
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().end_))/1000000.f;
|
||||
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -110,6 +110,9 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
// Skip if nothing needs writing.
|
||||
return hipSuccess;
|
||||
}
|
||||
if (dst == nullptr || src == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
amd::Command* command = nullptr;
|
||||
amd::Command::EventWaitList waitList;
|
||||
@@ -1717,7 +1720,9 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
|
||||
}
|
||||
|
||||
size_t offset = 0;
|
||||
amd::Memory* memory = getMemoryObject(dst, offset);
|
||||
auto aligned_dst = amd::alignUp(reinterpret_cast<address>(dst), sizeof(uint64_t));
|
||||
|
||||
amd::Memory* memory = getMemoryObject(aligned_dst, offset);
|
||||
if (memory == nullptr) {
|
||||
// Host alloced memory
|
||||
memset(dst, value, sizeBytes);
|
||||
@@ -1727,44 +1732,52 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
|
||||
hipError_t hip_error = hipSuccess;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
|
||||
size_t n_head_bytes = 0;
|
||||
size_t n_tail_bytes = 0;
|
||||
int64_t value64 = 0;
|
||||
const size_t uint64ModSize = (sizeBytes % sizeof(int64_t));
|
||||
|
||||
if (sizeBytes/sizeof(int64_t) > 0) {
|
||||
n_head_bytes = static_cast<uint8_t*>(aligned_dst) - static_cast<uint8_t*>(dst);
|
||||
if (valueSize == sizeof(int8_t)) {
|
||||
value = value & 0xff;
|
||||
value64 = ((value << 56) | (value << 48) | (value << 40) | (value << 32)
|
||||
| (value << 24) | (value << 16) | (value << 8) | (value));
|
||||
} else if (valueSize == sizeof(int16_t)) {
|
||||
value = value & 0xffff;
|
||||
value64 = ((value << 48) | (value << 32) | (value<<16) | (value));
|
||||
} else if(valueSize == sizeof(int32_t)) {
|
||||
value64 = ((value << 48) | (value << 32) | (value << 16) | (value));
|
||||
} else if (valueSize == sizeof(int32_t)) {
|
||||
value = value & 0xffffffff;
|
||||
value64 = ((value<<32) | (value));
|
||||
value64 = ((value << 32) | (value));
|
||||
} else if (valueSize == sizeof(int64_t)) {
|
||||
value64 = value;
|
||||
} else {
|
||||
LogPrintfError("Unsupported Pattern size: %u \n", valueSize);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
// If uint64ModSize is != 0 then we will do a second fillBuffer Command
|
||||
n_tail_bytes = ((sizeBytes - n_head_bytes) % sizeof(int64_t));
|
||||
// If n_tail_bytes is != 0 then we will do a second fillBuffer Command
|
||||
// on the same stream below, dont wait, do the first call async.
|
||||
hip_error = packFillMemoryCommand(memory, offset, value64, sizeof(int64_t),
|
||||
sizeBytes - uint64ModSize, queue,
|
||||
((uint64ModSize != 0) || isAsync));
|
||||
if(hip_error != hipSuccess) {
|
||||
sizeBytes - n_tail_bytes - n_head_bytes, queue,
|
||||
((n_head_bytes != 0) || (n_tail_bytes != 0) || isAsync));
|
||||
if (hip_error != hipSuccess) {
|
||||
return hip_error;
|
||||
}
|
||||
} else {
|
||||
n_head_bytes = sizeBytes;
|
||||
}
|
||||
|
||||
if (uint64ModSize != 0) {
|
||||
void* new_dst = reinterpret_cast<void*>((reinterpret_cast<address>(dst)
|
||||
+ sizeBytes) - uint64ModSize);
|
||||
if (n_head_bytes != 0) {
|
||||
memory = getMemoryObject(dst, offset);
|
||||
hip_error = packFillMemoryCommand(memory, offset, value, valueSize,
|
||||
n_head_bytes , queue, isAsync);
|
||||
}
|
||||
|
||||
if (n_tail_bytes != 0) {
|
||||
void* new_dst = (reinterpret_cast<address>(dst) + sizeBytes) - n_tail_bytes;
|
||||
memory = getMemoryObject(new_dst, offset);
|
||||
hip_error = packFillMemoryCommand(memory, offset, value, valueSize,
|
||||
uint64ModSize, queue, isAsync);
|
||||
n_tail_bytes, queue, isAsync);
|
||||
}
|
||||
|
||||
return hip_error;
|
||||
}
|
||||
|
||||
@@ -2090,7 +2103,7 @@ hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor,
|
||||
|
||||
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy,
|
||||
hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpyParam2D, pCopy);
|
||||
HIP_INIT_API(hipMemcpyParam2DAsync, pCopy);
|
||||
|
||||
HIP_RETURN(ihipMemcpyParam2D(pCopy, stream, true));
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
|
||||
void** optionsValues)
|
||||
{
|
||||
/* TODO: Pass options to Program */
|
||||
HIP_INIT_API(hipModuleLoadData, module, image);
|
||||
HIP_INIT_API(hipModuleLoadDataEx, module, image);
|
||||
|
||||
HIP_RETURN(ihipModuleLoadData(module, image, 0));
|
||||
}
|
||||
@@ -369,7 +369,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
|
||||
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
|
||||
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) {
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
HIP_INIT_API(ihipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params);
|
||||
|
||||
hip::Function* function = hip::Function::asFunction(f);
|
||||
@@ -493,7 +493,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent, flags);
|
||||
@@ -512,7 +512,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
|
||||
HIP_INIT_API(hipHccModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
@@ -529,7 +529,7 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
|
||||
HIP_INIT_API(hipModuleLaunchKernelExt, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
|
||||
@@ -191,13 +191,6 @@ void PlatformState::init()
|
||||
for (auto& it : vars_) {
|
||||
it.second.rvars.resize(g_devices.size());
|
||||
}
|
||||
if (!HIP_ENABLE_LAZY_KERNEL_LOADING) {
|
||||
for (size_t i = 0; i < g_devices.size(); ++i) {
|
||||
for (auto& it: functions_) {
|
||||
getFunc(it.first, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PlatformState::unregisterFunc(hipModule_t hmod) {
|
||||
@@ -599,6 +592,13 @@ void PlatformState::popExec(ihipExec_t& exec) {
|
||||
execStack_.pop();
|
||||
}
|
||||
|
||||
namespace {
|
||||
const int HIP_ENABLE_DEFERRED_LOADING{[] () {
|
||||
char *var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
||||
return var ? atoi(var) : 1;
|
||||
}()};
|
||||
} /* namespace */
|
||||
|
||||
extern "C" void __hipRegisterFunction(
|
||||
std::vector<std::pair<hipModule_t,bool> >* modules,
|
||||
const void* hostFunction,
|
||||
@@ -613,9 +613,12 @@ extern "C" void __hipRegisterFunction(
|
||||
{
|
||||
PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector<hipFunction_t>{g_devices.size()}};
|
||||
PlatformState::instance().registerFunction(hostFunction, func);
|
||||
// for (size_t i = 0; i < g_devices.size(); ++i) {
|
||||
// PlatformState::instance().getFunc(hostFunction, i);
|
||||
// }
|
||||
if (!HIP_ENABLE_DEFERRED_LOADING) {
|
||||
HIP_INIT();
|
||||
for (size_t i = 0; i < g_devices.size(); ++i) {
|
||||
PlatformState::instance().getFunc(hostFunction, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Registers a device-side global variable.
|
||||
@@ -706,7 +709,7 @@ extern "C" hipError_t hipConfigureCall(
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
@@ -719,7 +722,7 @@ extern "C" hipError_t __hipPushCallConfiguration(
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
@@ -730,7 +733,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
||||
dim3 *blockDim,
|
||||
size_t *sharedMem,
|
||||
hipStream_t *stream) {
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
ihipExec_t exec;
|
||||
PlatformState::instance().popExec(exec);
|
||||
@@ -747,7 +750,7 @@ extern "C" hipError_t hipSetupArgument(
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
HIP_INIT_API(NONE, arg, size, offset);
|
||||
HIP_INIT_API(hipSetupArgument, arg, size, offset);
|
||||
|
||||
PlatformState::instance().setupArgument(arg, size, offset);
|
||||
|
||||
@@ -756,7 +759,7 @@ extern "C" hipError_t hipSetupArgument(
|
||||
|
||||
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction);
|
||||
HIP_INIT_API(hipLaunchByPtr, hostFunction);
|
||||
|
||||
ihipExec_t exec;
|
||||
PlatformState::instance().popExec(exec);
|
||||
@@ -1301,7 +1304,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes,
|
||||
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes,
|
||||
stream);
|
||||
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
|
||||
@@ -68,8 +68,8 @@ def filtr_api_name(name):
|
||||
return name
|
||||
|
||||
def filtr_api_decl(record):
|
||||
record = re.sub("\s__dparm\([^\)]*\)", '', record);
|
||||
record = re.sub("\(void\*\)", '', record);
|
||||
record = re.sub("\s__dparm\([^\)]*\)", r'', record);
|
||||
record = re.sub("\(void\*\)", r'', record);
|
||||
return record
|
||||
|
||||
# Normalizing API arguments
|
||||
@@ -216,6 +216,8 @@ def parse_content(inp_file_p, api_map, out):
|
||||
api_name = ""
|
||||
# Valid public API found flag
|
||||
api_valid = 0
|
||||
# API overload (parameters mismatch)
|
||||
api_overload = 0
|
||||
|
||||
# Input file patched content
|
||||
content = ''
|
||||
@@ -240,6 +242,7 @@ def parse_content(inp_file_p, api_map, out):
|
||||
|
||||
# Looking for API begin
|
||||
if found == 0:
|
||||
record = re.sub(r'\s*extern\s+"C"\s+', r'', record);
|
||||
if beg_pattern.match(record):
|
||||
found = 1
|
||||
record = filtr_api_decl(record)
|
||||
@@ -255,6 +258,8 @@ def parse_content(inp_file_p, api_map, out):
|
||||
# Checking if complete API matched
|
||||
if m:
|
||||
found = 2
|
||||
api_valid = 0
|
||||
api_overload = 0
|
||||
api_name = filtr_api_name(m.group(2))
|
||||
# Checking if API name is in the API map
|
||||
if (private_check_mode == 0) or (api_name in api_map):
|
||||
@@ -280,48 +285,34 @@ def parse_content(inp_file_p, api_map, out):
|
||||
out[api_name] = filtr_api_opts(api_args)
|
||||
# Register missmatched API methods
|
||||
else:
|
||||
api_overload = 1
|
||||
# Warning about mismatched API, possible non public overloaded version
|
||||
api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types
|
||||
message("\t" + api_name + ' args mismatch:\n' + api_diff + '\n')
|
||||
|
||||
if hip_patch_mode != 0:
|
||||
# Looking for INIT macro
|
||||
m = init_pattern.match(line)
|
||||
if m:
|
||||
if api_valid == 0: api_name = 'NONE'
|
||||
|
||||
if api_name == m.group(3):
|
||||
if hip_patch_mode == 1: hip_patch_mode = 0
|
||||
else: fatal("patching failed")
|
||||
else:
|
||||
hip_patch_mode = 2
|
||||
init_args = m.group(2)
|
||||
if init_args != '': init_args = ', ' + init_args
|
||||
line = m.group(1) + '(' + api_name + init_args + m.group(5) + '\n'
|
||||
non_public_api = 0
|
||||
|
||||
# API found action
|
||||
if found == 2:
|
||||
# Looking for INIT macro
|
||||
if hip_patch_mode != 0:
|
||||
# Looking for INIT macro
|
||||
m = init_pattern.match(line)
|
||||
if m:
|
||||
init_name = api_name
|
||||
if api_overload == 1: init_name = 'NONE'
|
||||
init_args = m.group(4)
|
||||
line = m.group(1) + '(' + init_name + init_args + m.group(5) + '\n'
|
||||
|
||||
m = init_pattern.match(line)
|
||||
if m:
|
||||
found = 0
|
||||
non_public_api = 0
|
||||
|
||||
if api_valid == 1:
|
||||
api_valid = 0
|
||||
message("\t" + api_name)
|
||||
else:
|
||||
non_public_api = 1
|
||||
|
||||
if non_public_api == 1:
|
||||
if api_valid == 1: message("\t" + api_name)
|
||||
# Ignore if it is initialized as NONE
|
||||
init_name = m.group(3)
|
||||
if init_name != 'NONE':
|
||||
# Check if init name matching API name
|
||||
if init_name != api_name:
|
||||
fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'")
|
||||
# Registering dummy API for non public API if the name in INIT is not NONE
|
||||
init_name = m.group(3)
|
||||
# Ignore if it is initialized as NONE
|
||||
if init_name != 'NONE':
|
||||
# Check if init name matching API name
|
||||
if init_name != api_name:
|
||||
fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'")
|
||||
if api_valid == 0:
|
||||
# If init name is not in public API map then it is private API
|
||||
# else it was not identified and will be checked on finish
|
||||
if not init_name in api_map:
|
||||
@@ -333,7 +324,6 @@ def parse_content(inp_file_p, api_map, out):
|
||||
# Expect INIT macro for valid public API
|
||||
# Removing and registering non-conformant APIs with missing HIP_INIT macro
|
||||
if api_valid == 1:
|
||||
api_valid = 0
|
||||
if api_name in out:
|
||||
del out[api_name]
|
||||
del api_map[api_name]
|
||||
|
||||
@@ -44,8 +44,6 @@ void multiplyCPU(float* C, float* A, float* B, int N){
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
|
||||
void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
@@ -95,10 +93,8 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
|
||||
float *A, *B, *C0, *C1, *cpuC;
|
||||
float *Ad, *Bd, *C0d, *C1d;
|
||||
int errors=0;
|
||||
@@ -177,8 +173,5 @@ int main() {
|
||||
free(C0);
|
||||
free(C1);
|
||||
free(cpuC);
|
||||
#else
|
||||
std::cout <<"hipOccupancyMaxPotentialBlockSize template not support for Clang compiler"<<std::endl;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -33,236 +30,181 @@ THE SOFTWARE.
|
||||
* TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA
|
||||
* HIT_END
|
||||
*/
|
||||
#define MAX_OFFSET 3
|
||||
// To test memset on unaligned pointer
|
||||
#define loop(offset, offsetMax) for (int offset = offsetMax; offset >= 0; offset --)
|
||||
|
||||
#include <vector>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
enum MemsetType {
|
||||
hipMemsetTypeDefault,
|
||||
hipMemsetTypeD8,
|
||||
hipMemsetTypeD16,
|
||||
hipMemsetTypeD32
|
||||
};
|
||||
|
||||
bool testhipMemset(int memsetval,int p_gpuDevice)
|
||||
bool testhipMemsetSmallSize(int memsetval, int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(char);
|
||||
printf ("testhipMemset N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
for ( size_t iSize = 1; iSize < 4; iSize++ ) {
|
||||
size_t Nbytes = iSize * sizeof(char);
|
||||
HIPCHECK(hipMalloc(&A_d, Nbytes));
|
||||
A_h = reinterpret_cast<char*> (malloc(Nbytes));
|
||||
printf("testhipMemsetSmallSize N=%zu memsetval=%2x device=%d\n",
|
||||
iSize, memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemset(A_d, memsetval, Nbytes));
|
||||
HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemset(A_d, memsetval, Nbytes) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", i, (int)A_h[i], (int)memsetval);
|
||||
break;
|
||||
}
|
||||
for ( int i = 0; i < iSize; i++ ) {
|
||||
if ( A_h[i] != memsetval ) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetval:%02x\n",
|
||||
i, static_cast<int> (A_h[i]), static_cast<int> (memsetval));
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD32(int memsetD32val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n", N, memsetD32val, p_gpuDevice);
|
||||
int *A_d;
|
||||
int *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (int*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemsetD32((hipDeviceptr_t)A_d, memsetD32val, N) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD32val) {
|
||||
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD32val:%08x\n", i, A_h[i], memsetD32val);
|
||||
break;
|
||||
}
|
||||
template<typename T>
|
||||
bool testhipMemset(T*A_h, T*A_d, T memsetval, enum MemsetType type,
|
||||
int p_gpuDevice) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
bool testResult = true;
|
||||
HIPCHECK(hipMalloc(&A_d, Nbytes));
|
||||
A_h = reinterpret_cast<T*> (malloc(Nbytes));
|
||||
loop(offset, MAX_OFFSET) {
|
||||
if (type == hipMemsetTypeDefault) {
|
||||
printf("testhipMemset N=%zu memsetval=%2x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemset(A_d + offset, memsetval, N - offset));
|
||||
} else if (type == hipMemsetTypeD8) {
|
||||
printf("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD8((hipDeviceptr_t)(A_d + offset), memsetval, N - offset));
|
||||
} else if (type == hipMemsetTypeD16) {
|
||||
printf("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD16((hipDeviceptr_t)(A_d + offset), memsetval, N - offset));
|
||||
} else if (type == hipMemsetTypeD32) {
|
||||
printf("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD32((hipDeviceptr_t)(A_d + offset), memsetval, N - offset));
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD16(short memsetD16val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", N, memsetD16val, p_gpuDevice);
|
||||
short *A_d;
|
||||
short *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (short*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemsetD16((hipDeviceptr_t)A_d, memsetD16val, N) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD16val) {
|
||||
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD16val:%08x\n", i, A_h[i], memsetD32val);
|
||||
break;
|
||||
}
|
||||
HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost) );
|
||||
for ( int i = offset; i < N; i++ ) {
|
||||
if (A_h[i] != memsetval) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetval:%02x\n",
|
||||
i, static_cast<int> (A_h[i]), static_cast<int> (memsetval));
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD8(char memsetD8val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", N, memsetD8val, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemsetD8((hipDeviceptr_t)A_d, memsetD8val, N) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD8val) {
|
||||
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD8val:%08x\n", i, A_h[i], memsetD8val);
|
||||
break;
|
||||
}
|
||||
template<typename T>
|
||||
bool testhipMemsetAsync(T*A_h, T*A_d, T memsetval, enum MemsetType type,
|
||||
int p_gpuDevice) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
bool testResult = true;
|
||||
HIPCHECK(hipMalloc(reinterpret_cast<void**> (&A_d), Nbytes));
|
||||
A_h = reinterpret_cast<T*> (malloc(Nbytes));
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
loop(offset, MAX_OFFSET) {
|
||||
if (type == hipMemsetTypeDefault) {
|
||||
printf("testhipMemsetAsync N=%zu memsetval=%2x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetAsync(A_d+offset, memsetval, Nbytes-offset, stream));
|
||||
} else if (type == hipMemsetTypeD8) {
|
||||
printf("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream));
|
||||
} else if (type == hipMemsetTypeD16) {
|
||||
printf("testhipMemsetD16Async N=%zu memsetD16val=%8x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream));
|
||||
} else if (type == hipMemsetTypeD32) {
|
||||
printf("testhipMemsetD32Async N=%zu memsetD32val=%8x device=%d\n",
|
||||
(N - offset), memsetval, p_gpuDevice);
|
||||
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream));
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
HIPCHECK(hipMemcpy(A_h, reinterpret_cast<void*> (A_d), Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
bool testhipMemsetAsync(int memsetval,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetAsync N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetAsync(A_d, memsetval, Nbytes, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", i, (int)A_h[i], (int)memsetval);
|
||||
break;
|
||||
}
|
||||
for ( int i = offset; i < N; i++ ) {
|
||||
if (A_h[i] != memsetval) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x\n", i, static_cast<int> (A_h[i]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
HIPCHECK(hipFree(reinterpret_cast<void*> (A_d)) );
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD32Async(int memsetD32val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD32Async N=%zu memsetval=%8x device=%d\n", N, memsetD32val, p_gpuDevice);
|
||||
int *A_d;
|
||||
int *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (int*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetD32Async((hipDeviceptr_t)A_d, memsetD32val, N, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD32val) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetD32val:%02x\n", i, A_h[i], memsetD32val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
bool testhipMemset2AsyncOps() {
|
||||
printf("testhipMemset2AsyncOps 2 memset operations at the same time\n");
|
||||
std::vector<float> v;
|
||||
v.resize(2048);
|
||||
float* p2, *p3;
|
||||
hipMalloc(reinterpret_cast<void**>(&p2), 4096 + 4096*2);
|
||||
p3 = p2+2048;
|
||||
hipStream_t s;
|
||||
hipStreamCreate(&s);
|
||||
hipMemsetAsync(p2, 0, 32*32*4, s);
|
||||
hipMemsetD32Async(p3, 0x3fe00000, 32*32, s );
|
||||
hipStreamSynchronize(s);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
hipMemsetAsync(p2, 0, 32*32*4, s);
|
||||
hipMemsetD32Async(p3, 0x3fe00000, 32*32, s );
|
||||
}
|
||||
hipStreamSynchronize(s);
|
||||
hipDeviceSynchronize();
|
||||
hipMemcpy(&v[0], p2, 1024, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(&v[1024], p3, 1024, hipMemcpyDeviceToHost);
|
||||
if ((v[0] != 0) || (v[1024] != 1.75f)) {
|
||||
printf("mismatch (%f != 0) or (%f != 1.75f)\n", v[0], v[1024]);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipMemsetD16Async(short memsetD16val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD16Async N=%zu memsetval=%8x device=%d\n", N, memsetD16val, p_gpuDevice);
|
||||
short *A_d;
|
||||
short *A_h;
|
||||
bool testResult = true;
|
||||
int main(int argc, char *argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
bool testResult = true;
|
||||
char * cA_d;
|
||||
char * cA_h;
|
||||
int16_t * siA_d;
|
||||
int16_t * siA_h;
|
||||
int32_t * iA_d;
|
||||
int32_t * iA_h;
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
testResult &= testhipMemsetSmallSize(memsetval, p_gpuDevice);
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (short*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetD16Async((hipDeviceptr_t)A_d, memsetD16val, N, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
testResult &= testhipMemset(cA_h, cA_d, memsetval, hipMemsetTypeDefault, p_gpuDevice);
|
||||
testResult &= testhipMemset(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice);
|
||||
testResult &= testhipMemset(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice);
|
||||
testResult &= testhipMemset(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice);
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD16val) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetD16val:%02x\n", i, A_h[i], memsetD16val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD8Async(char memsetD8val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n", N, memsetD8val, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetD8Async((hipDeviceptr_t)A_d, memsetD8val, N, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD8val) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetD8val:%02x\n", i, A_h[i], memsetD8val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
bool testResult = true;
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
testResult &= testhipMemset(memsetval, p_gpuDevice);
|
||||
testResult &= testhipMemsetAsync(memsetval, p_gpuDevice);
|
||||
testResult &= testhipMemsetD32(memsetD32val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD32Async(memsetD32val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD16(memsetD16val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD16Async(memsetD16val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD8(memsetD8val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD8Async(memsetD8val, p_gpuDevice);
|
||||
if (testResult) passed();
|
||||
failed("Output Mismatch\n");
|
||||
testResult &= testhipMemsetAsync(cA_h, cA_d, memsetval, hipMemsetTypeDefault, p_gpuDevice);
|
||||
testResult &= testhipMemsetAsync(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice);
|
||||
testResult &= testhipMemsetAsync(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice);
|
||||
testResult &= testhipMemsetAsync(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice);
|
||||
testResult &= testhipMemset2AsyncOps();
|
||||
if (testResult) passed();
|
||||
failed("Output Mismatch\n");
|
||||
}
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user