SWDEV-289405 - [catch2][dtest][module] Migration of Module files to CATCH2 framework (#2351)
Migrated all module related files to CATCH2 framework and optimized to have single module kernel file Change-Id: I39aa28ef22c1b2f4d0014ca32b59b9c645b725dc
Этот коммит содержится в:
@@ -15,6 +15,7 @@ target_link_libraries(UnitTests PRIVATE UnitDeviceTests
|
||||
EventTest
|
||||
OccupancyTest
|
||||
DeviceTest
|
||||
ModuleTest
|
||||
RTC
|
||||
stdc++fs)
|
||||
|
||||
@@ -36,6 +37,7 @@ target_link_libraries(ABMTests PRIVATE ABMAddKernels
|
||||
stdc++fs)
|
||||
|
||||
catch_discover_tests(ABMTests PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
|
||||
add_dependencies(UnitTests module_kernels.code)
|
||||
|
||||
add_dependencies(build_tests UnitTests ABMTests)
|
||||
|
||||
@@ -63,7 +65,7 @@ else()
|
||||
target_compile_options(StressTest PUBLIC -std=c++17)
|
||||
endif()
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
target_link_libraries(StressTest PRIVATE printf stream)
|
||||
target_link_libraries(StressTest PRIVATE printf stream module)
|
||||
endif()
|
||||
target_link_libraries(StressTest PRIVATE memory stdc++fs)
|
||||
add_dependencies(build_stress_test StressTest)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -23,6 +22,13 @@ THE SOFTWARE.
|
||||
#pragma once
|
||||
#include "hip_test_context.hh"
|
||||
#include <catch.hpp>
|
||||
#ifdef __linux__
|
||||
#include <sys/sysinfo.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
|
||||
|
||||
@@ -72,6 +78,27 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
|
||||
#if HT_NVIDIA
|
||||
#define CTX_CREATE() \
|
||||
hipCtx_t context;\
|
||||
initHipCtx(&context);
|
||||
#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context));
|
||||
#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array));
|
||||
#define HIP_TEX_REFERENCE hipTexRef
|
||||
#define HIP_ARRAY hiparray
|
||||
static void initHipCtx(hipCtx_t *pcontext) {
|
||||
HIPCHECK(hipInit(0));
|
||||
hipDevice_t device;
|
||||
HIPCHECK(hipDeviceGet(&device, 0));
|
||||
HIPCHECK(hipCtxCreate(pcontext, 0, device));
|
||||
}
|
||||
#else
|
||||
#define CTX_CREATE()
|
||||
#define CTX_DESTROY()
|
||||
#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array));
|
||||
#define HIP_TEX_REFERENCE textureReference*
|
||||
#define HIP_ARRAY hipArray*
|
||||
#endif
|
||||
|
||||
// Utility Functions
|
||||
namespace HipTest {
|
||||
@@ -104,4 +131,34 @@ static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlo
|
||||
|
||||
return blocks;
|
||||
}
|
||||
// Get Free Memory from the system
|
||||
static size_t getMemoryAmount() {
|
||||
#if __linux__
|
||||
struct sysinfo info;
|
||||
sysinfo(&info);
|
||||
return info.freeram / (1024 * 1024); // MB
|
||||
#elif defined(_WIN32)
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof(statex);
|
||||
GlobalMemoryStatusEx(&statex);
|
||||
return (statex.ullAvailPhys / (1024 * 1024)); // MB
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t getHostThreadCount(const size_t memPerThread = 200, const size_t maxThreads = 0) {
|
||||
if (memPerThread == 0) return 0;
|
||||
auto memAmount = getMemoryAmount();
|
||||
const auto processor_count = std::thread::hardware_concurrency();
|
||||
if (processor_count == 0 || memAmount == 0) return 0;
|
||||
size_t thread_count = 0;
|
||||
if ((processor_count * memPerThread) < memAmount)
|
||||
thread_count = processor_count;
|
||||
else
|
||||
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
|
||||
if (maxThreads > 0) {
|
||||
return (thread_count > maxThreads) ? maxThreads : thread_count;
|
||||
}
|
||||
return thread_count;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -72,6 +72,35 @@ __global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ void waitKernel(uint64_t wait_sec, T clockrate) {
|
||||
uint64_t start = clock64()/clockrate, cur;
|
||||
do { cur = clock64()/clockrate-start;}while (cur < (wait_sec*1000));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void TwoSecKernel_GlobalVar(int globalvar, int clockrate) {
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x3333;
|
||||
}
|
||||
waitKernel(2, clockrate);
|
||||
if (globalvar != 0x3333) {
|
||||
globalvar = 0x5555;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void FourSecKernel_GlobalVar(int globalvar, int clockrate) {
|
||||
if (globalvar == 1) {
|
||||
globalvar = 0x2222;
|
||||
}
|
||||
waitKernel(4, clockrate);
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x4444;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_subdirectory(memory)
|
||||
add_subdirectory(module)
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
add_subdirectory(printf)
|
||||
add_subdirectory(stream)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
set(TEST_SRC
|
||||
memcpy.cc
|
||||
hipMemcpyMThreadMSize.cc
|
||||
hipMemcpyBoundaryOffsetCheck.cc
|
||||
)
|
||||
|
||||
# Create shared lib of all tests
|
||||
|
||||
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
This testcase verifies following scenarios
|
||||
3. Boundary checks with different sizes
|
||||
5. device offset scenario
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include "sys/types.h"
|
||||
#include "sys/sysinfo.h"
|
||||
#endif
|
||||
static constexpr auto NUM_ELM{4*1024 * 1024};
|
||||
template<typename T>
|
||||
class DeviceMemory {
|
||||
public:
|
||||
explicit DeviceMemory(size_t numElements);
|
||||
DeviceMemory() = delete;
|
||||
~DeviceMemory();
|
||||
T* A_d() const { return _A_d + _offset; }
|
||||
T* B_d() const { return _B_d + _offset; }
|
||||
T* C_d() const { return _C_d + _offset; }
|
||||
T* C_dd() const { return _C_dd + _offset; }
|
||||
size_t maxNumElements() const { return _maxNumElements; }
|
||||
void offset(int offset) { _offset = offset; }
|
||||
int offset() const { return _offset; }
|
||||
private:
|
||||
T* _A_d;
|
||||
T* _B_d;
|
||||
T* _C_d;
|
||||
T* _C_dd;
|
||||
size_t _maxNumElements;
|
||||
int _offset;
|
||||
};
|
||||
template <typename T>
|
||||
DeviceMemory<T>::DeviceMemory(size_t numElements) :
|
||||
_maxNumElements(numElements), _offset(0) {
|
||||
T** np = nullptr;
|
||||
HipTest::initArrays(&_A_d, &_B_d, &_C_d, np, np, np, numElements, 0);
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
HIP_CHECK(hipMalloc(&_C_dd, sizeElements));
|
||||
}
|
||||
template <typename T>
|
||||
DeviceMemory<T>::~DeviceMemory() {
|
||||
T* np = nullptr;
|
||||
HipTest::freeArrays<T>(_A_d, _B_d, _C_d, np, np, np, 0);
|
||||
HIP_CHECK(hipFree(_C_dd));
|
||||
_C_dd = NULL;
|
||||
}
|
||||
template <typename T>
|
||||
class HostMemory {
|
||||
public:
|
||||
HostMemory(size_t numElements, bool usePinnedHost);
|
||||
HostMemory() = delete;
|
||||
void reset(size_t numElements, bool full = false);
|
||||
~HostMemory();
|
||||
T* A_h() const { return _A_h + _offset; }
|
||||
T* B_h() const { return _B_h + _offset; }
|
||||
T* C_h() const { return _C_h + _offset; }
|
||||
size_t maxNumElements() const { return _maxNumElements; }
|
||||
void offset(int offset) { _offset = offset; }
|
||||
int offset() const { return _offset; }
|
||||
// Host arrays, secondary copy
|
||||
T* A_hh;
|
||||
T* B_hh;
|
||||
bool _usePinnedHost;
|
||||
private:
|
||||
size_t _maxNumElements;
|
||||
int _offset;
|
||||
// Host arrays
|
||||
T* _A_h;
|
||||
T* _B_h;
|
||||
T* _C_h;
|
||||
};
|
||||
template <typename T>
|
||||
HostMemory<T>::HostMemory(size_t numElements, bool usePinnedHost)
|
||||
: _usePinnedHost(usePinnedHost), _maxNumElements(numElements), _offset(0) {
|
||||
T** np = nullptr;
|
||||
HipTest::initArrays(np, np, np, &_A_h, &_B_h, &_C_h,
|
||||
numElements, usePinnedHost);
|
||||
A_hh = NULL;
|
||||
B_hh = NULL;
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
if (usePinnedHost) {
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_hh), sizeElements,
|
||||
hipHostMallocDefault));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_hh), sizeElements,
|
||||
hipHostMallocDefault));
|
||||
} else {
|
||||
A_hh = reinterpret_cast<T*>(malloc(sizeElements));
|
||||
B_hh = reinterpret_cast<T*>(malloc(sizeElements));
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
void HostMemory<T>::reset(size_t numElements, bool full) {
|
||||
// Initialize the host data:
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
(A_hh)[i] = 1097.0 + i;
|
||||
(B_hh)[i] = 1492.0 + i; // Phi
|
||||
if (full) {
|
||||
(_A_h)[i] = 3.146f + i; // Pi
|
||||
(_B_h)[i] = 1.618f + i; // Phi
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
HostMemory<T>::~HostMemory() {
|
||||
HipTest::freeArraysForHost(_A_h, _B_h, _C_h, _usePinnedHost);
|
||||
if (_usePinnedHost) {
|
||||
HIP_CHECK(hipHostFree(A_hh));
|
||||
HIP_CHECK(hipHostFree(B_hh));
|
||||
} else {
|
||||
free(A_hh);
|
||||
free(B_hh);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
void memcpytest2_get_host_memory(size_t *free, size_t *total) {
|
||||
MEMORYSTATUSEX status;
|
||||
status.dwLength = sizeof(status);
|
||||
GlobalMemoryStatusEx(&status);
|
||||
// Windows doesn't allow allocating more than half of system memory to the gpu
|
||||
// Since the runtime also needs space for its internal allocations,
|
||||
// we should not try to allocate more than 40% of reported system memory,
|
||||
// otherwise we can run into OOM issues.
|
||||
*free = static_cast<size_t>(0.4 * status.ullAvailPhys);
|
||||
*total = static_cast<size_t>(0.4 * status.ullTotalPhys);
|
||||
}
|
||||
#else
|
||||
struct sysinfo memInfo;
|
||||
void memcpytest2_get_host_memory(size_t *free, size_t *total) {
|
||||
sysinfo(&memInfo);
|
||||
uint64_t freePhysMem = memInfo.freeram;
|
||||
freePhysMem *= memInfo.mem_unit;
|
||||
*free = freePhysMem;
|
||||
uint64_t totalPhysMem = memInfo.totalram;
|
||||
totalPhysMem *= memInfo.mem_unit;
|
||||
*total = totalPhysMem;
|
||||
}
|
||||
#endif
|
||||
//---
|
||||
// Test many different kinds of memory copies.
|
||||
// The subroutine allocates memory , copies to device, runs a vector
|
||||
// add kernel, copies back, and
|
||||
// checks the result.
|
||||
//
|
||||
// IN: numElements controls the number of elements used for allocations.
|
||||
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned
|
||||
// else allocate host
|
||||
// memory with malloc. IN: useHostToHost : If true, add an extra
|
||||
// host-to-host copy. IN:
|
||||
// useDeviceToDevice : If true, add an extra deviceto-device copy after
|
||||
// result is produced. IN:
|
||||
// useMemkindDefault : If true, use memkinddefault
|
||||
// (runtime figures out direction). if false, use
|
||||
// explicit memcpy direction.
|
||||
//
|
||||
template <typename T>
|
||||
void memcpytest2(DeviceMemory<T>* dmem, HostMemory<T>* hmem,
|
||||
size_t numElements, bool useHostToHost,
|
||||
bool useDeviceToDevice, bool useMemkindDefault) {
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
hmem->reset(numElements);
|
||||
assert(numElements <= dmem->maxNumElements());
|
||||
assert(numElements <= hmem->maxNumElements());
|
||||
if (useHostToHost) {
|
||||
// Do some extra host-to-host copies here to mix things up:
|
||||
HIP_CHECK(hipMemcpy(hmem->A_hh, hmem->A_h(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost));
|
||||
HIP_CHECK(hipMemcpy(hmem->B_hh, hmem->B_h(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost));
|
||||
HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_hh, sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_hh, sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_h(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_h(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
}
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
|
||||
static_cast<const T*>(dmem->A_d()), static_cast<const T*>(dmem->B_d()),
|
||||
dmem->C_d(), numElements);
|
||||
if (useDeviceToDevice) {
|
||||
// Do an extra device-to-device copy here to mix things up:
|
||||
HIP_CHECK(hipMemcpy(dmem->C_dd(), dmem->C_d(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToDevice));
|
||||
// Destroy the original dmem->C_d():
|
||||
HIP_CHECK(hipMemset(dmem->C_d(), 0x5A, sizeElements));
|
||||
HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_dd(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_d(), sizeElements,
|
||||
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost));
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(hmem->A_h(), hmem->B_h(), hmem->C_h(), numElements);
|
||||
}
|
||||
// Try all the 16 possible combinations to memcpytest2 - usePinnedHost,
|
||||
// useHostToHost,
|
||||
// useDeviceToDevice, useMemkindDefault
|
||||
template <typename T>
|
||||
void memcpytest2_for_type(size_t numElements) {
|
||||
DeviceMemory<T> memD(numElements);
|
||||
HostMemory<T> memU(numElements, 0 /*usePinnedHost*/);
|
||||
HostMemory<T> memP(numElements, 1 /*usePinnedHost*/);
|
||||
for (int usePinnedHost = 0; usePinnedHost <= 1; usePinnedHost++) {
|
||||
for (int useHostToHost = 0; useHostToHost <= 1; useHostToHost++) {
|
||||
for (int useDeviceToDevice = 0; useDeviceToDevice <= 1;
|
||||
useDeviceToDevice++) {
|
||||
for (int useMemkindDefault = 0; useMemkindDefault <= 1;
|
||||
useMemkindDefault++) {
|
||||
memcpytest2<T>(&memD, usePinnedHost ? &memP : &memU,
|
||||
numElements, useHostToHost,
|
||||
useDeviceToDevice, useMemkindDefault);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Try many different sizes to memory copy.
|
||||
template <typename T>
|
||||
void memcpytest2_sizes(size_t maxElem = 0) {
|
||||
int deviceId;
|
||||
HIP_CHECK(hipGetDevice(&deviceId));
|
||||
size_t free, total, freeCPU, totalCPU;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
memcpytest2_get_host_memory(&freeCPU, &totalCPU);
|
||||
if (maxElem == 0) {
|
||||
// Use lesser maxElem if not enough host memory available
|
||||
size_t maxElemGPU = free / sizeof(T) / 8;
|
||||
size_t maxElemCPU = freeCPU / sizeof(T) / 8;
|
||||
maxElem = maxElemGPU < maxElemCPU ? maxElemGPU : maxElemCPU;
|
||||
}
|
||||
HIP_CHECK(hipDeviceReset());
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0 /*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1 /*usePinnedHost*/);
|
||||
for (size_t elem = 1; elem <= maxElem; elem *= 2) {
|
||||
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
|
||||
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
|
||||
}
|
||||
}
|
||||
// Try many different sizes to memory copy.
|
||||
template <typename T>
|
||||
void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) {
|
||||
int deviceId;
|
||||
HIP_CHECK(hipGetDevice(&deviceId));
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
HIP_CHECK(hipDeviceReset());
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0 /*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1 /*usePinnedHost*/);
|
||||
size_t elem = maxElem / 2;
|
||||
for (size_t offset = 0; offset < 512; offset++) {
|
||||
assert(elem + offset < maxElem);
|
||||
if (devOffsets) {
|
||||
memD.offset(offset);
|
||||
}
|
||||
if (hostOffsets) {
|
||||
memU.offset(offset);
|
||||
memP.offset(offset);
|
||||
}
|
||||
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
|
||||
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
|
||||
}
|
||||
for (size_t offset = 512; offset < elem; offset *= 2) {
|
||||
assert(elem + offset < maxElem);
|
||||
if (devOffsets) {
|
||||
memD.offset(offset);
|
||||
}
|
||||
if (hostOffsets) {
|
||||
memU.offset(offset);
|
||||
memP.offset(offset);
|
||||
}
|
||||
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
|
||||
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
|
||||
}
|
||||
}
|
||||
// Create multiple threads to stress multi-thread locking behavior in the
|
||||
// allocation/deallocation/tracking logic:
|
||||
template <typename T>
|
||||
void multiThread_1(bool serialize, bool usePinnedHost) {
|
||||
DeviceMemory<T> memD(NUM_ELM);
|
||||
HostMemory<T> mem1(NUM_ELM, usePinnedHost);
|
||||
HostMemory<T> mem2(NUM_ELM, usePinnedHost);
|
||||
std::thread t1(memcpytest2<T>, &memD, &mem1, NUM_ELM, 0, 0, 0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
}
|
||||
std::thread t2(memcpytest2<T>, &memD, &mem2, NUM_ELM, 0, 0, 0);
|
||||
if (serialize) {
|
||||
t2.join();
|
||||
}
|
||||
}
|
||||
/*
|
||||
This testcase verfies the boundary checks of hipMemcpy API for different sizes
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy_BoundaryCheck") {
|
||||
size_t maxElem = 32 * 1024 * 1024;
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0 /*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0 /*usePinnedHost*/);
|
||||
memcpytest2<float>(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0);
|
||||
auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024,
|
||||
16 * 1024 * 1024 + 16 * 1024,
|
||||
16 * 1024 * 1024 + 512 * 1024,
|
||||
17 * 1024 * 1024 + 1024,
|
||||
32 * 1024 * 1024);
|
||||
memcpytest2<float>(&memD, &memP, sizes, 0, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the device offsets
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) {
|
||||
HIP_CHECK(hipDeviceReset());
|
||||
size_t maxSize = 256 * 1024;
|
||||
memcpytest2_offsets<TestType>(maxSize, true, false);
|
||||
memcpytest2_offsets<TestType>(maxSize, false, true);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
hipExtModuleLaunchKernel_CornerTest.cc
|
||||
hipModuleLaunchKernel_CornerTests.cc
|
||||
)
|
||||
else()
|
||||
set(TEST_SRC
|
||||
hipModuleLaunchKernel_CornerTests.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_target(kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/stress/module/kernels.cc -o ${HIP_PATH}/catch/hipTestMain/kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include)
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(module SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_stress_test module kernels.code)
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Test Scenario
|
||||
hipExtModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions
|
||||
*/
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_kernels.hh"
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
#define fileName "kernels.code"
|
||||
#define dummyKernel "EmptyKernel"
|
||||
|
||||
struct gridblockDim {
|
||||
unsigned int gridX;
|
||||
unsigned int gridY;
|
||||
unsigned int gridZ;
|
||||
unsigned int blockX;
|
||||
unsigned int blockY;
|
||||
unsigned int blockZ;
|
||||
};
|
||||
|
||||
/*
|
||||
This testcase verifies hipExtModuleLaunchKernel API Corner
|
||||
cases
|
||||
*/
|
||||
TEST_CASE("Stress_hipExtModuleLaunchKernel_CornerCases") {
|
||||
hipModule_t Module;
|
||||
hipFunction_t DummyKernel;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
|
||||
constexpr auto gridblocksize{6};
|
||||
struct {
|
||||
} args;
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
size_t size = sizeof(args);
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
unsigned int maxblockX = deviceProp.maxThreadsDim[0];
|
||||
unsigned int maxblockY = deviceProp.maxThreadsDim[1];
|
||||
unsigned int maxblockZ = deviceProp.maxThreadsDim[2];
|
||||
struct gridblockDim test[gridblocksize] = {{1, 1, 1, maxblockX, 1, 1},
|
||||
{1, 1, 1, 1, maxblockY, 1},
|
||||
{1, 1, 1, 1, 1, maxblockZ},
|
||||
{UINT32_MAX, 1, 1, 1, 1, 1},
|
||||
{1, UINT32_MAX, 1, 1, 1, 1},
|
||||
{1, 1, UINT32_MAX, 1, 1, 1}};
|
||||
|
||||
// Launching kernel with corner cases in grid and block dimensions
|
||||
for (int i = 0; i < gridblocksize; i++) {
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel,
|
||||
test[i].gridX,
|
||||
test[i].gridY,
|
||||
test[i].gridZ,
|
||||
test[i].blockX,
|
||||
test[i].blockY,
|
||||
test[i].blockZ,
|
||||
0,
|
||||
stream, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0));
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Test Scenario
|
||||
hipModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions
|
||||
*/
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_kernels.hh"
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
#define fileName "kernels.code"
|
||||
#define dummyKernel "EmptyKernel"
|
||||
|
||||
struct gridblockDim {
|
||||
unsigned int gridX;
|
||||
unsigned int gridY;
|
||||
unsigned int gridZ;
|
||||
unsigned int blockX;
|
||||
unsigned int blockY;
|
||||
unsigned int blockZ;
|
||||
};
|
||||
|
||||
/*
|
||||
This testcase verifies hipModuleLaunchKernel API Corner
|
||||
cases
|
||||
*/
|
||||
TEST_CASE("Stress_hipModuleLaunchKernel_CornerCases") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipStream_t stream1;
|
||||
CTX_CREATE()
|
||||
hipModule_t Module;
|
||||
hipFunction_t DummyKernel;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
|
||||
// Passing Max int value to block dimensions
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
unsigned int maxblockX = deviceProp.maxThreadsDim[0];
|
||||
unsigned int maxblockY = deviceProp.maxThreadsDim[1];
|
||||
unsigned int maxblockZ = deviceProp.maxThreadsDim[2];
|
||||
#if HT_NVIDIA
|
||||
unsigned int maxgridX = deviceProp.maxGridSize[0];
|
||||
unsigned int maxgridY = deviceProp.maxGridSize[1];
|
||||
unsigned int maxgridZ = deviceProp.maxGridSize[2];
|
||||
#else
|
||||
unsigned int maxgridX = UINT32_MAX;
|
||||
unsigned int maxgridY = UINT32_MAX;
|
||||
unsigned int maxgridZ = UINT32_MAX;
|
||||
#endif
|
||||
struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1},
|
||||
{1, 1, 1, 1, maxblockY, 1},
|
||||
{1, 1, 1, 1, 1, maxblockZ},
|
||||
{maxgridX, 1, 1, 1, 1, 1},
|
||||
{1, maxgridY, 1, 1, 1, 1},
|
||||
{1, 1, maxgridZ, 1, 1, 1}};
|
||||
for (int i = 0; i < 6; i++) {
|
||||
HIP_CHECK(hipModuleLaunchKernel(DummyKernel,
|
||||
test[i].gridX,
|
||||
test[i].gridY,
|
||||
test[i].gridZ,
|
||||
test[i].blockX,
|
||||
test[i].blockY,
|
||||
test[i].blockZ,
|
||||
0,
|
||||
stream1, NULL, NULL));
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY();
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void EmptyKernel() {
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
add_subdirectory(module)
|
||||
add_subdirectory(memory)
|
||||
add_subdirectory(deviceLib)
|
||||
add_subdirectory(stream)
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
hipExtLaunchKernelGGL.cc
|
||||
hipExtModuleLaunchKernel.cc
|
||||
hipExtLaunchMultiKernelMultiDevice.cc
|
||||
hipModuleLaunchKernel.cc
|
||||
hipFuncSetCacheConfig.cc
|
||||
hipModuleUnload.cc
|
||||
hipFuncSetAttribute.cc
|
||||
hipModuleLoadData.cc
|
||||
hipFuncSetSharedMemConfig.cc
|
||||
hipManagedKeyword.cc
|
||||
hipModuleGetGlobal.cc
|
||||
hipFuncGetAttributes.cc
|
||||
hipModule.cc
|
||||
hipModuleLoadDataMultThreadOnMultGPU.cc
|
||||
hipModuleLoadDataMultThreaded.cc
|
||||
hipModuleLoadMultiThreaded.cc
|
||||
hipModuleLoadUnloadStress.cc
|
||||
hipModuleNegative.cc
|
||||
hipModuleOccupancyMaxPotentialBlockSize.cc
|
||||
hipModuleTexture2dDrv.cc
|
||||
hipOpenCLCOTest.cc
|
||||
)
|
||||
else()
|
||||
set(TEST_SRC
|
||||
hipModuleLaunchKernel.cc
|
||||
hipFuncSetCacheConfig.cc
|
||||
hipModuleUnload.cc
|
||||
hipFuncSetAttribute.cc
|
||||
hipModuleLoadData.cc
|
||||
hipFuncSetSharedMemConfig.cc
|
||||
hipManagedKeyword.cc
|
||||
hipModuleGetGlobal.cc
|
||||
hipFuncGetAttributes.cc
|
||||
hipModule.cc
|
||||
hipModuleLoadDataMultThreadOnMultGPU.cc
|
||||
hipModuleLoadDataMultThreaded.cc
|
||||
hipModuleLoadMultiThreaded.cc
|
||||
hipModuleLoadUnloadStress.cc
|
||||
hipModuleNegative.cc
|
||||
hipModuleOccupancyMaxPotentialBlockSize.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_target(module_kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/unit/module/module_kernels.cc -o ${HIP_PATH}/catch/hipTestMain/module_kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include)
|
||||
# Create shared lib of all tests
|
||||
add_library(ModuleTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
add_dependencies(build_tests ModuleTest module_kernels.code)
|
||||
Исполняемый файл
+129
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* Test Scenarios
|
||||
1. Verify kernel execution time of the particular kernel
|
||||
2. Verify hipExtLaunchKernelGGL API by disabling time flag in event creation
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
#define FOURSEC_KERNEL 4999
|
||||
#define TWOSEC_KERNEL 2999
|
||||
|
||||
__device__ int globalvar = 1;
|
||||
__global__ void TwoSecKernel_GlobalVar(int clockrate) {
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x3333;
|
||||
}
|
||||
HipTest::waitKernel(2, clockrate);
|
||||
if (globalvar != 0x3333) {
|
||||
globalvar = 0x5555;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void FourSecKernel_GlobalVar(int clockrate) {
|
||||
if (globalvar == 1) {
|
||||
globalvar = 0x2222;
|
||||
}
|
||||
HipTest::waitKernel(4, clockrate);
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x4444;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* In this Scenario, we create events by disabling the timing flag
|
||||
* We then Launch the kernel using hipExtModuleLaunchKernel by passing
|
||||
* disabled events and try to fetch kernel execution time using
|
||||
* hipEventElapsedTime API which would fail as the flag is disabled.
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtLaunchKernelGGL_TimeFlagDisabled") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
float time_2sec;
|
||||
hipEvent_t start_event, end_event;
|
||||
int clkRate = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
|
||||
// Event Creation and Launching kernels
|
||||
HIP_CHECK(hipEventCreateWithFlags(&start_event,
|
||||
hipEventDisableTiming));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&end_event,
|
||||
hipEventDisableTiming));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0,
|
||||
stream, start_event, end_event, 0, clkRate);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(hipEventElapsedTime(&time_2sec, start_event, end_event)
|
||||
!= hipSuccess);
|
||||
|
||||
// Destroying the events and streams
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipEventDestroy(start_event));
|
||||
HIP_CHECK(hipEventDestroy(end_event));
|
||||
}
|
||||
/*
|
||||
* Launching FourSecKernel and TwoSecKernel and then we try to
|
||||
* get the event elapsed time of each kernel using the start and
|
||||
* end events.The event elapsed time should return us the kernel
|
||||
* execution time for that particular kernel
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtLaunchKernelGGL_KernelTimeExecution") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipEvent_t start_event1, end_event1, start_event2, end_event2;
|
||||
float time_4sec, time_2sec;
|
||||
int clkRate = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
|
||||
// Creating streams and events
|
||||
HIP_CHECK(hipEventCreate(&start_event1));
|
||||
HIP_CHECK(hipEventCreate(&end_event1));
|
||||
HIP_CHECK(hipEventCreate(&start_event2));
|
||||
HIP_CHECK(hipEventCreate(&end_event2));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Launching 4sec and 2sec kernels
|
||||
hipExtLaunchKernelGGL(FourSecKernel_GlobalVar, dim3(1), dim3(1), 0,
|
||||
stream, start_event1, end_event1, 0, clkRate);
|
||||
hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0,
|
||||
stream, start_event2, end_event2, 0, clkRate);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1));
|
||||
HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2));
|
||||
|
||||
INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec
|
||||
<< "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec);
|
||||
// Verifying the kernel execution time
|
||||
REQUIRE(time_4sec < static_cast<float>(FOURSEC_KERNEL));
|
||||
REQUIRE(time_2sec < static_cast<float>(TWOSEC_KERNEL));
|
||||
|
||||
// Destroying streams and events
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipEventDestroy(start_event1));
|
||||
HIP_CHECK(hipEventDestroy(end_event1));
|
||||
HIP_CHECK(hipEventDestroy(start_event2));
|
||||
HIP_CHECK(hipEventDestroy(end_event2));
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* This testfile verifies the basic functionality of
|
||||
hipExtLaunchMultiKernelMultiDevice API.
|
||||
It can be tested on single GPU or multi GPUs.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define MAX_GPUS 8
|
||||
#define NUM_KERNEL_ARGS 3
|
||||
|
||||
/*
|
||||
This testcase verifies hipExtLaunchMultiKernelMultiDevice API for different
|
||||
datatypes where
|
||||
1. Intitialize device variables
|
||||
2. Initializing hipLaunchParams structure to pass it to
|
||||
hipExtLaunchMultiKernelMultiDevice API
|
||||
3. Launches vector_square kernel which performs square of the variable
|
||||
4. Validates the result with the square of variable.
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Basic", "", int
|
||||
, float, double) {
|
||||
TestType *A_d[MAX_GPUS], *C_d[MAX_GPUS];
|
||||
TestType *A_h, *C_h;
|
||||
size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(TestType);
|
||||
int nGpu = 0;
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
SUCCEED("info: didn't find any GPU! Skipping the testcase");
|
||||
} else {
|
||||
if (nGpu > MAX_GPUS) {
|
||||
nGpu = MAX_GPUS;
|
||||
}
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h, N, false);
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
// Allocating and initializing device variables
|
||||
hipStream_t stream[MAX_GPUS];
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[i], hipStreamNonBlocking));
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, i/*deviceID*/));
|
||||
INFO("Running on bus 0x" << props.pciBusID << " " << props.name);
|
||||
INFO("Allocate device mem " << 2*Nbytes/1024.0/1024.0);
|
||||
HIP_CHECK(hipMalloc(&A_d[i], Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d[i], Nbytes));
|
||||
HIP_CHECK(hipMemcpy(A_d[i], A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
hipLaunchParams *launchParamsList = reinterpret_cast<hipLaunchParams *>(
|
||||
malloc(sizeof(hipLaunchParams)*nGpu));
|
||||
void *args[MAX_GPUS * NUM_KERNEL_ARGS];
|
||||
|
||||
// Intializing the hipLaunchParams structure with device variables
|
||||
// ,kernel and launching hipExtLaunchMultiKernelMultiDevice API
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
args[i * NUM_KERNEL_ARGS] = &A_d[i];
|
||||
args[i * NUM_KERNEL_ARGS + 1] = &C_d[i];
|
||||
args[i * NUM_KERNEL_ARGS + 2] = &N;
|
||||
launchParamsList[i].func =
|
||||
reinterpret_cast<void *>(HipTest::vector_square<TestType>);
|
||||
launchParamsList[i].gridDim = dim3(blocks);
|
||||
launchParamsList[i].blockDim = dim3(threadsPerBlock);
|
||||
launchParamsList[i].sharedMem = 0;
|
||||
launchParamsList[i].stream = stream[i];
|
||||
launchParamsList[i].args = args + i * NUM_KERNEL_ARGS;
|
||||
}
|
||||
|
||||
hipExtLaunchMultiKernelMultiDevice(launchParamsList, nGpu, 0);
|
||||
|
||||
// Validating the result
|
||||
for (int j = 0; j < nGpu; j++) {
|
||||
hipStreamSynchronize(stream[j]);
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, j/*deviceID*/));
|
||||
INFO("Checking result on bus " << props.pciBusID << props.name);
|
||||
|
||||
HIP_CHECK(hipSetDevice(j));
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d[j], Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
INFO("validation failed " << C_h[i] << A_h[i]*A_h[i]);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DeAllocating memory
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
for (int j = 0; j < nGpu; j++) {
|
||||
HIP_CHECK(hipFree(A_d[j]));
|
||||
HIP_CHECK(hipFree(C_d[j]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
Исполняемый файл
+433
@@ -0,0 +1,433 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* Test Scenarios
|
||||
1. hipExtModuleLaunchKernel Negative Scenarios
|
||||
2. hipExtModuleLaunchKernel API verifying the kernel execution time of a particular kernel.
|
||||
3. hipExtModuleLaunchKernel API verifying the kernel execution time by disabling the time flag
|
||||
4. hipModuleLaunchKernel Work Group tests =>
|
||||
- (block.x * block.y * block.z) <= Work Group Size
|
||||
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
- (block.x * block.y * block.z) > Work Group Size
|
||||
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_kernels.hh"
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
#define fileName "module_kernels.code"
|
||||
#define matmulK "matmulK"
|
||||
#define SixteenSec "SixteenSecKernel"
|
||||
#define KernelandExtra "KernelandExtraParams"
|
||||
#define FourSec "FourSecKernel"
|
||||
#define TwoSec "TwoSecKernel"
|
||||
#define globalDevVar "deviceGlobal"
|
||||
#define dummyKernel "EmptyKernel"
|
||||
#define FOURSEC_KERNEL 4999
|
||||
#define TWOSEC_KERNEL 2999
|
||||
|
||||
struct gridblockDim {
|
||||
unsigned int gridX;
|
||||
unsigned int gridY;
|
||||
unsigned int gridZ;
|
||||
unsigned int blockX;
|
||||
unsigned int blockY;
|
||||
unsigned int blockZ;
|
||||
};
|
||||
|
||||
class ModuleLaunchKernel {
|
||||
int N = 64;
|
||||
int SIZE = N*N;
|
||||
int *A, *B, *C;
|
||||
hipDeviceptr_t *Ad, *Bd;
|
||||
hipStream_t stream1, stream2;
|
||||
hipEvent_t start_event1, end_event1, start_event2, end_event2,
|
||||
start_timingDisabled, end_timingDisabled;
|
||||
hipModule_t Module;
|
||||
hipDeviceptr_t deviceGlobal;
|
||||
hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel,
|
||||
TwoSecKernel, KernelandExtraParamKernel, DummyKernel;
|
||||
struct {
|
||||
int clockRate;
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
void* _Cd;
|
||||
int _n;
|
||||
} args1, args2;
|
||||
struct {
|
||||
} args3;
|
||||
size_t size1;
|
||||
size_t size2;
|
||||
size_t size3;
|
||||
size_t deviceGlobalSize;
|
||||
public :
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void ModuleLoad();
|
||||
void Module_Negative_tests();
|
||||
void ExtModule_Negative_tests();
|
||||
void Module_WorkGroup_Test();
|
||||
void ExtModule_KernelExecutionTime();
|
||||
void ExtModule_Disabled_Timingflag();
|
||||
};
|
||||
|
||||
void ModuleLaunchKernel::AllocateMemory() {
|
||||
A = new int[N*N*sizeof(int)];
|
||||
B = new int[N*N*sizeof(int)];
|
||||
for (int i=0; i < N; i++) {
|
||||
for (int j=0; j < N; j++) {
|
||||
A[i*N +j] = 1;
|
||||
B[i*N +j] = 1;
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad),
|
||||
SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd),
|
||||
SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C), SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
int clkRate = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
args1._Ad = Ad;
|
||||
args1._Bd = Bd;
|
||||
args1._Cd = C;
|
||||
args1._n = N;
|
||||
args1.clockRate = clkRate;
|
||||
args2._Ad = NULL;
|
||||
args2._Bd = NULL;
|
||||
args2._Cd = NULL;
|
||||
args2._n = 0;
|
||||
args2.clockRate = clkRate;
|
||||
size1 = sizeof(args1);
|
||||
size2 = sizeof(args2);
|
||||
size3 = sizeof(args3);
|
||||
HIP_CHECK(hipEventCreate(&start_event1));
|
||||
HIP_CHECK(hipEventCreate(&end_event1));
|
||||
HIP_CHECK(hipEventCreate(&start_event2));
|
||||
HIP_CHECK(hipEventCreate(&end_event2));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&start_timingDisabled,
|
||||
hipEventDisableTiming));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&end_timingDisabled,
|
||||
hipEventDisableTiming));
|
||||
}
|
||||
|
||||
void ModuleLaunchKernel::ModuleLoad() {
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK));
|
||||
HIP_CHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel,
|
||||
Module, KernelandExtra));
|
||||
HIP_CHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
|
||||
HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
|
||||
Module, globalDevVar));
|
||||
}
|
||||
|
||||
void ModuleLaunchKernel::DeAllocateMemory() {
|
||||
HIP_CHECK(hipEventDestroy(start_event1));
|
||||
HIP_CHECK(hipEventDestroy(end_event1));
|
||||
HIP_CHECK(hipEventDestroy(start_event2));
|
||||
HIP_CHECK(hipEventDestroy(end_event2));
|
||||
HIP_CHECK(hipEventDestroy(start_timingDisabled));
|
||||
HIP_CHECK(hipEventDestroy(end_timingDisabled));
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipHostFree(C));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
/*
|
||||
* In this scenario,We launch the 4 sec kernel and 2 sec kernel
|
||||
* and we fetch the event execution time of each kernel and it
|
||||
* should not exceed the execution time of that particular kernel
|
||||
*/
|
||||
void ModuleLaunchKernel::ExtModule_KernelExecutionTime() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
float time_4sec, time_2sec;
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
// Launching kernels
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event1, end_event1, 0));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event2, end_event2, 0));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1));
|
||||
HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2));
|
||||
|
||||
INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec
|
||||
<< "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec);
|
||||
// Verifying the kernel execution time
|
||||
REQUIRE(time_4sec < static_cast<float>(FOURSEC_KERNEL));
|
||||
REQUIRE(time_2sec < static_cast<float>(TWOSEC_KERNEL));
|
||||
|
||||
DeAllocateMemory();
|
||||
}
|
||||
/*
|
||||
* In this Scenario, we create events by disabling the timing flag
|
||||
* We then Launch the kernel using hipExtModuleLaunchKernel by passing
|
||||
* disabled events and try to fetch kernel execution time using
|
||||
* hipEventElapsedTime API which would fail as the flag is disabled.
|
||||
*/
|
||||
void ModuleLaunchKernel::ExtModule_Disabled_Timingflag() {
|
||||
// Allocating Memory and Loading kernel
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
float time_2sec;
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
// Launching Kernel
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_timingDisabled,
|
||||
end_timingDisabled, 0));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
|
||||
REQUIRE(hipEventElapsedTime(&time_2sec, start_timingDisabled,
|
||||
end_timingDisabled) != hipSuccess);
|
||||
|
||||
// DeAllocating the memory
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies negative scenarios of hipExtModuleLaunchKernel API
|
||||
*/
|
||||
void ModuleLaunchKernel::ExtModule_Negative_tests() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
// Allocating memeory and loading kernel
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
void *params[] = {Ad};
|
||||
|
||||
SECTION("Nullptr to kernel function") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Max int value to block dimensions") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(), 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null values to all dimensions") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 0, 0,
|
||||
0,
|
||||
0,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing 0 for x dimension") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 1, 1,
|
||||
0,
|
||||
1,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing 0 for y dimension") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 0, 1,
|
||||
1,
|
||||
0,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing 0 for Z dimension") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 0,
|
||||
1,
|
||||
1,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both kernel and extra params") {
|
||||
REQUIRE(hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1,
|
||||
1, 1, 0,
|
||||
stream1,
|
||||
reinterpret_cast<void**>(¶ms),
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both than maxthreadsperblock to block dimensions") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dimension x = Max alloweed + 1") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsDim[0]+1,
|
||||
1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dimension Y = Max alloweed + 1") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[1]+1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dimension Z = Max alloweed + 1") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[2]+1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing invalid config data in extra params") {
|
||||
void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config3),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
void ModuleLaunchKernel::Module_WorkGroup_Test() {
|
||||
// Allocate memory and load modules
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
double cuberootVal =
|
||||
cbrt(static_cast<double>(deviceProp.maxThreadsPerBlock));
|
||||
uint32_t cuberoot_floor = floor(cuberootVal);
|
||||
uint32_t cuberoot_ceil = ceil(cuberootVal);
|
||||
|
||||
// Scenario: (block.x * block.y * block.z) <= Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_floor, cuberoot_floor, cuberoot_floor,
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0));
|
||||
|
||||
// Scenario: (block.x * block.y * block.z) > Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
REQUIRE(hipExtModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_ceil, cuberoot_ceil, cuberoot_ceil+1,
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0) != hipSuccess);
|
||||
|
||||
// DeAllocating memory
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the negative scenarios of
|
||||
hipExtModuleLaunchKernel API
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative") {
|
||||
ModuleLaunchKernel Ext_obj;
|
||||
Ext_obj.ExtModule_Negative_tests();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies hipExtModuleLaunchKernel API by
|
||||
disabling the timing flag
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_TimingflagDisabled") {
|
||||
ModuleLaunchKernel Ext_obj;
|
||||
Ext_obj.ExtModule_Disabled_Timingflag();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies hipExtModuleLaunchKernel API kernel
|
||||
execution time
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_KernelExecutionTime") {
|
||||
ModuleLaunchKernel Ext_obj;
|
||||
Ext_obj.ExtModule_KernelExecutionTime();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies workgroup of hipExtModuleLaunchKernel API
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_WorkGroup") {
|
||||
ModuleLaunchKernel Ext_obj;
|
||||
Ext_obj.Module_WorkGroup_Test();
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <utility>
|
||||
|
||||
#define fileName "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
namespace testhipFuncGetAttributesApi {
|
||||
__global__
|
||||
void fn(float* px, float* py) {
|
||||
bool a[42];
|
||||
__shared__ double b[69];
|
||||
for (auto&& x : b) x = *py++;
|
||||
for (auto&& x : a) x = *px++ > 0.0;
|
||||
for (auto&& x : a) if (x) *--py = *--px;
|
||||
}
|
||||
template <int WGSIZE, int LDS>
|
||||
__launch_bounds__(WGSIZE, 1) __global__ void kernelfn(int *x) {
|
||||
__shared__ int lds[LDS];
|
||||
for (int i = 0; i < LDS; ++i) {
|
||||
lds[i] = x[i];
|
||||
}
|
||||
x[LDS - 1] = lds[0] / lds[LDS - 1];
|
||||
}
|
||||
template <int WGSIZE, int LDS> bool test_Attributes_Values() {
|
||||
bool TestPassed = true;
|
||||
hipFuncAttributes attr{};
|
||||
hipFuncGetAttributes(&attr,
|
||||
reinterpret_cast<void const *>(kernelfn<WGSIZE, LDS>));
|
||||
if (attr.maxThreadsPerBlock != WGSIZE) {
|
||||
TestPassed = false;
|
||||
}
|
||||
if (attr.sharedSizeBytes != LDS * sizeof(int)) {
|
||||
TestPassed = false;
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
} // namespace testhipFuncGetAttributesApi
|
||||
/**
|
||||
* hipFuncGetAttributes and hipModuleGetFunction functional tests
|
||||
* Scenario1: Validates the value of attribute "maxThreadsPerBlock" should be non zero.
|
||||
* Scenario2: Validates the value of attribute
|
||||
* "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK" should be non zero.
|
||||
*/
|
||||
// scenario 1
|
||||
TEST_CASE("Unit_hipFuncGetAttributes_FuncTst") {
|
||||
hipFuncAttributes attr{};
|
||||
auto r = hipFuncGetAttributes(&attr,
|
||||
reinterpret_cast<const void*>(&testhipFuncGetAttributesApi::fn));
|
||||
REQUIRE_FALSE(r != hipSuccess);
|
||||
REQUIRE_FALSE(attr.maxThreadsPerBlock == 0);
|
||||
}
|
||||
// scenario 2
|
||||
TEST_CASE("Unit_hipFuncGetAttribute_FuncTst") {
|
||||
hipModule_t Module;
|
||||
int attrib_val;
|
||||
CTX_CREATE()
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
auto r = hipFuncGetAttribute(&attrib_val,
|
||||
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function);
|
||||
REQUIRE_FALSE(r != hipSuccess);
|
||||
REQUIRE_FALSE(attrib_val == 0);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* hipFuncGetAttributes negative tests
|
||||
* Scenario1: Validates returned error code for attr = nullptr
|
||||
* Scenario2: Validates returned error code for function = nullptr
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncGetAttributes_NegTst") {
|
||||
SECTION("attr is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(nullptr,
|
||||
reinterpret_cast<const void*>(&testhipFuncGetAttributesApi::fn)));
|
||||
}
|
||||
SECTION("function is nullptr") {
|
||||
hipFuncAttributes attr{};
|
||||
REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(&attr, nullptr));
|
||||
}
|
||||
}
|
||||
/**
|
||||
* hipFuncGetAttribute negative tests
|
||||
* Scenario1: Validates returned error code for attrib_val = nullptr
|
||||
* Scenario2: Validates returned error code for attrib = invalid = 0xff
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncGetAttribute_NegTst") {
|
||||
hipModule_t Module;
|
||||
CTX_CREATE()
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
SECTION("attr is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(nullptr,
|
||||
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function));
|
||||
}
|
||||
SECTION("attr is invalid") {
|
||||
int attrib_val;
|
||||
REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(&attrib_val,
|
||||
static_cast<hipFunction_attribute>(0xff), Function));
|
||||
}
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* hipFuncGetAttributes
|
||||
* Scenario4: Validates the value of attribute "maxThreadsPerBlock".
|
||||
* Scenario5: Validates the value of attribute "sharedSizeBytes".
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncGetAttributes_AttrTest") {
|
||||
bool TestPassed = true;
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<64, 64>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<128, 64>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<256, 64>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<512, 64>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<1024, 64>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<64, 128>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<128, 128>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<256, 128>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<512, 128>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<1024, 128>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<64, 256>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<128, 256>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<256, 256>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<512, 256>();
|
||||
TestPassed &= testhipFuncGetAttributesApi::
|
||||
test_Attributes_Values<1024, 256>();
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
__global__ void fn(float* px, float* py) {
|
||||
bool a[42];
|
||||
__shared__ double b[69];
|
||||
|
||||
for (auto&& x : b) x = *py++;
|
||||
for (auto&& x : a) x = *px++ > 0.0;
|
||||
for (auto&& x : a) if (x) *--py = *--px;
|
||||
}
|
||||
|
||||
/*
|
||||
This testcases verifies the basic func of hipFuncSetAttribute API where
|
||||
we need to pass function that executes on device
|
||||
hipFuncAttributeMaxDynamicSharedMemorySize -->
|
||||
The sum of this value + sharedSizeBytes should not exceed device attribute
|
||||
hipFuncAttributePreferredSharedMemoryCarveout -->
|
||||
Carving out the shared memory.
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncSetAttribute_Basic") {
|
||||
HIP_CHECK(hipFuncSetAttribute(reinterpret_cast<const void*>(&fn),
|
||||
hipFuncAttributeMaxDynamicSharedMemorySize,
|
||||
0));
|
||||
HIP_CHECK(hipFuncSetAttribute(reinterpret_cast<const void*>(&fn),
|
||||
hipFuncAttributePreferredSharedMemoryCarveout,
|
||||
0));
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void Empty_Kernel() {
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the basic funct of hipFuncSetCacheConfig API
|
||||
On GPU devices, where L1 and shared memory uses same resources
|
||||
This sets the preferred cache configuration for the kernel function
|
||||
In this testcases we are setting hipFuncCachePreferL1 where L1 is
|
||||
preferred more than shared memory
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncSetCacheConfig_Basic") {
|
||||
hipFuncCache_t cacheConfig{hipFuncCachePreferL1};
|
||||
HIP_CHECK(hipFuncSetCacheConfig(reinterpret_cast<void*>(Empty_Kernel),
|
||||
cacheConfig));
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Test Description:
|
||||
// This test case verifies the working of hipFuncSetSharedMemConfig() api and
|
||||
// the flag parameter
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
|
||||
__global__ void ReverseSeq(int *A, int *B, int N) {
|
||||
extern __shared__ int SMem[];
|
||||
int offset = threadIdx.x;
|
||||
int MirrorVal = N - offset - 1;
|
||||
SMem[offset] = A[offset];
|
||||
__syncthreads();
|
||||
B[offset] = SMem[MirrorVal];
|
||||
}
|
||||
/*
|
||||
This testcase verifies the basic functionality of hipFuncSetSharedMemConfig API
|
||||
by setting shared memory bank size
|
||||
|
||||
1. hipSharedMemBankSizeDefault
|
||||
2. hipSharedMemBankSizeFourByte
|
||||
3. hipSharedMemBankSizeEightByte
|
||||
|
||||
*/
|
||||
TEST_CASE("Unit_hipFuncSetSharedMemConfig_Basic") {
|
||||
int *Ah{nullptr}, *RAh{nullptr}, NumElms = 128;
|
||||
int *Ad{nullptr}, *RAd{nullptr};
|
||||
|
||||
HipTest::initArrays<int>(&Ad, &RAd, nullptr,
|
||||
&Ah, &RAh, nullptr, NumElms, false);
|
||||
for (int i = 0; i < NumElms; ++i) {
|
||||
Ah[i] = i;
|
||||
RAh[i] = NumElms - i - 1;
|
||||
}
|
||||
HIP_CHECK(hipMemcpy(Ad, Ah, NumElms * sizeof(int), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
|
||||
|
||||
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeDefault flag
|
||||
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
|
||||
(&ReverseSeq),
|
||||
hipSharedMemBankSizeDefault));
|
||||
|
||||
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
|
||||
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
|
||||
memset(Ah, 0, NumElms * sizeof(int));
|
||||
|
||||
// Verifying the results
|
||||
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
|
||||
for (int i = 0; i < NumElms; ++i) {
|
||||
REQUIRE(Ah[i] == RAh[i]);
|
||||
}
|
||||
|
||||
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeFourBytes flg
|
||||
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
|
||||
(&ReverseSeq),
|
||||
hipSharedMemBankSizeFourByte));
|
||||
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
|
||||
|
||||
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
|
||||
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
|
||||
memset(Ah, 0, NumElms * sizeof(int));
|
||||
|
||||
// Verifying the results
|
||||
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
|
||||
for (int i = 0; i < NumElms; ++i) {
|
||||
REQUIRE(Ah[i] == RAh[i]);
|
||||
}
|
||||
|
||||
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeEightBytes flg
|
||||
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
|
||||
(&ReverseSeq),
|
||||
hipSharedMemBankSizeEightByte));
|
||||
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
|
||||
|
||||
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
|
||||
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
|
||||
memset(Ah, 0, NumElms * sizeof(int));
|
||||
|
||||
// Verifying the results
|
||||
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
|
||||
for (int i = 0; i < NumElms; ++i) {
|
||||
REQUIRE(Ah[i] == RAh[i]);
|
||||
}
|
||||
|
||||
HipTest::freeArrays<int>(Ad, RAd, nullptr,
|
||||
Ah, RAh, nullptr, false);
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
hipManagedKeyword API Scenario
|
||||
1. Test hipModuleLoad on multiple GPUs
|
||||
*/
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_kernels.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define MANAGED_VAR_INIT_VALUE 10
|
||||
#define fileName "module_kernels.code"
|
||||
|
||||
TEST_CASE("Unit_hipMangedKeyword_ModuleLoadMultiGPU") {
|
||||
int numDevices = 0, data;
|
||||
hipDeviceptr_t x;
|
||||
size_t xSize;
|
||||
hipGetDeviceCount(&numDevices);
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
hipSetDevice(i);
|
||||
CTX_CREATE()
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "GPU_func"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, 1, 1,
|
||||
1, 0, 0, NULL, NULL));
|
||||
hipDeviceSynchronize();
|
||||
HIP_CHECK(hipModuleGetGlobal(reinterpret_cast<hipDeviceptr_t*>(&x),
|
||||
&xSize, Module, "x"));
|
||||
HIP_CHECK(hipMemcpyDtoH(&data, hipDeviceptr_t(x), xSize));
|
||||
REQUIRE(data == (1 + MANAGED_VAR_INIT_VALUE));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
}
|
||||
Исполняемый файл
+183
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the hipModuleLoad API On
|
||||
1. Single code object
|
||||
2. Multi Target architecture code object
|
||||
*/
|
||||
#include <fstream>
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#define LEN 64
|
||||
#define SIZE (LEN << 2)
|
||||
#define COMMAND_LEN 256
|
||||
#define CODE_OBJ_SINGLEARCH "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
#define CODE_OBJ_MULTIARCH "vcpy_kernel_multarch.code"
|
||||
|
||||
/*
|
||||
This API loads the kernel function, Launches the kernel
|
||||
which copies one variable to another and validates both
|
||||
the device variables for the current GPU architecture
|
||||
*/
|
||||
void testCodeObjFile(const char *codeObjFile) {
|
||||
float *A, *B;
|
||||
float *Ad, *Bd;
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
|
||||
&A, &B, nullptr, LEN, false);
|
||||
|
||||
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Ad), A, SIZE));
|
||||
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Bd), B, SIZE));
|
||||
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, codeObjFile));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
} args;
|
||||
args._Ad = reinterpret_cast<void*>(Ad);
|
||||
args._Bd = reinterpret_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0,
|
||||
stream, NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIP_CHECK(hipMemcpyDtoH(B, reinterpret_cast<hipDeviceptr_t>(Bd), SIZE));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
REQUIRE(A[i] == B[i]);
|
||||
}
|
||||
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
A, B, nullptr,
|
||||
false);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
/**
|
||||
* Check if environment variable $ROCM_PATH is defined
|
||||
*
|
||||
*/
|
||||
bool isRocmPathSet() {
|
||||
FILE *fpipe;
|
||||
char const *command = "echo $ROCM_PATH";
|
||||
fpipe = popen(command, "r");
|
||||
|
||||
if (fpipe == nullptr) {
|
||||
WARN("Unable to create command");
|
||||
return false;
|
||||
}
|
||||
char command_op[COMMAND_LEN];
|
||||
if (fgets(command_op, COMMAND_LEN, fpipe)) {
|
||||
size_t len = strlen(command_op);
|
||||
if (len > 1) { // This is because fgets always adds newline character
|
||||
pclose(fpipe);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
pclose(fpipe);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
This testcase checks the hipModuleLoadData API for the
|
||||
current GPU architecture.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModule_TestCodeObjFile") {
|
||||
testCodeObjFile(CODE_OBJ_SINGLEARCH);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcases
|
||||
1. Creates kernel file and copies to tmp folder
|
||||
2. Checks for Rocm path and generates code file for
|
||||
multiple target architectures.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModule_MultiTargArchCodeObj") {
|
||||
#ifdef __linux__
|
||||
char command[COMMAND_LEN];
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, 0);
|
||||
// Hardcoding the codeobject lines in multiple string to avoid cpplint warning
|
||||
std::string CodeObjL1 = "#include \"hip/hip_runtime.h\"\n";
|
||||
std::string CodeObjL2 =
|
||||
"extern \"C\" __global__ void hello_world(float* a, float* b) {\n";
|
||||
std::string CodeObjL3 = " int tx = hipThreadIdx_x;\n";
|
||||
std::string CodeObjL4 = " b[tx] = a[tx];\n";
|
||||
std::string CodeObjL5 = "}";
|
||||
// Creating the full code object string
|
||||
static std::string CodeObj = CodeObjL1 + CodeObjL2 + CodeObjL3 +
|
||||
CodeObjL4 + CodeObjL5;
|
||||
std::ofstream ofs("/tmp/vcpy_kernel.cpp", std::ofstream::out);
|
||||
ofs << CodeObj;
|
||||
ofs.close();
|
||||
// Copy the file into current working location if not available
|
||||
if (access("/tmp/vcpy_kernel.cpp", F_OK) == -1) {
|
||||
INFO("Code Object File: /tmp/vcpy_kernel.cpp not found");
|
||||
REQUIRE(true);
|
||||
}
|
||||
// Generate the command to generate multi architecture code object file
|
||||
const char* hipcc_path = nullptr;
|
||||
if (isRocmPathSet()) {
|
||||
hipcc_path = "$ROCM_PATH/bin/hipcc";
|
||||
} else {
|
||||
hipcc_path = "/opt/rocm/bin/hipcc";
|
||||
}
|
||||
/* Putting these command parameters into a variable to shorten the string
|
||||
literal length in order to avoid multiline string literal cpplint warning
|
||||
*/
|
||||
const char* genco_option = "--offload-arch";
|
||||
const char* input_codeobj = "/tmp/vcpy_kernel.cpp";
|
||||
snprintf(command, COMMAND_LEN,
|
||||
"%s --genco %s=gfx801,gfx802,gfx803,gfx900,gfx908,gfx1030,gfx90a,%s %s -o %s",
|
||||
hipcc_path, genco_option, props.gcnArchName, input_codeobj,
|
||||
CODE_OBJ_MULTIARCH);
|
||||
|
||||
system((const char*)command);
|
||||
// Check if the code object file is created
|
||||
snprintf(command, COMMAND_LEN, "./%s",
|
||||
CODE_OBJ_MULTIARCH);
|
||||
|
||||
if (access(command, F_OK) == -1) {
|
||||
INFO("Code Object File not found");
|
||||
REQUIRE(true);
|
||||
} else {
|
||||
testCodeObjFile(CODE_OBJ_MULTIARCH);
|
||||
}
|
||||
#else
|
||||
SUCCEED("This test is skipped due to non linux environment");
|
||||
#endif
|
||||
}
|
||||
Исполняемый файл
+120
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN * sizeof(float)
|
||||
#define ARRAY_SIZE 16
|
||||
#define fileName "module_kernels.code"
|
||||
|
||||
/*
|
||||
This testcase verifies the basic functionality of hipModuleGetGlobal API
|
||||
1. Simple kernel
|
||||
2. Global variables
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetGlobal_Basic") {
|
||||
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &A, &B, nullptr, LEN,
|
||||
false);
|
||||
CTX_CREATE()
|
||||
hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Ad), A, SIZE);
|
||||
hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Bd), B, SIZE);
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
|
||||
float myDeviceGlobal_h = 42.0;
|
||||
hipDeviceptr_t deviceGlobal;
|
||||
size_t deviceGlobalSize;
|
||||
HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
|
||||
Module, "myDeviceGlobal"));
|
||||
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(deviceGlobal),
|
||||
&myDeviceGlobal_h, deviceGlobalSize));
|
||||
float myDeviceGlobalArray_h[ARRAY_SIZE];
|
||||
hipDeviceptr_t myDeviceGlobalArray;
|
||||
size_t myDeviceGlobalArraySize;
|
||||
|
||||
HIP_CHECK(hipModuleGetGlobal(reinterpret_cast<hipDeviceptr_t*>
|
||||
(&myDeviceGlobalArray),
|
||||
&myDeviceGlobalArraySize, Module,
|
||||
"myDeviceGlobalArray"));
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE; i++) {
|
||||
myDeviceGlobalArray_h[i] = i * 1000.0f;
|
||||
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>
|
||||
(myDeviceGlobalArray),
|
||||
&myDeviceGlobalArray_h,
|
||||
myDeviceGlobalArraySize));
|
||||
}
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
} args;
|
||||
|
||||
args._Ad = reinterpret_cast<void*>(Ad);
|
||||
args._Bd = reinterpret_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
SECTION("Testing with simple kernel") {
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0,
|
||||
NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
|
||||
hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE);
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
REQUIRE(A[i] == B[i]);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Testing global variables") {
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0,
|
||||
NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
|
||||
hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE);
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
float expected = A[i] + myDeviceGlobal_h +
|
||||
myDeviceGlobalArray_h[i % 16];
|
||||
REQUIRE(expected == B[i]);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
A, B, nullptr,
|
||||
false);
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* Test Scenarios
|
||||
1. hipModuleLaunchKernel Negative Scenarios
|
||||
2. hipModuleLaunchKernel Work Group tests =>
|
||||
- (block.x * block.y * block.z) <= Work Group Size
|
||||
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
- (block.x * block.y * block.z) > Work Group Size
|
||||
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#define fileName "module_kernels.code"
|
||||
#define matmulK "matmulK"
|
||||
#define SixteenSec "SixteenSecKernel"
|
||||
#define KernelandExtra "KernelandExtraParams"
|
||||
#define FourSec "FourSecKernel"
|
||||
#define TwoSec "TwoSecKernel"
|
||||
#define dummyKernel "EmptyKernel"
|
||||
|
||||
struct gridblockDim {
|
||||
unsigned int gridX;
|
||||
unsigned int gridY;
|
||||
unsigned int gridZ;
|
||||
unsigned int blockX;
|
||||
unsigned int blockY;
|
||||
unsigned int blockZ;
|
||||
};
|
||||
|
||||
/*
|
||||
This testcase verifies the negative scenarios of
|
||||
hipModuleLaunchKernel API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLaunchKernel_Negative") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
void* _Cd;
|
||||
int _n;
|
||||
} args1;
|
||||
args1._Ad = nullptr;
|
||||
args1._Bd = nullptr;
|
||||
args1._Cd = nullptr;
|
||||
args1._n = 0;
|
||||
hipFunction_t MultKernel, KernelandExtraParamKernel;
|
||||
size_t size1;
|
||||
size1 = sizeof(args1);
|
||||
hipModule_t Module;
|
||||
hipStream_t stream1;
|
||||
hipDeviceptr_t *Ad{nullptr};
|
||||
CTX_CREATE()
|
||||
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK));
|
||||
HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel,
|
||||
Module, KernelandExtra));
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
void *params[] = {Ad};
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
SECTION("Passing nullptr to kernel function") {
|
||||
REQUIRE(hipModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Max int value to block dim") {
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
SECTION("Passing 0 to all value dim") {
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 0, 0,
|
||||
0,
|
||||
0,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing 0 for X dim") {
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 1, 1,
|
||||
0,
|
||||
1,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
SECTION("Passing 0 for Y dim") {
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 0, 1,
|
||||
1,
|
||||
0,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing 0 for Z dim") {
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 0,
|
||||
1,
|
||||
1,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both kernel and extra params") {
|
||||
REQUIRE(hipModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1,
|
||||
1, 1, 0, stream1,
|
||||
reinterpret_cast<void**>(¶ms),
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing more than maxthreadsperblock to block dim") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dim X is more than max allowed") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsDim[0]+1,
|
||||
1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dim Y is more than max allowed") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[1]+1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block dim Z is more than max allowed") {
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[2]+1,
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Block invalid config to extra params") {
|
||||
void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1, 1, 1, 0, stream1,
|
||||
NULL,
|
||||
reinterpret_cast<void**>(&config3))
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the work group scenarios of
|
||||
hipModuleLaunchKernel API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLaunchKernel_WorkGroup") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipFunction_t DummyKernel;
|
||||
hipModule_t Module;
|
||||
hipStream_t stream1;
|
||||
CTX_CREATE()
|
||||
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
// Passing Max int value to block dimensions
|
||||
hipDeviceProp_t deviceProp;
|
||||
hipGetDeviceProperties(&deviceProp, 0);
|
||||
double cuberootVal =
|
||||
cbrt(static_cast<double>(deviceProp.maxThreadsPerBlock));
|
||||
uint32_t cuberoot_floor = floor(cuberootVal);
|
||||
uint32_t cuberoot_ceil = ceil(cuberootVal);
|
||||
// Scenario: (block.x * block.y * block.z) <= Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
HIP_CHECK(hipModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_floor, cuberoot_floor, cuberoot_floor,
|
||||
0, stream1, NULL, NULL));
|
||||
// Scenario: (block.x * block.y * block.z) > Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
REQUIRE(hipModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1,
|
||||
0, stream1, NULL, NULL) != hipSuccess);
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
hipModuleLoadData scenarios
|
||||
|
||||
1. Loads the kernel and the corresponding kernel function
|
||||
which copies the data from one device variable to another.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN << 2
|
||||
#define FILENAME "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
static std::vector<char> load_file() {
|
||||
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
INFO("could not open code object" << FILENAME);
|
||||
REQUIRE(false);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("Unit_hipModuleLoadData_Basic") {
|
||||
auto buffer = load_file();
|
||||
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &A, &B, nullptr,
|
||||
LEN, false);
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function{nullptr};
|
||||
|
||||
HIP_CHECK(hipModuleLoadData(&Module, &buffer[0]));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
} args;
|
||||
args._Ad = reinterpret_cast<void*>(Ad);
|
||||
args._Bd = reinterpret_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0,
|
||||
stream, NULL, reinterpret_cast<void**>(&config)));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
REQUIRE(A[i] == B[i]);
|
||||
}
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
A, B,
|
||||
nullptr, false);
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the multithreaded scenario of
|
||||
hipModuleLoadData API on MultiGPU system
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN << 2
|
||||
#define THREADS 8
|
||||
|
||||
#define FILENAME "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
/*
|
||||
This function reads the kernel code object file into buffer
|
||||
*/
|
||||
static std::vector<char> load_file() {
|
||||
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
INFO("could not open code object " << FILENAME);
|
||||
REQUIRE(false);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
Thread function
|
||||
1. Loads the module using hipModuleLoadData API
|
||||
2. Initializes 2 device variables.
|
||||
3. Launches kernel which copies one data into another.
|
||||
4. validates the result and returns it to the caller using
|
||||
std::ref variable.
|
||||
*/
|
||||
static void run(const std::vector<char>& buffer, int deviceNo,
|
||||
bool &testResult) {
|
||||
hipSetDevice(deviceNo);
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
|
||||
testResult = true;
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
|
||||
&A, &B, nullptr,
|
||||
LEN, false);
|
||||
HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
|
||||
HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
} args;
|
||||
args._Ad = static_cast<void*>(Ad);
|
||||
args._Bd = static_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN,
|
||||
1, 1, 0, stream, NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
|
||||
HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
REQUIRE(A[i] == B[i]);
|
||||
}
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
A, B, nullptr,
|
||||
false);
|
||||
}
|
||||
|
||||
/*
|
||||
Thread class inherited from std::thread
|
||||
*/
|
||||
struct joinable_thread : std::thread {
|
||||
template <class... Xs>
|
||||
joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) {} // NOLINT
|
||||
|
||||
joinable_thread& operator=(joinable_thread&& other) = default;
|
||||
joinable_thread(joinable_thread&& other) = default;
|
||||
|
||||
~joinable_thread() {
|
||||
if (this->joinable())
|
||||
this->join();
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
This API is triggered form the test case where in
|
||||
1. Creates the thread object.
|
||||
2. Loops through the number of GPUs and launches multiple threads.
|
||||
*/
|
||||
static void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
|
||||
int numDevices = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
bool testResult = false;
|
||||
std::vector<joinable_thread> threads;
|
||||
|
||||
for (int deviceNo=0; deviceNo < numDevices; ++deviceNo) {
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
threads.emplace_back(std::thread{[&, buffer] {
|
||||
run(buffer, deviceNo, std::ref(testResult));
|
||||
}});
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
The testcase verifies the multithreaded funtionality on MGPU system
|
||||
1. Loads the kernel file by calling load_file API
|
||||
2. Gets the host thread count
|
||||
3. Creates multiple threads in parallel where in each thread initializes
|
||||
2 device variables and loads the kernel using hipModuleLoadData API.
|
||||
The kernel copies the data from one variable to another.Then the thread
|
||||
validates both the variables.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoadData_MGpuMultiThread") {
|
||||
auto buffer = load_file();
|
||||
auto file_size = buffer.size() / (1024 * 1024);
|
||||
auto thread_count = HipTest::getHostThreadCount(file_size + 10);
|
||||
run_multi_threads(thread_count, buffer);
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the multithreaded scenario of hipModuleLoadData API
|
||||
*/
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN << 2
|
||||
#define THREADS 8
|
||||
#define MAX_THREADS 512
|
||||
|
||||
#define FILENAME "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
/*
|
||||
This function reads the kernel code object file into buffer
|
||||
*/
|
||||
std::vector<char> load_file() {
|
||||
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
INFO("could not open code object" << FILENAME);
|
||||
REQUIRE(false);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
Thread function
|
||||
1. Loads the module using hipModuleLoadData API
|
||||
2. Initializes 2 device variables.
|
||||
3. Launches kernel which copies one data into another.
|
||||
4. validates the result and returns it to the caller using
|
||||
std::ref variable.
|
||||
*/
|
||||
void run(const std::vector<char>& buffer, bool &testResult) {
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
|
||||
float *A, *B, *Ad, *Bd;
|
||||
testResult = true;
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
|
||||
&A, &B, nullptr,
|
||||
LEN, false);
|
||||
|
||||
|
||||
HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
|
||||
HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
} args;
|
||||
args._Ad = static_cast<void*>(Ad);
|
||||
args._Bd = static_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1,
|
||||
LEN, 1, 1, 0, stream,
|
||||
NULL, reinterpret_cast<void**>(&config)));
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
|
||||
HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
REQUIRE(A[i] == B[i]);
|
||||
}
|
||||
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
A, B, nullptr,
|
||||
false);
|
||||
}
|
||||
|
||||
/*
|
||||
Thread class inherited from std::thread
|
||||
*/
|
||||
struct joinable_thread : std::thread {
|
||||
template <class... Xs>
|
||||
joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) {} // NOLINT
|
||||
|
||||
joinable_thread& operator=(joinable_thread&& other) = default;
|
||||
joinable_thread(joinable_thread&& other) = default;
|
||||
|
||||
~joinable_thread() {
|
||||
if (this->joinable())
|
||||
this->join();
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
This API is triggered form the test case where in
|
||||
1. Creates the thread object.
|
||||
2. Loops through the number of GPUs and launches multiple threads.
|
||||
*/
|
||||
void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
|
||||
std::vector<joinable_thread> threads;
|
||||
bool testResult = false;
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
threads.emplace_back(std::thread{[&] {
|
||||
run(buffer, std::ref(testResult));
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
The testcase verifies the multithreaded funtionality
|
||||
1. Loads the kernel file by calling load_file API
|
||||
2. Gets the host thread count
|
||||
3. Creates multiple threads in parallel where in each thread initializes
|
||||
2 device variables and loads the kernel using hipModuleLoadData API.
|
||||
The kernel copies the data from one variable to another.Then the thread
|
||||
validates both the variables.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoadData_MultiThreaded") {
|
||||
HIPCHECK(hipInit(0));
|
||||
auto buffer = load_file();
|
||||
auto file_size = buffer.size() / (1024 * 1024);
|
||||
auto thread_count = HipTest::getHostThreadCount(file_size + 10);
|
||||
if (thread_count == 0) {
|
||||
INFO("Thread Count is zero");
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
run_multi_threads(thread_count, buffer);
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies hipModuleLoad API in multithreaded scenario
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#if HT_AMD
|
||||
#include "hip/hip_ext.h"
|
||||
#endif
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <future>
|
||||
#define THREADS 8
|
||||
#define MAX_NUM_THREADS 128
|
||||
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define NUM_GROUPS 1
|
||||
#define GROUP_SIZE 1
|
||||
#define WARMUP_RUN_COUNT 10
|
||||
#define TIMING_RUN_COUNT 100
|
||||
#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT
|
||||
#define FILENAME "module_kernels.code"
|
||||
#define kernel_name "EmptyKernel"
|
||||
|
||||
/*
|
||||
This thread function loads the kernel file , synchronizes the threads
|
||||
and Launches the kernel .
|
||||
*/
|
||||
void hipModuleLaunchKernel_enqueue(std::atomic_int* shared, int max_threads) {
|
||||
// resources necessary for this thread
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
hipModule_t module;
|
||||
hipFunction_t function;
|
||||
|
||||
HIPCHECK(hipModuleLoad(&module, FILENAME));
|
||||
HIPCHECK(hipModuleGetFunction(&function, module, kernel_name));
|
||||
|
||||
void* kernel_params = nullptr;
|
||||
|
||||
// synchronize all threads, before running
|
||||
shared->fetch_add(1, std::memory_order_release);
|
||||
while (max_threads != shared->load(std::memory_order_acquire)) {}
|
||||
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
HIPCHECK(hipModuleLaunchKernel(function, 1, 1,
|
||||
1, 1, 1, 1, 0, stream,
|
||||
&kernel_params, nullptr));
|
||||
}
|
||||
HIPCHECK(hipModuleUnload(module));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/*
|
||||
thread pool class contains launching the threads using std::async API
|
||||
with future variable "threads".
|
||||
The start API Launches the threads and finish API waits for the
|
||||
thread execution to end.
|
||||
*/
|
||||
struct thread_pool {
|
||||
explicit thread_pool(int total_threads) : max_threads(total_threads) {
|
||||
}
|
||||
void start(std::function<void(std::atomic_int*, int)> f) {
|
||||
for (int i = 0; i < max_threads; ++i) {
|
||||
threads.push_back(std::async(std::launch::async, f,
|
||||
&shared, max_threads));
|
||||
}
|
||||
}
|
||||
void finish() {
|
||||
for (auto&&thread : threads) {
|
||||
thread.get();
|
||||
}
|
||||
threads.clear();
|
||||
shared = 0;
|
||||
}
|
||||
~thread_pool() {
|
||||
finish();
|
||||
}
|
||||
private:
|
||||
std::atomic_int shared {0};
|
||||
std::vector<char> buffer;
|
||||
std::vector<std::future<void>> threads;
|
||||
int max_threads = 1;
|
||||
};
|
||||
|
||||
/*
|
||||
This testcase verifies the Multithreaded scenario of hipModule API
|
||||
where in threadpool object is created and the object invokes start API
|
||||
which launches multiple threads where each thread loads the kernel object
|
||||
using hipModuleLoad API and launches the kernel in parallel.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoad_MultiThread") {
|
||||
int max_threads = min(THREADS * std::thread::hardware_concurrency(),
|
||||
MAX_NUM_THREADS);
|
||||
thread_pool task(max_threads);
|
||||
task.start(hipModuleLaunchKernel_enqueue);
|
||||
task.finish();
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
#define TEST_ITERATIONS 1000
|
||||
#define CODEOBJ_FILE "module_kernels.code"
|
||||
/**
|
||||
* Run Valgrind tool with these test cases to validate memory leakage.
|
||||
* E.g. valgrind --leak-check=yes ./a.out
|
||||
*/
|
||||
|
||||
/**
|
||||
* Internal Function
|
||||
*/
|
||||
static std::vector<char> load_file() {
|
||||
std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
WARN("could not open code object " << CODEOBJ_FILE);
|
||||
}
|
||||
file.close();
|
||||
return buffer;
|
||||
}
|
||||
/**
|
||||
* Validates no memory leakage for hipModuleLoad
|
||||
*/
|
||||
TEST_CASE("Unit_hipModule_LoadUnloadStress") {
|
||||
CTX_CREATE()
|
||||
for (int count = 0; count < TEST_ITERATIONS; count++) {
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* Validates no memory leakage for hipModuleLoadData
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoadData_LoadUnloadStress") {
|
||||
CTX_CREATE()
|
||||
auto buffer = load_file();
|
||||
for (int count = 0; count < TEST_ITERATIONS; count++) {
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoadData(&Module, &buffer[0]));
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* Validates no memory leakage for hipModuleLoadDataEx
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoadDataEx_UnloadStress") {
|
||||
CTX_CREATE()
|
||||
auto buffer = load_file();
|
||||
for (int count = 0; count < TEST_ITERATIONS; count++) {
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoadDataEx(&Module, &buffer[0], 0,
|
||||
nullptr, nullptr));
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
CTX_DESTROY()
|
||||
}
|
||||
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
This testcase verifies the negative scenarios of
|
||||
1. hipModuleLoad API
|
||||
2. hipModuleLoadData API
|
||||
3. hipModuleGetFunction API
|
||||
4. hipModuleGetGlobal API
|
||||
*/
|
||||
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
#define FILENAME_NONEXST "sample_nonexst.code"
|
||||
#define FILENAME_EMPTY "emptyfile.code"
|
||||
#define FILENAME_RAND "rand_file.code"
|
||||
#define RANDOMFILE_LEN 2048
|
||||
#define CODEOBJ_FILE "module_kernels.code"
|
||||
#define KERNEL_NAME "hello_world"
|
||||
#define KERNEL_NAME_NONEXST "xyz"
|
||||
#define CODEOBJ_GLOBAL "module_kernels.code"
|
||||
#define DEVGLOB_VAR_NONEXIST "xyz"
|
||||
#define DEVGLOB_VAR "myDeviceGlobal"
|
||||
/**
|
||||
* Internal Function
|
||||
* Loads the kernel file into buffer
|
||||
*/
|
||||
std::vector<char> load_file(const char* filename) {
|
||||
std::ifstream file(filename, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
INFO("could not open code object " << filename);
|
||||
}
|
||||
file.close();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal Function
|
||||
Create Randome file
|
||||
*/
|
||||
void createRandomFile(const char* filename) {
|
||||
std::ofstream outfile(filename, std::ios::binary);
|
||||
char buf[RANDOMFILE_LEN];
|
||||
unsigned int seed = 1;
|
||||
for (int i = 0; i < RANDOMFILE_LEN; i++) {
|
||||
buf[i] = rand_r(&seed) % 256;
|
||||
}
|
||||
outfile.write(buf, RANDOMFILE_LEN);
|
||||
outfile.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates negative scenarios for hipModuleLoad API
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipModuleLoad_Negative") {
|
||||
CTX_CREATE()
|
||||
hipModule_t Module;
|
||||
|
||||
SECTION("Nullptr to module") {
|
||||
REQUIRE(hipModuleLoad(nullptr, CODEOBJ_FILE)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to Fname") {
|
||||
REQUIRE(hipModuleLoad(&Module, nullptr)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Empty fname") {
|
||||
std::fstream fs;
|
||||
fs.open(FILENAME_EMPTY, std::ios::out);
|
||||
fs.close();
|
||||
REQUIRE(hipModuleLoad(&Module, FILENAME_EMPTY)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Binary file with random number") {
|
||||
createRandomFile(FILENAME_RAND);
|
||||
REQUIRE(hipModuleLoad(&Module, FILENAME_RAND)
|
||||
!= hipSuccess);
|
||||
remove(FILENAME_RAND);
|
||||
}
|
||||
|
||||
SECTION("Non Existent file") {
|
||||
REQUIRE(hipModuleLoad(&Module, FILENAME_NONEXST)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Empty string to file name") {
|
||||
REQUIRE(hipModuleLoad(&Module, "")
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates negative scenarios for hipModuleLoadData API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoadData_Negative") {
|
||||
CTX_CREATE()
|
||||
hipModule_t Module;
|
||||
|
||||
SECTION("Nullptr to module") {
|
||||
auto buffer = load_file(CODEOBJ_FILE);
|
||||
REQUIRE(hipModuleLoadData(nullptr, &buffer[0])
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to image") {
|
||||
REQUIRE(hipModuleLoadData(&Module, nullptr)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Random file to image") {
|
||||
createRandomFile(FILENAME_RAND);
|
||||
auto buffer = load_file(FILENAME_RAND);
|
||||
REQUIRE(hipModuleLoadData(&Module, &buffer[0])
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to Module") {
|
||||
auto buffer = load_file(CODEOBJ_FILE);
|
||||
REQUIRE(hipModuleLoadDataEx(nullptr, &buffer[0], 0, nullptr, nullptr)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to image") {
|
||||
REQUIRE(hipModuleLoadDataEx(&Module, nullptr, 0, nullptr, nullptr)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Random image file") {
|
||||
// Create a binary file with random numbers
|
||||
createRandomFile(FILENAME_RAND);
|
||||
// Open the code object file and copy it in a buffer
|
||||
auto buffer = load_file(FILENAME_RAND);
|
||||
REQUIRE(hipModuleLoadDataEx(&Module, &buffer[0], 0, nullptr, nullptr)
|
||||
!= hipSuccess);
|
||||
}
|
||||
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates negative scenarios for hipModuleGetFunction API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetFunction_Negative") {
|
||||
CTX_CREATE()
|
||||
hipFunction_t Function;
|
||||
hipModule_t Module;
|
||||
|
||||
SECTION("Nullptr to function name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
REQUIRE(hipModuleGetFunction(nullptr, Module, KERNEL_NAME) != hipSuccess);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
SECTION("Uninitialized module") {
|
||||
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Non existing function kernel name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME_NONEXST)
|
||||
!= hipSuccess);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
SECTION("Nullptr to kernel name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
REQUIRE(hipModuleGetFunction(&Function, Module, nullptr) != hipSuccess);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
#if HT_AMD
|
||||
SECTION("Unloaded module") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Empty string to kernel name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
REQUIRE(hipModuleGetFunction(&Function, Module, "") != hipSuccess);
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates negative scenarios for hipModuleGetGlobal API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetGlobal_Negative") {
|
||||
CTX_CREATE()
|
||||
hipModule_t Module;
|
||||
hipDeviceptr_t deviceGlobal;
|
||||
size_t deviceGlobalSize;
|
||||
|
||||
SECTION("Nullptr to varname") {
|
||||
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
|
||||
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
|
||||
&deviceGlobalSize, Module, nullptr)
|
||||
!= hipSuccess);
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
SECTION("Wrong variable name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
|
||||
REQUIRE(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
|
||||
Module, DEVGLOB_VAR_NONEXIST) != hipSuccess);
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
SECTION("Empty string to module name") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
|
||||
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
|
||||
&deviceGlobalSize, Module, "") != hipSuccess);
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
}
|
||||
|
||||
#if HT_AMD
|
||||
SECTION("Unloaded Module") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
|
||||
&deviceGlobalSize, Module,
|
||||
DEVGLOB_VAR) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Unload an Unloaded module") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
REQUIRE(hipModuleUnload(Module) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Uninitialized module") {
|
||||
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
|
||||
&deviceGlobalSize, Module,
|
||||
DEVGLOB_VAR) != hipSuccess);
|
||||
}
|
||||
SECTION("Unload Uninitialized module") {
|
||||
REQUIRE(hipModuleUnload(Module) != hipSuccess);
|
||||
}
|
||||
#endif
|
||||
|
||||
CTX_DESTROY()
|
||||
}
|
||||
@@ -0,0 +1,267 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <limits>
|
||||
|
||||
#define fileName "module_kernels.code"
|
||||
#define kernel_name "hello_world"
|
||||
/**
|
||||
* hipModuleOccupancyMaxPotentialBlockSize and hipModuleOccupancyMaxPotentialBlockSizeWithFlags
|
||||
* corner tests.
|
||||
* Scenario1:
|
||||
* Validates the value of gridSize, which should be always non zero +ve integer and blockSize
|
||||
* range returned for dynSharedMemPerBlk = 0 and blockSizeLimit = 0.
|
||||
* Scenario2:
|
||||
* Validates the value of gridSize, which should be always non zero +ve integer and blockSize
|
||||
* range returned for dynSharedMemPerBlk = devProp.sharedMemPerBlock and
|
||||
* blockSizeLimit = devProp.maxThreadsPerBlock.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_FuncTst") {
|
||||
// Initialize
|
||||
hipDeviceProp_t devProp;
|
||||
int gridSize = 0;
|
||||
int blockSize = 0;
|
||||
hipModule_t Module;
|
||||
CTX_CREATE()
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
// Scenario1
|
||||
SECTION("without flag - gridSize when input params are 0") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
|
||||
&blockSize, Function, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("without flag - gridSize when input params are maximum") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
|
||||
&blockSize, Function,
|
||||
devProp.sharedMemPerBlock, devProp.maxThreadsPerBlock));
|
||||
}
|
||||
// Scenario1
|
||||
SECTION("with flag - gridSize when input params are 0") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
|
||||
&blockSize, Function, 0, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("with flag - gridSize when input params are maximum") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
|
||||
&blockSize, Function, devProp.sharedMemPerBlock,
|
||||
devProp.maxThreadsPerBlock, 0));
|
||||
}
|
||||
// Check if blockSize doen't exceed maxThreadsPerBlock
|
||||
REQUIRE_FALSE(gridSize <= 0);
|
||||
REQUIRE_FALSE(blockSize <= 0);
|
||||
REQUIRE_FALSE(blockSize > devProp.maxThreadsPerBlock);
|
||||
// Un-initialize
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessor and
|
||||
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags Corner tests.
|
||||
* Scenario1:
|
||||
* Validates numBlock value range is within expected limit when sharedMemPerBlock
|
||||
* is 0.
|
||||
* Scenario2:
|
||||
* Validates numBlock value range is within expected limit when
|
||||
* dynSharedMemPerBlk = devProp.sharedMemPerBlock.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_FuncTst") {
|
||||
// Initialize
|
||||
hipDeviceProp_t devProp;
|
||||
int gridSize = 0;
|
||||
int blockSize = 0;
|
||||
int numBlock = 0;
|
||||
hipModule_t Module;
|
||||
CTX_CREATE()
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
|
||||
&blockSize, Function, 0, 0));
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
// Scenario1
|
||||
SECTION("without flag - gridSize when input params are 0") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock,
|
||||
Function, blockSize, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("without flag - gridSize when input params are maximum") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock,
|
||||
Function, blockSize, devProp.sharedMemPerBlock));
|
||||
}
|
||||
// Scenario1
|
||||
SECTION("with flag - gridSize when input params are 0") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
&numBlock, Function, blockSize, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("with flag - gridSize when input params are maximum") {
|
||||
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
&numBlock, Function, blockSize, devProp.sharedMemPerBlock, 0))
|
||||
}
|
||||
// Check if numBlocks are within limits
|
||||
int temp_val = (numBlock * blockSize);
|
||||
REQUIRE_FALSE(numBlock <= 0);
|
||||
REQUIRE_FALSE(temp_val > devProp.maxThreadsPerMultiProcessor);
|
||||
// Un-initialize
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* hipModuleOccupancyMaxPotentialBlockSize negative tests.
|
||||
* Scenario1: gridSize is nullptr.
|
||||
* Scenario2: blocksize is nullptr.
|
||||
* Scenario3: blockSizeLimit < 0.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_NegTst") {
|
||||
int gridSize = 0;
|
||||
int blockSize = 0;
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
CTX_CREATE()
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
// Scenario1
|
||||
SECTION("without flag - gridSize is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
|
||||
nullptr, &blockSize, Function, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("without flag - blocksize is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
|
||||
&gridSize, nullptr, Function, 0, 0));
|
||||
}
|
||||
// Scenario3
|
||||
SECTION("without flag - blockSizeLimit is less than 0") {
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
#if HT_NVIDIA
|
||||
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
|
||||
&gridSize, &blockSize, Function, 0, -1));
|
||||
#else
|
||||
// As discussed in SWDEV-269400
|
||||
// with developers this difference in behavior between NVIDIA and AMD
|
||||
// is retained.
|
||||
REQUIRE_FALSE(hipSuccess != hipModuleOccupancyMaxPotentialBlockSize(
|
||||
&gridSize, &blockSize, Function, 0, -1));
|
||||
#endif
|
||||
}
|
||||
// Scenario1
|
||||
SECTION("with flag - gridSize is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(nullptr,
|
||||
&blockSize, Function, 0, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("with flag - blocksize is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
|
||||
nullptr, Function, 0, 0, 0));
|
||||
}
|
||||
// Scenario3
|
||||
SECTION("with flag - blockSizeLimit is less than 0") {
|
||||
#if HT_NVIDIA
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
|
||||
&blockSize, Function, 0, -1, 0));
|
||||
#else
|
||||
// As discussed in SWDEV-269400
|
||||
// with developers this difference in behavior between NVIDIA and AMD
|
||||
// is retained.
|
||||
REQUIRE_FALSE(hipSuccess !=
|
||||
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
|
||||
&blockSize, Function, 0, -1, 0));
|
||||
#endif
|
||||
}
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessor negative tests.
|
||||
* Scenario1: numBlocks is nullptr.
|
||||
* Scenario2: Check the behavior for blockSize < 0.
|
||||
* Scenario3: Check error code returned for dynSharedMemPerBlk = 0 and blockSize = 0.
|
||||
* Scenario4: dynSharedMemPerBlk = size_t numeric limit.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_NegTst") {
|
||||
int gridSize = 0;
|
||||
int blockSize = 0;
|
||||
int numBlocks = 0;
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
CTX_CREATE()
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize,
|
||||
Function, 0, 0));
|
||||
// Scenario1
|
||||
SECTION("without flag - numBlocks is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(nullptr,
|
||||
Function, blockSize, 0));
|
||||
}
|
||||
// Scenario3
|
||||
SECTION("without flag - dynSharedMemPerBlk = 0 and blockSize = 0") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
|
||||
Function, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("without flag - blockSize is less than 0") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
|
||||
Function, -1, 0));
|
||||
}
|
||||
// Scenario4
|
||||
SECTION("without flag - dynSharedMemPerBlk = max_numerical_limit") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
|
||||
Function, 0, std::numeric_limits<std::size_t>::max()));
|
||||
}
|
||||
// Scenario1
|
||||
SECTION("with flag - numBlocks is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(nullptr,
|
||||
Function, blockSize, 0, 0));
|
||||
}
|
||||
// Scenario3
|
||||
SECTION("with flag - dynSharedMemPerBlk = 0 and blockSize = 0") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
|
||||
Function, 0, 0, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("with flag - blockSize is less than 0") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
|
||||
Function, -1, 0, 0));
|
||||
}
|
||||
// Scenario4
|
||||
SECTION("with flag - dynSharedMemPerBlk = max_numerical_limit") {
|
||||
REQUIRE_FALSE(hipSuccess ==
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
|
||||
Function, 0, std::numeric_limits<std::size_t>::max(), 0));
|
||||
}
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
Исполняемый файл
+561
@@ -0,0 +1,561 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
This testcase verifies the following scenarios of hipModuleGetTexRef API
|
||||
1. Negative
|
||||
2. Basic functionality using different data types
|
||||
3. Multiple streams
|
||||
4. MultiThreaded - MultStreamMultGPU
|
||||
5. MultiThreaded - SingleStreamMultGPU
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include <limits>
|
||||
#include <atomic>
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define CODEOBJ_FILE "module_kernels.code"
|
||||
#define NON_EXISTING_TEX_NAME "xyz"
|
||||
#define EMPTY_TEX_NAME ""
|
||||
#define GLOBAL_KERNEL_VAR "deviceGlobalFloat"
|
||||
#define TEX_REF "ftex"
|
||||
#define WIDTH 256
|
||||
#define HEIGHT 256
|
||||
#define MAX_STREAMS 4
|
||||
#define GRIDDIMX 16
|
||||
#define GRIDDIMY 16
|
||||
#define GRIDDIMZ 1
|
||||
#define BLOCKDIMZ 1
|
||||
#define MAX_GPU 16
|
||||
|
||||
std::atomic<int> g_thTestPassed(1);
|
||||
|
||||
|
||||
/**
|
||||
* Internal Functions
|
||||
* Loads the kernel file
|
||||
*/
|
||||
static std::vector<char> load_file() {
|
||||
std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate);
|
||||
std::streamsize fsize = file.tellg();
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::vector<char> buffer(fsize);
|
||||
if (!file.read(buffer.data(), fsize)) {
|
||||
INFO("could not open code object " << CODEOBJ_FILE);
|
||||
REQUIRE(false);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
Initializes the array
|
||||
*/
|
||||
template<typename T>
|
||||
void allocInitArray(unsigned int width,
|
||||
unsigned int height,
|
||||
hipArray_Format format,
|
||||
HIP_ARRAY* array
|
||||
) {
|
||||
HIP_ARRAY_DESCRIPTOR desc;
|
||||
desc.Format = format;
|
||||
desc.NumChannels = 1;
|
||||
desc.Width = width * sizeof(T);
|
||||
desc.Height = height;
|
||||
HIPCHECK(hipArrayCreate(array, &desc));
|
||||
}
|
||||
|
||||
/*
|
||||
Copies buffer to array using hipMemcpyParam2D API
|
||||
*/
|
||||
template <class T, class T1> void copyBuffer2Array(unsigned int width,
|
||||
unsigned int height,
|
||||
T* hData,
|
||||
T1 array
|
||||
) {
|
||||
hip_Memcpy2D copyParam;
|
||||
memset(©Param, 0, sizeof(copyParam));
|
||||
#if HT_NVIDIA
|
||||
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
copyParam.dstArray = *array;
|
||||
#else
|
||||
copyParam.dstMemoryType = hipMemoryTypeArray;
|
||||
copyParam.srcMemoryType = hipMemoryTypeHost;
|
||||
copyParam.dstArray = array;
|
||||
#endif
|
||||
copyParam.srcHost = hData;
|
||||
copyParam.srcPitch = width * sizeof(T);
|
||||
copyParam.WidthInBytes = width * sizeof(T);
|
||||
copyParam.Height = height;
|
||||
HIPCHECK(hipMemcpyParam2D(©Param));
|
||||
}
|
||||
|
||||
/*
|
||||
Assigns array to texture ref
|
||||
*/
|
||||
template <class T> void assignArray2TexRef(hipArray_Format format,
|
||||
const char* texRefName,
|
||||
hipModule_t Module,
|
||||
T array
|
||||
) {
|
||||
HIP_TEX_REFERENCE texref;
|
||||
#if HT_NVIDIA
|
||||
HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName));
|
||||
HIPCHECK(hipTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP));
|
||||
HIPCHECK(hipTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP));
|
||||
HIPCHECK(hipTexRefSetFilterMode(texref, HIP_TR_FILTER_MODE_POINT));
|
||||
HIPCHECK(hipTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
|
||||
HIPCHECK(hipTexRefSetFormat(texref, format, 1));
|
||||
HIPCHECK(hipTexRefSetArray(texref, *array, CU_TRSA_OVERRIDE_FORMAT));
|
||||
#else
|
||||
HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName));
|
||||
HIPCHECK(hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap));
|
||||
HIPCHECK(hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap));
|
||||
HIPCHECK(hipTexRefSetFilterMode(texref, hipFilterModePoint));
|
||||
HIPCHECK(hipTexRefSetFlags(texref, HIP_TRSF_READ_AS_INTEGER));
|
||||
HIPCHECK(hipTexRefSetFormat(texref, format, 1));
|
||||
HIPCHECK(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T> bool validateOutput(unsigned int width,
|
||||
unsigned int height,
|
||||
T* hData,
|
||||
T* hOutputData) {
|
||||
for (unsigned int i = 0; i < height; i++) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
if (hData[i * width + j] != hOutputData[i * width + j]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates texture functionality with multiple streams for hipModuleGetTexRef
|
||||
*
|
||||
*/
|
||||
template <class T> bool testTexMultStream(const std::vector<char>& buffer,
|
||||
hipArray_Format format,
|
||||
const char* texRefName,
|
||||
const char* kerFuncName,
|
||||
unsigned int numOfStreams) {
|
||||
bool TestPassed = true;
|
||||
unsigned int width = WIDTH;
|
||||
unsigned int height = HEIGHT;
|
||||
unsigned int size = width * height * sizeof(T);
|
||||
T* hData = reinterpret_cast<T*>(malloc(size));
|
||||
CTX_CREATE()
|
||||
HipTest::setDefaultData<T>(width * height, hData, nullptr, nullptr);
|
||||
|
||||
// Load Kernel File and create hipArray
|
||||
hipModule_t Module;
|
||||
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
|
||||
HIP_ARRAY array;
|
||||
allocInitArray<T>(width, height, format, &array);
|
||||
#if HT_NVIDIA
|
||||
// Copy from hData to array using hipMemcpyParam2D
|
||||
copyBuffer2Array<T, HIP_ARRAY*>(width, height, hData, &array);
|
||||
// Get tex reference from the loaded kernel file
|
||||
// Assign array to the tex reference
|
||||
assignArray2TexRef<HIP_ARRAY*>(format, texRefName, Module, &array);
|
||||
#else
|
||||
// Copy from hData to array using hipMemcpyParam2D
|
||||
copyBuffer2Array<T, HIP_ARRAY>(width, height, hData, array);
|
||||
// Get tex reference from the loaded kernel file
|
||||
// Assign array to the tex reference
|
||||
assignArray2TexRef<HIP_ARRAY>(format, texRefName, Module, array);
|
||||
#endif
|
||||
hipFunction_t Function;
|
||||
HIPCHECK(hipModuleGetFunction(&Function, Module, kerFuncName));
|
||||
|
||||
// Create Multiple Strings
|
||||
hipStream_t streams[MAX_STREAMS]={0};
|
||||
T* dData[MAX_STREAMS] = {NULL};
|
||||
T* hOutputData[MAX_STREAMS] = {NULL};
|
||||
if (numOfStreams > MAX_STREAMS) {
|
||||
numOfStreams = MAX_STREAMS;
|
||||
}
|
||||
unsigned int totalStreamsCreated = 0;
|
||||
for (unsigned int stream_num = 0; stream_num < numOfStreams; stream_num++) {
|
||||
hOutputData[stream_num] = reinterpret_cast<T*>(malloc(size));
|
||||
if (NULL == hOutputData[stream_num]) {
|
||||
WARN("Failed to allocate using malloc in testTexMultStream");
|
||||
TestPassed &= false;
|
||||
}
|
||||
HIPCHECK(hipStreamCreate(&streams[stream_num]));
|
||||
HIPCHECK(hipMalloc(reinterpret_cast<void**>(&dData[stream_num]), size));
|
||||
memset(hOutputData[stream_num], 0, size);
|
||||
struct {
|
||||
void* _Ad;
|
||||
unsigned int _Bd;
|
||||
unsigned int _Cd;
|
||||
} args;
|
||||
args._Ad = reinterpret_cast<void*>(dData[stream_num]);
|
||||
args._Bd = width;
|
||||
args._Cd = height;
|
||||
|
||||
size_t sizeTemp = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
&args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE,
|
||||
&sizeTemp,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
int temp1 = width / GRIDDIMX;
|
||||
int temp2 = height / GRIDDIMY;
|
||||
HIPCHECK(hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ,
|
||||
temp1, temp2, BLOCKDIMZ, 0,
|
||||
streams[stream_num],
|
||||
NULL, reinterpret_cast<void**>(&config)));
|
||||
totalStreamsCreated++;
|
||||
}
|
||||
// Check the kernel results separately
|
||||
for (unsigned int stream_num = 0; stream_num < totalStreamsCreated;
|
||||
stream_num++) {
|
||||
HIPCHECK(hipStreamSynchronize(streams[stream_num]));
|
||||
HIPCHECK(hipMemcpy(hOutputData[stream_num], dData[stream_num], size,
|
||||
hipMemcpyDeviceToHost));
|
||||
TestPassed &= validateOutput<T>(width, height, hData,
|
||||
hOutputData[stream_num]);
|
||||
}
|
||||
for (unsigned int i = 0; i < totalStreamsCreated; i++) {
|
||||
HIPCHECK(hipFree(dData[i]));
|
||||
HIPCHECK(hipStreamDestroy(streams[i]));
|
||||
free(hOutputData[i]);
|
||||
}
|
||||
ARRAY_DESTROY(array)
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
free(hData);
|
||||
CTX_DESTROY()
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal Thread Functions
|
||||
*
|
||||
*/
|
||||
void launchSingleStreamMultGPU(int gpu, const std::vector<char>& buffer) {
|
||||
bool TestPassed = true;
|
||||
HIPCHECK(hipSetDevice(gpu));
|
||||
TestPassed = testTexMultStream<float>(buffer,
|
||||
HIP_AD_FORMAT_FLOAT,
|
||||
"ftex",
|
||||
"tex2dKernelFloat", 1);
|
||||
g_thTestPassed &= static_cast<int>(TestPassed);
|
||||
}
|
||||
|
||||
void launchMultStreamMultGPU(int gpu, const std::vector<char>& buffer) {
|
||||
bool TestPassed = true;
|
||||
HIPCHECK(hipSetDevice(gpu));
|
||||
TestPassed = testTexMultStream<float>(buffer,
|
||||
HIP_AD_FORMAT_FLOAT,
|
||||
"ftex",
|
||||
"tex2dKernelFloat", 3);
|
||||
g_thTestPassed &= static_cast<int>(TestPassed);
|
||||
}
|
||||
/**
|
||||
* Validates texture functionality with Multiple Streams on multuple GPU
|
||||
* for hipModuleGetTexRef
|
||||
*
|
||||
*/
|
||||
bool testTexMultStreamMultGPU(unsigned int numOfGPUs,
|
||||
const std::vector<char>& buffer) {
|
||||
bool TestPassed = true;
|
||||
std::thread T[MAX_GPU];
|
||||
|
||||
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
T[gpu] = std::thread(launchMultStreamMultGPU, gpu, buffer);
|
||||
}
|
||||
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
T[gpu].join();
|
||||
}
|
||||
|
||||
if (g_thTestPassed) {
|
||||
TestPassed = true;
|
||||
} else {
|
||||
TestPassed = false;
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates texture functionality with Single Stream on multuple GPU
|
||||
* for hipModuleGetTexRef
|
||||
*
|
||||
*/
|
||||
bool testTexSingleStreamMultGPU(unsigned int numOfGPUs,
|
||||
const std::vector<char>& buffer) {
|
||||
bool TestPassed = true;
|
||||
std::thread T[MAX_GPU];
|
||||
|
||||
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
T[gpu] = std::thread(launchSingleStreamMultGPU, gpu, buffer);
|
||||
}
|
||||
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
T[gpu].join();
|
||||
}
|
||||
|
||||
if (g_thTestPassed) {
|
||||
TestPassed = true;
|
||||
} else {
|
||||
TestPassed = false;
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the negative scenarios of hipModuleGetTexRef API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetTexRef_Negative") {
|
||||
hipModule_t Module;
|
||||
HIP_TEX_REFERENCE texref;
|
||||
CTX_CREATE()
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
|
||||
SECTION("TexRef as nullptr") {
|
||||
REQUIRE(hipModuleGetTexRef(nullptr, Module, "tex") != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Name as nullptr") {
|
||||
REQUIRE(hipModuleGetTexRef(&texref, Module, nullptr) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Name as non existing TexName") {
|
||||
REQUIRE(hipModuleGetTexRef(&texref, Module,
|
||||
NON_EXISTING_TEX_NAME) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Empty tex name") {
|
||||
REQUIRE(hipModuleGetTexRef(&texref, Module, EMPTY_TEX_NAME) != hipSuccess);
|
||||
}
|
||||
#if HT_NVIDIA
|
||||
SECTION("Name as Global kernel Var") {
|
||||
REQUIRE(hipModuleGetTexRef(&texref, Module,
|
||||
GLOBAL_KERNEL_VAR) != hipSuccess);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Unload Module") {
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
REQUIRE(hipModuleGetTexRef(&texref, Module, TEX_REF) != hipSuccess);
|
||||
}
|
||||
|
||||
CTX_DESTROY()
|
||||
}
|
||||
/**
|
||||
* Validates texture type data functionality for hipModuleGetTexRef
|
||||
* 1.Loads the code object file
|
||||
* 2.Based on the template type texRefName,KernelFuncName and format are assigned.
|
||||
* 3.Allocate array based on format.
|
||||
* 4.Assigns array to texRef
|
||||
* 5.Launches the kernel based on the template type which invokes text2D API
|
||||
and copies the data to output variable.
|
||||
* 6.Validates the data.
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipModuleGetTexRef_Basic", "", int,
|
||||
char, uint16_t, float) {
|
||||
bool TestPassed = true;
|
||||
constexpr unsigned int width = WIDTH;
|
||||
constexpr unsigned int height = HEIGHT;
|
||||
constexpr unsigned int size = width * height * sizeof(TestType);
|
||||
const char *texRefName, *kerFuncName;
|
||||
hipArray_Format format;
|
||||
|
||||
TestType* hData = reinterpret_cast<TestType*>(malloc(size));
|
||||
if (NULL == hData) {
|
||||
INFO("Failed to allocate using malloc in testTexType.\n");
|
||||
REQUIRE(false);
|
||||
}
|
||||
CTX_CREATE()
|
||||
HipTest::setDefaultData<TestType>(width * height, hData, nullptr, nullptr);
|
||||
// Load Kernel File and create hipArray
|
||||
hipModule_t Module;
|
||||
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
|
||||
HIP_ARRAY array;
|
||||
|
||||
if (std::is_same<TestType, char>::value) {
|
||||
texRefName = "ctex";
|
||||
kerFuncName = "tex2dKernelInt8";
|
||||
format = HIP_AD_FORMAT_SIGNED_INT8;
|
||||
} else if (std::is_same<TestType, uint16_t>::value) {
|
||||
texRefName = "stex";
|
||||
kerFuncName = "tex2dKernelInt16";
|
||||
format = HIP_AD_FORMAT_SIGNED_INT16;
|
||||
} else if (std::is_same<TestType, int>::value) {
|
||||
texRefName = "itex";
|
||||
kerFuncName = "tex2dKernelInt";
|
||||
format = HIP_AD_FORMAT_SIGNED_INT32;
|
||||
} else if (std::is_same<TestType, float>::value) {
|
||||
texRefName = "ftex";
|
||||
kerFuncName = "tex2dKernelFloat";
|
||||
format = HIP_AD_FORMAT_FLOAT;
|
||||
}
|
||||
allocInitArray<TestType>(width, height, format, &array);
|
||||
|
||||
#if HT_NVIDIA
|
||||
// Copy from hData to array using hipMemcpyParam2D
|
||||
copyBuffer2Array<TestType, HIP_ARRAY*>(width, height, hData, &array);
|
||||
// Get tex reference from the loaded kernel file
|
||||
// Assign array to the tex reference
|
||||
assignArray2TexRef<HIP_ARRAY*>(format, texRefName, Module, &array);
|
||||
#else
|
||||
// Copy from hData to array using hipMemcpyParam2D
|
||||
copyBuffer2Array<TestType, HIP_ARRAY>(width, height, hData, array);
|
||||
// Get tex reference from the loaded kernel file
|
||||
// Assign array to the tex reference
|
||||
assignArray2TexRef<HIP_ARRAY>(format, texRefName, Module, array);
|
||||
#endif
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kerFuncName));
|
||||
|
||||
TestType* dData = NULL;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dData), size));
|
||||
|
||||
struct {
|
||||
void* _Ad;
|
||||
unsigned int _Bd;
|
||||
unsigned int _Cd;
|
||||
} args;
|
||||
args._Ad = reinterpret_cast<void*>(dData);
|
||||
args._Bd = width;
|
||||
args._Cd = height;
|
||||
|
||||
size_t sizeTemp = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
&args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE,
|
||||
&sizeTemp,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
int temp1 = width / GRIDDIMX;
|
||||
int temp2 = height / GRIDDIMY;
|
||||
HIP_CHECK(
|
||||
hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ,
|
||||
temp1, temp2, BLOCKDIMZ, 0, 0,
|
||||
NULL, reinterpret_cast<void**>(&config)));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
TestType* hOutputData = reinterpret_cast<TestType*>(malloc(size));
|
||||
if (NULL == hOutputData) {
|
||||
INFO("Failed to allocate using malloc in testTexType");
|
||||
REQUIRE(false);
|
||||
} else {
|
||||
memset(hOutputData, 0, size);
|
||||
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
|
||||
TestPassed = validateOutput<TestType>(width, height, hData, hOutputData);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
free(hOutputData);
|
||||
HIP_CHECK(hipFree(dData));
|
||||
ARRAY_DESTROY(array)
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
free(hData);
|
||||
CTX_DESTROY()
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies hipModuleGetTexRef on multiple streams
|
||||
where
|
||||
* 1..Loads the code object file
|
||||
* 2.Allocate array and initializes it with hData
|
||||
* 3.Assigns array to texRef
|
||||
4.Creates multiple streams
|
||||
* 4.Launches the kernel on each stream which invokes text2D API
|
||||
and copies the data to output variable
|
||||
* 5.Validates the hData with output data in each stream.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetTexRef_TexMultStream") {
|
||||
bool TestPassed = true;
|
||||
auto buffer = load_file();
|
||||
TestPassed = testTexMultStream<float>(buffer,
|
||||
HIP_AD_FORMAT_FLOAT,
|
||||
"ftex",
|
||||
"tex2dKernelFloat",
|
||||
MAX_STREAMS);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
/*
|
||||
This testcase verifies hipModuleGetTexRef Multithreaded scenario on
|
||||
single stream and multi GPU machine.
|
||||
1. Gets the device count.
|
||||
2. Create the threads based on device count.
|
||||
3. Each thread calls the testTexMultStream which performs the same
|
||||
above funtionality on single Stream
|
||||
4. The threads are executed in parallel and are joined later.
|
||||
|
||||
This testcase ensures that the multi thread execution on single stream
|
||||
in parallel is successful
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexSingleStreamMultiGPU") {
|
||||
bool TestPassed = true;
|
||||
// Testcase skipped on nvidia with CUDA API version 11.2,
|
||||
// as hipModuleLoadData returning error code
|
||||
// 'a PTX JIT compilation failed'(218), which is invalid
|
||||
// behavior. Test passes with AMD and previous CUDA versions.
|
||||
#if HT_NVIDIA
|
||||
INFO("Testcase skipped on CUDA version 11.2\n");
|
||||
REQUIRE(true);
|
||||
#else
|
||||
int gpu_cnt = 0;
|
||||
auto buffer = load_file();
|
||||
HIP_CHECK(hipGetDeviceCount(&gpu_cnt));
|
||||
TestPassed = testTexSingleStreamMultGPU(gpu_cnt, buffer);
|
||||
REQUIRE(TestPassed);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
This testcase verifies hipModuleGetTexRef Multithreaded scenario on
|
||||
single stream and multi GPU machine.
|
||||
1. Gets the device count.
|
||||
2. Create the threads based on device count.
|
||||
3. Each thread calls the testTexMultStream which performs the same
|
||||
above funtionality on multiple Stream
|
||||
4. The threads are executed in parallel and are joined later.
|
||||
|
||||
This testcase ensures that the multi thread execution on multiple streams
|
||||
in parallel is successful
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexMultiStreamMultiGPU") {
|
||||
bool TestPassed = true;
|
||||
// Testcase skipped on nvidia with CUDA API version 11.2,
|
||||
// as hipModuleLoadData returning error code
|
||||
// 'a PTX JIT compilation failed'(218), which is invalid
|
||||
// behavior. Test passes with AMD and previous CUDA versions.
|
||||
#if HT_NVIDIA
|
||||
INFO("Testcase skipped on CUDA version 11.2\n");
|
||||
REQUIRE(true);
|
||||
#else
|
||||
int gpu_cnt = 0;
|
||||
auto buffer = load_file();
|
||||
HIP_CHECK(hipGetDeviceCount(&gpu_cnt));
|
||||
TestPassed = testTexMultStreamMultGPU(gpu_cnt, buffer);
|
||||
REQUIRE(TestPassed);
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#define fileName "module_kernels.code"
|
||||
/*
|
||||
This testcase verifies the basic functionality of hipModuleUnload API
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleUnload_Basic") {
|
||||
CTX_CREATE()
|
||||
hipModule_t module;
|
||||
HIP_CHECK(hipModuleLoad(&module, fileName));
|
||||
HIP_CHECK(hipModuleUnload(module));
|
||||
CTX_DESTROY()
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase reads the openCL kernel file and generate the the code object
|
||||
file which gets executed in HIP interface.
|
||||
This testcase verifies for the
|
||||
1. Current GPU architecture
|
||||
2. Code object version v3
|
||||
*/
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <experimental/filesystem>
|
||||
#endif
|
||||
#include <string.h>
|
||||
#include "hip_test_common.hh"
|
||||
#include "hip_test_checkers.hh"
|
||||
|
||||
#define OPENCL_OBJ_FILE "opencl_add.cc"
|
||||
#define HIP_CODEOBJ_FILE_DEFAULT "opencl_add.co"
|
||||
#define HIP_CODEOBJ_FILE_V3 "opencl_add_v3.co"
|
||||
#define COMMAND_LEN 256
|
||||
#define BUFFER_LEN 256
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
/* Check if environment variable $ROCM_PATH is defined */
|
||||
static bool isRocmPathSet() {
|
||||
FILE *fpipe;
|
||||
char const *command = "echo $ROCM_PATH";
|
||||
fpipe = popen(command, "r");
|
||||
|
||||
if (fpipe == nullptr) {
|
||||
WARN("Unable to create command");
|
||||
return false;
|
||||
}
|
||||
char command_op[BUFFER_LEN];
|
||||
if (fgets(command_op, BUFFER_LEN, fpipe)) {
|
||||
size_t len = strlen(command_op);
|
||||
if (len > 1) { // This is because fgets always adds newline character
|
||||
pclose(fpipe);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
pclose(fpipe);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Gets the sramecc/xnack settings from rocm info */
|
||||
|
||||
int getV3TargetIdFeature(char* feature, bool rocmPathSet) {
|
||||
FILE *fpipe;
|
||||
char command[COMMAND_LEN] = "";
|
||||
const char *rocmpath = nullptr;
|
||||
if (rocmPathSet) {
|
||||
// For STG2 testing where /opt/rocm path is not present
|
||||
rocmpath = "$ROCM_PATH/bin/rocminfo";
|
||||
} else {
|
||||
// Check if the rocminfo tool exists
|
||||
rocmpath = "/opt/rocm/bin/rocminfo";
|
||||
}
|
||||
snprintf(command, COMMAND_LEN, "%s", rocmpath);
|
||||
strncat(command, " | grep -m1 \"sramecc.:xnack.\"", COMMAND_LEN);
|
||||
fpipe = popen(command, "r");
|
||||
|
||||
if (fpipe == nullptr) {
|
||||
WARN("Unable to create command file");
|
||||
return -1;
|
||||
}
|
||||
char command_op[BUFFER_LEN];
|
||||
const char* pOpt1 = nullptr;
|
||||
const char *pOpt2 = nullptr;
|
||||
if (fgets(command_op, BUFFER_LEN, fpipe)) {
|
||||
if (strstr(command_op, "sramecc+")) {
|
||||
pOpt1 = "-msram-ecc";
|
||||
} else if (strstr(command_op, "sramecc-")) {
|
||||
pOpt1 = "-mno-sram-ecc";
|
||||
} else {
|
||||
pclose(fpipe);
|
||||
return -1;
|
||||
}
|
||||
if (strstr(command_op, "xnack+")) {
|
||||
pOpt2 = " -mxnack";
|
||||
} else if (strstr(command_op, "xnack-")) {
|
||||
pOpt2 = " -mno-xnack";
|
||||
} else {
|
||||
pclose(fpipe);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
printf("No sramecc/xnack settings found.\n");
|
||||
pclose(fpipe);
|
||||
return -1;
|
||||
}
|
||||
strncpy(feature, pOpt1, strlen(pOpt1));
|
||||
strncat(feature, pOpt2, strlen(pOpt2));
|
||||
pclose(fpipe);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* Validates OpenCL Static Lds Code Object where
|
||||
* 1. Tries to access opencl kernel file
|
||||
* 2. Copies it to current folder
|
||||
* 3. Tries to get RocmPath and execute the kernel file to
|
||||
generate the code object file.code-object-version argument
|
||||
specifies the code object version
|
||||
* 4. Launch the kernel which copies one variable to another
|
||||
* 5. Validates the result.
|
||||
*/
|
||||
TEST_CASE("Unit_hipModuleLoad_OpenCLStaticCodeObjV3") {
|
||||
#ifdef __linux__
|
||||
auto codeobj_type = GENERATE(0, 1);
|
||||
char command[COMMAND_LEN] = "";
|
||||
char v3option[32] = "";
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, 0);
|
||||
std::string path = std::experimental::filesystem::current_path();
|
||||
WARN("path is " << path.c_str());
|
||||
if (access("./opencl_add.cc", F_OK) == -1) {
|
||||
system("cp ./../../../../hip-on-rocclr/tests/catch/unit/module/opencl_add.cc .");
|
||||
}
|
||||
// Generate the command to translate the OpenCL code object to hip code object
|
||||
const char *pCodeObjVer = nullptr;
|
||||
const char *pCodeObjFile = nullptr;
|
||||
bool rocmPathSet = isRocmPathSet();
|
||||
if (codeobj_type == 0) {
|
||||
pCodeObjVer = "";
|
||||
pCodeObjFile = HIP_CODEOBJ_FILE_DEFAULT;
|
||||
} else {
|
||||
pCodeObjVer = "-mcode-object-version=3";
|
||||
if (-1 == getV3TargetIdFeature(v3option, rocmPathSet)) {
|
||||
INFO("Error getting V3 Option. Skipping Test. \n");
|
||||
REQUIRE(true);
|
||||
}
|
||||
pCodeObjFile = HIP_CODEOBJ_FILE_V3;
|
||||
}
|
||||
INFO("v3option "<< v3option);
|
||||
/* The command string is created using multiple concatenation instead of one go
|
||||
to avoid the following cpplint error:
|
||||
" Multi-line string ("...") found. This lint script doesn't do well with such strings,
|
||||
and may give bogus warnings. Use C++11 raw strings or concatenation instead."
|
||||
*/
|
||||
if (rocmPathSet) {
|
||||
// For STG2 testing where /opt/rocm path is not present
|
||||
snprintf(command, COMMAND_LEN,
|
||||
"$ROCM_PATH/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl ");
|
||||
} else {
|
||||
snprintf(command, COMMAND_LEN,
|
||||
"/opt/rocm/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl ");
|
||||
}
|
||||
char command_temp[COMMAND_LEN] = "";
|
||||
snprintf(command_temp, COMMAND_LEN,
|
||||
"-include `find /opt/rocm* -name opencl-c.h` %s %s -mcpu=%s -o %s %s",
|
||||
pCodeObjVer, v3option, props.gcnArchName, pCodeObjFile, OPENCL_OBJ_FILE);
|
||||
|
||||
strncat(command, command_temp, COMMAND_LEN);
|
||||
INFO("command executed "<< command);
|
||||
|
||||
system((const char*)command);
|
||||
// Check if the code object file is created
|
||||
snprintf(command, COMMAND_LEN, "./%s",
|
||||
pCodeObjFile);
|
||||
|
||||
if (access(command, F_OK) == -1) {
|
||||
INFO("Code Object File not found \n");
|
||||
REQUIRE(true);
|
||||
}
|
||||
|
||||
hipDevice_t device;
|
||||
hipModule_t Module;
|
||||
hipFunction_t Function;
|
||||
HIPCHECK(hipDeviceGet(&device, 0));
|
||||
HIPCHECK(hipModuleLoad(&Module, pCodeObjFile));
|
||||
HIPCHECK(hipModuleGetFunction(&Function, Module, "add"));
|
||||
float *Ah, *Bh, *Ad, *Bd;
|
||||
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &Ah, &Bh, nullptr,
|
||||
BUFFER_LEN, false);
|
||||
|
||||
HIPCHECK(hipMemcpy(Ad, Ah, sizeof(float) * BUFFER_LEN,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
struct {
|
||||
void* _Bd;
|
||||
void* _Ad;
|
||||
} args;
|
||||
args._Ad = static_cast<void*>(Ad);
|
||||
args._Bd = static_cast<void*>(Bd);
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void *config[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, BUFFER_LEN, 1, 1, 0, 0,
|
||||
NULL, reinterpret_cast<void**>(&config)));
|
||||
HIPCHECK(hipMemcpy(Bh, Bd, sizeof(float) * BUFFER_LEN,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (uint32_t i = 0; i < BUFFER_LEN; i++) {
|
||||
REQUIRE(Ah[i] == Bh[i]);
|
||||
}
|
||||
HipTest::freeArrays<float>(Ad, Bd, nullptr,
|
||||
Ah, Bh, nullptr, false);
|
||||
#else
|
||||
INFO("This test is skipped due to non linux environment.\n");
|
||||
REQUIRE(true);
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define GLOBAL_BUF_SIZE 2048
|
||||
#define ARRAY_SIZE (16)
|
||||
|
||||
texture<float, 2, hipReadModeElementType> ftex;
|
||||
texture<int, 2, hipReadModeElementType> itex;
|
||||
texture<uint16_t, 2, hipReadModeElementType> stex;
|
||||
texture<char, 2, hipReadModeElementType> ctex;
|
||||
|
||||
__device__ int deviceGlobal = 1;
|
||||
__managed__ int x = 10;
|
||||
__device__ float myDeviceGlobal;
|
||||
__device__ float myDeviceGlobalArray[16];
|
||||
|
||||
|
||||
__device__ float deviceGlobalFloat;
|
||||
__device__ int deviceGlobalInt1;
|
||||
__device__ int deviceGlobalInt2;
|
||||
__device__ uint16_t deviceGlobalShort;
|
||||
__device__ char deviceGlobalChar;
|
||||
|
||||
extern "C" __global__ void tex2dKernelFloat(float* outputData,
|
||||
int width, int height) {
|
||||
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
|
||||
if ((x < width) && (y < width)) {
|
||||
outputData[y * width + x] = tex2D(ftex, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void tex2dKernelInt(int* outputData,
|
||||
int width, int height) {
|
||||
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
|
||||
if ((x < width) && (y < width)) {
|
||||
outputData[y * width + x] = tex2D(itex, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void tex2dKernelInt16(uint16_t* outputData,
|
||||
int width, int height) {
|
||||
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
|
||||
if ((x < width) && (y < width)) {
|
||||
outputData[y * width + x] = tex2D(stex, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void tex2dKernelInt8(char* outputData,
|
||||
int width, int height) {
|
||||
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
|
||||
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
|
||||
if ((x < width) && (y < width)) {
|
||||
outputData[y * width + x] = tex2D(ctex, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C,
|
||||
int N) {
|
||||
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
|
||||
int COL = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
int tmpSum = 0;
|
||||
if ((ROW < N) && (COL < N)) {
|
||||
// each thread computes one element of the block sub-matrix
|
||||
for (int i = 0; i < N; i++) {
|
||||
tmpSum += A[ROW * N + i] * B[i * N + COL];
|
||||
}
|
||||
C[ROW * N + COL] = tmpSum;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C,
|
||||
int *D, int N) {
|
||||
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
|
||||
int COL = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
int tmpSum = 0;
|
||||
if (ROW < N && COL < N) {
|
||||
// each thread computes one element of the block sub-matrix
|
||||
for (int i = 0; i < N; i++) {
|
||||
tmpSum += A[ROW * N + i] * B[i * N + COL];
|
||||
}
|
||||
}
|
||||
C[ROW * N + COL] = tmpSum;
|
||||
D[ROW * N + COL] = tmpSum;
|
||||
}
|
||||
|
||||
extern "C" __global__ void SixteenSecKernel(int clockrate) {
|
||||
HipTest::waitKernel(16, clockrate);
|
||||
}
|
||||
|
||||
extern "C" __global__ void TwoSecKernel(int clockrate) {
|
||||
if (deviceGlobal == 0x2222) {
|
||||
deviceGlobal = 0x3333;
|
||||
}
|
||||
|
||||
HipTest::waitKernel(2, clockrate);
|
||||
|
||||
if (deviceGlobal != 0x3333) {
|
||||
deviceGlobal = 0x5555;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void FourSecKernel(int clockrate) {
|
||||
if (deviceGlobal == 1) {
|
||||
deviceGlobal = 0x2222;
|
||||
}
|
||||
|
||||
HipTest::waitKernel(4, clockrate);
|
||||
|
||||
if (deviceGlobal == 0x2222) {
|
||||
deviceGlobal = 0x4444;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void GPU_func() {
|
||||
x++;
|
||||
}
|
||||
|
||||
|
||||
__device__ int getSquareOfGlobalFloat() {
|
||||
return static_cast<int>(deviceGlobalFloat*deviceGlobalFloat);
|
||||
}
|
||||
|
||||
extern "C" __global__ void testWeightedCopy(int* a, int* b) {
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = deviceGlobalInt1*a[tx] + deviceGlobalInt2 +
|
||||
static_cast<int>(deviceGlobalShort) + static_cast<int>(deviceGlobalChar)
|
||||
+ getSquareOfGlobalFloat();
|
||||
}
|
||||
|
||||
|
||||
extern "C" __global__ void hello_world(const float* a, float* b) {
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
extern "C" __global__ void test_globals(const float* a, float* b) {
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx] + myDeviceGlobal + myDeviceGlobalArray[tx % ARRAY_SIZE];
|
||||
}
|
||||
|
||||
extern "C" __global__ void EmptyKernel() {
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
kernel void add(global float* output, global float* input) {
|
||||
__local float lds[100];
|
||||
int id = get_global_id(0);
|
||||
|
||||
if (id == 0) {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
lds[i] = input[i];
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (id < 100) {
|
||||
output[id] = lds[id];
|
||||
} else {
|
||||
output[id] = input[id];
|
||||
}
|
||||
}
|
||||
Ссылка в новой задаче
Block a user