SWDEV-289405 - [catch2][dtest][module] Migration of Module files to CATCH2 framework (#2351)

Migrated all module related files to CATCH2 framework and optimized to
have single module kernel file

Change-Id: I39aa28ef22c1b2f4d0014ca32b59b9c645b725dc
Этот коммит содержится в:
dkrottap
2021-09-17 11:39:36 +05:30
коммит произвёл GitHub
родитель 039b342e14
Коммит 4e1a3ff850
36 изменённых файлов: 4560 добавлений и 5 удалений
+3 -1
Просмотреть файл
@@ -15,6 +15,7 @@ target_link_libraries(UnitTests PRIVATE UnitDeviceTests
EventTest
OccupancyTest
DeviceTest
ModuleTest
RTC
stdc++fs)
@@ -36,6 +37,7 @@ target_link_libraries(ABMTests PRIVATE ABMAddKernels
stdc++fs)
catch_discover_tests(ABMTests PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
add_dependencies(UnitTests module_kernels.code)
add_dependencies(build_tests UnitTests ABMTests)
@@ -63,7 +65,7 @@ else()
target_compile_options(StressTest PUBLIC -std=c++17)
endif()
if(HIP_PLATFORM MATCHES "amd")
target_link_libraries(StressTest PRIVATE printf stream)
target_link_libraries(StressTest PRIVATE printf stream module)
endif()
target_link_libraries(StressTest PRIVATE memory stdc++fs)
add_dependencies(build_stress_test StressTest)
+59 -2
Просмотреть файл
@@ -1,6 +1,5 @@
/*
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -23,6 +22,13 @@ THE SOFTWARE.
#pragma once
#include "hip_test_context.hh"
#include <catch.hpp>
#ifdef __linux__
#include <sys/sysinfo.h>
#elif defined(_WIN32)
#include <windows.h>
#endif
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
@@ -72,6 +78,27 @@ THE SOFTWARE.
}
#if HT_NVIDIA
#define CTX_CREATE() \
hipCtx_t context;\
initHipCtx(&context);
#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context));
#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array));
#define HIP_TEX_REFERENCE hipTexRef
#define HIP_ARRAY hiparray
static void initHipCtx(hipCtx_t *pcontext) {
HIPCHECK(hipInit(0));
hipDevice_t device;
HIPCHECK(hipDeviceGet(&device, 0));
HIPCHECK(hipCtxCreate(pcontext, 0, device));
}
#else
#define CTX_CREATE()
#define CTX_DESTROY()
#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array));
#define HIP_TEX_REFERENCE textureReference*
#define HIP_ARRAY hipArray*
#endif
// Utility Functions
namespace HipTest {
@@ -104,4 +131,34 @@ static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlo
return blocks;
}
// Get Free Memory from the system
static size_t getMemoryAmount() {
#if __linux__
struct sysinfo info;
sysinfo(&info);
return info.freeram / (1024 * 1024); // MB
#elif defined(_WIN32)
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
GlobalMemoryStatusEx(&statex);
return (statex.ullAvailPhys / (1024 * 1024)); // MB
#endif
}
static inline size_t getHostThreadCount(const size_t memPerThread = 200, const size_t maxThreads = 0) {
if (memPerThread == 0) return 0;
auto memAmount = getMemoryAmount();
const auto processor_count = std::thread::hardware_concurrency();
if (processor_count == 0 || memAmount == 0) return 0;
size_t thread_count = 0;
if ((processor_count * memPerThread) < memAmount)
thread_count = processor_count;
else
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
if (maxThreads > 0) {
return (thread_count > maxThreads) ? maxThreads : thread_count;
}
return thread_count;
}
}
+1 -1
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+30 -1
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -72,6 +72,35 @@ __global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count)
}
}
template<typename T>
__device__ void waitKernel(uint64_t wait_sec, T clockrate) {
uint64_t start = clock64()/clockrate, cur;
do { cur = clock64()/clockrate-start;}while (cur < (wait_sec*1000));
}
template<typename T>
__global__ void TwoSecKernel_GlobalVar(int globalvar, int clockrate) {
if (globalvar == 0x2222) {
globalvar = 0x3333;
}
waitKernel(2, clockrate);
if (globalvar != 0x3333) {
globalvar = 0x5555;
}
}
template<typename T>
__global__ void FourSecKernel_GlobalVar(int globalvar, int clockrate) {
if (globalvar == 1) {
globalvar = 0x2222;
}
waitKernel(4, clockrate);
if (globalvar == 0x2222) {
globalvar = 0x4444;
}
}
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
+1
Просмотреть файл
@@ -1,4 +1,5 @@
add_subdirectory(memory)
add_subdirectory(module)
if(HIP_PLATFORM MATCHES "amd")
add_subdirectory(printf)
add_subdirectory(stream)
+1
Просмотреть файл
@@ -2,6 +2,7 @@
set(TEST_SRC
memcpy.cc
hipMemcpyMThreadMSize.cc
hipMemcpyBoundaryOffsetCheck.cc
)
# Create shared lib of all tests
+344
Просмотреть файл
@@ -0,0 +1,344 @@
/*
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies following scenarios
3. Boundary checks with different sizes
5. device offset scenario
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include "sys/types.h"
#include "sys/sysinfo.h"
#endif
static constexpr auto NUM_ELM{4*1024 * 1024};
template<typename T>
class DeviceMemory {
public:
explicit DeviceMemory(size_t numElements);
DeviceMemory() = delete;
~DeviceMemory();
T* A_d() const { return _A_d + _offset; }
T* B_d() const { return _B_d + _offset; }
T* C_d() const { return _C_d + _offset; }
T* C_dd() const { return _C_dd + _offset; }
size_t maxNumElements() const { return _maxNumElements; }
void offset(int offset) { _offset = offset; }
int offset() const { return _offset; }
private:
T* _A_d;
T* _B_d;
T* _C_d;
T* _C_dd;
size_t _maxNumElements;
int _offset;
};
template <typename T>
DeviceMemory<T>::DeviceMemory(size_t numElements) :
_maxNumElements(numElements), _offset(0) {
T** np = nullptr;
HipTest::initArrays(&_A_d, &_B_d, &_C_d, np, np, np, numElements, 0);
size_t sizeElements = numElements * sizeof(T);
HIP_CHECK(hipMalloc(&_C_dd, sizeElements));
}
template <typename T>
DeviceMemory<T>::~DeviceMemory() {
T* np = nullptr;
HipTest::freeArrays<T>(_A_d, _B_d, _C_d, np, np, np, 0);
HIP_CHECK(hipFree(_C_dd));
_C_dd = NULL;
}
template <typename T>
class HostMemory {
public:
HostMemory(size_t numElements, bool usePinnedHost);
HostMemory() = delete;
void reset(size_t numElements, bool full = false);
~HostMemory();
T* A_h() const { return _A_h + _offset; }
T* B_h() const { return _B_h + _offset; }
T* C_h() const { return _C_h + _offset; }
size_t maxNumElements() const { return _maxNumElements; }
void offset(int offset) { _offset = offset; }
int offset() const { return _offset; }
// Host arrays, secondary copy
T* A_hh;
T* B_hh;
bool _usePinnedHost;
private:
size_t _maxNumElements;
int _offset;
// Host arrays
T* _A_h;
T* _B_h;
T* _C_h;
};
template <typename T>
HostMemory<T>::HostMemory(size_t numElements, bool usePinnedHost)
: _usePinnedHost(usePinnedHost), _maxNumElements(numElements), _offset(0) {
T** np = nullptr;
HipTest::initArrays(np, np, np, &_A_h, &_B_h, &_C_h,
numElements, usePinnedHost);
A_hh = NULL;
B_hh = NULL;
size_t sizeElements = numElements * sizeof(T);
if (usePinnedHost) {
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_hh), sizeElements,
hipHostMallocDefault));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_hh), sizeElements,
hipHostMallocDefault));
} else {
A_hh = reinterpret_cast<T*>(malloc(sizeElements));
B_hh = reinterpret_cast<T*>(malloc(sizeElements));
}
}
template <typename T>
void HostMemory<T>::reset(size_t numElements, bool full) {
// Initialize the host data:
for (size_t i = 0; i < numElements; i++) {
(A_hh)[i] = 1097.0 + i;
(B_hh)[i] = 1492.0 + i; // Phi
if (full) {
(_A_h)[i] = 3.146f + i; // Pi
(_B_h)[i] = 1.618f + i; // Phi
}
}
}
template <typename T>
HostMemory<T>::~HostMemory() {
HipTest::freeArraysForHost(_A_h, _B_h, _C_h, _usePinnedHost);
if (_usePinnedHost) {
HIP_CHECK(hipHostFree(A_hh));
HIP_CHECK(hipHostFree(B_hh));
} else {
free(A_hh);
free(B_hh);
}
}
#ifdef _WIN32
void memcpytest2_get_host_memory(size_t *free, size_t *total) {
MEMORYSTATUSEX status;
status.dwLength = sizeof(status);
GlobalMemoryStatusEx(&status);
// Windows doesn't allow allocating more than half of system memory to the gpu
// Since the runtime also needs space for its internal allocations,
// we should not try to allocate more than 40% of reported system memory,
// otherwise we can run into OOM issues.
*free = static_cast<size_t>(0.4 * status.ullAvailPhys);
*total = static_cast<size_t>(0.4 * status.ullTotalPhys);
}
#else
struct sysinfo memInfo;
void memcpytest2_get_host_memory(size_t *free, size_t *total) {
sysinfo(&memInfo);
uint64_t freePhysMem = memInfo.freeram;
freePhysMem *= memInfo.mem_unit;
*free = freePhysMem;
uint64_t totalPhysMem = memInfo.totalram;
totalPhysMem *= memInfo.mem_unit;
*total = totalPhysMem;
}
#endif
//---
// Test many different kinds of memory copies.
// The subroutine allocates memory , copies to device, runs a vector
// add kernel, copies back, and
// checks the result.
//
// IN: numElements controls the number of elements used for allocations.
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned
// else allocate host
// memory with malloc. IN: useHostToHost : If true, add an extra
// host-to-host copy. IN:
// useDeviceToDevice : If true, add an extra deviceto-device copy after
// result is produced. IN:
// useMemkindDefault : If true, use memkinddefault
// (runtime figures out direction). if false, use
// explicit memcpy direction.
//
template <typename T>
void memcpytest2(DeviceMemory<T>* dmem, HostMemory<T>* hmem,
size_t numElements, bool useHostToHost,
bool useDeviceToDevice, bool useMemkindDefault) {
size_t sizeElements = numElements * sizeof(T);
hmem->reset(numElements);
assert(numElements <= dmem->maxNumElements());
assert(numElements <= hmem->maxNumElements());
if (useHostToHost) {
// Do some extra host-to-host copies here to mix things up:
HIP_CHECK(hipMemcpy(hmem->A_hh, hmem->A_h(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost));
HIP_CHECK(hipMemcpy(hmem->B_hh, hmem->B_h(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost));
HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_hh, sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_hh, sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
} else {
HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_h(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_h(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
}
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
static_cast<const T*>(dmem->A_d()), static_cast<const T*>(dmem->B_d()),
dmem->C_d(), numElements);
if (useDeviceToDevice) {
// Do an extra device-to-device copy here to mix things up:
HIP_CHECK(hipMemcpy(dmem->C_dd(), dmem->C_d(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToDevice));
// Destroy the original dmem->C_d():
HIP_CHECK(hipMemset(dmem->C_d(), 0x5A, sizeElements));
HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_dd(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost));
} else {
HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_d(), sizeElements,
useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost));
}
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD(hmem->A_h(), hmem->B_h(), hmem->C_h(), numElements);
}
// Try all the 16 possible combinations to memcpytest2 - usePinnedHost,
// useHostToHost,
// useDeviceToDevice, useMemkindDefault
template <typename T>
void memcpytest2_for_type(size_t numElements) {
DeviceMemory<T> memD(numElements);
HostMemory<T> memU(numElements, 0 /*usePinnedHost*/);
HostMemory<T> memP(numElements, 1 /*usePinnedHost*/);
for (int usePinnedHost = 0; usePinnedHost <= 1; usePinnedHost++) {
for (int useHostToHost = 0; useHostToHost <= 1; useHostToHost++) {
for (int useDeviceToDevice = 0; useDeviceToDevice <= 1;
useDeviceToDevice++) {
for (int useMemkindDefault = 0; useMemkindDefault <= 1;
useMemkindDefault++) {
memcpytest2<T>(&memD, usePinnedHost ? &memP : &memU,
numElements, useHostToHost,
useDeviceToDevice, useMemkindDefault);
}
}
}
}
}
// Try many different sizes to memory copy.
template <typename T>
void memcpytest2_sizes(size_t maxElem = 0) {
int deviceId;
HIP_CHECK(hipGetDevice(&deviceId));
size_t free, total, freeCPU, totalCPU;
HIP_CHECK(hipMemGetInfo(&free, &total));
memcpytest2_get_host_memory(&freeCPU, &totalCPU);
if (maxElem == 0) {
// Use lesser maxElem if not enough host memory available
size_t maxElemGPU = free / sizeof(T) / 8;
size_t maxElemCPU = freeCPU / sizeof(T) / 8;
maxElem = maxElemGPU < maxElemCPU ? maxElemGPU : maxElemCPU;
}
HIP_CHECK(hipDeviceReset());
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0 /*usePinnedHost*/);
HostMemory<T> memP(maxElem, 1 /*usePinnedHost*/);
for (size_t elem = 1; elem <= maxElem; elem *= 2) {
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
}
}
// Try many different sizes to memory copy.
template <typename T>
void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) {
int deviceId;
HIP_CHECK(hipGetDevice(&deviceId));
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
HIP_CHECK(hipDeviceReset());
DeviceMemory<T> memD(maxElem);
HostMemory<T> memU(maxElem, 0 /*usePinnedHost*/);
HostMemory<T> memP(maxElem, 1 /*usePinnedHost*/);
size_t elem = maxElem / 2;
for (size_t offset = 0; offset < 512; offset++) {
assert(elem + offset < maxElem);
if (devOffsets) {
memD.offset(offset);
}
if (hostOffsets) {
memU.offset(offset);
memP.offset(offset);
}
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
}
for (size_t offset = 512; offset < elem; offset *= 2) {
assert(elem + offset < maxElem);
if (devOffsets) {
memD.offset(offset);
}
if (hostOffsets) {
memU.offset(offset);
memP.offset(offset);
}
memcpytest2<T>(&memD, &memU, elem, 1, 1, 0); // unpinned host
memcpytest2<T>(&memD, &memP, elem, 1, 1, 0); // pinned host
}
}
// Create multiple threads to stress multi-thread locking behavior in the
// allocation/deallocation/tracking logic:
template <typename T>
void multiThread_1(bool serialize, bool usePinnedHost) {
DeviceMemory<T> memD(NUM_ELM);
HostMemory<T> mem1(NUM_ELM, usePinnedHost);
HostMemory<T> mem2(NUM_ELM, usePinnedHost);
std::thread t1(memcpytest2<T>, &memD, &mem1, NUM_ELM, 0, 0, 0);
if (serialize) {
t1.join();
}
std::thread t2(memcpytest2<T>, &memD, &mem2, NUM_ELM, 0, 0, 0);
if (serialize) {
t2.join();
}
}
/*
This testcase verfies the boundary checks of hipMemcpy API for different sizes
*/
TEST_CASE("Unit_hipMemcpy_BoundaryCheck") {
size_t maxElem = 32 * 1024 * 1024;
DeviceMemory<float> memD(maxElem);
HostMemory<float> memU(maxElem, 0 /*usePinnedHost*/);
HostMemory<float> memP(maxElem, 0 /*usePinnedHost*/);
memcpytest2<float>(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0);
auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024,
16 * 1024 * 1024 + 16 * 1024,
16 * 1024 * 1024 + 512 * 1024,
17 * 1024 * 1024 + 1024,
32 * 1024 * 1024);
memcpytest2<float>(&memD, &memP, sizes, 0, 0, 0);
}
/*
This testcase verifies the device offsets
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) {
HIP_CHECK(hipDeviceReset());
size_t maxSize = 256 * 1024;
memcpytest2_offsets<TestType>(maxSize, true, false);
memcpytest2_offsets<TestType>(maxSize, false, true);
}
+19
Просмотреть файл
@@ -0,0 +1,19 @@
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
hipExtModuleLaunchKernel_CornerTest.cc
hipModuleLaunchKernel_CornerTests.cc
)
else()
set(TEST_SRC
hipModuleLaunchKernel_CornerTests.cc
)
endif()
add_custom_target(kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/stress/module/kernels.cc -o ${HIP_PATH}/catch/hipTestMain/kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include)
# Create shared lib of all tests
add_library(module SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
# Add dependency on build_tests to build it on this custom target
add_dependencies(build_stress_test module kernels.code)
+86
Просмотреть файл
@@ -0,0 +1,86 @@
/*
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Test Scenario
hipExtModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions
*/
#include "hip_test_common.hh"
#include "hip_test_kernels.hh"
#include "hip/hip_ext.h"
#define fileName "kernels.code"
#define dummyKernel "EmptyKernel"
struct gridblockDim {
unsigned int gridX;
unsigned int gridY;
unsigned int gridZ;
unsigned int blockX;
unsigned int blockY;
unsigned int blockZ;
};
/*
This testcase verifies hipExtModuleLaunchKernel API Corner
cases
*/
TEST_CASE("Stress_hipExtModuleLaunchKernel_CornerCases") {
hipModule_t Module;
hipFunction_t DummyKernel;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
constexpr auto gridblocksize{6};
struct {
} args;
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
size_t size = sizeof(args);
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
unsigned int maxblockX = deviceProp.maxThreadsDim[0];
unsigned int maxblockY = deviceProp.maxThreadsDim[1];
unsigned int maxblockZ = deviceProp.maxThreadsDim[2];
struct gridblockDim test[gridblocksize] = {{1, 1, 1, maxblockX, 1, 1},
{1, 1, 1, 1, maxblockY, 1},
{1, 1, 1, 1, 1, maxblockZ},
{UINT32_MAX, 1, 1, 1, 1, 1},
{1, UINT32_MAX, 1, 1, 1, 1},
{1, 1, UINT32_MAX, 1, 1, 1}};
// Launching kernel with corner cases in grid and block dimensions
for (int i = 0; i < gridblocksize; i++) {
HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel,
test[i].gridX,
test[i].gridY,
test[i].gridZ,
test[i].blockX,
test[i].blockY,
test[i].blockZ,
0,
stream, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0));
}
HIP_CHECK(hipStreamDestroy(stream));
}
+90
Просмотреть файл
@@ -0,0 +1,90 @@
/*
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Test Scenario
hipModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions
*/
#include "hip_test_common.hh"
#include "hip_test_kernels.hh"
#include "hip/hip_ext.h"
#define fileName "kernels.code"
#define dummyKernel "EmptyKernel"
struct gridblockDim {
unsigned int gridX;
unsigned int gridY;
unsigned int gridZ;
unsigned int blockX;
unsigned int blockY;
unsigned int blockZ;
};
/*
This testcase verifies hipModuleLaunchKernel API Corner
cases
*/
TEST_CASE("Stress_hipModuleLaunchKernel_CornerCases") {
HIP_CHECK(hipSetDevice(0));
hipStream_t stream1;
CTX_CREATE()
hipModule_t Module;
hipFunction_t DummyKernel;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
HIP_CHECK(hipStreamCreate(&stream1));
// Passing Max int value to block dimensions
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
unsigned int maxblockX = deviceProp.maxThreadsDim[0];
unsigned int maxblockY = deviceProp.maxThreadsDim[1];
unsigned int maxblockZ = deviceProp.maxThreadsDim[2];
#if HT_NVIDIA
unsigned int maxgridX = deviceProp.maxGridSize[0];
unsigned int maxgridY = deviceProp.maxGridSize[1];
unsigned int maxgridZ = deviceProp.maxGridSize[2];
#else
unsigned int maxgridX = UINT32_MAX;
unsigned int maxgridY = UINT32_MAX;
unsigned int maxgridZ = UINT32_MAX;
#endif
struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1},
{1, 1, 1, 1, maxblockY, 1},
{1, 1, 1, 1, 1, maxblockZ},
{maxgridX, 1, 1, 1, 1, 1},
{1, maxgridY, 1, 1, 1, 1},
{1, 1, maxgridZ, 1, 1, 1}};
for (int i = 0; i < 6; i++) {
HIP_CHECK(hipModuleLaunchKernel(DummyKernel,
test[i].gridX,
test[i].gridY,
test[i].gridZ,
test[i].blockX,
test[i].blockY,
test[i].blockZ,
0,
stream1, NULL, NULL));
}
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY();
}
+28
Просмотреть файл
@@ -0,0 +1,28 @@
/*
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include "hip/hip_runtime.h"
extern "C" __global__ void EmptyKernel() {
}
+1
Просмотреть файл
@@ -1,3 +1,4 @@
add_subdirectory(module)
add_subdirectory(memory)
add_subdirectory(deviceLib)
add_subdirectory(stream)
+51
Просмотреть файл
@@ -0,0 +1,51 @@
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
hipExtLaunchKernelGGL.cc
hipExtModuleLaunchKernel.cc
hipExtLaunchMultiKernelMultiDevice.cc
hipModuleLaunchKernel.cc
hipFuncSetCacheConfig.cc
hipModuleUnload.cc
hipFuncSetAttribute.cc
hipModuleLoadData.cc
hipFuncSetSharedMemConfig.cc
hipManagedKeyword.cc
hipModuleGetGlobal.cc
hipFuncGetAttributes.cc
hipModule.cc
hipModuleLoadDataMultThreadOnMultGPU.cc
hipModuleLoadDataMultThreaded.cc
hipModuleLoadMultiThreaded.cc
hipModuleLoadUnloadStress.cc
hipModuleNegative.cc
hipModuleOccupancyMaxPotentialBlockSize.cc
hipModuleTexture2dDrv.cc
hipOpenCLCOTest.cc
)
else()
set(TEST_SRC
hipModuleLaunchKernel.cc
hipFuncSetCacheConfig.cc
hipModuleUnload.cc
hipFuncSetAttribute.cc
hipModuleLoadData.cc
hipFuncSetSharedMemConfig.cc
hipManagedKeyword.cc
hipModuleGetGlobal.cc
hipFuncGetAttributes.cc
hipModule.cc
hipModuleLoadDataMultThreadOnMultGPU.cc
hipModuleLoadDataMultThreaded.cc
hipModuleLoadMultiThreaded.cc
hipModuleLoadUnloadStress.cc
hipModuleNegative.cc
hipModuleOccupancyMaxPotentialBlockSize.cc
)
endif()
add_custom_target(module_kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/unit/module/module_kernels.cc -o ${HIP_PATH}/catch/hipTestMain/module_kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include)
# Create shared lib of all tests
add_library(ModuleTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
add_dependencies(build_tests ModuleTest module_kernels.code)
Исполняемый файл
+129
Просмотреть файл
@@ -0,0 +1,129 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* Test Scenarios
1. Verify kernel execution time of the particular kernel
2. Verify hipExtLaunchKernelGGL API by disabling time flag in event creation
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include "hip/hip_ext.h"
#define FOURSEC_KERNEL 4999
#define TWOSEC_KERNEL 2999
__device__ int globalvar = 1;
__global__ void TwoSecKernel_GlobalVar(int clockrate) {
if (globalvar == 0x2222) {
globalvar = 0x3333;
}
HipTest::waitKernel(2, clockrate);
if (globalvar != 0x3333) {
globalvar = 0x5555;
}
}
__global__ void FourSecKernel_GlobalVar(int clockrate) {
if (globalvar == 1) {
globalvar = 0x2222;
}
HipTest::waitKernel(4, clockrate);
if (globalvar == 0x2222) {
globalvar = 0x4444;
}
}
/*
* In this Scenario, we create events by disabling the timing flag
* We then Launch the kernel using hipExtModuleLaunchKernel by passing
* disabled events and try to fetch kernel execution time using
* hipEventElapsedTime API which would fail as the flag is disabled.
*/
TEST_CASE("Unit_hipExtLaunchKernelGGL_TimeFlagDisabled") {
hipStream_t stream;
HIP_CHECK(hipSetDevice(0));
float time_2sec;
hipEvent_t start_event, end_event;
int clkRate = 0;
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
// Event Creation and Launching kernels
HIP_CHECK(hipEventCreateWithFlags(&start_event,
hipEventDisableTiming));
HIP_CHECK(hipEventCreateWithFlags(&end_event,
hipEventDisableTiming));
HIP_CHECK(hipStreamCreate(&stream));
hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0,
stream, start_event, end_event, 0, clkRate);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(hipEventElapsedTime(&time_2sec, start_event, end_event)
!= hipSuccess);
// Destroying the events and streams
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipEventDestroy(start_event));
HIP_CHECK(hipEventDestroy(end_event));
}
/*
* Launching FourSecKernel and TwoSecKernel and then we try to
* get the event elapsed time of each kernel using the start and
* end events.The event elapsed time should return us the kernel
* execution time for that particular kernel
*/
TEST_CASE("Unit_hipExtLaunchKernelGGL_KernelTimeExecution") {
hipStream_t stream;
HIP_CHECK(hipSetDevice(0));
hipEvent_t start_event1, end_event1, start_event2, end_event2;
float time_4sec, time_2sec;
int clkRate = 0;
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
// Creating streams and events
HIP_CHECK(hipEventCreate(&start_event1));
HIP_CHECK(hipEventCreate(&end_event1));
HIP_CHECK(hipEventCreate(&start_event2));
HIP_CHECK(hipEventCreate(&end_event2));
HIP_CHECK(hipStreamCreate(&stream));
// Launching 4sec and 2sec kernels
hipExtLaunchKernelGGL(FourSecKernel_GlobalVar, dim3(1), dim3(1), 0,
stream, start_event1, end_event1, 0, clkRate);
hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0,
stream, start_event2, end_event2, 0, clkRate);
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1));
HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2));
INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec
<< "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec);
// Verifying the kernel execution time
REQUIRE(time_4sec < static_cast<float>(FOURSEC_KERNEL));
REQUIRE(time_2sec < static_cast<float>(TWOSEC_KERNEL));
// Destroying streams and events
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipEventDestroy(start_event1));
HIP_CHECK(hipEventDestroy(end_event1));
HIP_CHECK(hipEventDestroy(start_event2));
HIP_CHECK(hipEventDestroy(end_event2));
}
+128
Просмотреть файл
@@ -0,0 +1,128 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* This testfile verifies the basic functionality of
hipExtLaunchMultiKernelMultiDevice API.
It can be tested on single GPU or multi GPUs.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include "hip/hip_runtime.h"
#define MAX_GPUS 8
#define NUM_KERNEL_ARGS 3
/*
This testcase verifies hipExtLaunchMultiKernelMultiDevice API for different
datatypes where
1. Intitialize device variables
2. Initializing hipLaunchParams structure to pass it to
hipExtLaunchMultiKernelMultiDevice API
3. Launches vector_square kernel which performs square of the variable
4. Validates the result with the square of variable.
*/
TEMPLATE_TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Basic", "", int
, float, double) {
TestType *A_d[MAX_GPUS], *C_d[MAX_GPUS];
TestType *A_h, *C_h;
size_t N = 1000000;
size_t Nbytes = N * sizeof(TestType);
int nGpu = 0;
HIP_CHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
SUCCEED("info: didn't find any GPU! Skipping the testcase");
} else {
if (nGpu > MAX_GPUS) {
nGpu = MAX_GPUS;
}
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h, N, false);
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
// Allocating and initializing device variables
hipStream_t stream[MAX_GPUS];
for (int i = 0; i < nGpu; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipStreamCreateWithFlags(&stream[i], hipStreamNonBlocking));
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, i/*deviceID*/));
INFO("Running on bus 0x" << props.pciBusID << " " << props.name);
INFO("Allocate device mem " << 2*Nbytes/1024.0/1024.0);
HIP_CHECK(hipMalloc(&A_d[i], Nbytes));
HIP_CHECK(hipMalloc(&C_d[i], Nbytes));
HIP_CHECK(hipMemcpy(A_d[i], A_h, Nbytes, hipMemcpyHostToDevice));
}
hipLaunchParams *launchParamsList = reinterpret_cast<hipLaunchParams *>(
malloc(sizeof(hipLaunchParams)*nGpu));
void *args[MAX_GPUS * NUM_KERNEL_ARGS];
// Intializing the hipLaunchParams structure with device variables
// ,kernel and launching hipExtLaunchMultiKernelMultiDevice API
for (int i = 0; i < nGpu; i++) {
args[i * NUM_KERNEL_ARGS] = &A_d[i];
args[i * NUM_KERNEL_ARGS + 1] = &C_d[i];
args[i * NUM_KERNEL_ARGS + 2] = &N;
launchParamsList[i].func =
reinterpret_cast<void *>(HipTest::vector_square<TestType>);
launchParamsList[i].gridDim = dim3(blocks);
launchParamsList[i].blockDim = dim3(threadsPerBlock);
launchParamsList[i].sharedMem = 0;
launchParamsList[i].stream = stream[i];
launchParamsList[i].args = args + i * NUM_KERNEL_ARGS;
}
hipExtLaunchMultiKernelMultiDevice(launchParamsList, nGpu, 0);
// Validating the result
for (int j = 0; j < nGpu; j++) {
hipStreamSynchronize(stream[j]);
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, j/*deviceID*/));
INFO("Checking result on bus " << props.pciBusID << props.name);
HIP_CHECK(hipSetDevice(j));
HIP_CHECK(hipMemcpy(C_h, C_d[j], Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
INFO("validation failed " << C_h[i] << A_h[i]*A_h[i]);
REQUIRE(false);
}
}
}
// DeAllocating memory
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
for (int j = 0; j < nGpu; j++) {
HIP_CHECK(hipFree(A_d[j]));
HIP_CHECK(hipFree(C_d[j]));
HIP_CHECK(hipStreamDestroy(stream[j]));
}
}
}
Исполняемый файл
+433
Просмотреть файл
@@ -0,0 +1,433 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* Test Scenarios
1. hipExtModuleLaunchKernel Negative Scenarios
2. hipExtModuleLaunchKernel API verifying the kernel execution time of a particular kernel.
3. hipExtModuleLaunchKernel API verifying the kernel execution time by disabling the time flag
4. hipModuleLaunchKernel Work Group tests =>
- (block.x * block.y * block.z) <= Work Group Size
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
- (block.x * block.y * block.z) > Work Group Size
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
*/
#include <math.h>
#include "hip_test_common.hh"
#include "hip_test_kernels.hh"
#include "hip/hip_ext.h"
#define fileName "module_kernels.code"
#define matmulK "matmulK"
#define SixteenSec "SixteenSecKernel"
#define KernelandExtra "KernelandExtraParams"
#define FourSec "FourSecKernel"
#define TwoSec "TwoSecKernel"
#define globalDevVar "deviceGlobal"
#define dummyKernel "EmptyKernel"
#define FOURSEC_KERNEL 4999
#define TWOSEC_KERNEL 2999
struct gridblockDim {
unsigned int gridX;
unsigned int gridY;
unsigned int gridZ;
unsigned int blockX;
unsigned int blockY;
unsigned int blockZ;
};
class ModuleLaunchKernel {
int N = 64;
int SIZE = N*N;
int *A, *B, *C;
hipDeviceptr_t *Ad, *Bd;
hipStream_t stream1, stream2;
hipEvent_t start_event1, end_event1, start_event2, end_event2,
start_timingDisabled, end_timingDisabled;
hipModule_t Module;
hipDeviceptr_t deviceGlobal;
hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel,
TwoSecKernel, KernelandExtraParamKernel, DummyKernel;
struct {
int clockRate;
void* _Ad;
void* _Bd;
void* _Cd;
int _n;
} args1, args2;
struct {
} args3;
size_t size1;
size_t size2;
size_t size3;
size_t deviceGlobalSize;
public :
void AllocateMemory();
void DeAllocateMemory();
void ModuleLoad();
void Module_Negative_tests();
void ExtModule_Negative_tests();
void Module_WorkGroup_Test();
void ExtModule_KernelExecutionTime();
void ExtModule_Disabled_Timingflag();
};
void ModuleLaunchKernel::AllocateMemory() {
A = new int[N*N*sizeof(int)];
B = new int[N*N*sizeof(int)];
for (int i=0; i < N; i++) {
for (int j=0; j < N; j++) {
A[i*N +j] = 1;
B[i*N +j] = 1;
}
}
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad),
SIZE*sizeof(int)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd),
SIZE*sizeof(int)));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C), SIZE*sizeof(int)));
HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice));
int clkRate = 0;
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
args1._Ad = Ad;
args1._Bd = Bd;
args1._Cd = C;
args1._n = N;
args1.clockRate = clkRate;
args2._Ad = NULL;
args2._Bd = NULL;
args2._Cd = NULL;
args2._n = 0;
args2.clockRate = clkRate;
size1 = sizeof(args1);
size2 = sizeof(args2);
size3 = sizeof(args3);
HIP_CHECK(hipEventCreate(&start_event1));
HIP_CHECK(hipEventCreate(&end_event1));
HIP_CHECK(hipEventCreate(&start_event2));
HIP_CHECK(hipEventCreate(&end_event2));
HIP_CHECK(hipEventCreateWithFlags(&start_timingDisabled,
hipEventDisableTiming));
HIP_CHECK(hipEventCreateWithFlags(&end_timingDisabled,
hipEventDisableTiming));
}
void ModuleLaunchKernel::ModuleLoad() {
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK));
HIP_CHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSec));
HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel,
Module, KernelandExtra));
HIP_CHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSec));
HIP_CHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSec));
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
Module, globalDevVar));
}
void ModuleLaunchKernel::DeAllocateMemory() {
HIP_CHECK(hipEventDestroy(start_event1));
HIP_CHECK(hipEventDestroy(end_event1));
HIP_CHECK(hipEventDestroy(start_event2));
HIP_CHECK(hipEventDestroy(end_event2));
HIP_CHECK(hipEventDestroy(start_timingDisabled));
HIP_CHECK(hipEventDestroy(end_timingDisabled));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
delete[] A;
delete[] B;
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipHostFree(C));
HIP_CHECK(hipModuleUnload(Module));
}
/*
* In this scenario,We launch the 4 sec kernel and 2 sec kernel
* and we fetch the event execution time of each kernel and it
* should not exceed the execution time of that particular kernel
*/
void ModuleLaunchKernel::ExtModule_KernelExecutionTime() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
ModuleLoad();
float time_4sec, time_2sec;
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
HIP_LAUNCH_PARAM_END};
// Launching kernels
HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0,
stream1,
NULL, reinterpret_cast<void**>(&config2),
start_event1, end_event1, 0));
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
NULL, reinterpret_cast<void**>(&config2),
start_event2, end_event2, 0));
HIP_CHECK(hipStreamSynchronize(stream1));
HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1));
HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2));
INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec
<< "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec);
// Verifying the kernel execution time
REQUIRE(time_4sec < static_cast<float>(FOURSEC_KERNEL));
REQUIRE(time_2sec < static_cast<float>(TWOSEC_KERNEL));
DeAllocateMemory();
}
/*
* In this Scenario, we create events by disabling the timing flag
* We then Launch the kernel using hipExtModuleLaunchKernel by passing
* disabled events and try to fetch kernel execution time using
* hipEventElapsedTime API which would fail as the flag is disabled.
*/
void ModuleLaunchKernel::ExtModule_Disabled_Timingflag() {
// Allocating Memory and Loading kernel
AllocateMemory();
ModuleLoad();
float time_2sec;
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
HIP_LAUNCH_PARAM_END};
// Launching Kernel
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
NULL, reinterpret_cast<void**>(&config2),
start_timingDisabled,
end_timingDisabled, 0));
HIP_CHECK(hipStreamSynchronize(stream1));
REQUIRE(hipEventElapsedTime(&time_2sec, start_timingDisabled,
end_timingDisabled) != hipSuccess);
// DeAllocating the memory
DeAllocateMemory();
}
/*
This testcase verifies negative scenarios of hipExtModuleLaunchKernel API
*/
void ModuleLaunchKernel::ExtModule_Negative_tests() {
HIP_CHECK(hipSetDevice(0));
// Allocating memeory and loading kernel
AllocateMemory();
ModuleLoad();
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
HIP_LAUNCH_PARAM_END};
void *params[] = {Ad};
SECTION("Nullptr to kernel function") {
REQUIRE(hipExtModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Max int value to block dimensions") {
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
std::numeric_limits<uint32_t>::max(),
std::numeric_limits<uint32_t>::max(),
std::numeric_limits<uint32_t>::max(), 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Null values to all dimensions") {
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 0, 0,
0,
0,
0, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing 0 for x dimension") {
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 1, 1,
0,
1,
1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing 0 for y dimension") {
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 0, 1,
1,
0,
1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing 0 for Z dimension") {
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 0,
1,
1,
0, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing both kernel and extra params") {
REQUIRE(hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1,
1, 1, 0,
stream1,
reinterpret_cast<void**>(&params),
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing both than maxthreadsperblock to block dimensions") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
deviceProp.maxThreadsPerBlock+1,
deviceProp.maxThreadsPerBlock+1,
deviceProp.maxThreadsPerBlock+1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Block dimension x = Max alloweed + 1") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
deviceProp.maxThreadsDim[0]+1,
1,
1, 0, stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Block dimension Y = Max alloweed + 1") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
1,
deviceProp.maxThreadsDim[1]+1,
1, 0, stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Block dimension Z = Max alloweed + 1") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
1,
1,
deviceProp.maxThreadsDim[2]+1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
}
SECTION("Passing invalid config data in extra params") {
void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
HIP_LAUNCH_PARAM_END};
REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config3),
nullptr, nullptr, 0) != hipSuccess);
}
DeAllocateMemory();
}
void ModuleLaunchKernel::Module_WorkGroup_Test() {
// Allocate memory and load modules
AllocateMemory();
ModuleLoad();
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3,
HIP_LAUNCH_PARAM_END};
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
double cuberootVal =
cbrt(static_cast<double>(deviceProp.maxThreadsPerBlock));
uint32_t cuberoot_floor = floor(cuberootVal);
uint32_t cuberoot_ceil = ceil(cuberootVal);
// Scenario: (block.x * block.y * block.z) <= Work Group Size where
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel,
1, 1, 1,
cuberoot_floor, cuberoot_floor, cuberoot_floor,
0, stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0));
// Scenario: (block.x * block.y * block.z) > Work Group Size where
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
REQUIRE(hipExtModuleLaunchKernel(DummyKernel,
1, 1, 1,
cuberoot_ceil, cuberoot_ceil, cuberoot_ceil+1,
0, stream1, NULL,
reinterpret_cast<void**>(&config1),
nullptr, nullptr, 0) != hipSuccess);
// DeAllocating memory
DeAllocateMemory();
}
/*
This testcase verifies the negative scenarios of
hipExtModuleLaunchKernel API
*/
TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative") {
ModuleLaunchKernel Ext_obj;
Ext_obj.ExtModule_Negative_tests();
}
/*
This testcase verifies hipExtModuleLaunchKernel API by
disabling the timing flag
*/
TEST_CASE("Unit_hipExtModuleLaunchKernel_TimingflagDisabled") {
ModuleLaunchKernel Ext_obj;
Ext_obj.ExtModule_Disabled_Timingflag();
}
/*
This testcase verifies hipExtModuleLaunchKernel API kernel
execution time
*/
TEST_CASE("Unit_hipExtModuleLaunchKernel_KernelExecutionTime") {
ModuleLaunchKernel Ext_obj;
Ext_obj.ExtModule_KernelExecutionTime();
}
/*
This testcase verifies workgroup of hipExtModuleLaunchKernel API
*/
TEST_CASE("Unit_hipExtModuleLaunchKernel_WorkGroup") {
ModuleLaunchKernel Ext_obj;
Ext_obj.Module_WorkGroup_Test();
}
+163
Просмотреть файл
@@ -0,0 +1,163 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <utility>
#define fileName "module_kernels.code"
#define kernel_name "hello_world"
namespace testhipFuncGetAttributesApi {
__global__
void fn(float* px, float* py) {
bool a[42];
__shared__ double b[69];
for (auto&& x : b) x = *py++;
for (auto&& x : a) x = *px++ > 0.0;
for (auto&& x : a) if (x) *--py = *--px;
}
template <int WGSIZE, int LDS>
__launch_bounds__(WGSIZE, 1) __global__ void kernelfn(int *x) {
__shared__ int lds[LDS];
for (int i = 0; i < LDS; ++i) {
lds[i] = x[i];
}
x[LDS - 1] = lds[0] / lds[LDS - 1];
}
template <int WGSIZE, int LDS> bool test_Attributes_Values() {
bool TestPassed = true;
hipFuncAttributes attr{};
hipFuncGetAttributes(&attr,
reinterpret_cast<void const *>(kernelfn<WGSIZE, LDS>));
if (attr.maxThreadsPerBlock != WGSIZE) {
TestPassed = false;
}
if (attr.sharedSizeBytes != LDS * sizeof(int)) {
TestPassed = false;
}
return TestPassed;
}
} // namespace testhipFuncGetAttributesApi
/**
* hipFuncGetAttributes and hipModuleGetFunction functional tests
* Scenario1: Validates the value of attribute "maxThreadsPerBlock" should be non zero.
* Scenario2: Validates the value of attribute
* "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK" should be non zero.
*/
// scenario 1
TEST_CASE("Unit_hipFuncGetAttributes_FuncTst") {
hipFuncAttributes attr{};
auto r = hipFuncGetAttributes(&attr,
reinterpret_cast<const void*>(&testhipFuncGetAttributesApi::fn));
REQUIRE_FALSE(r != hipSuccess);
REQUIRE_FALSE(attr.maxThreadsPerBlock == 0);
}
// scenario 2
TEST_CASE("Unit_hipFuncGetAttribute_FuncTst") {
hipModule_t Module;
int attrib_val;
CTX_CREATE()
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
auto r = hipFuncGetAttribute(&attrib_val,
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function);
REQUIRE_FALSE(r != hipSuccess);
REQUIRE_FALSE(attrib_val == 0);
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/**
* hipFuncGetAttributes negative tests
* Scenario1: Validates returned error code for attr = nullptr
* Scenario2: Validates returned error code for function = nullptr
*/
TEST_CASE("Unit_hipFuncGetAttributes_NegTst") {
SECTION("attr is nullptr") {
REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(nullptr,
reinterpret_cast<const void*>(&testhipFuncGetAttributesApi::fn)));
}
SECTION("function is nullptr") {
hipFuncAttributes attr{};
REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(&attr, nullptr));
}
}
/**
* hipFuncGetAttribute negative tests
* Scenario1: Validates returned error code for attrib_val = nullptr
* Scenario2: Validates returned error code for attrib = invalid = 0xff
*/
TEST_CASE("Unit_hipFuncGetAttribute_NegTst") {
hipModule_t Module;
CTX_CREATE()
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
SECTION("attr is nullptr") {
REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(nullptr,
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function));
}
SECTION("attr is invalid") {
int attrib_val;
REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(&attrib_val,
static_cast<hipFunction_attribute>(0xff), Function));
}
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/**
* hipFuncGetAttributes
* Scenario4: Validates the value of attribute "maxThreadsPerBlock".
* Scenario5: Validates the value of attribute "sharedSizeBytes".
*/
TEST_CASE("Unit_hipFuncGetAttributes_AttrTest") {
bool TestPassed = true;
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<64, 64>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<128, 64>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<256, 64>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<512, 64>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<1024, 64>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<64, 128>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<128, 128>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<256, 128>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<512, 128>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<1024, 128>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<64, 256>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<128, 256>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<256, 256>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<512, 256>();
TestPassed &= testhipFuncGetAttributesApi::
test_Attributes_Values<1024, 256>();
REQUIRE(TestPassed);
}
+46
Просмотреть файл
@@ -0,0 +1,46 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip_test_common.hh"
__global__ void fn(float* px, float* py) {
bool a[42];
__shared__ double b[69];
for (auto&& x : b) x = *py++;
for (auto&& x : a) x = *px++ > 0.0;
for (auto&& x : a) if (x) *--py = *--px;
}
/*
This testcases verifies the basic func of hipFuncSetAttribute API where
we need to pass function that executes on device
hipFuncAttributeMaxDynamicSharedMemorySize -->
The sum of this value + sharedSizeBytes should not exceed device attribute
hipFuncAttributePreferredSharedMemoryCarveout -->
Carving out the shared memory.
*/
TEST_CASE("Unit_hipFuncSetAttribute_Basic") {
HIP_CHECK(hipFuncSetAttribute(reinterpret_cast<const void*>(&fn),
hipFuncAttributeMaxDynamicSharedMemorySize,
0));
HIP_CHECK(hipFuncSetAttribute(reinterpret_cast<const void*>(&fn),
hipFuncAttributePreferredSharedMemoryCarveout,
0));
}
+36
Просмотреть файл
@@ -0,0 +1,36 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
__global__ void Empty_Kernel() {
}
/*
This testcase verifies the basic funct of hipFuncSetCacheConfig API
On GPU devices, where L1 and shared memory uses same resources
This sets the preferred cache configuration for the kernel function
In this testcases we are setting hipFuncCachePreferL1 where L1 is
preferred more than shared memory
*/
TEST_CASE("Unit_hipFuncSetCacheConfig_Basic") {
hipFuncCache_t cacheConfig{hipFuncCachePreferL1};
HIP_CHECK(hipFuncSetCacheConfig(reinterpret_cast<void*>(Empty_Kernel),
cacheConfig));
}
+107
Просмотреть файл
@@ -0,0 +1,107 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test Description:
// This test case verifies the working of hipFuncSetSharedMemConfig() api and
// the flag parameter
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
__global__ void ReverseSeq(int *A, int *B, int N) {
extern __shared__ int SMem[];
int offset = threadIdx.x;
int MirrorVal = N - offset - 1;
SMem[offset] = A[offset];
__syncthreads();
B[offset] = SMem[MirrorVal];
}
/*
This testcase verifies the basic functionality of hipFuncSetSharedMemConfig API
by setting shared memory bank size
1. hipSharedMemBankSizeDefault
2. hipSharedMemBankSizeFourByte
3. hipSharedMemBankSizeEightByte
*/
TEST_CASE("Unit_hipFuncSetSharedMemConfig_Basic") {
int *Ah{nullptr}, *RAh{nullptr}, NumElms = 128;
int *Ad{nullptr}, *RAd{nullptr};
HipTest::initArrays<int>(&Ad, &RAd, nullptr,
&Ah, &RAh, nullptr, NumElms, false);
for (int i = 0; i < NumElms; ++i) {
Ah[i] = i;
RAh[i] = NumElms - i - 1;
}
HIP_CHECK(hipMemcpy(Ad, Ah, NumElms * sizeof(int), hipMemcpyHostToDevice));
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeDefault flag
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
(&ReverseSeq),
hipSharedMemBankSizeDefault));
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
memset(Ah, 0, NumElms * sizeof(int));
// Verifying the results
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
for (int i = 0; i < NumElms; ++i) {
REQUIRE(Ah[i] == RAh[i]);
}
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeFourBytes flg
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
(&ReverseSeq),
hipSharedMemBankSizeFourByte));
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
memset(Ah, 0, NumElms * sizeof(int));
// Verifying the results
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
for (int i = 0; i < NumElms; ++i) {
REQUIRE(Ah[i] == RAh[i]);
}
// Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeEightBytes flg
HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast<const void*>
(&ReverseSeq),
hipSharedMemBankSizeEightByte));
HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int)));
// Kernel Launch with shared mem size of = NumElms * sizeof(int)
ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms);
memset(Ah, 0, NumElms * sizeof(int));
// Verifying the results
HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost));
for (int i = 0; i < NumElms; ++i) {
REQUIRE(Ah[i] == RAh[i]);
}
HipTest::freeArrays<int>(Ad, RAd, nullptr,
Ah, RAh, nullptr, false);
}
+56
Просмотреть файл
@@ -0,0 +1,56 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
hipManagedKeyword API Scenario
1. Test hipModuleLoad on multiple GPUs
*/
#include "hip_test_common.hh"
#include "hip_test_kernels.hh"
#include "hip_test_checkers.hh"
#define MANAGED_VAR_INIT_VALUE 10
#define fileName "module_kernels.code"
TEST_CASE("Unit_hipMangedKeyword_ModuleLoadMultiGPU") {
int numDevices = 0, data;
hipDeviceptr_t x;
size_t xSize;
hipGetDeviceCount(&numDevices);
for (int i = 0; i < numDevices; i++) {
hipSetDevice(i);
CTX_CREATE()
hipModule_t Module;
HIP_CHECK(hipModuleLoad(&Module, fileName));
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "GPU_func"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, 1, 1,
1, 0, 0, NULL, NULL));
hipDeviceSynchronize();
HIP_CHECK(hipModuleGetGlobal(reinterpret_cast<hipDeviceptr_t*>(&x),
&xSize, Module, "x"));
HIP_CHECK(hipMemcpyDtoH(&data, hipDeviceptr_t(x), xSize));
REQUIRE(data == (1 + MANAGED_VAR_INIT_VALUE));
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
}
Исполняемый файл
+183
Просмотреть файл
@@ -0,0 +1,183 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the hipModuleLoad API On
1. Single code object
2. Multi Target architecture code object
*/
#include <fstream>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#ifdef __linux__
#include <unistd.h>
#endif
#define LEN 64
#define SIZE (LEN << 2)
#define COMMAND_LEN 256
#define CODE_OBJ_SINGLEARCH "module_kernels.code"
#define kernel_name "hello_world"
#define CODE_OBJ_MULTIARCH "vcpy_kernel_multarch.code"
/*
This API loads the kernel function, Launches the kernel
which copies one variable to another and validates both
the device variables for the current GPU architecture
*/
void testCodeObjFile(const char *codeObjFile) {
float *A, *B;
float *Ad, *Bd;
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
&A, &B, nullptr, LEN, false);
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Ad), A, SIZE));
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Bd), B, SIZE));
hipModule_t Module;
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, codeObjFile));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
struct {
void* _Ad;
void* _Bd;
} args;
args._Ad = reinterpret_cast<void*>(Ad);
args._Bd = reinterpret_cast<void*>(Bd);
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0,
stream, NULL,
reinterpret_cast<void**>(&config)));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipMemcpyDtoH(B, reinterpret_cast<hipDeviceptr_t>(Bd), SIZE));
for (uint32_t i = 0; i < LEN; i++) {
REQUIRE(A[i] == B[i]);
}
HipTest::freeArrays<float>(Ad, Bd, nullptr,
A, B, nullptr,
false);
HIP_CHECK(hipModuleUnload(Module));
}
#ifdef __linux__
/**
* Check if environment variable $ROCM_PATH is defined
*
*/
bool isRocmPathSet() {
FILE *fpipe;
char const *command = "echo $ROCM_PATH";
fpipe = popen(command, "r");
if (fpipe == nullptr) {
WARN("Unable to create command");
return false;
}
char command_op[COMMAND_LEN];
if (fgets(command_op, COMMAND_LEN, fpipe)) {
size_t len = strlen(command_op);
if (len > 1) { // This is because fgets always adds newline character
pclose(fpipe);
return true;
}
}
pclose(fpipe);
return false;
}
#endif
/*
This testcase checks the hipModuleLoadData API for the
current GPU architecture.
*/
TEST_CASE("Unit_hipModule_TestCodeObjFile") {
testCodeObjFile(CODE_OBJ_SINGLEARCH);
}
/*
This testcases
1. Creates kernel file and copies to tmp folder
2. Checks for Rocm path and generates code file for
multiple target architectures.
*/
TEST_CASE("Unit_hipModule_MultiTargArchCodeObj") {
#ifdef __linux__
char command[COMMAND_LEN];
hipDeviceProp_t props;
hipGetDeviceProperties(&props, 0);
// Hardcoding the codeobject lines in multiple string to avoid cpplint warning
std::string CodeObjL1 = "#include \"hip/hip_runtime.h\"\n";
std::string CodeObjL2 =
"extern \"C\" __global__ void hello_world(float* a, float* b) {\n";
std::string CodeObjL3 = " int tx = hipThreadIdx_x;\n";
std::string CodeObjL4 = " b[tx] = a[tx];\n";
std::string CodeObjL5 = "}";
// Creating the full code object string
static std::string CodeObj = CodeObjL1 + CodeObjL2 + CodeObjL3 +
CodeObjL4 + CodeObjL5;
std::ofstream ofs("/tmp/vcpy_kernel.cpp", std::ofstream::out);
ofs << CodeObj;
ofs.close();
// Copy the file into current working location if not available
if (access("/tmp/vcpy_kernel.cpp", F_OK) == -1) {
INFO("Code Object File: /tmp/vcpy_kernel.cpp not found");
REQUIRE(true);
}
// Generate the command to generate multi architecture code object file
const char* hipcc_path = nullptr;
if (isRocmPathSet()) {
hipcc_path = "$ROCM_PATH/bin/hipcc";
} else {
hipcc_path = "/opt/rocm/bin/hipcc";
}
/* Putting these command parameters into a variable to shorten the string
literal length in order to avoid multiline string literal cpplint warning
*/
const char* genco_option = "--offload-arch";
const char* input_codeobj = "/tmp/vcpy_kernel.cpp";
snprintf(command, COMMAND_LEN,
"%s --genco %s=gfx801,gfx802,gfx803,gfx900,gfx908,gfx1030,gfx90a,%s %s -o %s",
hipcc_path, genco_option, props.gcnArchName, input_codeobj,
CODE_OBJ_MULTIARCH);
system((const char*)command);
// Check if the code object file is created
snprintf(command, COMMAND_LEN, "./%s",
CODE_OBJ_MULTIARCH);
if (access(command, F_OK) == -1) {
INFO("Code Object File not found");
REQUIRE(true);
} else {
testCodeObjFile(CODE_OBJ_MULTIARCH);
}
#else
SUCCEED("This test is skipped due to non linux environment");
#endif
}
Исполняемый файл
+120
Просмотреть файл
@@ -0,0 +1,120 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <fstream>
#include <vector>
#define LEN 64
#define SIZE LEN * sizeof(float)
#define ARRAY_SIZE 16
#define fileName "module_kernels.code"
/*
This testcase verifies the basic functionality of hipModuleGetGlobal API
1. Simple kernel
2. Global variables
*/
TEST_CASE("Unit_hipModuleGetGlobal_Basic") {
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &A, &B, nullptr, LEN,
false);
CTX_CREATE()
hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Ad), A, SIZE);
hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(Bd), B, SIZE);
hipModule_t Module;
HIP_CHECK(hipModuleLoad(&Module, fileName));
float myDeviceGlobal_h = 42.0;
hipDeviceptr_t deviceGlobal;
size_t deviceGlobalSize;
HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
Module, "myDeviceGlobal"));
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(deviceGlobal),
&myDeviceGlobal_h, deviceGlobalSize));
float myDeviceGlobalArray_h[ARRAY_SIZE];
hipDeviceptr_t myDeviceGlobalArray;
size_t myDeviceGlobalArraySize;
HIP_CHECK(hipModuleGetGlobal(reinterpret_cast<hipDeviceptr_t*>
(&myDeviceGlobalArray),
&myDeviceGlobalArraySize, Module,
"myDeviceGlobalArray"));
for (int i = 0; i < ARRAY_SIZE; i++) {
myDeviceGlobalArray_h[i] = i * 1000.0f;
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>
(myDeviceGlobalArray),
&myDeviceGlobalArray_h,
myDeviceGlobalArraySize));
}
struct {
void* _Ad;
void* _Bd;
} args;
args._Ad = reinterpret_cast<void*>(Ad);
args._Bd = reinterpret_cast<void*>(Bd);
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
SECTION("Testing with simple kernel") {
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0,
NULL,
reinterpret_cast<void**>(&config)));
hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE);
for (uint32_t i = 0; i < LEN; i++) {
REQUIRE(A[i] == B[i]);
}
}
SECTION("Testing global variables") {
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0,
NULL,
reinterpret_cast<void**>(&config)));
hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE);
for (uint32_t i = 0; i < LEN; i++) {
float expected = A[i] + myDeviceGlobal_h +
myDeviceGlobalArray_h[i % 16];
REQUIRE(expected == B[i]);
}
}
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
HipTest::freeArrays<float>(Ad, Bd, nullptr,
A, B, nullptr,
false);
}
+246
Просмотреть файл
@@ -0,0 +1,246 @@
/*
Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* Test Scenarios
1. hipModuleLaunchKernel Negative Scenarios
2. hipModuleLaunchKernel Work Group tests =>
- (block.x * block.y * block.z) <= Work Group Size
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
- (block.x * block.y * block.z) > Work Group Size
where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
*/
#include <math.h>
#include <hip_test_common.hh>
#define fileName "module_kernels.code"
#define matmulK "matmulK"
#define SixteenSec "SixteenSecKernel"
#define KernelandExtra "KernelandExtraParams"
#define FourSec "FourSecKernel"
#define TwoSec "TwoSecKernel"
#define dummyKernel "EmptyKernel"
struct gridblockDim {
unsigned int gridX;
unsigned int gridY;
unsigned int gridZ;
unsigned int blockX;
unsigned int blockY;
unsigned int blockZ;
};
/*
This testcase verifies the negative scenarios of
hipModuleLaunchKernel API
*/
TEST_CASE("Unit_hipModuleLaunchKernel_Negative") {
HIP_CHECK(hipSetDevice(0));
struct {
void* _Ad;
void* _Bd;
void* _Cd;
int _n;
} args1;
args1._Ad = nullptr;
args1._Bd = nullptr;
args1._Cd = nullptr;
args1._n = 0;
hipFunction_t MultKernel, KernelandExtraParamKernel;
size_t size1;
size1 = sizeof(args1);
hipModule_t Module;
hipStream_t stream1;
hipDeviceptr_t *Ad{nullptr};
CTX_CREATE()
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK));
HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel,
Module, KernelandExtra));
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
HIP_LAUNCH_PARAM_END};
void *params[] = {Ad};
HIP_CHECK(hipStreamCreate(&stream1));
SECTION("Passing nullptr to kernel function") {
REQUIRE(hipModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing Max int value to block dim") {
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
std::numeric_limits<uint32_t>::max(),
std::numeric_limits<uint32_t>::max(),
std::numeric_limits<uint32_t>::max(),
0, stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing 0 to all value dim") {
REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 0, 0,
0,
0,
0, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing 0 for X dim") {
REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 1, 1,
0,
1,
1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing 0 for Y dim") {
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 0, 1,
1,
0,
1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing 0 for Z dim") {
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 0,
1,
1,
0, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing both kernel and extra params") {
REQUIRE(hipModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1,
1, 1, 0, stream1,
reinterpret_cast<void**>(&params),
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Passing more than maxthreadsperblock to block dim") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
deviceProp.maxThreadsPerBlock+1,
deviceProp.maxThreadsPerBlock+1,
deviceProp.maxThreadsPerBlock+1, 0,
stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Block dim X is more than max allowed") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
deviceProp.maxThreadsDim[0]+1,
1,
1, 0, stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Block dim Y is more than max allowed") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
1,
deviceProp.maxThreadsDim[1]+1,
1, 0, stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Block dim Z is more than max allowed") {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
1,
1,
deviceProp.maxThreadsDim[2]+1,
0, stream1, NULL,
reinterpret_cast<void**>(&config1))
!= hipSuccess);
}
SECTION("Block invalid config to extra params") {
void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
HIP_LAUNCH_PARAM_END};
REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1,
1, 1, 1, 0, stream1,
NULL,
reinterpret_cast<void**>(&config3))
!= hipSuccess);
}
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/*
This testcase verifies the work group scenarios of
hipModuleLaunchKernel API
*/
TEST_CASE("Unit_hipModuleLaunchKernel_WorkGroup") {
HIP_CHECK(hipSetDevice(0));
hipFunction_t DummyKernel;
hipModule_t Module;
hipStream_t stream1;
CTX_CREATE()
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
HIP_CHECK(hipStreamCreate(&stream1));
// Passing Max int value to block dimensions
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, 0);
double cuberootVal =
cbrt(static_cast<double>(deviceProp.maxThreadsPerBlock));
uint32_t cuberoot_floor = floor(cuberootVal);
uint32_t cuberoot_ceil = ceil(cuberootVal);
// Scenario: (block.x * block.y * block.z) <= Work Group Size where
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
HIP_CHECK(hipModuleLaunchKernel(DummyKernel,
1, 1, 1,
cuberoot_floor, cuberoot_floor, cuberoot_floor,
0, stream1, NULL, NULL));
// Scenario: (block.x * block.y * block.z) > Work Group Size where
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
REQUIRE(hipModuleLaunchKernel(DummyKernel,
1, 1, 1,
cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1,
0, stream1, NULL, NULL) != hipSuccess);
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
+91
Просмотреть файл
@@ -0,0 +1,91 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
hipModuleLoadData scenarios
1. Loads the kernel and the corresponding kernel function
which copies the data from one device variable to another.
*/
#include <fstream>
#include <vector>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define LEN 64
#define SIZE LEN << 2
#define FILENAME "module_kernels.code"
#define kernel_name "hello_world"
static std::vector<char> load_file() {
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
INFO("could not open code object" << FILENAME);
REQUIRE(false);
}
return buffer;
}
TEST_CASE("Unit_hipModuleLoadData_Basic") {
auto buffer = load_file();
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &A, &B, nullptr,
LEN, false);
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
hipModule_t Module;
hipFunction_t Function{nullptr};
HIP_CHECK(hipModuleLoadData(&Module, &buffer[0]));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
struct {
void* _Ad;
void* _Bd;
} args;
args._Ad = reinterpret_cast<void*>(Ad);
args._Bd = reinterpret_cast<void*>(Bd);
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0,
stream, NULL, reinterpret_cast<void**>(&config)));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
for (uint32_t i = 0; i < LEN; i++) {
REQUIRE(A[i] == B[i]);
}
HipTest::freeArrays<float>(Ad, Bd, nullptr,
A, B,
nullptr, false);
}
+161
Просмотреть файл
@@ -0,0 +1,161 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the multithreaded scenario of
hipModuleLoadData API on MultiGPU system
*/
#include <fstream>
#include <vector>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define LEN 64
#define SIZE LEN << 2
#define THREADS 8
#define FILENAME "module_kernels.code"
#define kernel_name "hello_world"
/*
This function reads the kernel code object file into buffer
*/
static std::vector<char> load_file() {
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
INFO("could not open code object " << FILENAME);
REQUIRE(false);
}
return buffer;
}
/*
Thread function
1. Loads the module using hipModuleLoadData API
2. Initializes 2 device variables.
3. Launches kernel which copies one data into another.
4. validates the result and returns it to the caller using
std::ref variable.
*/
static void run(const std::vector<char>& buffer, int deviceNo,
bool &testResult) {
hipSetDevice(deviceNo);
hipModule_t Module;
hipFunction_t Function;
float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr};
testResult = true;
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
&A, &B, nullptr,
LEN, false);
HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name));
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
struct {
void* _Ad;
void* _Bd;
} args;
args._Ad = static_cast<void*>(Ad);
args._Bd = static_cast<void*>(Bd);
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN,
1, 1, 0, stream, NULL,
reinterpret_cast<void**>(&config)));
HIPCHECK(hipStreamSynchronize(stream));
HIPCHECK(hipStreamDestroy(stream));
HIPCHECK(hipModuleUnload(Module));
HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
for (uint32_t i = 0; i < LEN; i++) {
REQUIRE(A[i] == B[i]);
}
HipTest::freeArrays<float>(Ad, Bd, nullptr,
A, B, nullptr,
false);
}
/*
Thread class inherited from std::thread
*/
struct joinable_thread : std::thread {
template <class... Xs>
joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) {} // NOLINT
joinable_thread& operator=(joinable_thread&& other) = default;
joinable_thread(joinable_thread&& other) = default;
~joinable_thread() {
if (this->joinable())
this->join();
}
};
/*
This API is triggered form the test case where in
1. Creates the thread object.
2. Loops through the number of GPUs and launches multiple threads.
*/
static void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
int numDevices = 0;
HIPCHECK(hipGetDeviceCount(&numDevices));
bool testResult = false;
std::vector<joinable_thread> threads;
for (int deviceNo=0; deviceNo < numDevices; ++deviceNo) {
for (uint32_t i = 0; i < n; i++) {
threads.emplace_back(std::thread{[&, buffer] {
run(buffer, deviceNo, std::ref(testResult));
}});
}
}
}
/*
The testcase verifies the multithreaded funtionality on MGPU system
1. Loads the kernel file by calling load_file API
2. Gets the host thread count
3. Creates multiple threads in parallel where in each thread initializes
2 device variables and loads the kernel using hipModuleLoadData API.
The kernel copies the data from one variable to another.Then the thread
validates both the variables.
*/
TEST_CASE("Unit_hipModuleLoadData_MGpuMultiThread") {
auto buffer = load_file();
auto file_size = buffer.size() / (1024 * 1024);
auto thread_count = HipTest::getHostThreadCount(file_size + 10);
run_multi_threads(thread_count, buffer);
}
+164
Просмотреть файл
@@ -0,0 +1,164 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the multithreaded scenario of hipModuleLoadData API
*/
#include <fstream>
#include <vector>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define LEN 64
#define SIZE LEN << 2
#define THREADS 8
#define MAX_THREADS 512
#define FILENAME "module_kernels.code"
#define kernel_name "hello_world"
/*
This function reads the kernel code object file into buffer
*/
std::vector<char> load_file() {
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
INFO("could not open code object" << FILENAME);
REQUIRE(false);
}
return buffer;
}
/*
Thread function
1. Loads the module using hipModuleLoadData API
2. Initializes 2 device variables.
3. Launches kernel which copies one data into another.
4. validates the result and returns it to the caller using
std::ref variable.
*/
void run(const std::vector<char>& buffer, bool &testResult) {
hipModule_t Module;
hipFunction_t Function;
float *A, *B, *Ad, *Bd;
testResult = true;
HipTest::initArrays<float>(&Ad, &Bd, nullptr,
&A, &B, nullptr,
LEN, false);
HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name));
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
struct {
void* _Ad;
void* _Bd;
} args;
args._Ad = static_cast<void*>(Ad);
args._Bd = static_cast<void*>(Bd);
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1,
LEN, 1, 1, 0, stream,
NULL, reinterpret_cast<void**>(&config)));
HIPCHECK(hipStreamSynchronize(stream));
HIPCHECK(hipStreamDestroy(stream));
HIPCHECK(hipModuleUnload(Module));
HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
for (uint32_t i = 0; i < LEN; i++) {
REQUIRE(A[i] == B[i]);
}
HipTest::freeArrays<float>(Ad, Bd, nullptr,
A, B, nullptr,
false);
}
/*
Thread class inherited from std::thread
*/
struct joinable_thread : std::thread {
template <class... Xs>
joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...) {} // NOLINT
joinable_thread& operator=(joinable_thread&& other) = default;
joinable_thread(joinable_thread&& other) = default;
~joinable_thread() {
if (this->joinable())
this->join();
}
};
/*
This API is triggered form the test case where in
1. Creates the thread object.
2. Loops through the number of GPUs and launches multiple threads.
*/
void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
std::vector<joinable_thread> threads;
bool testResult = false;
for (uint32_t i = 0; i < n; i++) {
threads.emplace_back(std::thread{[&] {
run(buffer, std::ref(testResult));
}});
}
}
/*
The testcase verifies the multithreaded funtionality
1. Loads the kernel file by calling load_file API
2. Gets the host thread count
3. Creates multiple threads in parallel where in each thread initializes
2 device variables and loads the kernel using hipModuleLoadData API.
The kernel copies the data from one variable to another.Then the thread
validates both the variables.
*/
TEST_CASE("Unit_hipModuleLoadData_MultiThreaded") {
HIPCHECK(hipInit(0));
auto buffer = load_file();
auto file_size = buffer.size() / (1024 * 1024);
auto thread_count = HipTest::getHostThreadCount(file_size + 10);
if (thread_count == 0) {
INFO("Thread Count is zero");
REQUIRE(false);
}
run_multi_threads(thread_count, buffer);
}
+121
Просмотреть файл
@@ -0,0 +1,121 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies hipModuleLoad API in multithreaded scenario
*/
#include <stdio.h>
#include "hip/hip_runtime.h"
#if HT_AMD
#include "hip/hip_ext.h"
#endif
#include <fstream>
#include <algorithm>
#include <atomic>
#include <functional>
#include <vector>
#include <future>
#define THREADS 8
#define MAX_NUM_THREADS 128
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define NUM_GROUPS 1
#define GROUP_SIZE 1
#define WARMUP_RUN_COUNT 10
#define TIMING_RUN_COUNT 100
#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT
#define FILENAME "module_kernels.code"
#define kernel_name "EmptyKernel"
/*
This thread function loads the kernel file , synchronizes the threads
and Launches the kernel .
*/
void hipModuleLaunchKernel_enqueue(std::atomic_int* shared, int max_threads) {
// resources necessary for this thread
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
hipModule_t module;
hipFunction_t function;
HIPCHECK(hipModuleLoad(&module, FILENAME));
HIPCHECK(hipModuleGetFunction(&function, module, kernel_name));
void* kernel_params = nullptr;
// synchronize all threads, before running
shared->fetch_add(1, std::memory_order_release);
while (max_threads != shared->load(std::memory_order_acquire)) {}
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
HIPCHECK(hipModuleLaunchKernel(function, 1, 1,
1, 1, 1, 1, 0, stream,
&kernel_params, nullptr));
}
HIPCHECK(hipModuleUnload(module));
HIPCHECK(hipStreamDestroy(stream));
}
/*
thread pool class contains launching the threads using std::async API
with future variable "threads".
The start API Launches the threads and finish API waits for the
thread execution to end.
*/
struct thread_pool {
explicit thread_pool(int total_threads) : max_threads(total_threads) {
}
void start(std::function<void(std::atomic_int*, int)> f) {
for (int i = 0; i < max_threads; ++i) {
threads.push_back(std::async(std::launch::async, f,
&shared, max_threads));
}
}
void finish() {
for (auto&&thread : threads) {
thread.get();
}
threads.clear();
shared = 0;
}
~thread_pool() {
finish();
}
private:
std::atomic_int shared {0};
std::vector<char> buffer;
std::vector<std::future<void>> threads;
int max_threads = 1;
};
/*
This testcase verifies the Multithreaded scenario of hipModule API
where in threadpool object is created and the object invokes start API
which launches multiple threads where each thread loads the kernel object
using hipModuleLoad API and launches the kernel in parallel.
*/
TEST_CASE("Unit_hipModuleLoad_MultiThread") {
int max_threads = min(THREADS * std::thread::hardware_concurrency(),
MAX_NUM_THREADS);
thread_pool task(max_threads);
task.start(hipModuleLaunchKernel_enqueue);
task.finish();
}
+93
Просмотреть файл
@@ -0,0 +1,93 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <iostream>
#include <fstream>
#include <cstddef>
#include <vector>
#include "hip_test_common.hh"
#define TEST_ITERATIONS 1000
#define CODEOBJ_FILE "module_kernels.code"
/**
* Run Valgrind tool with these test cases to validate memory leakage.
* E.g. valgrind --leak-check=yes ./a.out
*/
/**
* Internal Function
*/
static std::vector<char> load_file() {
std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
WARN("could not open code object " << CODEOBJ_FILE);
}
file.close();
return buffer;
}
/**
* Validates no memory leakage for hipModuleLoad
*/
TEST_CASE("Unit_hipModule_LoadUnloadStress") {
CTX_CREATE()
for (int count = 0; count < TEST_ITERATIONS; count++) {
hipModule_t Module;
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
HIP_CHECK(hipModuleUnload(Module));
}
CTX_DESTROY()
}
/**
* Validates no memory leakage for hipModuleLoadData
*/
TEST_CASE("Unit_hipModuleLoadData_LoadUnloadStress") {
CTX_CREATE()
auto buffer = load_file();
for (int count = 0; count < TEST_ITERATIONS; count++) {
hipModule_t Module;
HIP_CHECK(hipModuleLoadData(&Module, &buffer[0]));
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
HIP_CHECK(hipModuleUnload(Module));
}
CTX_DESTROY()
}
/**
* Validates no memory leakage for hipModuleLoadDataEx
*/
TEST_CASE("Unit_hipModuleLoadDataEx_UnloadStress") {
CTX_CREATE()
auto buffer = load_file();
for (int count = 0; count < TEST_ITERATIONS; count++) {
hipModule_t Module;
HIP_CHECK(hipModuleLoadDataEx(&Module, &buffer[0], 0,
nullptr, nullptr));
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy"));
HIP_CHECK(hipModuleUnload(Module));
}
CTX_DESTROY()
}
+274
Просмотреть файл
@@ -0,0 +1,274 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the negative scenarios of
1. hipModuleLoad API
2. hipModuleLoadData API
3. hipModuleGetFunction API
4. hipModuleGetGlobal API
*/
#include <ctime>
#include <fstream>
#include <cstddef>
#include <vector>
#include "hip_test_common.hh"
#define FILENAME_NONEXST "sample_nonexst.code"
#define FILENAME_EMPTY "emptyfile.code"
#define FILENAME_RAND "rand_file.code"
#define RANDOMFILE_LEN 2048
#define CODEOBJ_FILE "module_kernels.code"
#define KERNEL_NAME "hello_world"
#define KERNEL_NAME_NONEXST "xyz"
#define CODEOBJ_GLOBAL "module_kernels.code"
#define DEVGLOB_VAR_NONEXIST "xyz"
#define DEVGLOB_VAR "myDeviceGlobal"
/**
* Internal Function
* Loads the kernel file into buffer
*/
std::vector<char> load_file(const char* filename) {
std::ifstream file(filename, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
INFO("could not open code object " << filename);
}
file.close();
return buffer;
}
/**
* Internal Function
Create Randome file
*/
void createRandomFile(const char* filename) {
std::ofstream outfile(filename, std::ios::binary);
char buf[RANDOMFILE_LEN];
unsigned int seed = 1;
for (int i = 0; i < RANDOMFILE_LEN; i++) {
buf[i] = rand_r(&seed) % 256;
}
outfile.write(buf, RANDOMFILE_LEN);
outfile.close();
}
/**
* Validates negative scenarios for hipModuleLoad API
*/
TEST_CASE("Unit_hipModuleLoad_Negative") {
CTX_CREATE()
hipModule_t Module;
SECTION("Nullptr to module") {
REQUIRE(hipModuleLoad(nullptr, CODEOBJ_FILE)
!= hipSuccess);
}
SECTION("Nullptr to Fname") {
REQUIRE(hipModuleLoad(&Module, nullptr)
!= hipSuccess);
}
SECTION("Empty fname") {
std::fstream fs;
fs.open(FILENAME_EMPTY, std::ios::out);
fs.close();
REQUIRE(hipModuleLoad(&Module, FILENAME_EMPTY)
!= hipSuccess);
}
SECTION("Binary file with random number") {
createRandomFile(FILENAME_RAND);
REQUIRE(hipModuleLoad(&Module, FILENAME_RAND)
!= hipSuccess);
remove(FILENAME_RAND);
}
SECTION("Non Existent file") {
REQUIRE(hipModuleLoad(&Module, FILENAME_NONEXST)
!= hipSuccess);
}
SECTION("Empty string to file name") {
REQUIRE(hipModuleLoad(&Module, "")
!= hipSuccess);
}
CTX_DESTROY()
}
/**
* Validates negative scenarios for hipModuleLoadData API
*/
TEST_CASE("Unit_hipModuleLoadData_Negative") {
CTX_CREATE()
hipModule_t Module;
SECTION("Nullptr to module") {
auto buffer = load_file(CODEOBJ_FILE);
REQUIRE(hipModuleLoadData(nullptr, &buffer[0])
!= hipSuccess);
}
SECTION("Nullptr to image") {
REQUIRE(hipModuleLoadData(&Module, nullptr)
!= hipSuccess);
}
SECTION("Random file to image") {
createRandomFile(FILENAME_RAND);
auto buffer = load_file(FILENAME_RAND);
REQUIRE(hipModuleLoadData(&Module, &buffer[0])
!= hipSuccess);
}
SECTION("Nullptr to Module") {
auto buffer = load_file(CODEOBJ_FILE);
REQUIRE(hipModuleLoadDataEx(nullptr, &buffer[0], 0, nullptr, nullptr)
!= hipSuccess);
}
SECTION("Nullptr to image") {
REQUIRE(hipModuleLoadDataEx(&Module, nullptr, 0, nullptr, nullptr)
!= hipSuccess);
}
SECTION("Random image file") {
// Create a binary file with random numbers
createRandomFile(FILENAME_RAND);
// Open the code object file and copy it in a buffer
auto buffer = load_file(FILENAME_RAND);
REQUIRE(hipModuleLoadDataEx(&Module, &buffer[0], 0, nullptr, nullptr)
!= hipSuccess);
}
CTX_DESTROY()
}
/**
* Validates negative scenarios for hipModuleGetFunction API
*/
TEST_CASE("Unit_hipModuleGetFunction_Negative") {
CTX_CREATE()
hipFunction_t Function;
hipModule_t Module;
SECTION("Nullptr to function name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
REQUIRE(hipModuleGetFunction(nullptr, Module, KERNEL_NAME) != hipSuccess);
HIP_CHECK(hipModuleUnload(Module));
}
SECTION("Uninitialized module") {
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess);
}
SECTION("Non existing function kernel name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME_NONEXST)
!= hipSuccess);
HIP_CHECK(hipModuleUnload(Module));
}
SECTION("Nullptr to kernel name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
REQUIRE(hipModuleGetFunction(&Function, Module, nullptr) != hipSuccess);
HIP_CHECK(hipModuleUnload(Module));
}
#if HT_AMD
SECTION("Unloaded module") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIP_CHECK(hipModuleUnload(Module));
REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess);
}
#endif
SECTION("Empty string to kernel name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
REQUIRE(hipModuleGetFunction(&Function, Module, "") != hipSuccess);
HIP_CHECK(hipModuleUnload(Module));
}
CTX_DESTROY()
}
/**
* Validates negative scenarios for hipModuleGetGlobal API
*/
TEST_CASE("Unit_hipModuleGetGlobal_Negative") {
CTX_CREATE()
hipModule_t Module;
hipDeviceptr_t deviceGlobal;
size_t deviceGlobalSize;
SECTION("Nullptr to varname") {
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
&deviceGlobalSize, Module, nullptr)
!= hipSuccess);
HIPCHECK(hipModuleUnload(Module));
}
SECTION("Wrong variable name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
REQUIRE(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
Module, DEVGLOB_VAR_NONEXIST) != hipSuccess);
HIPCHECK(hipModuleUnload(Module));
}
SECTION("Empty string to module name") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
&deviceGlobalSize, Module, "") != hipSuccess);
HIPCHECK(hipModuleUnload(Module));
}
#if HT_AMD
SECTION("Unloaded Module") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL));
HIP_CHECK(hipModuleUnload(Module));
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
&deviceGlobalSize, Module,
DEVGLOB_VAR) != hipSuccess);
}
SECTION("Unload an Unloaded module") {
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIP_CHECK(hipModuleUnload(Module));
REQUIRE(hipModuleUnload(Module) != hipSuccess);
}
SECTION("Uninitialized module") {
REQUIRE(hipModuleGetGlobal(&deviceGlobal,
&deviceGlobalSize, Module,
DEVGLOB_VAR) != hipSuccess);
}
SECTION("Unload Uninitialized module") {
REQUIRE(hipModuleUnload(Module) != hipSuccess);
}
#endif
CTX_DESTROY()
}
+267
Просмотреть файл
@@ -0,0 +1,267 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <limits>
#define fileName "module_kernels.code"
#define kernel_name "hello_world"
/**
* hipModuleOccupancyMaxPotentialBlockSize and hipModuleOccupancyMaxPotentialBlockSizeWithFlags
* corner tests.
* Scenario1:
* Validates the value of gridSize, which should be always non zero +ve integer and blockSize
* range returned for dynSharedMemPerBlk = 0 and blockSizeLimit = 0.
* Scenario2:
* Validates the value of gridSize, which should be always non zero +ve integer and blockSize
* range returned for dynSharedMemPerBlk = devProp.sharedMemPerBlock and
* blockSizeLimit = devProp.maxThreadsPerBlock.
*/
TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_FuncTst") {
// Initialize
hipDeviceProp_t devProp;
int gridSize = 0;
int blockSize = 0;
hipModule_t Module;
CTX_CREATE()
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
// Scenario1
SECTION("without flag - gridSize when input params are 0") {
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
&blockSize, Function, 0, 0));
}
// Scenario2
SECTION("without flag - gridSize when input params are maximum") {
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
&blockSize, Function,
devProp.sharedMemPerBlock, devProp.maxThreadsPerBlock));
}
// Scenario1
SECTION("with flag - gridSize when input params are 0") {
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
&blockSize, Function, 0, 0, 0));
}
// Scenario2
SECTION("with flag - gridSize when input params are maximum") {
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
&blockSize, Function, devProp.sharedMemPerBlock,
devProp.maxThreadsPerBlock, 0));
}
// Check if blockSize doen't exceed maxThreadsPerBlock
REQUIRE_FALSE(gridSize <= 0);
REQUIRE_FALSE(blockSize <= 0);
REQUIRE_FALSE(blockSize > devProp.maxThreadsPerBlock);
// Un-initialize
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/**
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessor and
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags Corner tests.
* Scenario1:
* Validates numBlock value range is within expected limit when sharedMemPerBlock
* is 0.
* Scenario2:
* Validates numBlock value range is within expected limit when
* dynSharedMemPerBlk = devProp.sharedMemPerBlock.
*/
TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_FuncTst") {
// Initialize
hipDeviceProp_t devProp;
int gridSize = 0;
int blockSize = 0;
int numBlock = 0;
hipModule_t Module;
CTX_CREATE()
hipFunction_t Function;
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize,
&blockSize, Function, 0, 0));
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
// Scenario1
SECTION("without flag - gridSize when input params are 0") {
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock,
Function, blockSize, 0));
}
// Scenario2
SECTION("without flag - gridSize when input params are maximum") {
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock,
Function, blockSize, devProp.sharedMemPerBlock));
}
// Scenario1
SECTION("with flag - gridSize when input params are 0") {
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
&numBlock, Function, blockSize, 0, 0));
}
// Scenario2
SECTION("with flag - gridSize when input params are maximum") {
HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
&numBlock, Function, blockSize, devProp.sharedMemPerBlock, 0))
}
// Check if numBlocks are within limits
int temp_val = (numBlock * blockSize);
REQUIRE_FALSE(numBlock <= 0);
REQUIRE_FALSE(temp_val > devProp.maxThreadsPerMultiProcessor);
// Un-initialize
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/**
* hipModuleOccupancyMaxPotentialBlockSize negative tests.
* Scenario1: gridSize is nullptr.
* Scenario2: blocksize is nullptr.
* Scenario3: blockSizeLimit < 0.
*/
TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_NegTst") {
int gridSize = 0;
int blockSize = 0;
hipModule_t Module;
hipFunction_t Function;
CTX_CREATE()
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
// Scenario1
SECTION("without flag - gridSize is nullptr") {
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
nullptr, &blockSize, Function, 0, 0));
}
// Scenario2
SECTION("without flag - blocksize is nullptr") {
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
&gridSize, nullptr, Function, 0, 0));
}
// Scenario3
SECTION("without flag - blockSizeLimit is less than 0") {
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
#if HT_NVIDIA
REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize(
&gridSize, &blockSize, Function, 0, -1));
#else
// As discussed in SWDEV-269400
// with developers this difference in behavior between NVIDIA and AMD
// is retained.
REQUIRE_FALSE(hipSuccess != hipModuleOccupancyMaxPotentialBlockSize(
&gridSize, &blockSize, Function, 0, -1));
#endif
}
// Scenario1
SECTION("with flag - gridSize is nullptr") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(nullptr,
&blockSize, Function, 0, 0, 0));
}
// Scenario2
SECTION("with flag - blocksize is nullptr") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
nullptr, Function, 0, 0, 0));
}
// Scenario3
SECTION("with flag - blockSizeLimit is less than 0") {
#if HT_NVIDIA
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
&blockSize, Function, 0, -1, 0));
#else
// As discussed in SWDEV-269400
// with developers this difference in behavior between NVIDIA and AMD
// is retained.
REQUIRE_FALSE(hipSuccess !=
hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize,
&blockSize, Function, 0, -1, 0));
#endif
}
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
/**
* hipModuleOccupancyMaxActiveBlocksPerMultiprocessor negative tests.
* Scenario1: numBlocks is nullptr.
* Scenario2: Check the behavior for blockSize < 0.
* Scenario3: Check error code returned for dynSharedMemPerBlk = 0 and blockSize = 0.
* Scenario4: dynSharedMemPerBlk = size_t numeric limit.
*/
TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_NegTst") {
int gridSize = 0;
int blockSize = 0;
int numBlocks = 0;
hipModule_t Module;
hipFunction_t Function;
CTX_CREATE()
HIP_CHECK(hipModuleLoad(&Module, fileName));
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize,
Function, 0, 0));
// Scenario1
SECTION("without flag - numBlocks is nullptr") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(nullptr,
Function, blockSize, 0));
}
// Scenario3
SECTION("without flag - dynSharedMemPerBlk = 0 and blockSize = 0") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
Function, 0, 0));
}
// Scenario2
SECTION("without flag - blockSize is less than 0") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
Function, -1, 0));
}
// Scenario4
SECTION("without flag - dynSharedMemPerBlk = max_numerical_limit") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks,
Function, 0, std::numeric_limits<std::size_t>::max()));
}
// Scenario1
SECTION("with flag - numBlocks is nullptr") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(nullptr,
Function, blockSize, 0, 0));
}
// Scenario3
SECTION("with flag - dynSharedMemPerBlk = 0 and blockSize = 0") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
Function, 0, 0, 0));
}
// Scenario2
SECTION("with flag - blockSize is less than 0") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
Function, -1, 0, 0));
}
// Scenario4
SECTION("with flag - dynSharedMemPerBlk = max_numerical_limit") {
REQUIRE_FALSE(hipSuccess ==
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks,
Function, 0, std::numeric_limits<std::size_t>::max(), 0));
}
HIP_CHECK(hipModuleUnload(Module));
CTX_DESTROY()
}
Исполняемый файл
+561
Просмотреть файл
@@ -0,0 +1,561 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the following scenarios of hipModuleGetTexRef API
1. Negative
2. Basic functionality using different data types
3. Multiple streams
4. MultiThreaded - MultStreamMultGPU
5. MultiThreaded - SingleStreamMultGPU
*/
#include <fstream>
#include <vector>
#include <type_traits>
#include <limits>
#include <atomic>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define CODEOBJ_FILE "module_kernels.code"
#define NON_EXISTING_TEX_NAME "xyz"
#define EMPTY_TEX_NAME ""
#define GLOBAL_KERNEL_VAR "deviceGlobalFloat"
#define TEX_REF "ftex"
#define WIDTH 256
#define HEIGHT 256
#define MAX_STREAMS 4
#define GRIDDIMX 16
#define GRIDDIMY 16
#define GRIDDIMZ 1
#define BLOCKDIMZ 1
#define MAX_GPU 16
std::atomic<int> g_thTestPassed(1);
/**
* Internal Functions
* Loads the kernel file
*/
static std::vector<char> load_file() {
std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (!file.read(buffer.data(), fsize)) {
INFO("could not open code object " << CODEOBJ_FILE);
REQUIRE(false);
}
return buffer;
}
/*
Initializes the array
*/
template<typename T>
void allocInitArray(unsigned int width,
unsigned int height,
hipArray_Format format,
HIP_ARRAY* array
) {
HIP_ARRAY_DESCRIPTOR desc;
desc.Format = format;
desc.NumChannels = 1;
desc.Width = width * sizeof(T);
desc.Height = height;
HIPCHECK(hipArrayCreate(array, &desc));
}
/*
Copies buffer to array using hipMemcpyParam2D API
*/
template <class T, class T1> void copyBuffer2Array(unsigned int width,
unsigned int height,
T* hData,
T1 array
) {
hip_Memcpy2D copyParam;
memset(&copyParam, 0, sizeof(copyParam));
#if HT_NVIDIA
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
copyParam.dstArray = *array;
#else
copyParam.dstMemoryType = hipMemoryTypeArray;
copyParam.srcMemoryType = hipMemoryTypeHost;
copyParam.dstArray = array;
#endif
copyParam.srcHost = hData;
copyParam.srcPitch = width * sizeof(T);
copyParam.WidthInBytes = width * sizeof(T);
copyParam.Height = height;
HIPCHECK(hipMemcpyParam2D(&copyParam));
}
/*
Assigns array to texture ref
*/
template <class T> void assignArray2TexRef(hipArray_Format format,
const char* texRefName,
hipModule_t Module,
T array
) {
HIP_TEX_REFERENCE texref;
#if HT_NVIDIA
HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName));
HIPCHECK(hipTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP));
HIPCHECK(hipTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP));
HIPCHECK(hipTexRefSetFilterMode(texref, HIP_TR_FILTER_MODE_POINT));
HIPCHECK(hipTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
HIPCHECK(hipTexRefSetFormat(texref, format, 1));
HIPCHECK(hipTexRefSetArray(texref, *array, CU_TRSA_OVERRIDE_FORMAT));
#else
HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName));
HIPCHECK(hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap));
HIPCHECK(hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap));
HIPCHECK(hipTexRefSetFilterMode(texref, hipFilterModePoint));
HIPCHECK(hipTexRefSetFlags(texref, HIP_TRSF_READ_AS_INTEGER));
HIPCHECK(hipTexRefSetFormat(texref, format, 1));
HIPCHECK(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT));
#endif
}
template <class T> bool validateOutput(unsigned int width,
unsigned int height,
T* hData,
T* hOutputData) {
for (unsigned int i = 0; i < height; i++) {
for (unsigned int j = 0; j < width; j++) {
if (hData[i * width + j] != hOutputData[i * width + j]) {
return false;
}
}
}
return true;
}
/**
* Validates texture functionality with multiple streams for hipModuleGetTexRef
*
*/
template <class T> bool testTexMultStream(const std::vector<char>& buffer,
hipArray_Format format,
const char* texRefName,
const char* kerFuncName,
unsigned int numOfStreams) {
bool TestPassed = true;
unsigned int width = WIDTH;
unsigned int height = HEIGHT;
unsigned int size = width * height * sizeof(T);
T* hData = reinterpret_cast<T*>(malloc(size));
CTX_CREATE()
HipTest::setDefaultData<T>(width * height, hData, nullptr, nullptr);
// Load Kernel File and create hipArray
hipModule_t Module;
HIPCHECK(hipModuleLoadData(&Module, &buffer[0]));
HIP_ARRAY array;
allocInitArray<T>(width, height, format, &array);
#if HT_NVIDIA
// Copy from hData to array using hipMemcpyParam2D
copyBuffer2Array<T, HIP_ARRAY*>(width, height, hData, &array);
// Get tex reference from the loaded kernel file
// Assign array to the tex reference
assignArray2TexRef<HIP_ARRAY*>(format, texRefName, Module, &array);
#else
// Copy from hData to array using hipMemcpyParam2D
copyBuffer2Array<T, HIP_ARRAY>(width, height, hData, array);
// Get tex reference from the loaded kernel file
// Assign array to the tex reference
assignArray2TexRef<HIP_ARRAY>(format, texRefName, Module, array);
#endif
hipFunction_t Function;
HIPCHECK(hipModuleGetFunction(&Function, Module, kerFuncName));
// Create Multiple Strings
hipStream_t streams[MAX_STREAMS]={0};
T* dData[MAX_STREAMS] = {NULL};
T* hOutputData[MAX_STREAMS] = {NULL};
if (numOfStreams > MAX_STREAMS) {
numOfStreams = MAX_STREAMS;
}
unsigned int totalStreamsCreated = 0;
for (unsigned int stream_num = 0; stream_num < numOfStreams; stream_num++) {
hOutputData[stream_num] = reinterpret_cast<T*>(malloc(size));
if (NULL == hOutputData[stream_num]) {
WARN("Failed to allocate using malloc in testTexMultStream");
TestPassed &= false;
}
HIPCHECK(hipStreamCreate(&streams[stream_num]));
HIPCHECK(hipMalloc(reinterpret_cast<void**>(&dData[stream_num]), size));
memset(hOutputData[stream_num], 0, size);
struct {
void* _Ad;
unsigned int _Bd;
unsigned int _Cd;
} args;
args._Ad = reinterpret_cast<void*>(dData[stream_num]);
args._Bd = width;
args._Cd = height;
size_t sizeTemp = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
&args,
HIP_LAUNCH_PARAM_BUFFER_SIZE,
&sizeTemp,
HIP_LAUNCH_PARAM_END};
int temp1 = width / GRIDDIMX;
int temp2 = height / GRIDDIMY;
HIPCHECK(hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ,
temp1, temp2, BLOCKDIMZ, 0,
streams[stream_num],
NULL, reinterpret_cast<void**>(&config)));
totalStreamsCreated++;
}
// Check the kernel results separately
for (unsigned int stream_num = 0; stream_num < totalStreamsCreated;
stream_num++) {
HIPCHECK(hipStreamSynchronize(streams[stream_num]));
HIPCHECK(hipMemcpy(hOutputData[stream_num], dData[stream_num], size,
hipMemcpyDeviceToHost));
TestPassed &= validateOutput<T>(width, height, hData,
hOutputData[stream_num]);
}
for (unsigned int i = 0; i < totalStreamsCreated; i++) {
HIPCHECK(hipFree(dData[i]));
HIPCHECK(hipStreamDestroy(streams[i]));
free(hOutputData[i]);
}
ARRAY_DESTROY(array)
HIPCHECK(hipModuleUnload(Module));
free(hData);
CTX_DESTROY()
return TestPassed;
}
/**
* Internal Thread Functions
*
*/
void launchSingleStreamMultGPU(int gpu, const std::vector<char>& buffer) {
bool TestPassed = true;
HIPCHECK(hipSetDevice(gpu));
TestPassed = testTexMultStream<float>(buffer,
HIP_AD_FORMAT_FLOAT,
"ftex",
"tex2dKernelFloat", 1);
g_thTestPassed &= static_cast<int>(TestPassed);
}
void launchMultStreamMultGPU(int gpu, const std::vector<char>& buffer) {
bool TestPassed = true;
HIPCHECK(hipSetDevice(gpu));
TestPassed = testTexMultStream<float>(buffer,
HIP_AD_FORMAT_FLOAT,
"ftex",
"tex2dKernelFloat", 3);
g_thTestPassed &= static_cast<int>(TestPassed);
}
/**
* Validates texture functionality with Multiple Streams on multuple GPU
* for hipModuleGetTexRef
*
*/
bool testTexMultStreamMultGPU(unsigned int numOfGPUs,
const std::vector<char>& buffer) {
bool TestPassed = true;
std::thread T[MAX_GPU];
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
T[gpu] = std::thread(launchMultStreamMultGPU, gpu, buffer);
}
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
T[gpu].join();
}
if (g_thTestPassed) {
TestPassed = true;
} else {
TestPassed = false;
}
return TestPassed;
}
/**
* Validates texture functionality with Single Stream on multuple GPU
* for hipModuleGetTexRef
*
*/
bool testTexSingleStreamMultGPU(unsigned int numOfGPUs,
const std::vector<char>& buffer) {
bool TestPassed = true;
std::thread T[MAX_GPU];
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
T[gpu] = std::thread(launchSingleStreamMultGPU, gpu, buffer);
}
for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) {
T[gpu].join();
}
if (g_thTestPassed) {
TestPassed = true;
} else {
TestPassed = false;
}
return TestPassed;
}
/*
This testcase verifies the negative scenarios of hipModuleGetTexRef API
*/
TEST_CASE("Unit_hipModuleGetTexRef_Negative") {
hipModule_t Module;
HIP_TEX_REFERENCE texref;
CTX_CREATE()
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
SECTION("TexRef as nullptr") {
REQUIRE(hipModuleGetTexRef(nullptr, Module, "tex") != hipSuccess);
}
SECTION("Name as nullptr") {
REQUIRE(hipModuleGetTexRef(&texref, Module, nullptr) != hipSuccess);
}
SECTION("Name as non existing TexName") {
REQUIRE(hipModuleGetTexRef(&texref, Module,
NON_EXISTING_TEX_NAME) != hipSuccess);
}
SECTION("Empty tex name") {
REQUIRE(hipModuleGetTexRef(&texref, Module, EMPTY_TEX_NAME) != hipSuccess);
}
#if HT_NVIDIA
SECTION("Name as Global kernel Var") {
REQUIRE(hipModuleGetTexRef(&texref, Module,
GLOBAL_KERNEL_VAR) != hipSuccess);
}
#endif
SECTION("Unload Module") {
HIP_CHECK(hipModuleUnload(Module));
REQUIRE(hipModuleGetTexRef(&texref, Module, TEX_REF) != hipSuccess);
}
CTX_DESTROY()
}
/**
* Validates texture type data functionality for hipModuleGetTexRef
* 1.Loads the code object file
* 2.Based on the template type texRefName,KernelFuncName and format are assigned.
* 3.Allocate array based on format.
* 4.Assigns array to texRef
* 5.Launches the kernel based on the template type which invokes text2D API
and copies the data to output variable.
* 6.Validates the data.
*/
TEMPLATE_TEST_CASE("Unit_hipModuleGetTexRef_Basic", "", int,
char, uint16_t, float) {
bool TestPassed = true;
constexpr unsigned int width = WIDTH;
constexpr unsigned int height = HEIGHT;
constexpr unsigned int size = width * height * sizeof(TestType);
const char *texRefName, *kerFuncName;
hipArray_Format format;
TestType* hData = reinterpret_cast<TestType*>(malloc(size));
if (NULL == hData) {
INFO("Failed to allocate using malloc in testTexType.\n");
REQUIRE(false);
}
CTX_CREATE()
HipTest::setDefaultData<TestType>(width * height, hData, nullptr, nullptr);
// Load Kernel File and create hipArray
hipModule_t Module;
HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIP_ARRAY array;
if (std::is_same<TestType, char>::value) {
texRefName = "ctex";
kerFuncName = "tex2dKernelInt8";
format = HIP_AD_FORMAT_SIGNED_INT8;
} else if (std::is_same<TestType, uint16_t>::value) {
texRefName = "stex";
kerFuncName = "tex2dKernelInt16";
format = HIP_AD_FORMAT_SIGNED_INT16;
} else if (std::is_same<TestType, int>::value) {
texRefName = "itex";
kerFuncName = "tex2dKernelInt";
format = HIP_AD_FORMAT_SIGNED_INT32;
} else if (std::is_same<TestType, float>::value) {
texRefName = "ftex";
kerFuncName = "tex2dKernelFloat";
format = HIP_AD_FORMAT_FLOAT;
}
allocInitArray<TestType>(width, height, format, &array);
#if HT_NVIDIA
// Copy from hData to array using hipMemcpyParam2D
copyBuffer2Array<TestType, HIP_ARRAY*>(width, height, hData, &array);
// Get tex reference from the loaded kernel file
// Assign array to the tex reference
assignArray2TexRef<HIP_ARRAY*>(format, texRefName, Module, &array);
#else
// Copy from hData to array using hipMemcpyParam2D
copyBuffer2Array<TestType, HIP_ARRAY>(width, height, hData, array);
// Get tex reference from the loaded kernel file
// Assign array to the tex reference
assignArray2TexRef<HIP_ARRAY>(format, texRefName, Module, array);
#endif
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, kerFuncName));
TestType* dData = NULL;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dData), size));
struct {
void* _Ad;
unsigned int _Bd;
unsigned int _Cd;
} args;
args._Ad = reinterpret_cast<void*>(dData);
args._Bd = width;
args._Cd = height;
size_t sizeTemp = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
&args,
HIP_LAUNCH_PARAM_BUFFER_SIZE,
&sizeTemp,
HIP_LAUNCH_PARAM_END};
int temp1 = width / GRIDDIMX;
int temp2 = height / GRIDDIMY;
HIP_CHECK(
hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ,
temp1, temp2, BLOCKDIMZ, 0, 0,
NULL, reinterpret_cast<void**>(&config)));
HIP_CHECK(hipDeviceSynchronize());
TestType* hOutputData = reinterpret_cast<TestType*>(malloc(size));
if (NULL == hOutputData) {
INFO("Failed to allocate using malloc in testTexType");
REQUIRE(false);
} else {
memset(hOutputData, 0, size);
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
TestPassed = validateOutput<TestType>(width, height, hData, hOutputData);
REQUIRE(TestPassed);
}
free(hOutputData);
HIP_CHECK(hipFree(dData));
ARRAY_DESTROY(array)
HIP_CHECK(hipModuleUnload(Module));
free(hData);
CTX_DESTROY()
}
/*
This testcase verifies hipModuleGetTexRef on multiple streams
where
* 1..Loads the code object file
* 2.Allocate array and initializes it with hData
* 3.Assigns array to texRef
4.Creates multiple streams
* 4.Launches the kernel on each stream which invokes text2D API
and copies the data to output variable
* 5.Validates the hData with output data in each stream.
*/
TEST_CASE("Unit_hipModuleGetTexRef_TexMultStream") {
bool TestPassed = true;
auto buffer = load_file();
TestPassed = testTexMultStream<float>(buffer,
HIP_AD_FORMAT_FLOAT,
"ftex",
"tex2dKernelFloat",
MAX_STREAMS);
REQUIRE(TestPassed);
}
/*
This testcase verifies hipModuleGetTexRef Multithreaded scenario on
single stream and multi GPU machine.
1. Gets the device count.
2. Create the threads based on device count.
3. Each thread calls the testTexMultStream which performs the same
above funtionality on single Stream
4. The threads are executed in parallel and are joined later.
This testcase ensures that the multi thread execution on single stream
in parallel is successful
*/
TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexSingleStreamMultiGPU") {
bool TestPassed = true;
// Testcase skipped on nvidia with CUDA API version 11.2,
// as hipModuleLoadData returning error code
// 'a PTX JIT compilation failed'(218), which is invalid
// behavior. Test passes with AMD and previous CUDA versions.
#if HT_NVIDIA
INFO("Testcase skipped on CUDA version 11.2\n");
REQUIRE(true);
#else
int gpu_cnt = 0;
auto buffer = load_file();
HIP_CHECK(hipGetDeviceCount(&gpu_cnt));
TestPassed = testTexSingleStreamMultGPU(gpu_cnt, buffer);
REQUIRE(TestPassed);
#endif
}
/*
This testcase verifies hipModuleGetTexRef Multithreaded scenario on
single stream and multi GPU machine.
1. Gets the device count.
2. Create the threads based on device count.
3. Each thread calls the testTexMultStream which performs the same
above funtionality on multiple Stream
4. The threads are executed in parallel and are joined later.
This testcase ensures that the multi thread execution on multiple streams
in parallel is successful
*/
TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexMultiStreamMultiGPU") {
bool TestPassed = true;
// Testcase skipped on nvidia with CUDA API version 11.2,
// as hipModuleLoadData returning error code
// 'a PTX JIT compilation failed'(218), which is invalid
// behavior. Test passes with AMD and previous CUDA versions.
#if HT_NVIDIA
INFO("Testcase skipped on CUDA version 11.2\n");
REQUIRE(true);
#else
int gpu_cnt = 0;
auto buffer = load_file();
HIP_CHECK(hipGetDeviceCount(&gpu_cnt));
TestPassed = testTexMultStreamMultGPU(gpu_cnt, buffer);
REQUIRE(TestPassed);
#endif
}
+34
Просмотреть файл
@@ -0,0 +1,34 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#define fileName "module_kernels.code"
/*
This testcase verifies the basic functionality of hipModuleUnload API
*/
TEST_CASE("Unit_hipModuleUnload_Basic") {
CTX_CREATE()
hipModule_t module;
HIP_CHECK(hipModuleLoad(&module, fileName));
HIP_CHECK(hipModuleUnload(module));
CTX_DESTROY()
}
+229
Просмотреть файл
@@ -0,0 +1,229 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase reads the openCL kernel file and generate the the code object
file which gets executed in HIP interface.
This testcase verifies for the
1. Current GPU architecture
2. Code object version v3
*/
#ifdef __linux__
#include <unistd.h>
#include <experimental/filesystem>
#endif
#include <string.h>
#include "hip_test_common.hh"
#include "hip_test_checkers.hh"
#define OPENCL_OBJ_FILE "opencl_add.cc"
#define HIP_CODEOBJ_FILE_DEFAULT "opencl_add.co"
#define HIP_CODEOBJ_FILE_V3 "opencl_add_v3.co"
#define COMMAND_LEN 256
#define BUFFER_LEN 256
#ifdef __linux__
/* Check if environment variable $ROCM_PATH is defined */
static bool isRocmPathSet() {
FILE *fpipe;
char const *command = "echo $ROCM_PATH";
fpipe = popen(command, "r");
if (fpipe == nullptr) {
WARN("Unable to create command");
return false;
}
char command_op[BUFFER_LEN];
if (fgets(command_op, BUFFER_LEN, fpipe)) {
size_t len = strlen(command_op);
if (len > 1) { // This is because fgets always adds newline character
pclose(fpipe);
return true;
}
}
pclose(fpipe);
return false;
}
/* Gets the sramecc/xnack settings from rocm info */
int getV3TargetIdFeature(char* feature, bool rocmPathSet) {
FILE *fpipe;
char command[COMMAND_LEN] = "";
const char *rocmpath = nullptr;
if (rocmPathSet) {
// For STG2 testing where /opt/rocm path is not present
rocmpath = "$ROCM_PATH/bin/rocminfo";
} else {
// Check if the rocminfo tool exists
rocmpath = "/opt/rocm/bin/rocminfo";
}
snprintf(command, COMMAND_LEN, "%s", rocmpath);
strncat(command, " | grep -m1 \"sramecc.:xnack.\"", COMMAND_LEN);
fpipe = popen(command, "r");
if (fpipe == nullptr) {
WARN("Unable to create command file");
return -1;
}
char command_op[BUFFER_LEN];
const char* pOpt1 = nullptr;
const char *pOpt2 = nullptr;
if (fgets(command_op, BUFFER_LEN, fpipe)) {
if (strstr(command_op, "sramecc+")) {
pOpt1 = "-msram-ecc";
} else if (strstr(command_op, "sramecc-")) {
pOpt1 = "-mno-sram-ecc";
} else {
pclose(fpipe);
return -1;
}
if (strstr(command_op, "xnack+")) {
pOpt2 = " -mxnack";
} else if (strstr(command_op, "xnack-")) {
pOpt2 = " -mno-xnack";
} else {
pclose(fpipe);
return -1;
}
} else {
printf("No sramecc/xnack settings found.\n");
pclose(fpipe);
return -1;
}
strncpy(feature, pOpt1, strlen(pOpt1));
strncat(feature, pOpt2, strlen(pOpt2));
pclose(fpipe);
return 0;
}
#endif
/**
* Validates OpenCL Static Lds Code Object where
* 1. Tries to access opencl kernel file
* 2. Copies it to current folder
* 3. Tries to get RocmPath and execute the kernel file to
generate the code object file.code-object-version argument
specifies the code object version
* 4. Launch the kernel which copies one variable to another
* 5. Validates the result.
*/
TEST_CASE("Unit_hipModuleLoad_OpenCLStaticCodeObjV3") {
#ifdef __linux__
auto codeobj_type = GENERATE(0, 1);
char command[COMMAND_LEN] = "";
char v3option[32] = "";
hipDeviceProp_t props;
hipGetDeviceProperties(&props, 0);
std::string path = std::experimental::filesystem::current_path();
WARN("path is " << path.c_str());
if (access("./opencl_add.cc", F_OK) == -1) {
system("cp ./../../../../hip-on-rocclr/tests/catch/unit/module/opencl_add.cc .");
}
// Generate the command to translate the OpenCL code object to hip code object
const char *pCodeObjVer = nullptr;
const char *pCodeObjFile = nullptr;
bool rocmPathSet = isRocmPathSet();
if (codeobj_type == 0) {
pCodeObjVer = "";
pCodeObjFile = HIP_CODEOBJ_FILE_DEFAULT;
} else {
pCodeObjVer = "-mcode-object-version=3";
if (-1 == getV3TargetIdFeature(v3option, rocmPathSet)) {
INFO("Error getting V3 Option. Skipping Test. \n");
REQUIRE(true);
}
pCodeObjFile = HIP_CODEOBJ_FILE_V3;
}
INFO("v3option "<< v3option);
/* The command string is created using multiple concatenation instead of one go
to avoid the following cpplint error:
" Multi-line string ("...") found. This lint script doesn't do well with such strings,
and may give bogus warnings. Use C++11 raw strings or concatenation instead."
*/
if (rocmPathSet) {
// For STG2 testing where /opt/rocm path is not present
snprintf(command, COMMAND_LEN,
"$ROCM_PATH/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl ");
} else {
snprintf(command, COMMAND_LEN,
"/opt/rocm/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl ");
}
char command_temp[COMMAND_LEN] = "";
snprintf(command_temp, COMMAND_LEN,
"-include `find /opt/rocm* -name opencl-c.h` %s %s -mcpu=%s -o %s %s",
pCodeObjVer, v3option, props.gcnArchName, pCodeObjFile, OPENCL_OBJ_FILE);
strncat(command, command_temp, COMMAND_LEN);
INFO("command executed "<< command);
system((const char*)command);
// Check if the code object file is created
snprintf(command, COMMAND_LEN, "./%s",
pCodeObjFile);
if (access(command, F_OK) == -1) {
INFO("Code Object File not found \n");
REQUIRE(true);
}
hipDevice_t device;
hipModule_t Module;
hipFunction_t Function;
HIPCHECK(hipDeviceGet(&device, 0));
HIPCHECK(hipModuleLoad(&Module, pCodeObjFile));
HIPCHECK(hipModuleGetFunction(&Function, Module, "add"));
float *Ah, *Bh, *Ad, *Bd;
HipTest::initArrays<float>(&Ad, &Bd, nullptr, &Ah, &Bh, nullptr,
BUFFER_LEN, false);
HIPCHECK(hipMemcpy(Ad, Ah, sizeof(float) * BUFFER_LEN,
hipMemcpyHostToDevice));
struct {
void* _Bd;
void* _Ad;
} args;
args._Ad = static_cast<void*>(Ad);
args._Bd = static_cast<void*>(Bd);
size_t size = sizeof(args);
void *config[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, BUFFER_LEN, 1, 1, 0, 0,
NULL, reinterpret_cast<void**>(&config)));
HIPCHECK(hipMemcpy(Bh, Bd, sizeof(float) * BUFFER_LEN,
hipMemcpyDeviceToHost));
for (uint32_t i = 0; i < BUFFER_LEN; i++) {
REQUIRE(Ah[i] == Bh[i]);
}
HipTest::freeArrays<float>(Ad, Bd, nullptr,
Ah, Bh, nullptr, false);
#else
INFO("This test is skipped due to non linux environment.\n");
REQUIRE(true);
#endif
}
+167
Просмотреть файл
@@ -0,0 +1,167 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include "hip/hip_runtime.h"
#define GLOBAL_BUF_SIZE 2048
#define ARRAY_SIZE (16)
texture<float, 2, hipReadModeElementType> ftex;
texture<int, 2, hipReadModeElementType> itex;
texture<uint16_t, 2, hipReadModeElementType> stex;
texture<char, 2, hipReadModeElementType> ctex;
__device__ int deviceGlobal = 1;
__managed__ int x = 10;
__device__ float myDeviceGlobal;
__device__ float myDeviceGlobalArray[16];
__device__ float deviceGlobalFloat;
__device__ int deviceGlobalInt1;
__device__ int deviceGlobalInt2;
__device__ uint16_t deviceGlobalShort;
__device__ char deviceGlobalChar;
extern "C" __global__ void tex2dKernelFloat(float* outputData,
int width, int height) {
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
if ((x < width) && (y < width)) {
outputData[y * width + x] = tex2D(ftex, x, y);
}
}
extern "C" __global__ void tex2dKernelInt(int* outputData,
int width, int height) {
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
if ((x < width) && (y < width)) {
outputData[y * width + x] = tex2D(itex, x, y);
}
}
extern "C" __global__ void tex2dKernelInt16(uint16_t* outputData,
int width, int height) {
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
if ((x < width) && (y < width)) {
outputData[y * width + x] = tex2D(stex, x, y);
}
}
extern "C" __global__ void tex2dKernelInt8(char* outputData,
int width, int height) {
int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
if ((x < width) && (y < width)) {
outputData[y * width + x] = tex2D(ctex, x, y);
}
}
extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C,
int N) {
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
int COL = blockIdx.x*blockDim.x+threadIdx.x;
int tmpSum = 0;
if ((ROW < N) && (COL < N)) {
// each thread computes one element of the block sub-matrix
for (int i = 0; i < N; i++) {
tmpSum += A[ROW * N + i] * B[i * N + COL];
}
C[ROW * N + COL] = tmpSum;
}
}
extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C,
int *D, int N) {
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
int COL = blockIdx.x*blockDim.x+threadIdx.x;
int tmpSum = 0;
if (ROW < N && COL < N) {
// each thread computes one element of the block sub-matrix
for (int i = 0; i < N; i++) {
tmpSum += A[ROW * N + i] * B[i * N + COL];
}
}
C[ROW * N + COL] = tmpSum;
D[ROW * N + COL] = tmpSum;
}
extern "C" __global__ void SixteenSecKernel(int clockrate) {
HipTest::waitKernel(16, clockrate);
}
extern "C" __global__ void TwoSecKernel(int clockrate) {
if (deviceGlobal == 0x2222) {
deviceGlobal = 0x3333;
}
HipTest::waitKernel(2, clockrate);
if (deviceGlobal != 0x3333) {
deviceGlobal = 0x5555;
}
}
extern "C" __global__ void FourSecKernel(int clockrate) {
if (deviceGlobal == 1) {
deviceGlobal = 0x2222;
}
HipTest::waitKernel(4, clockrate);
if (deviceGlobal == 0x2222) {
deviceGlobal = 0x4444;
}
}
extern "C" __global__ void GPU_func() {
x++;
}
__device__ int getSquareOfGlobalFloat() {
return static_cast<int>(deviceGlobalFloat*deviceGlobalFloat);
}
extern "C" __global__ void testWeightedCopy(int* a, int* b) {
int tx = hipThreadIdx_x;
b[tx] = deviceGlobalInt1*a[tx] + deviceGlobalInt2 +
static_cast<int>(deviceGlobalShort) + static_cast<int>(deviceGlobalChar)
+ getSquareOfGlobalFloat();
}
extern "C" __global__ void hello_world(const float* a, float* b) {
int tx = hipThreadIdx_x;
b[tx] = a[tx];
}
extern "C" __global__ void test_globals(const float* a, float* b) {
int tx = hipThreadIdx_x;
b[tx] = a[tx] + myDeviceGlobal + myDeviceGlobalArray[tx % ARRAY_SIZE];
}
extern "C" __global__ void EmptyKernel() {
}
+37
Просмотреть файл
@@ -0,0 +1,37 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
kernel void add(global float* output, global float* input) {
__local float lds[100];
int id = get_global_id(0);
if (id == 0) {
for (int i = 0; i < 100; i++) {
lds[i] = input[i];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (id < 100) {
output[id] = lds[id];
} else {
output[id] = input[id];
}
}