diff --git a/catch/hipTestMain/CMakeLists.txt b/catch/hipTestMain/CMakeLists.txt index 2b3cb3f60c..5b62dbbb49 100644 --- a/catch/hipTestMain/CMakeLists.txt +++ b/catch/hipTestMain/CMakeLists.txt @@ -15,6 +15,7 @@ target_link_libraries(UnitTests PRIVATE UnitDeviceTests EventTest OccupancyTest DeviceTest + ModuleTest RTC stdc++fs) @@ -36,6 +37,7 @@ target_link_libraries(ABMTests PRIVATE ABMAddKernels stdc++fs) catch_discover_tests(ABMTests PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST") +add_dependencies(UnitTests module_kernels.code) add_dependencies(build_tests UnitTests ABMTests) @@ -63,7 +65,7 @@ else() target_compile_options(StressTest PUBLIC -std=c++17) endif() if(HIP_PLATFORM MATCHES "amd") -target_link_libraries(StressTest PRIVATE printf stream) +target_link_libraries(StressTest PRIVATE printf stream module) endif() target_link_libraries(StressTest PRIVATE memory stdc++fs) add_dependencies(build_stress_test StressTest) diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index 9107740333..9ffe839739 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -1,6 +1,5 @@ /* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -23,6 +22,13 @@ THE SOFTWARE. #pragma once #include "hip_test_context.hh" #include +#ifdef __linux__ +#include +#elif defined(_WIN32) +#include +#endif + + #define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__); @@ -72,6 +78,27 @@ THE SOFTWARE. } +#if HT_NVIDIA +#define CTX_CREATE() \ + hipCtx_t context;\ + initHipCtx(&context); +#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context)); +#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array)); +#define HIP_TEX_REFERENCE hipTexRef +#define HIP_ARRAY hiparray +static void initHipCtx(hipCtx_t *pcontext) { + HIPCHECK(hipInit(0)); + hipDevice_t device; + HIPCHECK(hipDeviceGet(&device, 0)); + HIPCHECK(hipCtxCreate(pcontext, 0, device)); +} +#else +#define CTX_CREATE() +#define CTX_DESTROY() +#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array)); +#define HIP_TEX_REFERENCE textureReference* +#define HIP_ARRAY hipArray* +#endif // Utility Functions namespace HipTest { @@ -104,4 +131,34 @@ static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlo return blocks; } +// Get Free Memory from the system +static size_t getMemoryAmount() { +#if __linux__ + struct sysinfo info; + sysinfo(&info); + return info.freeram / (1024 * 1024); // MB +#elif defined(_WIN32) + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + GlobalMemoryStatusEx(&statex); + return (statex.ullAvailPhys / (1024 * 1024)); // MB +#endif +} + +static inline size_t getHostThreadCount(const size_t memPerThread = 200, const size_t maxThreads = 0) { + if (memPerThread == 0) return 0; + auto memAmount = getMemoryAmount(); + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count == 0 || memAmount == 0) return 0; + size_t thread_count = 0; + if ((processor_count * memPerThread) < memAmount) + thread_count = processor_count; + else + thread_count = reinterpret_cast(memAmount / memPerThread); + if (maxThreads > 0) { + return (thread_count > maxThreads) ? maxThreads : thread_count; + } + return thread_count; +} + } diff --git a/catch/include/hip_test_helper.hh b/catch/include/hip_test_helper.hh index 1d0a57b776..08b6bec0c6 100644 --- a/catch/include/hip_test_helper.hh +++ b/catch/include/hip_test_helper.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/catch/include/hip_test_kernels.hh b/catch/include/hip_test_kernels.hh index 04b00b5ad3..3805b4cbbc 100644 --- a/catch/include/hip_test_kernels.hh +++ b/catch/include/hip_test_kernels.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -72,6 +72,35 @@ __global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) } } +template +__device__ void waitKernel(uint64_t wait_sec, T clockrate) { + uint64_t start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < (wait_sec*1000)); +} + +template +__global__ void TwoSecKernel_GlobalVar(int globalvar, int clockrate) { + if (globalvar == 0x2222) { + globalvar = 0x3333; + } + waitKernel(2, clockrate); + if (globalvar != 0x3333) { + globalvar = 0x5555; + } +} + +template +__global__ void FourSecKernel_GlobalVar(int globalvar, int clockrate) { + if (globalvar == 1) { + globalvar = 0x2222; + } + waitKernel(4, clockrate); + if (globalvar == 0x2222) { + globalvar = 0x4444; + } +} + + template __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); size_t stride = blockDim.x * gridDim.x; diff --git a/catch/stress/CMakeLists.txt b/catch/stress/CMakeLists.txt index 00e12f2c2c..04038e0b8a 100644 --- a/catch/stress/CMakeLists.txt +++ b/catch/stress/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(memory) +add_subdirectory(module) if(HIP_PLATFORM MATCHES "amd") add_subdirectory(printf) add_subdirectory(stream) diff --git a/catch/stress/memory/CMakeLists.txt b/catch/stress/memory/CMakeLists.txt index 5afd69ee11..4338216fad 100644 --- a/catch/stress/memory/CMakeLists.txt +++ b/catch/stress/memory/CMakeLists.txt @@ -2,6 +2,7 @@ set(TEST_SRC memcpy.cc hipMemcpyMThreadMSize.cc + hipMemcpyBoundaryOffsetCheck.cc ) # Create shared lib of all tests diff --git a/catch/stress/memory/hipMemcpyBoundaryOffsetCheck.cc b/catch/stress/memory/hipMemcpyBoundaryOffsetCheck.cc new file mode 100644 index 0000000000..b81e575bcf --- /dev/null +++ b/catch/stress/memory/hipMemcpyBoundaryOffsetCheck.cc @@ -0,0 +1,344 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* +This testcase verifies following scenarios +3. Boundary checks with different sizes +5. device offset scenario +*/ +#include +#include +#include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#else +#include "sys/types.h" +#include "sys/sysinfo.h" +#endif +static constexpr auto NUM_ELM{4*1024 * 1024}; +template +class DeviceMemory { + public: + explicit DeviceMemory(size_t numElements); + DeviceMemory() = delete; + ~DeviceMemory(); + T* A_d() const { return _A_d + _offset; } + T* B_d() const { return _B_d + _offset; } + T* C_d() const { return _C_d + _offset; } + T* C_dd() const { return _C_dd + _offset; } + size_t maxNumElements() const { return _maxNumElements; } + void offset(int offset) { _offset = offset; } + int offset() const { return _offset; } + private: + T* _A_d; + T* _B_d; + T* _C_d; + T* _C_dd; + size_t _maxNumElements; + int _offset; +}; +template +DeviceMemory::DeviceMemory(size_t numElements) : + _maxNumElements(numElements), _offset(0) { + T** np = nullptr; + HipTest::initArrays(&_A_d, &_B_d, &_C_d, np, np, np, numElements, 0); + size_t sizeElements = numElements * sizeof(T); + HIP_CHECK(hipMalloc(&_C_dd, sizeElements)); +} +template +DeviceMemory::~DeviceMemory() { + T* np = nullptr; + HipTest::freeArrays(_A_d, _B_d, _C_d, np, np, np, 0); + HIP_CHECK(hipFree(_C_dd)); + _C_dd = NULL; +} +template +class HostMemory { + public: + HostMemory(size_t numElements, bool usePinnedHost); + HostMemory() = delete; + void reset(size_t numElements, bool full = false); + ~HostMemory(); + T* A_h() const { return _A_h + _offset; } + T* B_h() const { return _B_h + _offset; } + T* C_h() const { return _C_h + _offset; } + size_t maxNumElements() const { return _maxNumElements; } + void offset(int offset) { _offset = offset; } + int offset() const { return _offset; } + // Host arrays, secondary copy + T* A_hh; + T* B_hh; + bool _usePinnedHost; + private: + size_t _maxNumElements; + int _offset; + // Host arrays + T* _A_h; + T* _B_h; + T* _C_h; +}; + template +HostMemory::HostMemory(size_t numElements, bool usePinnedHost) + : _usePinnedHost(usePinnedHost), _maxNumElements(numElements), _offset(0) { + T** np = nullptr; + HipTest::initArrays(np, np, np, &_A_h, &_B_h, &_C_h, + numElements, usePinnedHost); + A_hh = NULL; + B_hh = NULL; + size_t sizeElements = numElements * sizeof(T); + if (usePinnedHost) { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_hh), sizeElements, + hipHostMallocDefault)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&B_hh), sizeElements, + hipHostMallocDefault)); + } else { + A_hh = reinterpret_cast(malloc(sizeElements)); + B_hh = reinterpret_cast(malloc(sizeElements)); + } + } +template +void HostMemory::reset(size_t numElements, bool full) { + // Initialize the host data: + for (size_t i = 0; i < numElements; i++) { + (A_hh)[i] = 1097.0 + i; + (B_hh)[i] = 1492.0 + i; // Phi + if (full) { + (_A_h)[i] = 3.146f + i; // Pi + (_B_h)[i] = 1.618f + i; // Phi + } + } +} +template +HostMemory::~HostMemory() { + HipTest::freeArraysForHost(_A_h, _B_h, _C_h, _usePinnedHost); + if (_usePinnedHost) { + HIP_CHECK(hipHostFree(A_hh)); + HIP_CHECK(hipHostFree(B_hh)); + } else { + free(A_hh); + free(B_hh); + } +} +#ifdef _WIN32 +void memcpytest2_get_host_memory(size_t *free, size_t *total) { + MEMORYSTATUSEX status; + status.dwLength = sizeof(status); + GlobalMemoryStatusEx(&status); + // Windows doesn't allow allocating more than half of system memory to the gpu + // Since the runtime also needs space for its internal allocations, + // we should not try to allocate more than 40% of reported system memory, + // otherwise we can run into OOM issues. + *free = static_cast(0.4 * status.ullAvailPhys); + *total = static_cast(0.4 * status.ullTotalPhys); +} +#else +struct sysinfo memInfo; +void memcpytest2_get_host_memory(size_t *free, size_t *total) { + sysinfo(&memInfo); + uint64_t freePhysMem = memInfo.freeram; + freePhysMem *= memInfo.mem_unit; + *free = freePhysMem; + uint64_t totalPhysMem = memInfo.totalram; + totalPhysMem *= memInfo.mem_unit; + *total = totalPhysMem; +} +#endif +//--- +// Test many different kinds of memory copies. +// The subroutine allocates memory , copies to device, runs a vector +// add kernel, copies back, and +// checks the result. +// +// IN: numElements controls the number of elements used for allocations. +// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned +// else allocate host +// memory with malloc. IN: useHostToHost : If true, add an extra +// host-to-host copy. IN: +// useDeviceToDevice : If true, add an extra deviceto-device copy after +// result is produced. IN: +// useMemkindDefault : If true, use memkinddefault +// (runtime figures out direction). if false, use +// explicit memcpy direction. +// +template +void memcpytest2(DeviceMemory* dmem, HostMemory* hmem, + size_t numElements, bool useHostToHost, + bool useDeviceToDevice, bool useMemkindDefault) { + size_t sizeElements = numElements * sizeof(T); + hmem->reset(numElements); + assert(numElements <= dmem->maxNumElements()); + assert(numElements <= hmem->maxNumElements()); + if (useHostToHost) { + // Do some extra host-to-host copies here to mix things up: + HIP_CHECK(hipMemcpy(hmem->A_hh, hmem->A_h(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost)); + HIP_CHECK(hipMemcpy(hmem->B_hh, hmem->B_h(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost)); + HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_hh, sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_hh, sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } else { + HIP_CHECK(hipMemcpy(dmem->A_d(), hmem->A_h(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(dmem->B_d(), hmem->B_h(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } + hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, + static_cast(dmem->A_d()), static_cast(dmem->B_d()), + dmem->C_d(), numElements); + if (useDeviceToDevice) { + // Do an extra device-to-device copy here to mix things up: + HIP_CHECK(hipMemcpy(dmem->C_dd(), dmem->C_d(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToDevice)); + // Destroy the original dmem->C_d(): + HIP_CHECK(hipMemset(dmem->C_d(), 0x5A, sizeElements)); + HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_dd(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost)); + } else { + HIP_CHECK(hipMemcpy(hmem->C_h(), dmem->C_d(), sizeElements, + useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipDeviceSynchronize()); + HipTest::checkVectorADD(hmem->A_h(), hmem->B_h(), hmem->C_h(), numElements); +} +// Try all the 16 possible combinations to memcpytest2 - usePinnedHost, +// useHostToHost, +// useDeviceToDevice, useMemkindDefault +template +void memcpytest2_for_type(size_t numElements) { + DeviceMemory memD(numElements); + HostMemory memU(numElements, 0 /*usePinnedHost*/); + HostMemory memP(numElements, 1 /*usePinnedHost*/); + for (int usePinnedHost = 0; usePinnedHost <= 1; usePinnedHost++) { + for (int useHostToHost = 0; useHostToHost <= 1; useHostToHost++) { + for (int useDeviceToDevice = 0; useDeviceToDevice <= 1; + useDeviceToDevice++) { + for (int useMemkindDefault = 0; useMemkindDefault <= 1; + useMemkindDefault++) { + memcpytest2(&memD, usePinnedHost ? &memP : &memU, + numElements, useHostToHost, + useDeviceToDevice, useMemkindDefault); + } + } + } + } +} +// Try many different sizes to memory copy. +template +void memcpytest2_sizes(size_t maxElem = 0) { + int deviceId; + HIP_CHECK(hipGetDevice(&deviceId)); + size_t free, total, freeCPU, totalCPU; + HIP_CHECK(hipMemGetInfo(&free, &total)); + memcpytest2_get_host_memory(&freeCPU, &totalCPU); + if (maxElem == 0) { + // Use lesser maxElem if not enough host memory available + size_t maxElemGPU = free / sizeof(T) / 8; + size_t maxElemCPU = freeCPU / sizeof(T) / 8; + maxElem = maxElemGPU < maxElemCPU ? maxElemGPU : maxElemCPU; + } + HIP_CHECK(hipDeviceReset()); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0 /*usePinnedHost*/); + HostMemory memP(maxElem, 1 /*usePinnedHost*/); + for (size_t elem = 1; elem <= maxElem; elem *= 2) { + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host + } +} +// Try many different sizes to memory copy. +template +void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) { + int deviceId; + HIP_CHECK(hipGetDevice(&deviceId)); + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + HIP_CHECK(hipDeviceReset()); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0 /*usePinnedHost*/); + HostMemory memP(maxElem, 1 /*usePinnedHost*/); + size_t elem = maxElem / 2; + for (size_t offset = 0; offset < 512; offset++) { + assert(elem + offset < maxElem); + if (devOffsets) { + memD.offset(offset); + } + if (hostOffsets) { + memU.offset(offset); + memP.offset(offset); + } + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host + } + for (size_t offset = 512; offset < elem; offset *= 2) { + assert(elem + offset < maxElem); + if (devOffsets) { + memD.offset(offset); + } + if (hostOffsets) { + memU.offset(offset); + memP.offset(offset); + } + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host + } +} +// Create multiple threads to stress multi-thread locking behavior in the +// allocation/deallocation/tracking logic: +template +void multiThread_1(bool serialize, bool usePinnedHost) { + DeviceMemory memD(NUM_ELM); + HostMemory mem1(NUM_ELM, usePinnedHost); + HostMemory mem2(NUM_ELM, usePinnedHost); + std::thread t1(memcpytest2, &memD, &mem1, NUM_ELM, 0, 0, 0); + if (serialize) { + t1.join(); + } + std::thread t2(memcpytest2, &memD, &mem2, NUM_ELM, 0, 0, 0); + if (serialize) { + t2.join(); + } +} +/* +This testcase verfies the boundary checks of hipMemcpy API for different sizes +*/ +TEST_CASE("Unit_hipMemcpy_BoundaryCheck") { + size_t maxElem = 32 * 1024 * 1024; + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0 /*usePinnedHost*/); + HostMemory memP(maxElem, 0 /*usePinnedHost*/); + memcpytest2(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0); + auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024, + 16 * 1024 * 1024 + 16 * 1024, + 16 * 1024 * 1024 + 512 * 1024, + 17 * 1024 * 1024 + 1024, + 32 * 1024 * 1024); + memcpytest2(&memD, &memP, sizes, 0, 0, 0); +} + +/* +This testcase verifies the device offsets +*/ +TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) { + HIP_CHECK(hipDeviceReset()); + size_t maxSize = 256 * 1024; + memcpytest2_offsets(maxSize, true, false); + memcpytest2_offsets(maxSize, false, true); +} diff --git a/catch/stress/module/CMakeLists.txt b/catch/stress/module/CMakeLists.txt new file mode 100644 index 0000000000..17ebb212e6 --- /dev/null +++ b/catch/stress/module/CMakeLists.txt @@ -0,0 +1,19 @@ +# Common Tests - Test independent of all platforms +if(HIP_PLATFORM MATCHES "amd") +set(TEST_SRC + hipExtModuleLaunchKernel_CornerTest.cc + hipModuleLaunchKernel_CornerTests.cc +) +else() +set(TEST_SRC + hipModuleLaunchKernel_CornerTests.cc +) +endif() + +add_custom_target(kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/stress/module/kernels.cc -o ${HIP_PATH}/catch/hipTestMain/kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include) + +# Create shared lib of all tests +add_library(module SHARED EXCLUDE_FROM_ALL ${TEST_SRC}) + +# Add dependency on build_tests to build it on this custom target +add_dependencies(build_stress_test module kernels.code) diff --git a/catch/stress/module/hipExtModuleLaunchKernel_CornerTest.cc b/catch/stress/module/hipExtModuleLaunchKernel_CornerTest.cc new file mode 100644 index 0000000000..05c17c7b9b --- /dev/null +++ b/catch/stress/module/hipExtModuleLaunchKernel_CornerTest.cc @@ -0,0 +1,86 @@ +/* + Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* +Test Scenario +hipExtModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions +*/ + +#include "hip_test_common.hh" +#include "hip_test_kernels.hh" +#include "hip/hip_ext.h" + +#define fileName "kernels.code" +#define dummyKernel "EmptyKernel" + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; + +/* +This testcase verifies hipExtModuleLaunchKernel API Corner +cases +*/ +TEST_CASE("Stress_hipExtModuleLaunchKernel_CornerCases") { + hipModule_t Module; + hipFunction_t DummyKernel; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + constexpr auto gridblocksize{6}; + struct { + } args; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + size_t size = sizeof(args); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + unsigned int maxblockX = deviceProp.maxThreadsDim[0]; + unsigned int maxblockY = deviceProp.maxThreadsDim[1]; + unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; + struct gridblockDim test[gridblocksize] = {{1, 1, 1, maxblockX, 1, 1}, + {1, 1, 1, 1, maxblockY, 1}, + {1, 1, 1, 1, 1, maxblockZ}, + {UINT32_MAX, 1, 1, 1, 1, 1}, + {1, UINT32_MAX, 1, 1, 1, 1}, + {1, 1, UINT32_MAX, 1, 1, 1}}; + + // Launching kernel with corner cases in grid and block dimensions + for (int i = 0; i < gridblocksize; i++) { + HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel, + test[i].gridX, + test[i].gridY, + test[i].gridZ, + test[i].blockX, + test[i].blockY, + test[i].blockZ, + 0, + stream, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0)); + } + HIP_CHECK(hipStreamDestroy(stream)); +} diff --git a/catch/stress/module/hipModuleLaunchKernel_CornerTests.cc b/catch/stress/module/hipModuleLaunchKernel_CornerTests.cc new file mode 100644 index 0000000000..f7bd866ceb --- /dev/null +++ b/catch/stress/module/hipModuleLaunchKernel_CornerTests.cc @@ -0,0 +1,90 @@ +/* + Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* +Test Scenario +hipModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions +*/ + +#include "hip_test_common.hh" +#include "hip_test_kernels.hh" +#include "hip/hip_ext.h" + +#define fileName "kernels.code" +#define dummyKernel "EmptyKernel" + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; + +/* +This testcase verifies hipModuleLaunchKernel API Corner +cases +*/ +TEST_CASE("Stress_hipModuleLaunchKernel_CornerCases") { + HIP_CHECK(hipSetDevice(0)); + hipStream_t stream1; + CTX_CREATE() + hipModule_t Module; + hipFunction_t DummyKernel; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipStreamCreate(&stream1)); + + // Passing Max int value to block dimensions + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + unsigned int maxblockX = deviceProp.maxThreadsDim[0]; + unsigned int maxblockY = deviceProp.maxThreadsDim[1]; + unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; +#if HT_NVIDIA + unsigned int maxgridX = deviceProp.maxGridSize[0]; + unsigned int maxgridY = deviceProp.maxGridSize[1]; + unsigned int maxgridZ = deviceProp.maxGridSize[2]; +#else + unsigned int maxgridX = UINT32_MAX; + unsigned int maxgridY = UINT32_MAX; + unsigned int maxgridZ = UINT32_MAX; +#endif + struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1}, + {1, 1, 1, 1, maxblockY, 1}, + {1, 1, 1, 1, 1, maxblockZ}, + {maxgridX, 1, 1, 1, 1, 1}, + {1, maxgridY, 1, 1, 1, 1}, + {1, 1, maxgridZ, 1, 1, 1}}; + for (int i = 0; i < 6; i++) { + HIP_CHECK(hipModuleLaunchKernel(DummyKernel, + test[i].gridX, + test[i].gridY, + test[i].gridZ, + test[i].blockX, + test[i].blockY, + test[i].blockZ, + 0, + stream1, NULL, NULL)); + } + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY(); +} diff --git a/catch/stress/module/kernels.cc b/catch/stress/module/kernels.cc new file mode 100644 index 0000000000..5a980c40e7 --- /dev/null +++ b/catch/stress/module/kernels.cc @@ -0,0 +1,28 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip/hip_runtime.h" + +extern "C" __global__ void EmptyKernel() { +} + diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index f0ae8b6b93..0d19d6d82b 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(module) add_subdirectory(memory) add_subdirectory(deviceLib) add_subdirectory(stream) diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt new file mode 100644 index 0000000000..7e5d6aeaf6 --- /dev/null +++ b/catch/unit/module/CMakeLists.txt @@ -0,0 +1,51 @@ +# Common Tests - Test independent of all platforms +if(HIP_PLATFORM MATCHES "amd") +set(TEST_SRC +hipExtLaunchKernelGGL.cc +hipExtModuleLaunchKernel.cc +hipExtLaunchMultiKernelMultiDevice.cc +hipModuleLaunchKernel.cc +hipFuncSetCacheConfig.cc +hipModuleUnload.cc +hipFuncSetAttribute.cc +hipModuleLoadData.cc +hipFuncSetSharedMemConfig.cc +hipManagedKeyword.cc +hipModuleGetGlobal.cc +hipFuncGetAttributes.cc +hipModule.cc +hipModuleLoadDataMultThreadOnMultGPU.cc +hipModuleLoadDataMultThreaded.cc +hipModuleLoadMultiThreaded.cc +hipModuleLoadUnloadStress.cc +hipModuleNegative.cc +hipModuleOccupancyMaxPotentialBlockSize.cc +hipModuleTexture2dDrv.cc +hipOpenCLCOTest.cc +) +else() +set(TEST_SRC +hipModuleLaunchKernel.cc +hipFuncSetCacheConfig.cc +hipModuleUnload.cc +hipFuncSetAttribute.cc +hipModuleLoadData.cc +hipFuncSetSharedMemConfig.cc +hipManagedKeyword.cc +hipModuleGetGlobal.cc +hipFuncGetAttributes.cc +hipModule.cc +hipModuleLoadDataMultThreadOnMultGPU.cc +hipModuleLoadDataMultThreaded.cc +hipModuleLoadMultiThreaded.cc +hipModuleLoadUnloadStress.cc +hipModuleNegative.cc +hipModuleOccupancyMaxPotentialBlockSize.cc +) +endif() + +add_custom_target(module_kernels.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${HIP_COMMON_DIR}/tests/catch/unit/module/module_kernels.cc -o ${HIP_PATH}/catch/hipTestMain/module_kernels.code -I${HIP_PATH}/include/ -I${HIP_COMMON_DIR}/tests/catch/include) +# Create shared lib of all tests +add_library(ModuleTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC}) + +add_dependencies(build_tests ModuleTest module_kernels.code) diff --git a/catch/unit/module/hipExtLaunchKernelGGL.cc b/catch/unit/module/hipExtLaunchKernelGGL.cc new file mode 100755 index 0000000000..9ab420679e --- /dev/null +++ b/catch/unit/module/hipExtLaunchKernelGGL.cc @@ -0,0 +1,129 @@ +/* + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +/* + * Test Scenarios + 1. Verify kernel execution time of the particular kernel + 2. Verify hipExtLaunchKernelGGL API by disabling time flag in event creation + */ + +#include +#include +#include "hip/hip_ext.h" + +#define FOURSEC_KERNEL 4999 +#define TWOSEC_KERNEL 2999 + +__device__ int globalvar = 1; +__global__ void TwoSecKernel_GlobalVar(int clockrate) { + if (globalvar == 0x2222) { + globalvar = 0x3333; + } + HipTest::waitKernel(2, clockrate); + if (globalvar != 0x3333) { + globalvar = 0x5555; + } +} + +__global__ void FourSecKernel_GlobalVar(int clockrate) { + if (globalvar == 1) { + globalvar = 0x2222; + } + HipTest::waitKernel(4, clockrate); + if (globalvar == 0x2222) { + globalvar = 0x4444; + } +} + + +/* + * In this Scenario, we create events by disabling the timing flag + * We then Launch the kernel using hipExtModuleLaunchKernel by passing + * disabled events and try to fetch kernel execution time using + * hipEventElapsedTime API which would fail as the flag is disabled. + */ +TEST_CASE("Unit_hipExtLaunchKernelGGL_TimeFlagDisabled") { + hipStream_t stream; + HIP_CHECK(hipSetDevice(0)); + float time_2sec; + hipEvent_t start_event, end_event; + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + + // Event Creation and Launching kernels + HIP_CHECK(hipEventCreateWithFlags(&start_event, + hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&end_event, + hipEventDisableTiming)); + HIP_CHECK(hipStreamCreate(&stream)); + + hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0, + stream, start_event, end_event, 0, clkRate); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(hipEventElapsedTime(&time_2sec, start_event, end_event) + != hipSuccess); + + // Destroying the events and streams + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipEventDestroy(start_event)); + HIP_CHECK(hipEventDestroy(end_event)); +} +/* + * Launching FourSecKernel and TwoSecKernel and then we try to + * get the event elapsed time of each kernel using the start and + * end events.The event elapsed time should return us the kernel + * execution time for that particular kernel +*/ +TEST_CASE("Unit_hipExtLaunchKernelGGL_KernelTimeExecution") { + hipStream_t stream; + HIP_CHECK(hipSetDevice(0)); + hipEvent_t start_event1, end_event1, start_event2, end_event2; + float time_4sec, time_2sec; + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + + // Creating streams and events + HIP_CHECK(hipEventCreate(&start_event1)); + HIP_CHECK(hipEventCreate(&end_event1)); + HIP_CHECK(hipEventCreate(&start_event2)); + HIP_CHECK(hipEventCreate(&end_event2)); + HIP_CHECK(hipStreamCreate(&stream)); + + // Launching 4sec and 2sec kernels + hipExtLaunchKernelGGL(FourSecKernel_GlobalVar, dim3(1), dim3(1), 0, + stream, start_event1, end_event1, 0, clkRate); + hipExtLaunchKernelGGL(TwoSecKernel_GlobalVar, dim3(1), dim3(1), 0, + stream, start_event2, end_event2, 0, clkRate); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1)); + HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2)); + + INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec + << "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec); + // Verifying the kernel execution time + REQUIRE(time_4sec < static_cast(FOURSEC_KERNEL)); + REQUIRE(time_2sec < static_cast(TWOSEC_KERNEL)); + + // Destroying streams and events + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipEventDestroy(start_event1)); + HIP_CHECK(hipEventDestroy(end_event1)); + HIP_CHECK(hipEventDestroy(start_event2)); + HIP_CHECK(hipEventDestroy(end_event2)); +} diff --git a/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc b/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc new file mode 100644 index 0000000000..d83c425b3d --- /dev/null +++ b/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc @@ -0,0 +1,128 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* This testfile verifies the basic functionality of + hipExtLaunchMultiKernelMultiDevice API. + It can be tested on single GPU or multi GPUs. +*/ + + +#include +#include +#include "hip/hip_runtime.h" + +#define MAX_GPUS 8 +#define NUM_KERNEL_ARGS 3 + +/* +This testcase verifies hipExtLaunchMultiKernelMultiDevice API for different +datatypes where +1. Intitialize device variables +2. Initializing hipLaunchParams structure to pass it to + hipExtLaunchMultiKernelMultiDevice API +3. Launches vector_square kernel which performs square of the variable +4. Validates the result with the square of variable. +*/ + +TEMPLATE_TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Basic", "", int + , float, double) { + TestType *A_d[MAX_GPUS], *C_d[MAX_GPUS]; + TestType *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(TestType); + int nGpu = 0; + + HIP_CHECK(hipGetDeviceCount(&nGpu)); + if (nGpu < 1) { + SUCCEED("info: didn't find any GPU! Skipping the testcase"); + } else { + if (nGpu > MAX_GPUS) { + nGpu = MAX_GPUS; + } + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, N, false); + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + + // Allocating and initializing device variables + hipStream_t stream[MAX_GPUS]; + for (int i = 0; i < nGpu; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreateWithFlags(&stream[i], hipStreamNonBlocking)); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, i/*deviceID*/)); + INFO("Running on bus 0x" << props.pciBusID << " " << props.name); + INFO("Allocate device mem " << 2*Nbytes/1024.0/1024.0); + HIP_CHECK(hipMalloc(&A_d[i], Nbytes)); + HIP_CHECK(hipMalloc(&C_d[i], Nbytes)); + HIP_CHECK(hipMemcpy(A_d[i], A_h, Nbytes, hipMemcpyHostToDevice)); + } + + hipLaunchParams *launchParamsList = reinterpret_cast( + malloc(sizeof(hipLaunchParams)*nGpu)); + void *args[MAX_GPUS * NUM_KERNEL_ARGS]; + + // Intializing the hipLaunchParams structure with device variables + // ,kernel and launching hipExtLaunchMultiKernelMultiDevice API + for (int i = 0; i < nGpu; i++) { + args[i * NUM_KERNEL_ARGS] = &A_d[i]; + args[i * NUM_KERNEL_ARGS + 1] = &C_d[i]; + args[i * NUM_KERNEL_ARGS + 2] = &N; + launchParamsList[i].func = + reinterpret_cast(HipTest::vector_square); + launchParamsList[i].gridDim = dim3(blocks); + launchParamsList[i].blockDim = dim3(threadsPerBlock); + launchParamsList[i].sharedMem = 0; + launchParamsList[i].stream = stream[i]; + launchParamsList[i].args = args + i * NUM_KERNEL_ARGS; + } + + hipExtLaunchMultiKernelMultiDevice(launchParamsList, nGpu, 0); + + // Validating the result + for (int j = 0; j < nGpu; j++) { + hipStreamSynchronize(stream[j]); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, j/*deviceID*/)); + INFO("Checking result on bus " << props.pciBusID << props.name); + + HIP_CHECK(hipSetDevice(j)); + HIP_CHECK(hipMemcpy(C_h, C_d[j], Nbytes, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + INFO("validation failed " << C_h[i] << A_h[i]*A_h[i]); + REQUIRE(false); + } + } + } + + // DeAllocating memory + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + for (int j = 0; j < nGpu; j++) { + HIP_CHECK(hipFree(A_d[j])); + HIP_CHECK(hipFree(C_d[j])); + HIP_CHECK(hipStreamDestroy(stream[j])); + } + } +} diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc new file mode 100755 index 0000000000..45db114135 --- /dev/null +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -0,0 +1,433 @@ +/* + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +/* Test Scenarios + 1. hipExtModuleLaunchKernel Negative Scenarios + 2. hipExtModuleLaunchKernel API verifying the kernel execution time of a particular kernel. + 3. hipExtModuleLaunchKernel API verifying the kernel execution time by disabling the time flag + 4. hipModuleLaunchKernel Work Group tests => + - (block.x * block.y * block.z) <= Work Group Size + where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + - (block.x * block.y * block.z) > Work Group Size + where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + */ + +#include +#include "hip_test_common.hh" +#include "hip_test_kernels.hh" +#include "hip/hip_ext.h" + +#define fileName "module_kernels.code" +#define matmulK "matmulK" +#define SixteenSec "SixteenSecKernel" +#define KernelandExtra "KernelandExtraParams" +#define FourSec "FourSecKernel" +#define TwoSec "TwoSecKernel" +#define globalDevVar "deviceGlobal" +#define dummyKernel "EmptyKernel" +#define FOURSEC_KERNEL 4999 +#define TWOSEC_KERNEL 2999 + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; + +class ModuleLaunchKernel { + int N = 64; + int SIZE = N*N; + int *A, *B, *C; + hipDeviceptr_t *Ad, *Bd; + hipStream_t stream1, stream2; + hipEvent_t start_event1, end_event1, start_event2, end_event2, + start_timingDisabled, end_timingDisabled; + hipModule_t Module; + hipDeviceptr_t deviceGlobal; + hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel, + TwoSecKernel, KernelandExtraParamKernel, DummyKernel; + struct { + int clockRate; + void* _Ad; + void* _Bd; + void* _Cd; + int _n; + } args1, args2; + struct { + } args3; + size_t size1; + size_t size2; + size_t size3; + size_t deviceGlobalSize; + public : + void AllocateMemory(); + void DeAllocateMemory(); + void ModuleLoad(); + void Module_Negative_tests(); + void ExtModule_Negative_tests(); + void Module_WorkGroup_Test(); + void ExtModule_KernelExecutionTime(); + void ExtModule_Disabled_Timingflag(); +}; + +void ModuleLaunchKernel::AllocateMemory() { + A = new int[N*N*sizeof(int)]; + B = new int[N*N*sizeof(int)]; + for (int i=0; i < N; i++) { + for (int j=0; j < N; j++) { + A[i*N +j] = 1; + B[i*N +j] = 1; + } + } + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), + SIZE*sizeof(int))); + HIP_CHECK(hipMalloc(reinterpret_cast(&Bd), + SIZE*sizeof(int))); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&C), SIZE*sizeof(int))); + HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + args1._Ad = Ad; + args1._Bd = Bd; + args1._Cd = C; + args1._n = N; + args1.clockRate = clkRate; + args2._Ad = NULL; + args2._Bd = NULL; + args2._Cd = NULL; + args2._n = 0; + args2.clockRate = clkRate; + size1 = sizeof(args1); + size2 = sizeof(args2); + size3 = sizeof(args3); + HIP_CHECK(hipEventCreate(&start_event1)); + HIP_CHECK(hipEventCreate(&end_event1)); + HIP_CHECK(hipEventCreate(&start_event2)); + HIP_CHECK(hipEventCreate(&end_event2)); + HIP_CHECK(hipEventCreateWithFlags(&start_timingDisabled, + hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&end_timingDisabled, + hipEventDisableTiming)); +} + +void ModuleLaunchKernel::ModuleLoad() { + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); + HIP_CHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSec)); + HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel, + Module, KernelandExtra)); + HIP_CHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSec)); + HIP_CHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSec)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, + Module, globalDevVar)); +} + +void ModuleLaunchKernel::DeAllocateMemory() { + HIP_CHECK(hipEventDestroy(start_event1)); + HIP_CHECK(hipEventDestroy(end_event1)); + HIP_CHECK(hipEventDestroy(start_event2)); + HIP_CHECK(hipEventDestroy(end_event2)); + HIP_CHECK(hipEventDestroy(start_timingDisabled)); + HIP_CHECK(hipEventDestroy(end_timingDisabled)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + delete[] A; + delete[] B; + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipHostFree(C)); + HIP_CHECK(hipModuleUnload(Module)); +} +/* + * In this scenario,We launch the 4 sec kernel and 2 sec kernel + * and we fetch the event execution time of each kernel and it + * should not exceed the execution time of that particular kernel + */ +void ModuleLaunchKernel::ExtModule_KernelExecutionTime() { + HIP_CHECK(hipSetDevice(0)); + AllocateMemory(); + ModuleLoad(); + float time_4sec, time_2sec; + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + + // Launching kernels + HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, + NULL, reinterpret_cast(&config2), + start_event1, end_event1, 0)); + HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, + NULL, reinterpret_cast(&config2), + start_event2, end_event2, 0)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1)); + HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2)); + + INFO("Expected Vs Actual: Kernel1-<" << FOURSEC_KERNEL << "Vs" << time_4sec + << "Kernel2-<" << TWOSEC_KERNEL << "Vs" << time_2sec); + // Verifying the kernel execution time + REQUIRE(time_4sec < static_cast(FOURSEC_KERNEL)); + REQUIRE(time_2sec < static_cast(TWOSEC_KERNEL)); + + DeAllocateMemory(); +} +/* + * In this Scenario, we create events by disabling the timing flag + * We then Launch the kernel using hipExtModuleLaunchKernel by passing + * disabled events and try to fetch kernel execution time using + * hipEventElapsedTime API which would fail as the flag is disabled. + */ +void ModuleLaunchKernel::ExtModule_Disabled_Timingflag() { + // Allocating Memory and Loading kernel + AllocateMemory(); + ModuleLoad(); + float time_2sec; + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + + // Launching Kernel + HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, + NULL, reinterpret_cast(&config2), + start_timingDisabled, + end_timingDisabled, 0)); + HIP_CHECK(hipStreamSynchronize(stream1)); + + REQUIRE(hipEventElapsedTime(&time_2sec, start_timingDisabled, + end_timingDisabled) != hipSuccess); + + // DeAllocating the memory + DeAllocateMemory(); +} + +/* +This testcase verifies negative scenarios of hipExtModuleLaunchKernel API +*/ +void ModuleLaunchKernel::ExtModule_Negative_tests() { + HIP_CHECK(hipSetDevice(0)); + // Allocating memeory and loading kernel + AllocateMemory(); + ModuleLoad(); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + void *params[] = {Ad}; + + SECTION("Nullptr to kernel function") { + REQUIRE(hipExtModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Max int value to block dimensions") { + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Null values to all dimensions") { + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 0, 0, + 0, + 0, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing 0 for x dimension") { + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 0, 1, 1, + 0, + 1, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing 0 for y dimension") { + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 0, 1, + 1, + 0, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing 0 for Z dimension") { + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 0, + 1, + 1, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing both kernel and extra params") { + REQUIRE(hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, + 1, 1, 0, + stream1, + reinterpret_cast(¶ms), + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing both than maxthreadsperblock to block dimensions") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Block dimension x = Max alloweed + 1") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsDim[0]+1, + 1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Block dimension Y = Max alloweed + 1") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + deviceProp.maxThreadsDim[1]+1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Block dimension Z = Max alloweed + 1") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + 1, + deviceProp.maxThreadsDim[2]+1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + } + + SECTION("Passing invalid config data in extra params") { + void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + REQUIRE(hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config3), + nullptr, nullptr, 0) != hipSuccess); + } + + DeAllocateMemory(); +} + +void ModuleLaunchKernel::Module_WorkGroup_Test() { + // Allocate memory and load modules + AllocateMemory(); + ModuleLoad(); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3, + HIP_LAUNCH_PARAM_END}; + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + double cuberootVal = + cbrt(static_cast(deviceProp.maxThreadsPerBlock)); + uint32_t cuberoot_floor = floor(cuberootVal); + uint32_t cuberoot_ceil = ceil(cuberootVal); + + // Scenario: (block.x * block.y * block.z) <= Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + HIP_CHECK(hipExtModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_floor, cuberoot_floor, cuberoot_floor, + 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0)); + + // Scenario: (block.x * block.y * block.z) > Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + REQUIRE(hipExtModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_ceil, cuberoot_ceil, cuberoot_ceil+1, + 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0) != hipSuccess); + + // DeAllocating memory + DeAllocateMemory(); +} + +/* +This testcase verifies the negative scenarios of +hipExtModuleLaunchKernel API +*/ +TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative") { + ModuleLaunchKernel Ext_obj; + Ext_obj.ExtModule_Negative_tests(); +} + +/* +This testcase verifies hipExtModuleLaunchKernel API by +disabling the timing flag +*/ +TEST_CASE("Unit_hipExtModuleLaunchKernel_TimingflagDisabled") { + ModuleLaunchKernel Ext_obj; + Ext_obj.ExtModule_Disabled_Timingflag(); +} + +/* +This testcase verifies hipExtModuleLaunchKernel API kernel +execution time +*/ +TEST_CASE("Unit_hipExtModuleLaunchKernel_KernelExecutionTime") { + ModuleLaunchKernel Ext_obj; + Ext_obj.ExtModule_KernelExecutionTime(); +} + +/* +This testcase verifies workgroup of hipExtModuleLaunchKernel API +*/ +TEST_CASE("Unit_hipExtModuleLaunchKernel_WorkGroup") { + ModuleLaunchKernel Ext_obj; + Ext_obj.Module_WorkGroup_Test(); +} diff --git a/catch/unit/module/hipFuncGetAttributes.cc b/catch/unit/module/hipFuncGetAttributes.cc new file mode 100644 index 0000000000..5e21dc17c6 --- /dev/null +++ b/catch/unit/module/hipFuncGetAttributes.cc @@ -0,0 +1,163 @@ + +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +#define fileName "module_kernels.code" +#define kernel_name "hello_world" + +namespace testhipFuncGetAttributesApi { +__global__ +void fn(float* px, float* py) { + bool a[42]; + __shared__ double b[69]; + for (auto&& x : b) x = *py++; + for (auto&& x : a) x = *px++ > 0.0; + for (auto&& x : a) if (x) *--py = *--px; +} +template +__launch_bounds__(WGSIZE, 1) __global__ void kernelfn(int *x) { + __shared__ int lds[LDS]; + for (int i = 0; i < LDS; ++i) { + lds[i] = x[i]; + } + x[LDS - 1] = lds[0] / lds[LDS - 1]; +} +template bool test_Attributes_Values() { + bool TestPassed = true; + hipFuncAttributes attr{}; + hipFuncGetAttributes(&attr, + reinterpret_cast(kernelfn)); + if (attr.maxThreadsPerBlock != WGSIZE) { + TestPassed = false; + } + if (attr.sharedSizeBytes != LDS * sizeof(int)) { + TestPassed = false; + } + return TestPassed; +} +} // namespace testhipFuncGetAttributesApi +/** + * hipFuncGetAttributes and hipModuleGetFunction functional tests + * Scenario1: Validates the value of attribute "maxThreadsPerBlock" should be non zero. + * Scenario2: Validates the value of attribute + * "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK" should be non zero. + */ +// scenario 1 +TEST_CASE("Unit_hipFuncGetAttributes_FuncTst") { + hipFuncAttributes attr{}; + auto r = hipFuncGetAttributes(&attr, + reinterpret_cast(&testhipFuncGetAttributesApi::fn)); + REQUIRE_FALSE(r != hipSuccess); + REQUIRE_FALSE(attr.maxThreadsPerBlock == 0); +} +// scenario 2 +TEST_CASE("Unit_hipFuncGetAttribute_FuncTst") { + hipModule_t Module; + int attrib_val; + CTX_CREATE() + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + auto r = hipFuncGetAttribute(&attrib_val, + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function); + REQUIRE_FALSE(r != hipSuccess); + REQUIRE_FALSE(attrib_val == 0); + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} +/** + * hipFuncGetAttributes negative tests + * Scenario1: Validates returned error code for attr = nullptr + * Scenario2: Validates returned error code for function = nullptr + */ +TEST_CASE("Unit_hipFuncGetAttributes_NegTst") { + SECTION("attr is nullptr") { + REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(nullptr, + reinterpret_cast(&testhipFuncGetAttributesApi::fn))); + } + SECTION("function is nullptr") { + hipFuncAttributes attr{}; + REQUIRE_FALSE(hipSuccess == hipFuncGetAttributes(&attr, nullptr)); + } +} +/** + * hipFuncGetAttribute negative tests + * Scenario1: Validates returned error code for attrib_val = nullptr + * Scenario2: Validates returned error code for attrib = invalid = 0xff + */ +TEST_CASE("Unit_hipFuncGetAttribute_NegTst") { + hipModule_t Module; + CTX_CREATE() + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + SECTION("attr is nullptr") { + REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(nullptr, + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Function)); + } + SECTION("attr is invalid") { + int attrib_val; + REQUIRE_FALSE(hipSuccess == hipFuncGetAttribute(&attrib_val, + static_cast(0xff), Function)); + } + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} +/** + * hipFuncGetAttributes + * Scenario4: Validates the value of attribute "maxThreadsPerBlock". + * Scenario5: Validates the value of attribute "sharedSizeBytes". + */ +TEST_CASE("Unit_hipFuncGetAttributes_AttrTest") { + bool TestPassed = true; + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<64, 64>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<128, 64>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<256, 64>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<512, 64>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<1024, 64>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<64, 128>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<128, 128>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<256, 128>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<512, 128>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<1024, 128>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<64, 256>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<128, 256>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<256, 256>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<512, 256>(); + TestPassed &= testhipFuncGetAttributesApi:: + test_Attributes_Values<1024, 256>(); + REQUIRE(TestPassed); +} + diff --git a/catch/unit/module/hipFuncSetAttribute.cc b/catch/unit/module/hipFuncSetAttribute.cc new file mode 100644 index 0000000000..33602f9078 --- /dev/null +++ b/catch/unit/module/hipFuncSetAttribute.cc @@ -0,0 +1,46 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_test_common.hh" + +__global__ void fn(float* px, float* py) { + bool a[42]; + __shared__ double b[69]; + + for (auto&& x : b) x = *py++; + for (auto&& x : a) x = *px++ > 0.0; + for (auto&& x : a) if (x) *--py = *--px; +} + +/* +This testcases verifies the basic func of hipFuncSetAttribute API where +we need to pass function that executes on device +hipFuncAttributeMaxDynamicSharedMemorySize --> +The sum of this value + sharedSizeBytes should not exceed device attribute +hipFuncAttributePreferredSharedMemoryCarveout --> +Carving out the shared memory. +*/ +TEST_CASE("Unit_hipFuncSetAttribute_Basic") { + HIP_CHECK(hipFuncSetAttribute(reinterpret_cast(&fn), + hipFuncAttributeMaxDynamicSharedMemorySize, + 0)); + HIP_CHECK(hipFuncSetAttribute(reinterpret_cast(&fn), + hipFuncAttributePreferredSharedMemoryCarveout, + 0)); +} diff --git a/catch/unit/module/hipFuncSetCacheConfig.cc b/catch/unit/module/hipFuncSetCacheConfig.cc new file mode 100644 index 0000000000..cd5d58a8f6 --- /dev/null +++ b/catch/unit/module/hipFuncSetCacheConfig.cc @@ -0,0 +1,36 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void Empty_Kernel() { +} + +/* +This testcase verifies the basic funct of hipFuncSetCacheConfig API +On GPU devices, where L1 and shared memory uses same resources +This sets the preferred cache configuration for the kernel function +In this testcases we are setting hipFuncCachePreferL1 where L1 is +preferred more than shared memory +*/ +TEST_CASE("Unit_hipFuncSetCacheConfig_Basic") { + hipFuncCache_t cacheConfig{hipFuncCachePreferL1}; + HIP_CHECK(hipFuncSetCacheConfig(reinterpret_cast(Empty_Kernel), + cacheConfig)); +} diff --git a/catch/unit/module/hipFuncSetSharedMemConfig.cc b/catch/unit/module/hipFuncSetSharedMemConfig.cc new file mode 100644 index 0000000000..c9d65275b5 --- /dev/null +++ b/catch/unit/module/hipFuncSetSharedMemConfig.cc @@ -0,0 +1,107 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// Test Description: +// This test case verifies the working of hipFuncSetSharedMemConfig() api and +// the flag parameter + +#include +#include + + +__global__ void ReverseSeq(int *A, int *B, int N) { + extern __shared__ int SMem[]; + int offset = threadIdx.x; + int MirrorVal = N - offset - 1; + SMem[offset] = A[offset]; + __syncthreads(); + B[offset] = SMem[MirrorVal]; +} +/* +This testcase verifies the basic functionality of hipFuncSetSharedMemConfig API +by setting shared memory bank size + +1. hipSharedMemBankSizeDefault +2. hipSharedMemBankSizeFourByte +3. hipSharedMemBankSizeEightByte + +*/ +TEST_CASE("Unit_hipFuncSetSharedMemConfig_Basic") { + int *Ah{nullptr}, *RAh{nullptr}, NumElms = 128; + int *Ad{nullptr}, *RAd{nullptr}; + + HipTest::initArrays(&Ad, &RAd, nullptr, + &Ah, &RAh, nullptr, NumElms, false); + for (int i = 0; i < NumElms; ++i) { + Ah[i] = i; + RAh[i] = NumElms - i - 1; + } + HIP_CHECK(hipMemcpy(Ad, Ah, NumElms * sizeof(int), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int))); + + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeDefault flag + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), + hipSharedMemBankSizeDefault)); + + // Kernel Launch with shared mem size of = NumElms * sizeof(int) + ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms); + memset(Ah, 0, NumElms * sizeof(int)); + + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NumElms; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeFourBytes flg + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), + hipSharedMemBankSizeFourByte)); + HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int))); + + // Kernel Launch with shared mem size of = NumElms * sizeof(int) + ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms); + memset(Ah, 0, NumElms * sizeof(int)); + + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NumElms; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeEightBytes flg + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), + hipSharedMemBankSizeEightByte)); + HIP_CHECK(hipMemset(RAd, 0, NumElms * sizeof(int))); + + // Kernel Launch with shared mem size of = NumElms * sizeof(int) + ReverseSeq<<<1, NumElms, NumElms * sizeof(int)>>>(Ad, RAd, NumElms); + memset(Ah, 0, NumElms * sizeof(int)); + + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NumElms * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NumElms; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + + HipTest::freeArrays(Ad, RAd, nullptr, + Ah, RAh, nullptr, false); +} diff --git a/catch/unit/module/hipManagedKeyword.cc b/catch/unit/module/hipManagedKeyword.cc new file mode 100644 index 0000000000..7ed9bbe630 --- /dev/null +++ b/catch/unit/module/hipManagedKeyword.cc @@ -0,0 +1,56 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* +hipManagedKeyword API Scenario +1. Test hipModuleLoad on multiple GPUs +*/ + +#include "hip_test_common.hh" +#include "hip_test_kernels.hh" +#include "hip_test_checkers.hh" + +#define MANAGED_VAR_INIT_VALUE 10 +#define fileName "module_kernels.code" + +TEST_CASE("Unit_hipMangedKeyword_ModuleLoadMultiGPU") { + int numDevices = 0, data; + hipDeviceptr_t x; + size_t xSize; + hipGetDeviceCount(&numDevices); + for (int i = 0; i < numDevices; i++) { + hipSetDevice(i); + CTX_CREATE() + hipModule_t Module; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "GPU_func")); + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, 1, 1, + 1, 0, 0, NULL, NULL)); + hipDeviceSynchronize(); + HIP_CHECK(hipModuleGetGlobal(reinterpret_cast(&x), + &xSize, Module, "x")); + HIP_CHECK(hipMemcpyDtoH(&data, hipDeviceptr_t(x), xSize)); + REQUIRE(data == (1 + MANAGED_VAR_INIT_VALUE)); + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() + } +} diff --git a/catch/unit/module/hipModule.cc b/catch/unit/module/hipModule.cc new file mode 100755 index 0000000000..9683eacee9 --- /dev/null +++ b/catch/unit/module/hipModule.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the hipModuleLoad API On +1. Single code object +2. Multi Target architecture code object +*/ +#include +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" +#ifdef __linux__ +#include +#endif +#define LEN 64 +#define SIZE (LEN << 2) +#define COMMAND_LEN 256 +#define CODE_OBJ_SINGLEARCH "module_kernels.code" +#define kernel_name "hello_world" +#define CODE_OBJ_MULTIARCH "vcpy_kernel_multarch.code" + +/* +This API loads the kernel function, Launches the kernel +which copies one variable to another and validates both +the device variables for the current GPU architecture +*/ +void testCodeObjFile(const char *codeObjFile) { + float *A, *B; + float *Ad, *Bd; + HipTest::initArrays(&Ad, &Bd, nullptr, + &A, &B, nullptr, LEN, false); + + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(Ad), A, SIZE)); + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(Bd), B, SIZE)); + + hipModule_t Module; + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, codeObjFile)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = reinterpret_cast(Ad); + args._Bd = reinterpret_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, + stream, NULL, + reinterpret_cast(&config))); + + HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipMemcpyDtoH(B, reinterpret_cast(Bd), SIZE)); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + + HipTest::freeArrays(Ad, Bd, nullptr, + A, B, nullptr, + false); + HIP_CHECK(hipModuleUnload(Module)); +} + +#ifdef __linux__ +/** + * Check if environment variable $ROCM_PATH is defined + * + */ +bool isRocmPathSet() { + FILE *fpipe; + char const *command = "echo $ROCM_PATH"; + fpipe = popen(command, "r"); + + if (fpipe == nullptr) { + WARN("Unable to create command"); + return false; + } + char command_op[COMMAND_LEN]; + if (fgets(command_op, COMMAND_LEN, fpipe)) { + size_t len = strlen(command_op); + if (len > 1) { // This is because fgets always adds newline character + pclose(fpipe); + return true; + } + } + pclose(fpipe); + return false; +} +#endif +/* +This testcase checks the hipModuleLoadData API for the +current GPU architecture. +*/ +TEST_CASE("Unit_hipModule_TestCodeObjFile") { + testCodeObjFile(CODE_OBJ_SINGLEARCH); +} + +/* +This testcases +1. Creates kernel file and copies to tmp folder +2. Checks for Rocm path and generates code file for + multiple target architectures. +*/ +TEST_CASE("Unit_hipModule_MultiTargArchCodeObj") { +#ifdef __linux__ + char command[COMMAND_LEN]; + hipDeviceProp_t props; + hipGetDeviceProperties(&props, 0); + // Hardcoding the codeobject lines in multiple string to avoid cpplint warning + std::string CodeObjL1 = "#include \"hip/hip_runtime.h\"\n"; + std::string CodeObjL2 = + "extern \"C\" __global__ void hello_world(float* a, float* b) {\n"; + std::string CodeObjL3 = " int tx = hipThreadIdx_x;\n"; + std::string CodeObjL4 = " b[tx] = a[tx];\n"; + std::string CodeObjL5 = "}"; + // Creating the full code object string + static std::string CodeObj = CodeObjL1 + CodeObjL2 + CodeObjL3 + + CodeObjL4 + CodeObjL5; + std::ofstream ofs("/tmp/vcpy_kernel.cpp", std::ofstream::out); + ofs << CodeObj; + ofs.close(); + // Copy the file into current working location if not available + if (access("/tmp/vcpy_kernel.cpp", F_OK) == -1) { + INFO("Code Object File: /tmp/vcpy_kernel.cpp not found"); + REQUIRE(true); + } + // Generate the command to generate multi architecture code object file + const char* hipcc_path = nullptr; + if (isRocmPathSet()) { + hipcc_path = "$ROCM_PATH/bin/hipcc"; + } else { + hipcc_path = "/opt/rocm/bin/hipcc"; + } + /* Putting these command parameters into a variable to shorten the string + literal length in order to avoid multiline string literal cpplint warning + */ + const char* genco_option = "--offload-arch"; + const char* input_codeobj = "/tmp/vcpy_kernel.cpp"; + snprintf(command, COMMAND_LEN, + "%s --genco %s=gfx801,gfx802,gfx803,gfx900,gfx908,gfx1030,gfx90a,%s %s -o %s", + hipcc_path, genco_option, props.gcnArchName, input_codeobj, + CODE_OBJ_MULTIARCH); + + system((const char*)command); + // Check if the code object file is created + snprintf(command, COMMAND_LEN, "./%s", + CODE_OBJ_MULTIARCH); + + if (access(command, F_OK) == -1) { + INFO("Code Object File not found"); + REQUIRE(true); + } else { + testCodeObjFile(CODE_OBJ_MULTIARCH); + } +#else + SUCCEED("This test is skipped due to non linux environment"); +#endif +} diff --git a/catch/unit/module/hipModuleGetGlobal.cc b/catch/unit/module/hipModuleGetGlobal.cc new file mode 100755 index 0000000000..e99e3a64f1 --- /dev/null +++ b/catch/unit/module/hipModuleGetGlobal.cc @@ -0,0 +1,120 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include +#include + +#define LEN 64 +#define SIZE LEN * sizeof(float) +#define ARRAY_SIZE 16 +#define fileName "module_kernels.code" + +/* +This testcase verifies the basic functionality of hipModuleGetGlobal API +1. Simple kernel +2. Global variables +*/ +TEST_CASE("Unit_hipModuleGetGlobal_Basic") { + float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr}; + HipTest::initArrays(&Ad, &Bd, nullptr, &A, &B, nullptr, LEN, + false); + CTX_CREATE() + hipMemcpyHtoD(reinterpret_cast(Ad), A, SIZE); + hipMemcpyHtoD(reinterpret_cast(Bd), B, SIZE); + hipModule_t Module; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + + float myDeviceGlobal_h = 42.0; + hipDeviceptr_t deviceGlobal; + size_t deviceGlobalSize; + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, + Module, "myDeviceGlobal")); + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(deviceGlobal), + &myDeviceGlobal_h, deviceGlobalSize)); + float myDeviceGlobalArray_h[ARRAY_SIZE]; + hipDeviceptr_t myDeviceGlobalArray; + size_t myDeviceGlobalArraySize; + + HIP_CHECK(hipModuleGetGlobal(reinterpret_cast + (&myDeviceGlobalArray), + &myDeviceGlobalArraySize, Module, + "myDeviceGlobalArray")); + + for (int i = 0; i < ARRAY_SIZE; i++) { + myDeviceGlobalArray_h[i] = i * 1000.0f; + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast + (myDeviceGlobalArray), + &myDeviceGlobalArray_h, + myDeviceGlobalArraySize)); + } + + struct { + void* _Ad; + void* _Bd; + } args; + + args._Ad = reinterpret_cast(Ad); + args._Bd = reinterpret_cast(Bd); + size_t size = sizeof(args); + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + + SECTION("Testing with simple kernel") { + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world")); + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, + NULL, + reinterpret_cast(&config))); + + hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + } + + SECTION("Testing global variables") { + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals")); + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, + NULL, + reinterpret_cast(&config))); + + hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE); + + for (uint32_t i = 0; i < LEN; i++) { + float expected = A[i] + myDeviceGlobal_h + + myDeviceGlobalArray_h[i % 16]; + REQUIRE(expected == B[i]); + } + } + + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() + HipTest::freeArrays(Ad, Bd, nullptr, + A, B, nullptr, + false); +} diff --git a/catch/unit/module/hipModuleLaunchKernel.cc b/catch/unit/module/hipModuleLaunchKernel.cc new file mode 100644 index 0000000000..341378aec0 --- /dev/null +++ b/catch/unit/module/hipModuleLaunchKernel.cc @@ -0,0 +1,246 @@ +/* + Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +/* Test Scenarios + 1. hipModuleLaunchKernel Negative Scenarios + 2. hipModuleLaunchKernel Work Group tests => + - (block.x * block.y * block.z) <= Work Group Size + where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + - (block.x * block.y * block.z) > Work Group Size + where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + */ + +#include +#include + +#define fileName "module_kernels.code" +#define matmulK "matmulK" +#define SixteenSec "SixteenSecKernel" +#define KernelandExtra "KernelandExtraParams" +#define FourSec "FourSecKernel" +#define TwoSec "TwoSecKernel" +#define dummyKernel "EmptyKernel" + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; + +/* +This testcase verifies the negative scenarios of +hipModuleLaunchKernel API +*/ +TEST_CASE("Unit_hipModuleLaunchKernel_Negative") { + HIP_CHECK(hipSetDevice(0)); + struct { + void* _Ad; + void* _Bd; + void* _Cd; + int _n; + } args1; + args1._Ad = nullptr; + args1._Bd = nullptr; + args1._Cd = nullptr; + args1._n = 0; + hipFunction_t MultKernel, KernelandExtraParamKernel; + size_t size1; + size1 = sizeof(args1); + hipModule_t Module; + hipStream_t stream1; + hipDeviceptr_t *Ad{nullptr}; + CTX_CREATE() + + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); + HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel, + Module, KernelandExtra)); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + void *params[] = {Ad}; + HIP_CHECK(hipStreamCreate(&stream1)); + SECTION("Passing nullptr to kernel function") { + REQUIRE(hipModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Passing Max int value to block dim") { + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + 0, stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + + SECTION("Passing 0 to all value dim") { + REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 0, 0, + 0, + 0, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Passing 0 for X dim") { + REQUIRE(hipModuleLaunchKernel(MultKernel, 0, 1, 1, + 0, + 1, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + + SECTION("Passing 0 for Y dim") { + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 0, 1, + 1, + 0, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Passing 0 for Z dim") { + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 0, + 1, + 1, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Passing both kernel and extra params") { + REQUIRE(hipModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, + 1, 1, 0, stream1, + reinterpret_cast(¶ms), + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Passing more than maxthreadsperblock to block dim") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, 0, + stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Block dim X is more than max allowed") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsDim[0]+1, + 1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Block dim Y is more than max allowed") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + deviceProp.maxThreadsDim[1]+1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Block dim Z is more than max allowed") { + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + 1, + deviceProp.maxThreadsDim[2]+1, + 0, stream1, NULL, + reinterpret_cast(&config1)) + != hipSuccess); + } + + SECTION("Block invalid config to extra params") { + void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + REQUIRE(hipModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, 1, 1, 0, stream1, + NULL, + reinterpret_cast(&config3)) + != hipSuccess); + } + + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} + +/* +This testcase verifies the work group scenarios of +hipModuleLaunchKernel API +*/ +TEST_CASE("Unit_hipModuleLaunchKernel_WorkGroup") { + HIP_CHECK(hipSetDevice(0)); + hipFunction_t DummyKernel; + hipModule_t Module; + hipStream_t stream1; + CTX_CREATE() + + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipStreamCreate(&stream1)); + // Passing Max int value to block dimensions + hipDeviceProp_t deviceProp; + hipGetDeviceProperties(&deviceProp, 0); + double cuberootVal = + cbrt(static_cast(deviceProp.maxThreadsPerBlock)); + uint32_t cuberoot_floor = floor(cuberootVal); + uint32_t cuberoot_ceil = ceil(cuberootVal); + // Scenario: (block.x * block.y * block.z) <= Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + HIP_CHECK(hipModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_floor, cuberoot_floor, cuberoot_floor, + 0, stream1, NULL, NULL)); + // Scenario: (block.x * block.y * block.z) > Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + REQUIRE(hipModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1, + 0, stream1, NULL, NULL) != hipSuccess); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} diff --git a/catch/unit/module/hipModuleLoadData.cc b/catch/unit/module/hipModuleLoadData.cc new file mode 100644 index 0000000000..2102a10f49 --- /dev/null +++ b/catch/unit/module/hipModuleLoadData.cc @@ -0,0 +1,91 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* +hipModuleLoadData scenarios + +1. Loads the kernel and the corresponding kernel function + which copies the data from one device variable to another. +*/ + +#include +#include +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define LEN 64 +#define SIZE LEN << 2 +#define FILENAME "module_kernels.code" +#define kernel_name "hello_world" + +static std::vector load_file() { + std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + INFO("could not open code object" << FILENAME); + REQUIRE(false); + } + return buffer; +} + + +TEST_CASE("Unit_hipModuleLoadData_Basic") { + auto buffer = load_file(); + float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr}; + HipTest::initArrays(&Ad, &Bd, nullptr, &A, &B, nullptr, + LEN, false); + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + hipModule_t Module; + hipFunction_t Function{nullptr}; + + HIP_CHECK(hipModuleLoadData(&Module, &buffer[0])); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = reinterpret_cast(Ad); + args._Bd = reinterpret_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, + stream, NULL, reinterpret_cast(&config))); + + HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + HipTest::freeArrays(Ad, Bd, nullptr, + A, B, + nullptr, false); +} diff --git a/catch/unit/module/hipModuleLoadDataMultThreadOnMultGPU.cc b/catch/unit/module/hipModuleLoadDataMultThreadOnMultGPU.cc new file mode 100644 index 0000000000..bfb780ca06 --- /dev/null +++ b/catch/unit/module/hipModuleLoadDataMultThreadOnMultGPU.cc @@ -0,0 +1,161 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the multithreaded scenario of +hipModuleLoadData API on MultiGPU system +*/ + +#include +#include + +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define LEN 64 +#define SIZE LEN << 2 +#define THREADS 8 + +#define FILENAME "module_kernels.code" +#define kernel_name "hello_world" + +/* +This function reads the kernel code object file into buffer +*/ +static std::vector load_file() { + std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + INFO("could not open code object " << FILENAME); + REQUIRE(false); + } + return buffer; +} + +/* +Thread function +1. Loads the module using hipModuleLoadData API +2. Initializes 2 device variables. +3. Launches kernel which copies one data into another. +4. validates the result and returns it to the caller using + std::ref variable. +*/ +static void run(const std::vector& buffer, int deviceNo, + bool &testResult) { + hipSetDevice(deviceNo); + hipModule_t Module; + hipFunction_t Function; + float *A{nullptr}, *B{nullptr}, *Ad{nullptr}, *Bd{nullptr}; + testResult = true; + HipTest::initArrays(&Ad, &Bd, nullptr, + &A, &B, nullptr, + LEN, false); + HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); + HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = static_cast(Ad); + args._Bd = static_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, + 1, 1, 0, stream, NULL, + reinterpret_cast(&config))); + + HIPCHECK(hipStreamSynchronize(stream)); + + HIPCHECK(hipStreamDestroy(stream)); + + HIPCHECK(hipModuleUnload(Module)); + + HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + HipTest::freeArrays(Ad, Bd, nullptr, + A, B, nullptr, + false); +} + +/* +Thread class inherited from std::thread +*/ +struct joinable_thread : std::thread { + template + joinable_thread(Xs&&... xs) : std::thread(std::forward(xs)...) {} // NOLINT + + joinable_thread& operator=(joinable_thread&& other) = default; + joinable_thread(joinable_thread&& other) = default; + + ~joinable_thread() { + if (this->joinable()) + this->join(); + } +}; + +/* +This API is triggered form the test case where in +1. Creates the thread object. +2. Loops through the number of GPUs and launches multiple threads. +*/ +static void run_multi_threads(uint32_t n, const std::vector& buffer) { + int numDevices = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + bool testResult = false; + std::vector threads; + + for (int deviceNo=0; deviceNo < numDevices; ++deviceNo) { + for (uint32_t i = 0; i < n; i++) { + threads.emplace_back(std::thread{[&, buffer] { + run(buffer, deviceNo, std::ref(testResult)); + }}); + } + } +} +/* +The testcase verifies the multithreaded funtionality on MGPU system +1. Loads the kernel file by calling load_file API +2. Gets the host thread count +3. Creates multiple threads in parallel where in each thread initializes + 2 device variables and loads the kernel using hipModuleLoadData API. + The kernel copies the data from one variable to another.Then the thread + validates both the variables. +*/ +TEST_CASE("Unit_hipModuleLoadData_MGpuMultiThread") { + auto buffer = load_file(); + auto file_size = buffer.size() / (1024 * 1024); + auto thread_count = HipTest::getHostThreadCount(file_size + 10); + run_multi_threads(thread_count, buffer); +} diff --git a/catch/unit/module/hipModuleLoadDataMultThreaded.cc b/catch/unit/module/hipModuleLoadDataMultThreaded.cc new file mode 100644 index 0000000000..183da4c007 --- /dev/null +++ b/catch/unit/module/hipModuleLoadDataMultThreaded.cc @@ -0,0 +1,164 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the multithreaded scenario of hipModuleLoadData API +*/ +#include +#include + +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define LEN 64 +#define SIZE LEN << 2 +#define THREADS 8 +#define MAX_THREADS 512 + +#define FILENAME "module_kernels.code" +#define kernel_name "hello_world" + +/* +This function reads the kernel code object file into buffer +*/ +std::vector load_file() { + std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + INFO("could not open code object" << FILENAME); + REQUIRE(false); + } + return buffer; +} + +/* +Thread function +1. Loads the module using hipModuleLoadData API +2. Initializes 2 device variables. +3. Launches kernel which copies one data into another. +4. validates the result and returns it to the caller using + std::ref variable. +*/ +void run(const std::vector& buffer, bool &testResult) { + hipModule_t Module; + hipFunction_t Function; + + float *A, *B, *Ad, *Bd; + testResult = true; + HipTest::initArrays(&Ad, &Bd, nullptr, + &A, &B, nullptr, + LEN, false); + + + HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); + HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = static_cast(Ad); + args._Bd = static_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, + LEN, 1, 1, 0, stream, + NULL, reinterpret_cast(&config))); + + HIPCHECK(hipStreamSynchronize(stream)); + + HIPCHECK(hipStreamDestroy(stream)); + + HIPCHECK(hipModuleUnload(Module)); + + HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + + HipTest::freeArrays(Ad, Bd, nullptr, + A, B, nullptr, + false); +} + +/* +Thread class inherited from std::thread +*/ +struct joinable_thread : std::thread { + template + joinable_thread(Xs&&... xs) : std::thread(std::forward(xs)...) {} // NOLINT + + joinable_thread& operator=(joinable_thread&& other) = default; + joinable_thread(joinable_thread&& other) = default; + + ~joinable_thread() { + if (this->joinable()) + this->join(); + } +}; + +/* +This API is triggered form the test case where in +1. Creates the thread object. +2. Loops through the number of GPUs and launches multiple threads. +*/ +void run_multi_threads(uint32_t n, const std::vector& buffer) { + std::vector threads; + bool testResult = false; + for (uint32_t i = 0; i < n; i++) { + threads.emplace_back(std::thread{[&] { + run(buffer, std::ref(testResult)); + }}); + } +} + +/* +The testcase verifies the multithreaded funtionality +1. Loads the kernel file by calling load_file API +2. Gets the host thread count +3. Creates multiple threads in parallel where in each thread initializes + 2 device variables and loads the kernel using hipModuleLoadData API. + The kernel copies the data from one variable to another.Then the thread + validates both the variables. +*/ +TEST_CASE("Unit_hipModuleLoadData_MultiThreaded") { + HIPCHECK(hipInit(0)); + auto buffer = load_file(); + auto file_size = buffer.size() / (1024 * 1024); + auto thread_count = HipTest::getHostThreadCount(file_size + 10); + if (thread_count == 0) { + INFO("Thread Count is zero"); + REQUIRE(false); + } + + run_multi_threads(thread_count, buffer); +} diff --git a/catch/unit/module/hipModuleLoadMultiThreaded.cc b/catch/unit/module/hipModuleLoadMultiThreaded.cc new file mode 100644 index 0000000000..401892915e --- /dev/null +++ b/catch/unit/module/hipModuleLoadMultiThreaded.cc @@ -0,0 +1,121 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies hipModuleLoad API in multithreaded scenario +*/ +#include +#include "hip/hip_runtime.h" +#if HT_AMD +#include "hip/hip_ext.h" +#endif +#include +#include +#include +#include +#include +#include +#define THREADS 8 +#define MAX_NUM_THREADS 128 + +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define NUM_GROUPS 1 +#define GROUP_SIZE 1 +#define WARMUP_RUN_COUNT 10 +#define TIMING_RUN_COUNT 100 +#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT +#define FILENAME "module_kernels.code" +#define kernel_name "EmptyKernel" + +/* +This thread function loads the kernel file , synchronizes the threads +and Launches the kernel . +*/ +void hipModuleLaunchKernel_enqueue(std::atomic_int* shared, int max_threads) { + // resources necessary for this thread + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + hipModule_t module; + hipFunction_t function; + + HIPCHECK(hipModuleLoad(&module, FILENAME)); + HIPCHECK(hipModuleGetFunction(&function, module, kernel_name)); + + void* kernel_params = nullptr; + + // synchronize all threads, before running + shared->fetch_add(1, std::memory_order_release); + while (max_threads != shared->load(std::memory_order_acquire)) {} + + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + HIPCHECK(hipModuleLaunchKernel(function, 1, 1, + 1, 1, 1, 1, 0, stream, + &kernel_params, nullptr)); + } + HIPCHECK(hipModuleUnload(module)); + HIPCHECK(hipStreamDestroy(stream)); +} + +/* +thread pool class contains launching the threads using std::async API +with future variable "threads". +The start API Launches the threads and finish API waits for the +thread execution to end. +*/ +struct thread_pool { + explicit thread_pool(int total_threads) : max_threads(total_threads) { + } + void start(std::function f) { + for (int i = 0; i < max_threads; ++i) { + threads.push_back(std::async(std::launch::async, f, + &shared, max_threads)); + } + } + void finish() { + for (auto&&thread : threads) { + thread.get(); + } + threads.clear(); + shared = 0; + } + ~thread_pool() { + finish(); + } + private: + std::atomic_int shared {0}; + std::vector buffer; + std::vector> threads; + int max_threads = 1; +}; + +/* +This testcase verifies the Multithreaded scenario of hipModule API +where in threadpool object is created and the object invokes start API +which launches multiple threads where each thread loads the kernel object +using hipModuleLoad API and launches the kernel in parallel. +*/ +TEST_CASE("Unit_hipModuleLoad_MultiThread") { + int max_threads = min(THREADS * std::thread::hardware_concurrency(), + MAX_NUM_THREADS); + thread_pool task(max_threads); + task.start(hipModuleLaunchKernel_enqueue); + task.finish(); +} diff --git a/catch/unit/module/hipModuleLoadUnloadStress.cc b/catch/unit/module/hipModuleLoadUnloadStress.cc new file mode 100644 index 0000000000..46acdae29b --- /dev/null +++ b/catch/unit/module/hipModuleLoadUnloadStress.cc @@ -0,0 +1,93 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include "hip_test_common.hh" + +#define TEST_ITERATIONS 1000 +#define CODEOBJ_FILE "module_kernels.code" +/** + * Run Valgrind tool with these test cases to validate memory leakage. + * E.g. valgrind --leak-check=yes ./a.out + */ + +/** + * Internal Function + */ +static std::vector load_file() { + std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + WARN("could not open code object " << CODEOBJ_FILE); + } + file.close(); + return buffer; +} +/** + * Validates no memory leakage for hipModuleLoad + */ +TEST_CASE("Unit_hipModule_LoadUnloadStress") { + CTX_CREATE() + for (int count = 0; count < TEST_ITERATIONS; count++) { + hipModule_t Module; + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); + HIP_CHECK(hipModuleUnload(Module)); + } + CTX_DESTROY() +} +/** + * Validates no memory leakage for hipModuleLoadData + */ +TEST_CASE("Unit_hipModuleLoadData_LoadUnloadStress") { + CTX_CREATE() + auto buffer = load_file(); + for (int count = 0; count < TEST_ITERATIONS; count++) { + hipModule_t Module; + HIP_CHECK(hipModuleLoadData(&Module, &buffer[0])); + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); + HIP_CHECK(hipModuleUnload(Module)); + } + CTX_DESTROY() +} +/** + * Validates no memory leakage for hipModuleLoadDataEx + */ +TEST_CASE("Unit_hipModuleLoadDataEx_UnloadStress") { + CTX_CREATE() + auto buffer = load_file(); + for (int count = 0; count < TEST_ITERATIONS; count++) { + hipModule_t Module; + HIP_CHECK(hipModuleLoadDataEx(&Module, &buffer[0], 0, + nullptr, nullptr)); + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); + HIP_CHECK(hipModuleUnload(Module)); + } + CTX_DESTROY() +} diff --git a/catch/unit/module/hipModuleNegative.cc b/catch/unit/module/hipModuleNegative.cc new file mode 100644 index 0000000000..c43b507c21 --- /dev/null +++ b/catch/unit/module/hipModuleNegative.cc @@ -0,0 +1,274 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* +This testcase verifies the negative scenarios of +1. hipModuleLoad API +2. hipModuleLoadData API +3. hipModuleGetFunction API +4. hipModuleGetGlobal API +*/ + +#include +#include +#include +#include +#include "hip_test_common.hh" + +#define FILENAME_NONEXST "sample_nonexst.code" +#define FILENAME_EMPTY "emptyfile.code" +#define FILENAME_RAND "rand_file.code" +#define RANDOMFILE_LEN 2048 +#define CODEOBJ_FILE "module_kernels.code" +#define KERNEL_NAME "hello_world" +#define KERNEL_NAME_NONEXST "xyz" +#define CODEOBJ_GLOBAL "module_kernels.code" +#define DEVGLOB_VAR_NONEXIST "xyz" +#define DEVGLOB_VAR "myDeviceGlobal" +/** + * Internal Function + * Loads the kernel file into buffer + */ +std::vector load_file(const char* filename) { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + INFO("could not open code object " << filename); + } + file.close(); + return buffer; +} + +/** + * Internal Function + Create Randome file + */ +void createRandomFile(const char* filename) { + std::ofstream outfile(filename, std::ios::binary); + char buf[RANDOMFILE_LEN]; + unsigned int seed = 1; + for (int i = 0; i < RANDOMFILE_LEN; i++) { + buf[i] = rand_r(&seed) % 256; + } + outfile.write(buf, RANDOMFILE_LEN); + outfile.close(); +} + +/** + * Validates negative scenarios for hipModuleLoad API + */ + +TEST_CASE("Unit_hipModuleLoad_Negative") { + CTX_CREATE() + hipModule_t Module; + + SECTION("Nullptr to module") { + REQUIRE(hipModuleLoad(nullptr, CODEOBJ_FILE) + != hipSuccess); + } + + SECTION("Nullptr to Fname") { + REQUIRE(hipModuleLoad(&Module, nullptr) + != hipSuccess); + } + + SECTION("Empty fname") { + std::fstream fs; + fs.open(FILENAME_EMPTY, std::ios::out); + fs.close(); + REQUIRE(hipModuleLoad(&Module, FILENAME_EMPTY) + != hipSuccess); + } + + SECTION("Binary file with random number") { + createRandomFile(FILENAME_RAND); + REQUIRE(hipModuleLoad(&Module, FILENAME_RAND) + != hipSuccess); + remove(FILENAME_RAND); + } + + SECTION("Non Existent file") { + REQUIRE(hipModuleLoad(&Module, FILENAME_NONEXST) + != hipSuccess); + } + + SECTION("Empty string to file name") { + REQUIRE(hipModuleLoad(&Module, "") + != hipSuccess); + } + + CTX_DESTROY() +} + +/** + * Validates negative scenarios for hipModuleLoadData API + */ +TEST_CASE("Unit_hipModuleLoadData_Negative") { + CTX_CREATE() + hipModule_t Module; + + SECTION("Nullptr to module") { + auto buffer = load_file(CODEOBJ_FILE); + REQUIRE(hipModuleLoadData(nullptr, &buffer[0]) + != hipSuccess); + } + + SECTION("Nullptr to image") { + REQUIRE(hipModuleLoadData(&Module, nullptr) + != hipSuccess); + } + + SECTION("Random file to image") { + createRandomFile(FILENAME_RAND); + auto buffer = load_file(FILENAME_RAND); + REQUIRE(hipModuleLoadData(&Module, &buffer[0]) + != hipSuccess); + } + + SECTION("Nullptr to Module") { + auto buffer = load_file(CODEOBJ_FILE); + REQUIRE(hipModuleLoadDataEx(nullptr, &buffer[0], 0, nullptr, nullptr) + != hipSuccess); + } + + SECTION("Nullptr to image") { + REQUIRE(hipModuleLoadDataEx(&Module, nullptr, 0, nullptr, nullptr) + != hipSuccess); + } + + SECTION("Random image file") { + // Create a binary file with random numbers + createRandomFile(FILENAME_RAND); + // Open the code object file and copy it in a buffer + auto buffer = load_file(FILENAME_RAND); + REQUIRE(hipModuleLoadDataEx(&Module, &buffer[0], 0, nullptr, nullptr) + != hipSuccess); + } + + CTX_DESTROY() +} + +/** + * Validates negative scenarios for hipModuleGetFunction API + */ +TEST_CASE("Unit_hipModuleGetFunction_Negative") { + CTX_CREATE() + hipFunction_t Function; + hipModule_t Module; + + SECTION("Nullptr to function name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + REQUIRE(hipModuleGetFunction(nullptr, Module, KERNEL_NAME) != hipSuccess); + HIP_CHECK(hipModuleUnload(Module)); + } + + SECTION("Uninitialized module") { + REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess); + } + + SECTION("Non existing function kernel name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME_NONEXST) + != hipSuccess); + HIP_CHECK(hipModuleUnload(Module)); + } + + SECTION("Nullptr to kernel name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + REQUIRE(hipModuleGetFunction(&Function, Module, nullptr) != hipSuccess); + HIP_CHECK(hipModuleUnload(Module)); + } +#if HT_AMD + SECTION("Unloaded module") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIP_CHECK(hipModuleUnload(Module)); + REQUIRE(hipModuleGetFunction(&Function, Module, KERNEL_NAME) != hipSuccess); + } +#endif + + SECTION("Empty string to kernel name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + REQUIRE(hipModuleGetFunction(&Function, Module, "") != hipSuccess); + HIP_CHECK(hipModuleUnload(Module)); + } + + CTX_DESTROY() +} + +/** + * Validates negative scenarios for hipModuleGetGlobal API + */ +TEST_CASE("Unit_hipModuleGetGlobal_Negative") { + CTX_CREATE() + hipModule_t Module; + hipDeviceptr_t deviceGlobal; + size_t deviceGlobalSize; + + SECTION("Nullptr to varname") { + HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); + REQUIRE(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, Module, nullptr) + != hipSuccess); + HIPCHECK(hipModuleUnload(Module)); + } + + SECTION("Wrong variable name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); + REQUIRE(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, + Module, DEVGLOB_VAR_NONEXIST) != hipSuccess); + HIPCHECK(hipModuleUnload(Module)); + } + + SECTION("Empty string to module name") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); + REQUIRE(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, Module, "") != hipSuccess); + HIPCHECK(hipModuleUnload(Module)); + } + +#if HT_AMD + SECTION("Unloaded Module") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); + HIP_CHECK(hipModuleUnload(Module)); + REQUIRE(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, Module, + DEVGLOB_VAR) != hipSuccess); + } + + SECTION("Unload an Unloaded module") { + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIP_CHECK(hipModuleUnload(Module)); + REQUIRE(hipModuleUnload(Module) != hipSuccess); + } + + SECTION("Uninitialized module") { + REQUIRE(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, Module, + DEVGLOB_VAR) != hipSuccess); + } + SECTION("Unload Uninitialized module") { + REQUIRE(hipModuleUnload(Module) != hipSuccess); + } +#endif + + CTX_DESTROY() +} diff --git a/catch/unit/module/hipModuleOccupancyMaxPotentialBlockSize.cc b/catch/unit/module/hipModuleOccupancyMaxPotentialBlockSize.cc new file mode 100644 index 0000000000..c58b3978e6 --- /dev/null +++ b/catch/unit/module/hipModuleOccupancyMaxPotentialBlockSize.cc @@ -0,0 +1,267 @@ + +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +#define fileName "module_kernels.code" +#define kernel_name "hello_world" +/** + * hipModuleOccupancyMaxPotentialBlockSize and hipModuleOccupancyMaxPotentialBlockSizeWithFlags + * corner tests. + * Scenario1: + * Validates the value of gridSize, which should be always non zero +ve integer and blockSize + * range returned for dynSharedMemPerBlk = 0 and blockSizeLimit = 0. + * Scenario2: + * Validates the value of gridSize, which should be always non zero +ve integer and blockSize + * range returned for dynSharedMemPerBlk = devProp.sharedMemPerBlock and + * blockSizeLimit = devProp.maxThreadsPerBlock. + */ +TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_FuncTst") { + // Initialize + hipDeviceProp_t devProp; + int gridSize = 0; + int blockSize = 0; + hipModule_t Module; + CTX_CREATE() + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); + // Scenario1 + SECTION("without flag - gridSize when input params are 0") { + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, + &blockSize, Function, 0, 0)); + } + // Scenario2 + SECTION("without flag - gridSize when input params are maximum") { + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, + &blockSize, Function, + devProp.sharedMemPerBlock, devProp.maxThreadsPerBlock)); + } + // Scenario1 + SECTION("with flag - gridSize when input params are 0") { + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize, + &blockSize, Function, 0, 0, 0)); + } + // Scenario2 + SECTION("with flag - gridSize when input params are maximum") { + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize, + &blockSize, Function, devProp.sharedMemPerBlock, + devProp.maxThreadsPerBlock, 0)); + } + // Check if blockSize doen't exceed maxThreadsPerBlock + REQUIRE_FALSE(gridSize <= 0); + REQUIRE_FALSE(blockSize <= 0); + REQUIRE_FALSE(blockSize > devProp.maxThreadsPerBlock); + // Un-initialize + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} +/** + * hipModuleOccupancyMaxActiveBlocksPerMultiprocessor and + * hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags Corner tests. + * Scenario1: + * Validates numBlock value range is within expected limit when sharedMemPerBlock + * is 0. + * Scenario2: + * Validates numBlock value range is within expected limit when + * dynSharedMemPerBlk = devProp.sharedMemPerBlock. + */ +TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_FuncTst") { + // Initialize + hipDeviceProp_t devProp; + int gridSize = 0; + int blockSize = 0; + int numBlock = 0; + hipModule_t Module; + CTX_CREATE() + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, + &blockSize, Function, 0, 0)); + HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); + // Scenario1 + SECTION("without flag - gridSize when input params are 0") { + HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, + Function, blockSize, 0)); + } + // Scenario2 + SECTION("without flag - gridSize when input params are maximum") { + HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, + Function, blockSize, devProp.sharedMemPerBlock)); + } + // Scenario1 + SECTION("with flag - gridSize when input params are 0") { + HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + &numBlock, Function, blockSize, 0, 0)); + } + // Scenario2 + SECTION("with flag - gridSize when input params are maximum") { + HIP_CHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + &numBlock, Function, blockSize, devProp.sharedMemPerBlock, 0)) + } + // Check if numBlocks are within limits + int temp_val = (numBlock * blockSize); + REQUIRE_FALSE(numBlock <= 0); + REQUIRE_FALSE(temp_val > devProp.maxThreadsPerMultiProcessor); + // Un-initialize + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} +/** + * hipModuleOccupancyMaxPotentialBlockSize negative tests. + * Scenario1: gridSize is nullptr. + * Scenario2: blocksize is nullptr. + * Scenario3: blockSizeLimit < 0. + */ +TEST_CASE("Unit_hipModuleOccupancyMaxPotentialBlockSize_NegTst") { + int gridSize = 0; + int blockSize = 0; + hipModule_t Module; + hipFunction_t Function; + CTX_CREATE() + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + // Scenario1 + SECTION("without flag - gridSize is nullptr") { + REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize( + nullptr, &blockSize, Function, 0, 0)); + } + // Scenario2 + SECTION("without flag - blocksize is nullptr") { + REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize( + &gridSize, nullptr, Function, 0, 0)); + } + // Scenario3 + SECTION("without flag - blockSizeLimit is less than 0") { + hipDeviceProp_t devProp; + HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); +#if HT_NVIDIA + REQUIRE_FALSE(hipSuccess == hipModuleOccupancyMaxPotentialBlockSize( + &gridSize, &blockSize, Function, 0, -1)); +#else + // As discussed in SWDEV-269400 + // with developers this difference in behavior between NVIDIA and AMD + // is retained. + REQUIRE_FALSE(hipSuccess != hipModuleOccupancyMaxPotentialBlockSize( + &gridSize, &blockSize, Function, 0, -1)); +#endif + } + // Scenario1 + SECTION("with flag - gridSize is nullptr") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxPotentialBlockSizeWithFlags(nullptr, + &blockSize, Function, 0, 0, 0)); + } + // Scenario2 + SECTION("with flag - blocksize is nullptr") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize, + nullptr, Function, 0, 0, 0)); + } + // Scenario3 + SECTION("with flag - blockSizeLimit is less than 0") { +#if HT_NVIDIA + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize, + &blockSize, Function, 0, -1, 0)); +#else + // As discussed in SWDEV-269400 + // with developers this difference in behavior between NVIDIA and AMD + // is retained. + REQUIRE_FALSE(hipSuccess != + hipModuleOccupancyMaxPotentialBlockSizeWithFlags(&gridSize, + &blockSize, Function, 0, -1, 0)); +#endif + } + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} +/** + * hipModuleOccupancyMaxActiveBlocksPerMultiprocessor negative tests. + * Scenario1: numBlocks is nullptr. + * Scenario2: Check the behavior for blockSize < 0. + * Scenario3: Check error code returned for dynSharedMemPerBlk = 0 and blockSize = 0. + * Scenario4: dynSharedMemPerBlk = size_t numeric limit. + */ +TEST_CASE("Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_NegTst") { + int gridSize = 0; + int blockSize = 0; + int numBlocks = 0; + hipModule_t Module; + hipFunction_t Function; + CTX_CREATE() + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + HIP_CHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, + Function, 0, 0)); + // Scenario1 + SECTION("without flag - numBlocks is nullptr") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(nullptr, + Function, blockSize, 0)); + } + // Scenario3 + SECTION("without flag - dynSharedMemPerBlk = 0 and blockSize = 0") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, + Function, 0, 0)); + } + // Scenario2 + SECTION("without flag - blockSize is less than 0") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, + Function, -1, 0)); + } + // Scenario4 + SECTION("without flag - dynSharedMemPerBlk = max_numerical_limit") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, + Function, 0, std::numeric_limits::max())); + } + // Scenario1 + SECTION("with flag - numBlocks is nullptr") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(nullptr, + Function, blockSize, 0, 0)); + } + // Scenario3 + SECTION("with flag - dynSharedMemPerBlk = 0 and blockSize = 0") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, + Function, 0, 0, 0)); + } + // Scenario2 + SECTION("with flag - blockSize is less than 0") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, + Function, -1, 0, 0)); + } + // Scenario4 + SECTION("with flag - dynSharedMemPerBlk = max_numerical_limit") { + REQUIRE_FALSE(hipSuccess == + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, + Function, 0, std::numeric_limits::max(), 0)); + } + HIP_CHECK(hipModuleUnload(Module)); + CTX_DESTROY() +} + diff --git a/catch/unit/module/hipModuleTexture2dDrv.cc b/catch/unit/module/hipModuleTexture2dDrv.cc new file mode 100755 index 0000000000..2afa3e06fc --- /dev/null +++ b/catch/unit/module/hipModuleTexture2dDrv.cc @@ -0,0 +1,561 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* +This testcase verifies the following scenarios of hipModuleGetTexRef API +1. Negative +2. Basic functionality using different data types +3. Multiple streams +4. MultiThreaded - MultStreamMultGPU +5. MultiThreaded - SingleStreamMultGPU +*/ + +#include +#include +#include +#include +#include +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define CODEOBJ_FILE "module_kernels.code" +#define NON_EXISTING_TEX_NAME "xyz" +#define EMPTY_TEX_NAME "" +#define GLOBAL_KERNEL_VAR "deviceGlobalFloat" +#define TEX_REF "ftex" +#define WIDTH 256 +#define HEIGHT 256 +#define MAX_STREAMS 4 +#define GRIDDIMX 16 +#define GRIDDIMY 16 +#define GRIDDIMZ 1 +#define BLOCKDIMZ 1 +#define MAX_GPU 16 + +std::atomic g_thTestPassed(1); + + +/** + * Internal Functions + * Loads the kernel file + */ +static std::vector load_file() { + std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + INFO("could not open code object " << CODEOBJ_FILE); + REQUIRE(false); + } + return buffer; +} + +/* +Initializes the array +*/ +template +void allocInitArray(unsigned int width, + unsigned int height, + hipArray_Format format, + HIP_ARRAY* array + ) { + HIP_ARRAY_DESCRIPTOR desc; + desc.Format = format; + desc.NumChannels = 1; + desc.Width = width * sizeof(T); + desc.Height = height; + HIPCHECK(hipArrayCreate(array, &desc)); +} + +/* +Copies buffer to array using hipMemcpyParam2D API +*/ +template void copyBuffer2Array(unsigned int width, + unsigned int height, + T* hData, + T1 array + ) { + hip_Memcpy2D copyParam; + memset(©Param, 0, sizeof(copyParam)); +#if HT_NVIDIA + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.dstArray = *array; +#else + copyParam.dstMemoryType = hipMemoryTypeArray; + copyParam.srcMemoryType = hipMemoryTypeHost; + copyParam.dstArray = array; +#endif + copyParam.srcHost = hData; + copyParam.srcPitch = width * sizeof(T); + copyParam.WidthInBytes = width * sizeof(T); + copyParam.Height = height; + HIPCHECK(hipMemcpyParam2D(©Param)); +} + +/* +Assigns array to texture ref +*/ +template void assignArray2TexRef(hipArray_Format format, + const char* texRefName, + hipModule_t Module, + T array + ) { + HIP_TEX_REFERENCE texref; +#if HT_NVIDIA + HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName)); + HIPCHECK(hipTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)); + HIPCHECK(hipTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)); + HIPCHECK(hipTexRefSetFilterMode(texref, HIP_TR_FILTER_MODE_POINT)); + HIPCHECK(hipTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); + HIPCHECK(hipTexRefSetFormat(texref, format, 1)); + HIPCHECK(hipTexRefSetArray(texref, *array, CU_TRSA_OVERRIDE_FORMAT)); +#else + HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName)); + HIPCHECK(hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap)); + HIPCHECK(hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap)); + HIPCHECK(hipTexRefSetFilterMode(texref, hipFilterModePoint)); + HIPCHECK(hipTexRefSetFlags(texref, HIP_TRSF_READ_AS_INTEGER)); + HIPCHECK(hipTexRefSetFormat(texref, format, 1)); + HIPCHECK(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT)); +#endif +} + +template bool validateOutput(unsigned int width, + unsigned int height, + T* hData, + T* hOutputData) { + for (unsigned int i = 0; i < height; i++) { + for (unsigned int j = 0; j < width; j++) { + if (hData[i * width + j] != hOutputData[i * width + j]) { + return false; + } + } + } + return true; +} + +/** + * Validates texture functionality with multiple streams for hipModuleGetTexRef + * + */ +template bool testTexMultStream(const std::vector& buffer, + hipArray_Format format, + const char* texRefName, + const char* kerFuncName, + unsigned int numOfStreams) { + bool TestPassed = true; + unsigned int width = WIDTH; + unsigned int height = HEIGHT; + unsigned int size = width * height * sizeof(T); + T* hData = reinterpret_cast(malloc(size)); + CTX_CREATE() + HipTest::setDefaultData(width * height, hData, nullptr, nullptr); + + // Load Kernel File and create hipArray + hipModule_t Module; + HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); + HIP_ARRAY array; + allocInitArray(width, height, format, &array); +#if HT_NVIDIA + // Copy from hData to array using hipMemcpyParam2D + copyBuffer2Array(width, height, hData, &array); + // Get tex reference from the loaded kernel file + // Assign array to the tex reference + assignArray2TexRef(format, texRefName, Module, &array); +#else + // Copy from hData to array using hipMemcpyParam2D + copyBuffer2Array(width, height, hData, array); + // Get tex reference from the loaded kernel file + // Assign array to the tex reference + assignArray2TexRef(format, texRefName, Module, array); +#endif + hipFunction_t Function; + HIPCHECK(hipModuleGetFunction(&Function, Module, kerFuncName)); + + // Create Multiple Strings + hipStream_t streams[MAX_STREAMS]={0}; + T* dData[MAX_STREAMS] = {NULL}; + T* hOutputData[MAX_STREAMS] = {NULL}; + if (numOfStreams > MAX_STREAMS) { + numOfStreams = MAX_STREAMS; + } + unsigned int totalStreamsCreated = 0; + for (unsigned int stream_num = 0; stream_num < numOfStreams; stream_num++) { + hOutputData[stream_num] = reinterpret_cast(malloc(size)); + if (NULL == hOutputData[stream_num]) { + WARN("Failed to allocate using malloc in testTexMultStream"); + TestPassed &= false; + } + HIPCHECK(hipStreamCreate(&streams[stream_num])); + HIPCHECK(hipMalloc(reinterpret_cast(&dData[stream_num]), size)); + memset(hOutputData[stream_num], 0, size); + struct { + void* _Ad; + unsigned int _Bd; + unsigned int _Cd; + } args; + args._Ad = reinterpret_cast(dData[stream_num]); + args._Bd = width; + args._Cd = height; + + size_t sizeTemp = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, + &sizeTemp, + HIP_LAUNCH_PARAM_END}; + + int temp1 = width / GRIDDIMX; + int temp2 = height / GRIDDIMY; + HIPCHECK(hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ, + temp1, temp2, BLOCKDIMZ, 0, + streams[stream_num], + NULL, reinterpret_cast(&config))); + totalStreamsCreated++; + } + // Check the kernel results separately + for (unsigned int stream_num = 0; stream_num < totalStreamsCreated; + stream_num++) { + HIPCHECK(hipStreamSynchronize(streams[stream_num])); + HIPCHECK(hipMemcpy(hOutputData[stream_num], dData[stream_num], size, + hipMemcpyDeviceToHost)); + TestPassed &= validateOutput(width, height, hData, + hOutputData[stream_num]); + } + for (unsigned int i = 0; i < totalStreamsCreated; i++) { + HIPCHECK(hipFree(dData[i])); + HIPCHECK(hipStreamDestroy(streams[i])); + free(hOutputData[i]); + } + ARRAY_DESTROY(array) + HIPCHECK(hipModuleUnload(Module)); + free(hData); + CTX_DESTROY() + return TestPassed; +} + +/** + * Internal Thread Functions + * + */ +void launchSingleStreamMultGPU(int gpu, const std::vector& buffer) { + bool TestPassed = true; + HIPCHECK(hipSetDevice(gpu)); + TestPassed = testTexMultStream(buffer, + HIP_AD_FORMAT_FLOAT, + "ftex", + "tex2dKernelFloat", 1); + g_thTestPassed &= static_cast(TestPassed); +} + +void launchMultStreamMultGPU(int gpu, const std::vector& buffer) { + bool TestPassed = true; + HIPCHECK(hipSetDevice(gpu)); + TestPassed = testTexMultStream(buffer, + HIP_AD_FORMAT_FLOAT, + "ftex", + "tex2dKernelFloat", 3); + g_thTestPassed &= static_cast(TestPassed); +} +/** + * Validates texture functionality with Multiple Streams on multuple GPU + * for hipModuleGetTexRef + * + */ +bool testTexMultStreamMultGPU(unsigned int numOfGPUs, + const std::vector& buffer) { + bool TestPassed = true; + std::thread T[MAX_GPU]; + + for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) { + T[gpu] = std::thread(launchMultStreamMultGPU, gpu, buffer); + } + for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) { + T[gpu].join(); + } + + if (g_thTestPassed) { + TestPassed = true; + } else { + TestPassed = false; + } + return TestPassed; +} + +/** + * Validates texture functionality with Single Stream on multuple GPU + * for hipModuleGetTexRef + * + */ +bool testTexSingleStreamMultGPU(unsigned int numOfGPUs, + const std::vector& buffer) { + bool TestPassed = true; + std::thread T[MAX_GPU]; + + for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) { + T[gpu] = std::thread(launchSingleStreamMultGPU, gpu, buffer); + } + for (unsigned int gpu = 0; gpu < numOfGPUs; gpu++) { + T[gpu].join(); + } + + if (g_thTestPassed) { + TestPassed = true; + } else { + TestPassed = false; + } + return TestPassed; +} + +/* +This testcase verifies the negative scenarios of hipModuleGetTexRef API +*/ +TEST_CASE("Unit_hipModuleGetTexRef_Negative") { + hipModule_t Module; + HIP_TEX_REFERENCE texref; + CTX_CREATE() + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + + SECTION("TexRef as nullptr") { + REQUIRE(hipModuleGetTexRef(nullptr, Module, "tex") != hipSuccess); + } + + SECTION("Name as nullptr") { + REQUIRE(hipModuleGetTexRef(&texref, Module, nullptr) != hipSuccess); + } + + SECTION("Name as non existing TexName") { + REQUIRE(hipModuleGetTexRef(&texref, Module, + NON_EXISTING_TEX_NAME) != hipSuccess); + } + + SECTION("Empty tex name") { + REQUIRE(hipModuleGetTexRef(&texref, Module, EMPTY_TEX_NAME) != hipSuccess); + } +#if HT_NVIDIA + SECTION("Name as Global kernel Var") { + REQUIRE(hipModuleGetTexRef(&texref, Module, + GLOBAL_KERNEL_VAR) != hipSuccess); + } +#endif + + SECTION("Unload Module") { + HIP_CHECK(hipModuleUnload(Module)); + REQUIRE(hipModuleGetTexRef(&texref, Module, TEX_REF) != hipSuccess); + } + + CTX_DESTROY() +} +/** + * Validates texture type data functionality for hipModuleGetTexRef + * 1.Loads the code object file + * 2.Based on the template type texRefName,KernelFuncName and format are assigned. + * 3.Allocate array based on format. + * 4.Assigns array to texRef + * 5.Launches the kernel based on the template type which invokes text2D API + and copies the data to output variable. + * 6.Validates the data. + */ +TEMPLATE_TEST_CASE("Unit_hipModuleGetTexRef_Basic", "", int, + char, uint16_t, float) { + bool TestPassed = true; + constexpr unsigned int width = WIDTH; + constexpr unsigned int height = HEIGHT; + constexpr unsigned int size = width * height * sizeof(TestType); + const char *texRefName, *kerFuncName; + hipArray_Format format; + + TestType* hData = reinterpret_cast(malloc(size)); + if (NULL == hData) { + INFO("Failed to allocate using malloc in testTexType.\n"); + REQUIRE(false); + } + CTX_CREATE() + HipTest::setDefaultData(width * height, hData, nullptr, nullptr); + // Load Kernel File and create hipArray + hipModule_t Module; + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIP_ARRAY array; + + if (std::is_same::value) { + texRefName = "ctex"; + kerFuncName = "tex2dKernelInt8"; + format = HIP_AD_FORMAT_SIGNED_INT8; + } else if (std::is_same::value) { + texRefName = "stex"; + kerFuncName = "tex2dKernelInt16"; + format = HIP_AD_FORMAT_SIGNED_INT16; + } else if (std::is_same::value) { + texRefName = "itex"; + kerFuncName = "tex2dKernelInt"; + format = HIP_AD_FORMAT_SIGNED_INT32; + } else if (std::is_same::value) { + texRefName = "ftex"; + kerFuncName = "tex2dKernelFloat"; + format = HIP_AD_FORMAT_FLOAT; + } + allocInitArray(width, height, format, &array); + +#if HT_NVIDIA + // Copy from hData to array using hipMemcpyParam2D + copyBuffer2Array(width, height, hData, &array); + // Get tex reference from the loaded kernel file + // Assign array to the tex reference + assignArray2TexRef(format, texRefName, Module, &array); +#else + // Copy from hData to array using hipMemcpyParam2D + copyBuffer2Array(width, height, hData, array); + // Get tex reference from the loaded kernel file + // Assign array to the tex reference + assignArray2TexRef(format, texRefName, Module, array); +#endif + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, kerFuncName)); + + TestType* dData = NULL; + HIP_CHECK(hipMalloc(reinterpret_cast(&dData), size)); + + struct { + void* _Ad; + unsigned int _Bd; + unsigned int _Cd; + } args; + args._Ad = reinterpret_cast(dData); + args._Bd = width; + args._Cd = height; + + size_t sizeTemp = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, + &sizeTemp, + HIP_LAUNCH_PARAM_END}; + + int temp1 = width / GRIDDIMX; + int temp2 = height / GRIDDIMY; + HIP_CHECK( + hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ, + temp1, temp2, BLOCKDIMZ, 0, 0, + NULL, reinterpret_cast(&config))); + HIP_CHECK(hipDeviceSynchronize()); + TestType* hOutputData = reinterpret_cast(malloc(size)); + if (NULL == hOutputData) { + INFO("Failed to allocate using malloc in testTexType"); + REQUIRE(false); + } else { + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); + TestPassed = validateOutput(width, height, hData, hOutputData); + REQUIRE(TestPassed); + } + free(hOutputData); + HIP_CHECK(hipFree(dData)); + ARRAY_DESTROY(array) + HIP_CHECK(hipModuleUnload(Module)); + free(hData); + CTX_DESTROY() +} + +/* +This testcase verifies hipModuleGetTexRef on multiple streams +where + * 1..Loads the code object file + * 2.Allocate array and initializes it with hData + * 3.Assigns array to texRef + 4.Creates multiple streams + * 4.Launches the kernel on each stream which invokes text2D API + and copies the data to output variable + * 5.Validates the hData with output data in each stream. +*/ +TEST_CASE("Unit_hipModuleGetTexRef_TexMultStream") { + bool TestPassed = true; + auto buffer = load_file(); + TestPassed = testTexMultStream(buffer, + HIP_AD_FORMAT_FLOAT, + "ftex", + "tex2dKernelFloat", + MAX_STREAMS); + REQUIRE(TestPassed); +} +/* +This testcase verifies hipModuleGetTexRef Multithreaded scenario on +single stream and multi GPU machine. +1. Gets the device count. +2. Create the threads based on device count. +3. Each thread calls the testTexMultStream which performs the same + above funtionality on single Stream +4. The threads are executed in parallel and are joined later. + +This testcase ensures that the multi thread execution on single stream +in parallel is successful +*/ +TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexSingleStreamMultiGPU") { + bool TestPassed = true; + // Testcase skipped on nvidia with CUDA API version 11.2, + // as hipModuleLoadData returning error code + // 'a PTX JIT compilation failed'(218), which is invalid + // behavior. Test passes with AMD and previous CUDA versions. +#if HT_NVIDIA + INFO("Testcase skipped on CUDA version 11.2\n"); + REQUIRE(true); +#else + int gpu_cnt = 0; + auto buffer = load_file(); + HIP_CHECK(hipGetDeviceCount(&gpu_cnt)); + TestPassed = testTexSingleStreamMultGPU(gpu_cnt, buffer); + REQUIRE(TestPassed); +#endif +} + + +/* +This testcase verifies hipModuleGetTexRef Multithreaded scenario on +single stream and multi GPU machine. +1. Gets the device count. +2. Create the threads based on device count. +3. Each thread calls the testTexMultStream which performs the same + above funtionality on multiple Stream +4. The threads are executed in parallel and are joined later. + +This testcase ensures that the multi thread execution on multiple streams +in parallel is successful +*/ +TEST_CASE("Unit_hipModuleGetTexRef_MultiThreadTexMultiStreamMultiGPU") { + bool TestPassed = true; + // Testcase skipped on nvidia with CUDA API version 11.2, + // as hipModuleLoadData returning error code + // 'a PTX JIT compilation failed'(218), which is invalid + // behavior. Test passes with AMD and previous CUDA versions. +#if HT_NVIDIA + INFO("Testcase skipped on CUDA version 11.2\n"); + REQUIRE(true); +#else + int gpu_cnt = 0; + auto buffer = load_file(); + HIP_CHECK(hipGetDeviceCount(&gpu_cnt)); + TestPassed = testTexMultStreamMultGPU(gpu_cnt, buffer); + REQUIRE(TestPassed); +#endif +} diff --git a/catch/unit/module/hipModuleUnload.cc b/catch/unit/module/hipModuleUnload.cc new file mode 100644 index 0000000000..24c6575d1b --- /dev/null +++ b/catch/unit/module/hipModuleUnload.cc @@ -0,0 +1,34 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + + +#include + +#define fileName "module_kernels.code" +/* +This testcase verifies the basic functionality of hipModuleUnload API +*/ +TEST_CASE("Unit_hipModuleUnload_Basic") { + CTX_CREATE() + hipModule_t module; + HIP_CHECK(hipModuleLoad(&module, fileName)); + HIP_CHECK(hipModuleUnload(module)); + CTX_DESTROY() +} diff --git a/catch/unit/module/hipOpenCLCOTest.cc b/catch/unit/module/hipOpenCLCOTest.cc new file mode 100644 index 0000000000..91cfae767a --- /dev/null +++ b/catch/unit/module/hipOpenCLCOTest.cc @@ -0,0 +1,229 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase reads the openCL kernel file and generate the the code object +file which gets executed in HIP interface. +This testcase verifies for the +1. Current GPU architecture +2. Code object version v3 +*/ + +#ifdef __linux__ +#include + #include +#endif +#include +#include "hip_test_common.hh" +#include "hip_test_checkers.hh" + +#define OPENCL_OBJ_FILE "opencl_add.cc" +#define HIP_CODEOBJ_FILE_DEFAULT "opencl_add.co" +#define HIP_CODEOBJ_FILE_V3 "opencl_add_v3.co" +#define COMMAND_LEN 256 +#define BUFFER_LEN 256 + + +#ifdef __linux__ + +/* Check if environment variable $ROCM_PATH is defined */ +static bool isRocmPathSet() { + FILE *fpipe; + char const *command = "echo $ROCM_PATH"; + fpipe = popen(command, "r"); + + if (fpipe == nullptr) { + WARN("Unable to create command"); + return false; + } + char command_op[BUFFER_LEN]; + if (fgets(command_op, BUFFER_LEN, fpipe)) { + size_t len = strlen(command_op); + if (len > 1) { // This is because fgets always adds newline character + pclose(fpipe); + return true; + } + } + pclose(fpipe); + return false; +} + +/* Gets the sramecc/xnack settings from rocm info */ + +int getV3TargetIdFeature(char* feature, bool rocmPathSet) { + FILE *fpipe; + char command[COMMAND_LEN] = ""; + const char *rocmpath = nullptr; + if (rocmPathSet) { + // For STG2 testing where /opt/rocm path is not present + rocmpath = "$ROCM_PATH/bin/rocminfo"; + } else { + // Check if the rocminfo tool exists + rocmpath = "/opt/rocm/bin/rocminfo"; + } + snprintf(command, COMMAND_LEN, "%s", rocmpath); + strncat(command, " | grep -m1 \"sramecc.:xnack.\"", COMMAND_LEN); + fpipe = popen(command, "r"); + + if (fpipe == nullptr) { + WARN("Unable to create command file"); + return -1; + } + char command_op[BUFFER_LEN]; + const char* pOpt1 = nullptr; + const char *pOpt2 = nullptr; + if (fgets(command_op, BUFFER_LEN, fpipe)) { + if (strstr(command_op, "sramecc+")) { + pOpt1 = "-msram-ecc"; + } else if (strstr(command_op, "sramecc-")) { + pOpt1 = "-mno-sram-ecc"; + } else { + pclose(fpipe); + return -1; + } + if (strstr(command_op, "xnack+")) { + pOpt2 = " -mxnack"; + } else if (strstr(command_op, "xnack-")) { + pOpt2 = " -mno-xnack"; + } else { + pclose(fpipe); + return -1; + } + } else { + printf("No sramecc/xnack settings found.\n"); + pclose(fpipe); + return -1; + } + strncpy(feature, pOpt1, strlen(pOpt1)); + strncat(feature, pOpt2, strlen(pOpt2)); + pclose(fpipe); + return 0; +} +#endif +/** + * Validates OpenCL Static Lds Code Object where + * 1. Tries to access opencl kernel file + * 2. Copies it to current folder + * 3. Tries to get RocmPath and execute the kernel file to + generate the code object file.code-object-version argument + specifies the code object version + * 4. Launch the kernel which copies one variable to another + * 5. Validates the result. + */ +TEST_CASE("Unit_hipModuleLoad_OpenCLStaticCodeObjV3") { +#ifdef __linux__ + auto codeobj_type = GENERATE(0, 1); + char command[COMMAND_LEN] = ""; + char v3option[32] = ""; + hipDeviceProp_t props; + hipGetDeviceProperties(&props, 0); + std::string path = std::experimental::filesystem::current_path(); + WARN("path is " << path.c_str()); + if (access("./opencl_add.cc", F_OK) == -1) { + system("cp ./../../../../hip-on-rocclr/tests/catch/unit/module/opencl_add.cc ."); + } + // Generate the command to translate the OpenCL code object to hip code object + const char *pCodeObjVer = nullptr; + const char *pCodeObjFile = nullptr; + bool rocmPathSet = isRocmPathSet(); + if (codeobj_type == 0) { + pCodeObjVer = ""; + pCodeObjFile = HIP_CODEOBJ_FILE_DEFAULT; + } else { + pCodeObjVer = "-mcode-object-version=3"; + if (-1 == getV3TargetIdFeature(v3option, rocmPathSet)) { + INFO("Error getting V3 Option. Skipping Test. \n"); + REQUIRE(true); + } + pCodeObjFile = HIP_CODEOBJ_FILE_V3; + } + INFO("v3option "<< v3option); + /* The command string is created using multiple concatenation instead of one go + to avoid the following cpplint error: + " Multi-line string ("...") found. This lint script doesn't do well with such strings, + and may give bogus warnings. Use C++11 raw strings or concatenation instead." + */ + if (rocmPathSet) { + // For STG2 testing where /opt/rocm path is not present + snprintf(command, COMMAND_LEN, + "$ROCM_PATH/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl "); + } else { + snprintf(command, COMMAND_LEN, + "/opt/rocm/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl "); + } + char command_temp[COMMAND_LEN] = ""; + snprintf(command_temp, COMMAND_LEN, + "-include `find /opt/rocm* -name opencl-c.h` %s %s -mcpu=%s -o %s %s", + pCodeObjVer, v3option, props.gcnArchName, pCodeObjFile, OPENCL_OBJ_FILE); + + strncat(command, command_temp, COMMAND_LEN); + INFO("command executed "<< command); + + system((const char*)command); + // Check if the code object file is created + snprintf(command, COMMAND_LEN, "./%s", + pCodeObjFile); + + if (access(command, F_OK) == -1) { + INFO("Code Object File not found \n"); + REQUIRE(true); + } + + hipDevice_t device; + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipDeviceGet(&device, 0)); + HIPCHECK(hipModuleLoad(&Module, pCodeObjFile)); + HIPCHECK(hipModuleGetFunction(&Function, Module, "add")); + float *Ah, *Bh, *Ad, *Bd; + HipTest::initArrays(&Ad, &Bd, nullptr, &Ah, &Bh, nullptr, + BUFFER_LEN, false); + + HIPCHECK(hipMemcpy(Ad, Ah, sizeof(float) * BUFFER_LEN, + hipMemcpyHostToDevice)); + + struct { + void* _Bd; + void* _Ad; + } args; + args._Ad = static_cast(Ad); + args._Bd = static_cast(Bd); + size_t size = sizeof(args); + + void *config[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, BUFFER_LEN, 1, 1, 0, 0, + NULL, reinterpret_cast(&config))); + HIPCHECK(hipMemcpy(Bh, Bd, sizeof(float) * BUFFER_LEN, + hipMemcpyDeviceToHost)); + + for (uint32_t i = 0; i < BUFFER_LEN; i++) { + REQUIRE(Ah[i] == Bh[i]); + } + HipTest::freeArrays(Ad, Bd, nullptr, + Ah, Bh, nullptr, false); +#else + INFO("This test is skipped due to non linux environment.\n"); + REQUIRE(true); +#endif +} diff --git a/catch/unit/module/module_kernels.cc b/catch/unit/module/module_kernels.cc new file mode 100644 index 0000000000..ccb9c204ea --- /dev/null +++ b/catch/unit/module/module_kernels.cc @@ -0,0 +1,167 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip/hip_runtime.h" + +#define GLOBAL_BUF_SIZE 2048 +#define ARRAY_SIZE (16) + +texture ftex; +texture itex; +texture stex; +texture ctex; + +__device__ int deviceGlobal = 1; +__managed__ int x = 10; +__device__ float myDeviceGlobal; +__device__ float myDeviceGlobalArray[16]; + + +__device__ float deviceGlobalFloat; +__device__ int deviceGlobalInt1; +__device__ int deviceGlobalInt2; +__device__ uint16_t deviceGlobalShort; +__device__ char deviceGlobalChar; + +extern "C" __global__ void tex2dKernelFloat(float* outputData, + int width, int height) { + int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + if ((x < width) && (y < width)) { + outputData[y * width + x] = tex2D(ftex, x, y); + } +} + +extern "C" __global__ void tex2dKernelInt(int* outputData, + int width, int height) { + int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + if ((x < width) && (y < width)) { + outputData[y * width + x] = tex2D(itex, x, y); + } +} + +extern "C" __global__ void tex2dKernelInt16(uint16_t* outputData, + int width, int height) { + int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + if ((x < width) && (y < width)) { + outputData[y * width + x] = tex2D(stex, x, y); + } +} + +extern "C" __global__ void tex2dKernelInt8(char* outputData, + int width, int height) { + int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + if ((x < width) && (y < width)) { + outputData[y * width + x] = tex2D(ctex, x, y); + } +} + +extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C, + int N) { + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + int tmpSum = 0; + if ((ROW < N) && (COL < N)) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + C[ROW * N + COL] = tmpSum; + } +} + +extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, + int *D, int N) { + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + int tmpSum = 0; + if (ROW < N && COL < N) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + } + C[ROW * N + COL] = tmpSum; + D[ROW * N + COL] = tmpSum; +} + +extern "C" __global__ void SixteenSecKernel(int clockrate) { + HipTest::waitKernel(16, clockrate); +} + +extern "C" __global__ void TwoSecKernel(int clockrate) { + if (deviceGlobal == 0x2222) { + deviceGlobal = 0x3333; + } + + HipTest::waitKernel(2, clockrate); + + if (deviceGlobal != 0x3333) { + deviceGlobal = 0x5555; + } +} + +extern "C" __global__ void FourSecKernel(int clockrate) { + if (deviceGlobal == 1) { + deviceGlobal = 0x2222; + } + + HipTest::waitKernel(4, clockrate); + + if (deviceGlobal == 0x2222) { + deviceGlobal = 0x4444; + } +} + +extern "C" __global__ void GPU_func() { + x++; +} + + +__device__ int getSquareOfGlobalFloat() { + return static_cast(deviceGlobalFloat*deviceGlobalFloat); +} + +extern "C" __global__ void testWeightedCopy(int* a, int* b) { + int tx = hipThreadIdx_x; + b[tx] = deviceGlobalInt1*a[tx] + deviceGlobalInt2 + + static_cast(deviceGlobalShort) + static_cast(deviceGlobalChar) + + getSquareOfGlobalFloat(); +} + + +extern "C" __global__ void hello_world(const float* a, float* b) { + int tx = hipThreadIdx_x; + b[tx] = a[tx]; +} + +extern "C" __global__ void test_globals(const float* a, float* b) { + int tx = hipThreadIdx_x; + b[tx] = a[tx] + myDeviceGlobal + myDeviceGlobalArray[tx % ARRAY_SIZE]; +} + +extern "C" __global__ void EmptyKernel() { +} diff --git a/catch/unit/module/opencl_add.cc b/catch/unit/module/opencl_add.cc new file mode 100644 index 0000000000..267ffc7f3b --- /dev/null +++ b/catch/unit/module/opencl_add.cc @@ -0,0 +1,37 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +kernel void add(global float* output, global float* input) { + __local float lds[100]; + int id = get_global_id(0); + + if (id == 0) { + for (int i = 0; i < 100; i++) { + lds[i] = input[i]; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (id < 100) { + output[id] = lds[id]; + } else { + output[id] = input[id]; + } +}