diff --git a/projects/hip-tests/catch/include/hip_test_defgroups.hh b/projects/hip-tests/catch/include/hip_test_defgroups.hh index 090cd10a20..a55a12a207 100644 --- a/projects/hip-tests/catch/include/hip_test_defgroups.hh +++ b/projects/hip-tests/catch/include/hip_test_defgroups.hh @@ -230,3 +230,10 @@ THE SOFTWARE. * This section describes the virtual memory management types & functions of HIP runtime API. * @} */ + +/** + * @defgroup ModuleTest Module Functions Management + * @{ + * This section describes the loading of modules from code object files and invocation of different kernels. + * @} + */ diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index 1a1024f164..afd4136a5d 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -191,3 +191,55 @@ endif() add_executable(hipGetFuncBySymbol_exe EXCLUDE_FROM_ALL hipGetFuncBySymbol_exe.cc) add_dependencies(build_tests hipGetFuncBySymbol_exe) +# Common Tests - Test independent of all platforms +set(TEST_SRC + hipFuncSetAttribute.cc + hipFuncGetAttributes.cc + hipFuncSetSharedMemConfig.cc + hipManagedKeyword.cc + hipModule.cc + hipModuleLoadMultProcessOnMultGPU.cc +) +set(AMD_TEST_SRC + hipExtLaunchKernelGGL.cc + hipExtLaunchMultiKernelMultiDevice.cc +) + +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC ${TEST_SRC} ${AMD_TEST_SRC}) +endif() + +hip_add_exe_to_target(NAME module + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) + +add_custom_target(managed_kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/managed_kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../module/managed_kernel.code -I${HIP_PATH}/include -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + +hip_add_exe_to_target(NAME managedKernel + TEST_SRC ${LINUX_TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS ${CMAKE_DL_LIBS}) + +add_custom_target(vcpy_kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/vcpy_kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../module/vcpy_kernel.code -I${HIP_PATH}/include -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + +hip_add_exe_to_target(NAME VcpyKernel + TEST_SRC ${LINUX_TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS ${CMAKE_DL_LIBS}) + +add_custom_target(matmul.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/matmul.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../module/matmul.code -I${HIP_PATH}/include -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + +hip_add_exe_to_target(NAME matmul + TEST_SRC ${LINUX_TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS ${CMAKE_DL_LIBS}) + +add_custom_target(kernel_composite_test.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/kernel_composite_test.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../module/kernel_composite_test.code -I${HIP_PATH}/include -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + +add_executable(testhipModuleLoadUnloadFunc_exe EXCLUDE_FROM_ALL testhipModuleLoadUnloadFunc_exe.cc) + +hip_add_exe_to_target(NAME compositeKernel + TEST_SRC ${LINUX_TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS ${CMAKE_DL_LIBS}) +add_dependencies(module managed_kernel.code vcpy_kernel.code matmul.code kernel_composite_test.code testhipModuleLoadUnloadFunc_exe) diff --git a/projects/hip-tests/catch/unit/module/hipExtLaunchKernelGGL.cc b/projects/hip-tests/catch/unit/module/hipExtLaunchKernelGGL.cc new file mode 100644 index 0000000000..2f865dad55 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipExtLaunchKernelGGL.cc @@ -0,0 +1,212 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include "hip/hip_ext.h" + +/** +* @addtogroup hipExtLaunchKernelGGL +* @{ +* @ingroup ModuleTest +* `void hipExtLaunchKernelGGL (F kernel, const dim3 &numBlocks, const dim3 &dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, std::uint32_t flags, Args... args)` - +* Launches kernel with dimention parameters and shared memory on stream with +* templated kernel and arguments. +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify kernel execution time of the particular kernel. + * - Test case to verify hipExtLaunchKernelGGL API by disabling time flag in event creation. + + * Test source + * ------------------------ + * - catch/unit/module/hipExtLaunchKernelGGL.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ + +__device__ int globalvar = 1; +__global__ void TwoSecKernel(int clockrate) { + if (globalvar == 0x2222) { + globalvar = 0x3333; + } + uint64_t wait_t = 2000, + start = clock64()/clockrate, cur; + do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); + if (globalvar != 0x3333) { + globalvar = 0x5555; + } +} +__global__ void FourSecKernel_Navi3xGpu(int clockrate) { + if (globalvar == 1) { + globalvar = 0x2222; + } + uint64_t wait_t = 4000, + start = wall_clock64()/clockrate, cur; + do { cur = (wall_clock64()/clockrate)-start;}while (cur < wait_t); + if (globalvar == 0x2222) { + globalvar = 0x4444; + } +} +__global__ void FourSecKernel(int clockrate) { + if (globalvar == 1) { + globalvar = 0x2222; + } + uint64_t wait_t = 4000, + start = clock64()/clockrate, cur; + do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); + if (globalvar == 0x2222) { + globalvar = 0x4444; + } +} + +bool DisableTimeFlag() { + bool testStatus = true; + hipStream_t stream1; + HIP_CHECK(hipSetDevice(0)); + hipError_t e; + float time_2sec; + hipEvent_t start_event1, end_event1; + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + HIP_CHECK(hipEventCreateWithFlags(&start_event1, + hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&end_event1, + hipEventDisableTiming)); + HIP_CHECK(hipStreamCreate(&stream1)); + hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, + stream1, start_event1, end_event1, 0, clkRate); + HIP_CHECK(hipStreamSynchronize(stream1)); + e = hipEventElapsedTime(&time_2sec, start_event1, end_event1); + if (e == hipErrorInvalidHandle) { + testStatus = true; + } else { + testStatus = false; + } + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipEventDestroy(start_event1)); + HIP_CHECK(hipEventDestroy(end_event1)); + return testStatus; +} + +bool ConcurencyCheck_GlobalVar(int conc_flag) { + bool testStatus = true; + hipStream_t stream1; + int deviceGlobal_h = 0; + HIP_CHECK(hipSetDevice(0)); + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + HIP_CHECK(hipStreamCreate(&stream1)); + hipDeviceProp_t props{}; + int device; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&props, device)); + if ((std::string(props.gcnArchName).find("gfx1101") != std::string::npos) || + (std::string(props.gcnArchName).find("gfx1100") != std::string::npos)) { + hipExtLaunchKernelGGL((FourSecKernel_Navi3xGpu), dim3(1), dim3(1), 0, + stream1, nullptr, nullptr, conc_flag, clkRate); + } else { + hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, + stream1, nullptr, nullptr, conc_flag, clkRate); + } + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipMemcpyFromSymbol(&deviceGlobal_h, globalvar, + sizeof(int))); + + if (conc_flag && deviceGlobal_h != 0x5555) { + testStatus = true; + } else if (!conc_flag && deviceGlobal_h == 0x5555) { + testStatus = true; + } else { + testStatus = false; + } + HIP_CHECK(hipStreamDestroy(stream1)); + return testStatus; +} + +bool KernelTimeExecution() { + constexpr int FIVESEC_KERNEL = 4999; + constexpr int THREESEC_KERNEL = 2999; + bool testStatus = true; + hipStream_t stream1; + HIP_CHECK(hipSetDevice(0)); + hipEvent_t start_event1, end_event1, start_event2, end_event2; + float time_4sec, time_2sec; + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + + HIP_CHECK(hipEventCreate(&start_event1)); + HIP_CHECK(hipEventCreate(&end_event1)); + HIP_CHECK(hipEventCreate(&start_event2)); + HIP_CHECK(hipEventCreate(&end_event2)); + HIP_CHECK(hipStreamCreate(&stream1)); + hipDeviceProp_t props{}; + int device; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&props, device)); + if ((std::string(props.gcnArchName).find("gfx1101") != std::string::npos) || + (std::string(props.gcnArchName).find("gfx1100") != std::string::npos)) { + hipExtLaunchKernelGGL((FourSecKernel_Navi3xGpu), dim3(1), dim3(1), 0, + stream1, start_event1, end_event1, 0, clkRate); + } else { + hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0, + stream1, start_event1, end_event1, 0, clkRate); + } + hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, + stream1, start_event2, end_event2, 0, clkRate); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1)); + HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2)); + + if ( (time_4sec < static_cast(FIVESEC_KERNEL)) && + (time_2sec < static_cast(THREESEC_KERNEL))) { + testStatus = true; + } else { + testStatus = false; + } + + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipEventDestroy(start_event1)); + HIP_CHECK(hipEventDestroy(end_event1)); + HIP_CHECK(hipEventDestroy(start_event2)); + HIP_CHECK(hipEventDestroy(end_event2)); + + return testStatus; +} + +TEST_CASE("Unit_hipExtLaunchKernelGGL_Functional") { + bool testStatus = true; + // Disabled the concurency test as the firmware does not support concurrency + // in the same stream + #if 0 + testStatus &= ConcurencyCheck_GlobalVar(0); + #endif + SECTION("Kernel Execution Time") { + testStatus &= KernelTimeExecution(); + REQUIRE(testStatus == true); + } + SECTION("Time flag Diabale") { + testStatus &= DisableTimeFlag(); + REQUIRE(testStatus == true); + } +} diff --git a/projects/hip-tests/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc b/projects/hip-tests/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc new file mode 100644 index 0000000000..b6bae2677e --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +/** +* @addtogroup hipExtLaunchMultiKernelMultiDevice +* @{ +* @ingroup ModuleTest +* `hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, +* int numDevices, unsigned int flags)` - +* Launches kernels on multiple devices and guarantees all specified kernels are dispatched +* on respective streams before enqueuing any other work on the specified streams from any +* other threads +*/ + +/** + * Test Description + * ------------------------ + * - Test case to Launche Multiple kernels on single device or multiple devices. + * Test source + * ------------------------ + * - catch/unit/module/hipExtLaunchMultiKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +// Square each element in the array A and write to array C. +#define NUM_KERNEL_ARGS 3 +__global__ void +vector_square(float *C_d, float *A_d, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } +} + +TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Functional") { + constexpr int MAX_GPUS = 8; + float *A_d[MAX_GPUS], *C_d[MAX_GPUS]; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + + int nGpu = 0; + HIP_CHECK(hipGetDeviceCount(&nGpu)); + if (nGpu < 1) { + INFO("info: didn't find any GPU!\n"); + REQUIRE(false); + } + if (nGpu > MAX_GPUS) { + nGpu = MAX_GPUS; + } + A_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + // Fill with Phi + i + for (size_t i = 0; i < N; i++) { + A_h[i] = 1.618f + i; + } + + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + + hipStream_t stream[MAX_GPUS]; + for (int i = 0; i < nGpu; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreateWithFlags(&stream[i], hipStreamNonBlocking)); + + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, i)); + HIP_CHECK(hipMalloc(&A_d[i], Nbytes)); + HIP_CHECK(hipMalloc(&C_d[i], Nbytes)); + + + INFO("info: copy Host2Device\n"); + HIP_CHECK(hipMemcpy(A_d[i], A_h, Nbytes, hipMemcpyHostToDevice)); + } + + hipLaunchParams *launchParamsList = reinterpret_cast( + malloc(sizeof(hipLaunchParams)*nGpu)); + + void *args[MAX_GPUS * NUM_KERNEL_ARGS]; + + for (int i = 0; i < nGpu; i++) { + args[i * NUM_KERNEL_ARGS] = &C_d[i]; + args[i * NUM_KERNEL_ARGS + 1] = &A_d[i]; + args[i * NUM_KERNEL_ARGS + 2] = &N; + launchParamsList[i].func = + reinterpret_cast(vector_square); + launchParamsList[i].gridDim = dim3(blocks); + launchParamsList[i].blockDim = dim3(threadsPerBlock); + launchParamsList[i].sharedMem = 0; + launchParamsList[i].stream = stream[i]; + launchParamsList[i].args = args + i * NUM_KERNEL_ARGS; + } + + INFO("info: launch vector_square kernel with") + INFO("hipExtLaunchMultiKernelMultiDevice API\n"); + HIP_CHECK(hipExtLaunchMultiKernelMultiDevice(launchParamsList, nGpu, 0)); + + for (int j = 0; j < nGpu; j++) { + HIP_CHECK(hipStreamSynchronize(stream[j])); + + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, j)); + INFO("info: copy Device2Host\n"); + HIP_CHECK(hipSetDevice(j)); + HIP_CHECK(hipMemcpy(C_h, C_d[j], Nbytes, hipMemcpyDeviceToHost)); + + INFO("info: check result\n"); + for (size_t i = 0; i < N; i++) { + REQUIRE(fabs(C_h[i] - (A_h[i] * A_h[i])) < 0.00000000001); + } + } +} diff --git a/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc b/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc index 3896ff8388..4a050a31d9 100644 --- a/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -1,22 +1,19 @@ /* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -42,24 +39,23 @@ THE SOFTWARE. * Launches kernel with parameters and shared memory on stream with arguments * passed to kernel params or extra arguments. */ - #include - +#include +#include #include #include -#include "hip/hip_ext.h" #include // NOLINT +#include +#include "hip_module_launch_kernel_common.hh" // NOLINT +#include "hip/hip_ext.h" -#include "hip_module_launch_kernel_common.hh" - +constexpr auto fileName = "copyKernel.code"; +constexpr auto kernel_name = "copy_ker"; static constexpr auto totalWorkGroups{1024}; static constexpr auto localWorkSize{512}; static constexpr auto lastWorkSizeEven{256}; static constexpr auto lastWorkSizeOdd{257}; -#define fileName "copyKernel.code" -#define kernel_name "copy_ker" - /** Local Function to search a string in file. */ @@ -156,12 +152,14 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup") { args.buffersize = arraylength; size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; // Memcpy from A to Ad HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault)); REQUIRE(hipErrorInvalidValue == - hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL, + hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, + 1, 1, 0, 0, NULL, reinterpret_cast(&config), 0)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(Ad)); @@ -216,11 +214,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { args.buffersize = arraylength; size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; // Memcpy from A to Ad HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault)); - HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL, + HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, + 1, 1, 0, 0, NULL, reinterpret_cast(&config), 0)); // Memcpy results back to host HIP_CHECK(hipMemcpy(B, Bd, sizeBytes, hipMemcpyDefault)); @@ -243,7 +243,8 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") { hipEvent_t start_event = nullptr; HIP_CHECK(hipEventCreate(&start_event)); const auto kernel = GetKernel(mg.module(), "NOPKernel"); - HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, + HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, + nullptr, nullptr, start_event, nullptr)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipEventQuery(start_event)); @@ -253,7 +254,8 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") { hipEvent_t stop_event = nullptr; HIP_CHECK(hipEventCreate(&stop_event)); const auto kernel = GetKernel(mg.module(), "NOPKernel"); - HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, + HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, + nullptr, nullptr, nullptr, stop_event)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipEventQuery(stop_event)); @@ -263,7 +265,569 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") { TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative_Parameters") { ModuleLaunchKernelNegativeParameters(); } +/** + * Test Description + * ------------------------ + * - Test case to verify Negative tests of hipExtModuleLaunchKernel API. + * - Test case to verify kernel execution time of the particular kernel by using hipExtModuleLaunchKernel. + * - Test case to verify hipExtModuleLaunchKernel API by disabling time flag in event creation. + * - Test case to verify hipExtModuleLaunchKernel API's Corner Scenarios for Grid and Block dimensions. + * - Test case to verify different work groups of hipExtModuleLaunchKernel API. + * Test source + * ------------------------ + * - catch/unit/module/hipExtModuleLaunchKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; +class ModuleLaunchKernel { + int N = 64; + int SIZE = N*N; + int *A, *B, *C; + hipDeviceptr_t *Ad, *Bd; + hipStream_t stream1, stream2; + hipEvent_t start_event1, end_event1, start_event2, end_event2, + start_timingDisabled, end_timingDisabled; + hipModule_t Module; + hipDeviceptr_t deviceGlobal; + hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel, + TwoSecKernel, KernelandExtraParamKernel, DummyKernel; + struct { + int clockRate; + void* _Ad; + void* _Bd; + void* _Cd; + int _n; + } args1, args2; + struct { + } args3; + size_t size1; + size_t size2; + size_t size3; + size_t deviceGlobalSize; + public : + void AllocateMemory(); + void DeAllocateMemory(); + void ModuleLoad(); + bool Module_Negative_tests(); + bool ExtModule_Negative_tests(); + bool ExtModule_Corner_tests(); + bool Module_WorkGroup_Test(); + bool ExtModule_KernelExecutionTime(); + bool ExtModule_ConcurencyCheck_GlobalVar(int conc_flag); + bool ExtModule_ConcurrencyCheck_TimeVer(); + bool ExtModule_Disabled_Timingflag(); +}; + +void ModuleLaunchKernel::AllocateMemory() { + A = new int[N*N*sizeof(int)]; + B = new int[N*N*sizeof(int)]; + for (int i=0; i < N; i++) { + for (int j=0; j < N; j++) { + A[i*N +j] = 1; + B[i*N +j] = 1; + } + } + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipMalloc(&Ad, SIZE*sizeof(int))); + HIP_CHECK(hipMalloc(&Bd, SIZE*sizeof(int))); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&C), SIZE*sizeof(int))); + HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); + int clkRate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + args1._Ad = Ad; + args1._Bd = Bd; + args1._Cd = C; + args1._n = N; + args1.clockRate = clkRate; + args2._Ad = NULL; + args2._Bd = NULL; + args2._Cd = NULL; + args2._n = 0; + args2.clockRate = clkRate; + size1 = sizeof(args1); + size2 = sizeof(args2); + size3 = sizeof(args3); + HIP_CHECK(hipEventCreate(&start_event1)); + HIP_CHECK(hipEventCreate(&end_event1)); + HIP_CHECK(hipEventCreate(&start_event2)); + HIP_CHECK(hipEventCreate(&end_event2)); + HIP_CHECK(hipEventCreateWithFlags(&start_timingDisabled, + hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&end_timingDisabled, + hipEventDisableTiming)); +} + +void ModuleLaunchKernel::ModuleLoad() { + constexpr auto matmulName = "matmul.code"; + constexpr auto matmulK = "matmulK"; + constexpr auto SixteenSec = "SixteenSecKernel"; + constexpr auto KernelandExtra = "KernelandExtraParams"; + constexpr auto FourSec = "FourSecKernel"; + constexpr auto TwoSec = "TwoSecKernel"; + constexpr auto globalDevVar = "deviceGlobal"; + constexpr auto dummyKernel = "dummyKernel"; + + HIP_CHECK(hipModuleLoad(&Module, matmulName)); + HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); + HIP_CHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSec)); + HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel, + Module, KernelandExtra)); + HIP_CHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSec)); + HIP_CHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSec)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, + Module, globalDevVar)); +} + +void ModuleLaunchKernel::DeAllocateMemory() { + HIP_CHECK(hipEventDestroy(start_event1)); + HIP_CHECK(hipEventDestroy(end_event1)); + HIP_CHECK(hipEventDestroy(start_event2)); + HIP_CHECK(hipEventDestroy(end_event2)); + HIP_CHECK(hipEventDestroy(start_timingDisabled)); + HIP_CHECK(hipEventDestroy(end_timingDisabled)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + delete[] A; + delete[] B; + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipHostFree(C)); + HIP_CHECK(hipModuleUnload(Module)); +} +/* + * In this scenario,We launch the 4 sec kernel and 2 sec kernel + * and we fetch the event execution time of each kernel and it + * should not exceed the execution time of that particular kernel + */ +bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() { + constexpr auto FOURSEC_KERNEL{4999}; + constexpr auto TWOSEC_KERNEL{2999}; + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + AllocateMemory(); + ModuleLoad(); + float time_4sec, time_2sec; + + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, reinterpret_cast(&config2), + start_event1, end_event1, 0)); + HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, + NULL, reinterpret_cast(&config2), + start_event2, end_event2, 0)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1)); + HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2)); + if (time_4sec < FOURSEC_KERNEL && time_2sec < TWOSEC_KERNEL) { + testStatus = true; + } else { + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} +/* + * In this Scenario, we create events by disabling the timing flag + * We then Launch the kernel using hipExtModuleLaunchKernel by passing + * disabled events and try to fetch kernel execution time using + * hipEventElapsedTime API which would fail as the flag is disabled. + */ +bool ModuleLaunchKernel::ExtModule_Disabled_Timingflag() { + bool testStatus = true; + AllocateMemory(); + ModuleLoad(); + hipError_t e; + float time_2sec; + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, + NULL, reinterpret_cast(&config2), + start_timingDisabled, end_timingDisabled, 0)); + HIP_CHECK(hipStreamSynchronize(stream1)); + e = hipEventElapsedTime(&time_2sec, start_timingDisabled, end_timingDisabled); + if (e == hipErrorInvalidHandle) { + testStatus = true; + } else { + INFO("Event elapsed time is success when time flag is disabled \n"); + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} +/* + * In this scenario , we initially create a global device variable in matmul.cpp + * with initial value as 1 We then launch the four sec and two sec kernels and + * try to modify the variable. + * In case of concurrency,the variable gets updated in four sec kernel to 0x2222 + * and then the two sec kernel would be launched parallely which would again + * modify the global variable to 0x3333 + * In case of non concurrency,the variale gets updated in four sec kernel + * and then in two sec kernel and the value of global variable would be 0x5555 + */ +bool ModuleLaunchKernel::ExtModule_ConcurencyCheck_GlobalVar(int conc_flag) { + bool testStatus = true; + int deviceGlobal_h = 0; + AllocateMemory(); + ModuleLoad(); + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, reinterpret_cast(&config2), + start_event1, end_event1, conc_flag)); + HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, + NULL, reinterpret_cast(&config2), + start_event2, end_event2, conc_flag)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipMemcpyDtoH(&deviceGlobal_h, hipDeviceptr_t(deviceGlobal), + deviceGlobalSize)); + if (conc_flag && deviceGlobal_h != 0x5555) { + testStatus = true; + } else if (!conc_flag && deviceGlobal_h == 0x5555) { + testStatus = true; + } else { + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} +/* In this scenario,we initially launch 2 kernels,one is sixteen sec kernel + * and other is matrix multiplication with non-concurrency (flag 0) + * and we launch the same 2 kernels with concurrency flag 1. We then compare + * the time difference between the concurrency and non currency kernels. + * The concurrency kernel duration should be less than the non concurrency + * duration kernels + */ +bool ModuleLaunchKernel::ExtModule_ConcurrencyCheck_TimeVer() { + bool testStatus = true; + AllocateMemory(); + ModuleLoad(); + int mismatch = 0; + void* config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + void* config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, + HIP_LAUNCH_PARAM_END}; + auto start = std::chrono::high_resolution_clock::now(); + HIP_CHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config2), + NULL, NULL, 0)); + HIP_CHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32 , 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + NULL, NULL, 0)); + HIP_CHECK(hipStreamSynchronize(stream1)); + auto stop = std::chrono::high_resolution_clock::now(); + auto duration1 = std::chrono::duration_cast + (stop-start); + start = std::chrono::high_resolution_clock::now(); + HIP_CHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config2), + NULL, NULL, 1)); + HIP_CHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32, 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + NULL, NULL, 1)); + HIP_CHECK(hipStreamSynchronize(stream1)); + stop = std::chrono::high_resolution_clock::now(); + auto duration2 = std::chrono::duration_cast + (stop-start); + if (!(duration2.count() < duration1.count())) { + testStatus = false; + } + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + if (C[i*N + j] != N) + mismatch++; + } + } + if (mismatch) { + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} +bool ModuleLaunchKernel::ExtModule_Negative_tests() { + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + AllocateMemory(); + ModuleLoad(); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + void *params[] = {Ad}; + // Passing nullptr to kernel function in hipExtModuleLaunchKernel API + err = hipExtModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed nullptr to kernel function"); + testStatus = false; + } + // Passing Max int value to block dimensions + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for max values to block dimension"); + testStatus = false; + } + // Passing 0 as value for all dimensions + err = hipExtModuleLaunchKernel(MultKernel, 0, 0, 0, + 0, + 0, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for 0 as value for all dimensions"); + testStatus = false; + } + // Passing 0 as value for x dimension + err = hipExtModuleLaunchKernel(MultKernel, 0, 1, 1, + 0, + 1, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for 0 as value for x dimension"); + testStatus = false; + } + // Passing 0 as value for y dimension + err = hipExtModuleLaunchKernel(MultKernel, 1, 0, 1, + 1, + 0, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for 0 as value for y dimension"); + testStatus = false; + } + // Passing 0 as value for z dimension + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 0, + 1, + 1, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for 0 as value for z dimension"); + testStatus = false; + } + // Passing both kernel and extra params + err = hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, 1, 1, 0, + stream1, reinterpret_cast(¶ms), + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel fail when we pass both kernel,extra args"); + testStatus = false; + } + // Passing more than maxthreadsperblock to block dimensions + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0)); + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for max group size"); + testStatus = false; + } + // Block dimension X = Max Allowed + 1 + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsDim[0]+1, + 1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimX + 1)"); + testStatus = false; + } + // Block dimension Y = Max Allowed + 1 + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + deviceProp.maxThreadsDim[1]+1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimY + 1)"); + testStatus = false; + } + // Block dimension Z = Max Allowed + 1 + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + 1, + deviceProp.maxThreadsDim[2]+1, 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimZ + 1)"); + testStatus = false; + } + + // Passing invalid config data in extra params + void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, stream1, NULL, + reinterpret_cast(&config3), + nullptr, nullptr, 0); + if (err == hipSuccess) { + INFO("hipExtModuleLaunchKernel failed for invalid conf"); + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} + +bool ModuleLaunchKernel::ExtModule_Corner_tests() { + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + AllocateMemory(); + ModuleLoad(); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3, + HIP_LAUNCH_PARAM_END}; + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0)); + unsigned int maxblockX = deviceProp.maxThreadsDim[0]; + unsigned int maxblockY = deviceProp.maxThreadsDim[1]; + unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; + struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1}, + {1, 1, 1, 1, maxblockY, 1}, + {1, 1, 1, 1, 1, maxblockZ}, + {UINT32_MAX, 1, 1, 1, 1, 1}, + {1, UINT32_MAX, 1, 1, 1, 1}, + {1, 1, UINT32_MAX, 1, 1, 1}}; + + for (int i = 0; i < 6; i++) { + err = hipExtModuleLaunchKernel(DummyKernel, + test[i].gridX, + test[i].gridY, + test[i].gridZ, + test[i].blockX, + test[i].blockY, + test[i].blockZ, + 0, + stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err != hipSuccess) { + testStatus = false; + } + } + DeAllocateMemory(); + return testStatus; +} + +bool ModuleLaunchKernel::Module_WorkGroup_Test() { + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + AllocateMemory(); + ModuleLoad(); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3, + HIP_LAUNCH_PARAM_END}; + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0)); + double cuberootVal = + cbrt(static_cast(deviceProp.maxThreadsPerBlock)); + uint32_t cuberoot_floor = floor(cuberootVal); + uint32_t cuberoot_ceil = ceil(cuberootVal); + // Scenario: (block.x * block.y * block.z) <= Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + err = hipExtModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_floor, cuberoot_floor, cuberoot_floor, + 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err != hipSuccess) { + testStatus = false; + } + // Scenario: (block.x * block.y * block.z) > Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + err = hipExtModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1, + 0, stream1, NULL, + reinterpret_cast(&config1), + nullptr, nullptr, 0); + if (err == hipSuccess) { + testStatus = false; + } + DeAllocateMemory(); + return testStatus; +} + +TEST_CASE("Unit_hipExtModuleLaunchKernel_Functional") { + bool testStatus = true; + ModuleLaunchKernel kernelLaunch; + testStatus &= kernelLaunch.ExtModule_Negative_tests(); +// Disabled below test cases as firmware currently does not support the +// concurrency in the same stream based on the flag +#if 0 + testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(1); + testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(0); + testStatus &= kernelLaunch.ExtModule_ConcurrencyCheck_TimeVer(); +#endif + SECTION("Kernel Execution Time") { + testStatus &= kernelLaunch.ExtModule_KernelExecutionTime(); + REQUIRE(testStatus == true); + } + SECTION("Disable Time Flag") { + testStatus &= kernelLaunch.ExtModule_Disabled_Timingflag(); + REQUIRE(testStatus == true); + } + SECTION("Corner Tests") { + testStatus &= kernelLaunch.ExtModule_Corner_tests(); + REQUIRE(testStatus == true); + } + SECTION("WorkGroup Test") { + testStatus &= kernelLaunch.Module_WorkGroup_Test(); + REQUIRE(testStatus == true); + } +} /** * End doxygen group KernelTest. * @} diff --git a/projects/hip-tests/catch/unit/module/hipFuncGetAttributes.cc b/projects/hip-tests/catch/unit/module/hipFuncGetAttributes.cc new file mode 100644 index 0000000000..f107066780 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipFuncGetAttributes.cc @@ -0,0 +1,55 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +/** +* @addtogroup hipFuncGetAttributes +* @{ +* @ingroup ModuleTest +* `hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func)` - +* Find out attributes for a given function +*/ + +/** + * Test Description + * ------------------------ + * - Test case to Find out attributes for a given function. + + * Test source + * ------------------------ + * - catch/unit/module/hipFuncGetAttributes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +__global__ void getAttrFn(float* px, float* py) { + *px = *px + 1.0f; + *py = *py + *px; +} + +TEST_CASE("Unit_hipFuncGetAttributes_basic") { + hipFuncAttributes attr{}; + + auto r = hipFuncGetAttributes(&attr, + reinterpret_cast(&getAttrFn)); + REQUIRE(r == hipSuccess); + REQUIRE(attr.maxThreadsPerBlock != 0); +} diff --git a/projects/hip-tests/catch/unit/module/hipFuncSetAttribute.cc b/projects/hip-tests/catch/unit/module/hipFuncSetAttribute.cc new file mode 100644 index 0000000000..3ab779c28e --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipFuncSetAttribute.cc @@ -0,0 +1,55 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +/** +* @addtogroup hipFuncSetAttribute +* @{ +* @ingroup ModuleTest +* `hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value)` - +* Set attributes for a specific function +*/ + +/** + * Test Description + * ------------------------ + * - Test case to set attributes for a specific function + + * Test source + * ------------------------ + * - catch/unit/module/hipFuncSetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ + +__global__ void fn(float* px, float* py) { + *px = *px + 1.0f; + *py = *py + *px; +} + +TEST_CASE("Unit_hipFuncSetAttribute_Basic") { + HIP_CHECK(hipFuncSetAttribute(reinterpret_cast(&fn), + hipFuncAttributeMaxDynamicSharedMemorySize, + 0)); + HIP_CHECK(hipFuncSetAttribute(reinterpret_cast(&fn), + hipFuncAttributePreferredSharedMemoryCarveout, + 0)); +} diff --git a/projects/hip-tests/catch/unit/module/hipFuncSetSharedMemConfig.cc b/projects/hip-tests/catch/unit/module/hipFuncSetSharedMemConfig.cc new file mode 100644 index 0000000000..0fc266f84b --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipFuncSetSharedMemConfig.cc @@ -0,0 +1,111 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +__global__ void ReverseSeq(int *A, int *B, int N) { + extern __shared__ int SMem[]; + int offset = threadIdx.x; + int MirrorVal = N - offset - 1; + SMem[offset] = A[offset]; + __syncthreads(); + B[offset] = SMem[MirrorVal]; +} +/** +* @addtogroup hipFuncSetSharedMemConfig +* @{ +* @ingroup ModuleTest +* `hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config)` - +* Sets shared memory configuation for a specific function +*/ + +/** + * Test Description + * ------------------------ + * - Test case to set shared memory configuations for a specific function for different flags. + + * Test source + * ------------------------ + * - catch/unit/module/hipFuncSetSharedMemConfig.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ +TEST_CASE("Unit_hipFuncSetSharedMemConfig_functional") { + int *Ah = NULL, *RAh = NULL, NELMTS = 128; + int *Ad = NULL, *RAd = NULL; + Ah = reinterpret_cast(malloc(NELMTS * sizeof(int))); + RAh = reinterpret_cast(malloc(NELMTS * sizeof(int))); + HIP_CHECK(hipMalloc(&Ad, NELMTS * sizeof(int))); + HIP_CHECK(hipMalloc(&RAd, NELMTS * sizeof(int))); + for (int i = 0; i < NELMTS; ++i) { + Ah[i] = i; + RAh[i] = NELMTS - i - 1; + } + HIP_CHECK(hipMemcpy(Ad, Ah, NELMTS * sizeof(int), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); + + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeDefault flag + SECTION("Flag: hipSharedMemBankSizeDefault") { + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), hipSharedMemBankSizeDefault)); + // Kernel Launch with shared mem size of = NELMTS * sizeof(int) + ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); + memset(Ah, 0, NELMTS * sizeof(int)); + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NELMTS; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + } + + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeFourBytes flag + SECTION("Flag: hipSharedMemBankSizeFourBytes") { + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), hipSharedMemBankSizeFourByte)); + HIP_CHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); + // Kernel Launch with shared mem size of = NELMTS * sizeof(int) + ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); + memset(Ah, 0, NELMTS * sizeof(int)); + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NELMTS; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + } + // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeEightBytes flg + SECTION("Flag: hipSharedMemBankSizeEightByte") { + HIP_CHECK(hipFuncSetSharedMemConfig(reinterpret_cast + (&ReverseSeq), hipSharedMemBankSizeEightByte)); + HIP_CHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); + // Kernel Launch with shared mem size of = NELMTS * sizeof(int) + ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); + memset(Ah, 0, NELMTS * sizeof(int)); + // Verifying the results + HIP_CHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); + for (int i = 0; i < NELMTS; ++i) { + REQUIRE(Ah[i] == RAh[i]); + } + } + + free(Ah); + free(RAh); + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(RAd)); +} diff --git a/projects/hip-tests/catch/unit/module/hipManagedKeyword.cc b/projects/hip-tests/catch/unit/module/hipManagedKeyword.cc new file mode 100644 index 0000000000..dc4fd53d22 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipManagedKeyword.cc @@ -0,0 +1,78 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include + +constexpr int MANAGED_VAR_INIT_VALUE = 10; +constexpr auto fileName = "managed_kernel.code"; + +/** +* @addtogroup hipModuleGetGlobal +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name)` - +* Returns a global pointer from a module +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify global pointer from a module for multiGPU's. + + * Test source + * ------------------------ + * - catch/unit/module/hipManagedKeyword.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ + +TEST_CASE("Unit_hipModuleGetGlobal_Functional") { + bool testStatus = true; + int numDevices = 0; + hipDeviceptr_t x; + size_t xSize; + int data; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int i = 0; i < numDevices; i++) { + HIP_CHECK(hipSetDevice(i)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, i)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + hipModule_t Module; + HIP_CHECK(hipModuleLoad(&Module, fileName)); + hipFunction_t Function; + HIP_CHECK(hipModuleGetFunction(&Function, Module, "GPU_func")); + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, 1, 1, 1, 0, 0, + NULL, NULL)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleGetGlobal(reinterpret_cast(&x), + &xSize, Module, "x")); + HIP_CHECK(hipMemcpyDtoH(&data, hipDeviceptr_t(x), xSize)); + if (data != (1 + MANAGED_VAR_INIT_VALUE)) { + HIP_CHECK(hipModuleUnload(Module)); + HIP_CHECK(hipCtxDestroy(context)); + testStatus = false; + } + HIP_CHECK(hipModuleUnload(Module)); + HIP_CHECK(hipCtxDestroy(context)); + } + REQUIRE(testStatus == true); +} diff --git a/projects/hip-tests/catch/unit/module/hipModule.cc b/projects/hip-tests/catch/unit/module/hipModule.cc new file mode 100644 index 0000000000..26549592d8 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModule.cc @@ -0,0 +1,206 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#ifdef __linux__ +#include +#endif + +constexpr int LEN = 64; +constexpr auto SIZE = (LEN << 2); +constexpr auto CODE_OBJ_SINGLEARCH = "vcpy_kernel.code"; +constexpr auto kernel_name = "hello_world"; +#ifdef __linux__ +constexpr int COMMAND_LEN = 256; +constexpr auto CODE_OBJ_MULTIARCH = "vcpy_kernel_multarch.code"; +#endif + +/** +* @addtogroup hipModuleLoad +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleLoad(hipModule_t* module, const char* fname)` - +* Loads code object from file into a module +*/ + +/** + * Test Description + * ------------------------ + * - Test case to load and execute a code object file for the current GPU architecture. + * - Test case to load and execute a code object file for the multiple GPU architectures including the current + + * Test source + * ------------------------ + * - catch/unit/module/hipModuleLoad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ + +bool testCodeObjFile(const char *codeObjFile) { + float *A, *B, *Ad, *Bd; + A = new float[LEN]; + B = new float[LEN]; + + for (uint32_t i = 0; i < LEN; i++) { + A[i] = i * 1.0f; + B[i] = 0.0f; + } + + HIP_CHECK(hipMalloc(&Ad, SIZE)); + HIP_CHECK(hipMalloc(&Bd, SIZE)); + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + hipModule_t Module; + hipFunction_t Function; + HIP_CHECK(hipModuleLoad(&Module, codeObjFile)); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = reinterpret_cast(Ad); + args._Bd = reinterpret_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, + stream, NULL, + reinterpret_cast(&config))); + + HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + + bool btestPassed = true; + for (uint32_t i = 0; i < LEN; i++) { + if (A[i] != B[i]) { + btestPassed = false; + break; + } + } + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipFree(Ad)); + delete[] B; + delete[] A; + HIP_CHECK(hipModuleUnload(Module)); + return btestPassed; +} + +#ifdef __linux__ +// Check if environment variable $ROCM_PATH is defined +bool isRocmPathSet() { + FILE *fpipe; + char const *command = "echo $ROCM_PATH"; + fpipe = popen(command, "r"); + + if (fpipe == nullptr) { + INFO("Unable to create command\n"); + return false; + } + char command_op[COMMAND_LEN]; + if (fgets(command_op, COMMAND_LEN, fpipe)) { + size_t len = strlen(command_op); + if (len > 1) { // This is because fgets always adds newline character + pclose(fpipe); + return true; + } + } + pclose(fpipe); + return false; +} +#endif + +bool testMultiTargArchCodeObj() { + bool btestPassed = true; +#ifdef __linux__ + char command[COMMAND_LEN]; + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, 0)); + // Hardcoding the codeobject lines in multiple string to avoid cpplint warning + std::string CodeObjL1 = "#include \"hip/hip_runtime.h\"\n"; + std::string CodeObjL2 = + "extern \"C\" __global__ void hello_world(float* a, float* b) {\n"; + std::string CodeObjL3 = " int tx = threadIdx.x;\n"; + std::string CodeObjL4 = " b[tx] = a[tx];\n"; + std::string CodeObjL5 = "}"; + // Creating the full code object string + static std::string CodeObj = CodeObjL1 + CodeObjL2 + CodeObjL3 + + CodeObjL4 + CodeObjL5; + std::ofstream ofs("/tmp/vcpy_kernel.cpp", std::ofstream::out); + ofs << CodeObj; + ofs.close(); + // Copy the file into current working location if not available + if (access("/tmp/vcpy_kernel.cpp", F_OK) == -1) { + INFO("Code Object File: /tmp/vcpy_kernel.cpp not found \n"); + return true; + } + // Generate the command to generate multi architecture code object file + const char* hipcc_path = nullptr; + if (isRocmPathSet()) { + hipcc_path = "$ROCM_PATH/bin/hipcc"; + } else { + hipcc_path = "/opt/rocm/bin/hipcc"; + } + /* Putting these command parameters into a variable to shorten the string + literal length in order to avoid multiline string literal cpplint warning + */ + const char* genco_option = "--offload-arch"; + const char* input_codeobj = "/tmp/vcpy_kernel.cpp"; + const char* rocm_enumerator = "${ROCM_PATH}/bin/rocm_agent_enumerator"; + snprintf(command, COMMAND_LEN, + rocm_enumerator, + hipcc_path, genco_option, props.gcnArchName, input_codeobj, + CODE_OBJ_MULTIARCH); + + system((const char*)command); + // Check if the code object file is created + snprintf(command, COMMAND_LEN, "./%s", + CODE_OBJ_MULTIARCH); + + if (access(command, F_OK) == -1) { + INFO("Code Object File not found \n"); + return true; + } + btestPassed = testCodeObjFile(CODE_OBJ_MULTIARCH); +#else + INFO("This test is skipped due to non linux environment.\n"); +#endif + return btestPassed; +} + +TEST_CASE("Unit_hipModule_Functional") { + bool TestPassed = true; + SECTION("Code object file test on current GPU") { + TestPassed &= testCodeObjFile(CODE_OBJ_SINGLEARCH); + REQUIRE(TestPassed == true); + } + SECTION("Code object file test on multiple GPUs") { + TestPassed &= testMultiTargArchCodeObj(); + REQUIRE(TestPassed == true); + } +} diff --git a/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc b/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc index f440e8c013..5daee12bc7 100644 --- a/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc +++ b/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc @@ -1,28 +1,27 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "hip_module_launch_kernel_common.hh" #include #include +#include +#include static hipError_t hipModuleLaunchKernelWrapper(hipFunction_t f, uint32_t gridX, uint32_t gridY, uint32_t gridZ, uint32_t blockX, uint32_t blockY, @@ -46,4 +45,324 @@ TEST_CASE("Unit_hipModuleLaunchKernel_Positive_Parameters") { TEST_CASE("Unit_hipModuleLaunchKernel_Negative_Parameters") { HIP_CHECK(hipFree(nullptr)); ModuleLaunchKernelNegativeParameters(); -} \ No newline at end of file +} +constexpr auto fileName = "matmul.code"; +constexpr auto dummyKernel = "dummyKernel"; + +/** +* @addtogroup hipModuleLaunchKernel +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra)` - +* launches kernel f with launch parameters and shared memory on stream with arguments passed +* to kernelparams +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify Negative tests of hipModuleLaunchKernel API. + * - Test case to verify hipModuleLaunchKernel API's Corner Scenarios for Grid and Block dimensions. + * - Test case to verify different work groups of hipModuleLaunchKernel API. + + * Test source + * ------------------------ + * - catch/unit/module/hipModuleLaunchKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; + +bool Module_Negative_tests() { + bool testStatus = true; + constexpr auto matmulK = "matmulK"; + constexpr auto KernelandExtra = "KernelandExtraParams"; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + struct { + void* _Ad; + void* _Bd; + void* _Cd; + int _n; + } args1; + args1._Ad = nullptr; + args1._Bd = nullptr; + args1._Cd = nullptr; + args1._n = 0; + hipFunction_t MultKernel, KernelandExtraParamKernel; + size_t size1; + size1 = sizeof(args1); + hipModule_t Module; + hipStream_t stream1; + hipDeviceptr_t *Ad = nullptr; +#ifdef HT_NVIDIA + HIP_CHECK(hipInit(0)); + hipCtx_t context; + HIP_CHECK(hipCtxCreate(&context, 0, 0)); +#endif + + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); + HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel, + Module, KernelandExtra)); + void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + void *params[] = {Ad}; + HIP_CHECK(hipStreamCreate(&stream1)); + // Passing nullptr to kernel function + err = hipModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, + stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing Max int value to block dimensions + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + 0, stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing 0 as value for all dimensions + err = hipModuleLaunchKernel(MultKernel, 0, 0, 0, + 0, + 0, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing 0 as value for x dimension + err = hipModuleLaunchKernel(MultKernel, 0, 1, 1, + 0, + 1, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing 0 as value for y dimension + err = hipModuleLaunchKernel(MultKernel, 1, 0, 1, + 1, + 0, + 1, 0, + stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing 0 as value for z dimension + err = hipModuleLaunchKernel(MultKernel, 1, 1, 0, + 1, + 1, + 0, 0, + stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing both kernel and extra params + err = hipModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, + 1, 1, 0, stream1, + reinterpret_cast(¶ms), + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing more than maxthreadsperblock to block dimensions + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0)); + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, + deviceProp.maxThreadsPerBlock+1, 0, stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Block dimension X = Max Allowed + 1 + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, + deviceProp.maxThreadsDim[0]+1, + 1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Block dimension Y = Max Allowed + 1 + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + deviceProp.maxThreadsDim[1]+1, + 1, 0, stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Block dimension Z = Max Allowed + 1 + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, + 1, + 1, + deviceProp.maxThreadsDim[2]+1, 0, stream1, NULL, + reinterpret_cast(&config1)); + if (err == hipSuccess) { + testStatus = false; + } + // Passing invalid config data to extra params + void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, stream1, NULL, + reinterpret_cast(&config3)); + if (err == hipSuccess) { + testStatus = false; + } + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); +#ifdef HT_NVIDIA + HIP_CHECK(hipCtxDestroy(context)); +#endif + return testStatus; +} + +bool Module_GridBlock_Corner_Tests() { + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + hipFunction_t DummyKernel; + hipModule_t Module; + hipStream_t stream1; + hipDevice_t device; +#ifdef HT_NVIDIA + HIP_CHECK(hipInit(0)); + hipCtx_t context; + HIP_CHECK(hipCtxCreate(&context, 0, 0)); +#endif + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipStreamCreate(&stream1)); + // Passing Max int value to block dimensions + hipDeviceProp_t deviceProp; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipGetDeviceProperties(&deviceProp, device)); + unsigned int maxblockX = deviceProp.maxThreadsDim[0]; + unsigned int maxblockY = deviceProp.maxThreadsDim[1]; + unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; +#ifdef HT_NVIDIA + unsigned int maxgridX = deviceProp.maxGridSize[0]; + unsigned int maxgridY = deviceProp.maxGridSize[1]; + unsigned int maxgridZ = deviceProp.maxGridSize[2]; +#else + unsigned int maxgridX = deviceProp.maxGridSize[0]; + unsigned int maxgridY = deviceProp.maxGridSize[1]; + unsigned int maxgridZ = deviceProp.maxGridSize[2]; +#endif + struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1}, + {1, 1, 1, 1, maxblockY, 1}, + {1, 1, 1, 1, 1, maxblockZ}, + {maxgridX, 1, 1, 1, 1, 1}, + {1, maxgridY, 1, 1, 1, 1}, + {1, 1, maxgridZ, 1, 1, 1}}; + for (int i = 0; i < 6; i++) { + err = hipModuleLaunchKernel(DummyKernel, + test[i].gridX, + test[i].gridY, + test[i].gridZ, + test[i].blockX, + test[i].blockY, + test[i].blockZ, + 0, + stream1, NULL, NULL); + if (err != hipSuccess) { + testStatus = false; + } + } + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); +#ifdef HT_NVIDIA + HIP_CHECK(hipCtxDestroy(context)); +#endif + return testStatus; +} + +bool Module_WorkGroup_Test() { + bool testStatus = true; + HIP_CHECK(hipSetDevice(0)); + hipError_t err; + hipFunction_t DummyKernel; + hipModule_t Module; + hipStream_t stream1; +#ifdef HT_NVIDIA + HIP_CHECK(hipInit(0)); + hipCtx_t context; + HIP_CHECK(hipCtxCreate(&context, 0, 0)); +#endif + HIP_CHECK(hipModuleLoad(&Module, fileName)); + HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); + HIP_CHECK(hipStreamCreate(&stream1)); + // Passing Max int value to block dimensions + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0)); + double cuberootVal = + cbrt(static_cast(deviceProp.maxThreadsPerBlock)); + uint32_t cuberoot_floor = floor(cuberootVal); + uint32_t cuberoot_ceil = ceil(cuberootVal); + // Scenario: (block.x * block.y * block.z) <= Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + err = hipModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_floor, cuberoot_floor, cuberoot_floor, + 0, stream1, NULL, NULL); + if (err != hipSuccess) { + testStatus = false; + } + // Scenario: (block.x * block.y * block.z) > Work Group Size where + // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ + err = hipModuleLaunchKernel(DummyKernel, + 1, 1, 1, + cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1, + 0, stream1, NULL, NULL); + if (err == hipSuccess) { + testStatus = false; + } + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipModuleUnload(Module)); +#ifdef HT_NVIDIA + HIP_CHECK(hipCtxDestroy(context)); +#endif + return testStatus; +} + +TEST_CASE("Unit_hipModuleLaunchKernel_Fntl") { + bool testStatus = false; + SECTION("Negative test scenarios") { + testStatus = Module_Negative_tests(); + REQUIRE(testStatus == true); + } + SECTION("Grid Block corner test") { + testStatus = Module_GridBlock_Corner_Tests(); + REQUIRE(testStatus == true); + } + SECTION("Work Group Test") { + testStatus = Module_WorkGroup_Test(); + REQUIRE(testStatus == true); + } +} diff --git a/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc b/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc index 4d364f1d33..1f112bd09a 100644 --- a/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc +++ b/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc @@ -1,21 +1,19 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -23,7 +21,9 @@ THE SOFTWARE. #include #include - +#include +#include +#include TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") { HIP_CHECK(hipFree(nullptr)); @@ -64,4 +64,90 @@ TEST_CASE("Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String") { hipModule_t module; HIP_CHECK_ERROR(hipModuleLoadData(&module, ""), hipErrorInvalidImage); -} \ No newline at end of file +} +/** +* @addtogroup hipModuleLoad hipModuleGetFunction +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleLoad(hipModule_t* module, const char* fname)` - +* Loads code object from file into a module +* `hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname)` - +* Function with kname will be extracted if present in module +*/ + +/** + * Test Description + * ------------------------ + * - Test case to load data from a code object file through hipModuleLoad and hipModuleGetFunction. + + * Test source + * ------------------------ + * - catch/unit/module/hipModuleLoadData.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ +#if HT_AMD +// Below test disabled for NVIDIA due to the defect SWDEV-472385 +TEST_CASE("Unit_hipModuleLoadData_Functional") { + constexpr int LEN = 64; + constexpr int SIZE = LEN << 2; + constexpr auto FILENAME = "vcpy_kernel.code"; + constexpr auto kernel_name = "hello_world"; + float *A, *B, *Ad, *Bd; + A = new float[LEN]; + B = new float[LEN]; + + for (uint32_t i = 0; i < LEN; i++) { + A[i] = i * 1.0f; + B[i] = 0.0f; + } + + HIP_CHECK(hipMalloc(&Ad, SIZE)); + HIP_CHECK(hipMalloc(&Bd, SIZE)); + + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + hipModule_t Module; + hipFunction_t Function = nullptr; + std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fsize); + if (file.read(buffer.data(), fsize)) { + HIP_CHECK(hipModuleLoadData(&Module, &buffer[0])); + HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + } + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = reinterpret_cast(Ad); + args._Bd = reinterpret_cast(Bd); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, + stream, NULL, reinterpret_cast(&config))); + + HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + + for (uint32_t i = 0; i < LEN; i++) { + REQUIRE(A[i] == B[i]); + } + delete [] A; + delete [] B; + HIP_CHECK(hipModuleUnload(Module)); + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); +} +#endif diff --git a/projects/hip-tests/catch/unit/module/hipModuleLoadMultProcessOnMultGPU.cc b/projects/hip-tests/catch/unit/module/hipModuleLoadMultProcessOnMultGPU.cc new file mode 100644 index 0000000000..0f6d6257bc --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleLoadMultProcessOnMultGPU.cc @@ -0,0 +1,60 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +/** +* @addtogroup hipModuleLoad hipModuleLoadData hipModuleLoadDataEx +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleLoad(hipModule_t* module, const char* fname)` - +* Loads code object from file into a module +* `hipError_t hipModuleLoadData (hipModule_t *module, const void *image)` - +* Builds module from code object which resides in host memory. Image is pointer to that location. +* `hipError_t hipModuleLoadDataEx (hipModule_t *module, const void *image, +* unsigned int numOptions, hipJitOption *options, void **optionValues)` - +* Builds module from code object which resides in host memory. Image is pointer to that +* location. Options are not used. +*/ + +/** + * Test Description + * ------------------------ + * - Test case to load and execute a code object file for multiprocess and multiGPU. + * Test source + * ------------------------ + * - catch/unit/module/hipModuleLoadMultProcessOnMultGPU.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ +TEST_CASE("Unit_hipModuleLoad_MultProcess_MultGPU") { + int deviceCount{0}; + HIP_CHECK(hipGetDeviceCount(&deviceCount)); + REQUIRE(deviceCount != 0); + // Spawn 1 Process for each device + for (int deviceNo = 0; deviceNo < deviceCount; deviceNo++) { + // set the device id for the current process + HIP_CHECK(hipSetDevice(deviceNo)); + hip::SpawnProc proc("testhipModuleLoadUnloadFunc_exe", true); + REQUIRE(proc.run("1") == true); + REQUIRE(proc.run("2") == true); + REQUIRE(proc.run("3") == true); + } +} diff --git a/projects/hip-tests/catch/unit/module/hipModuleUnload.cc b/projects/hip-tests/catch/unit/module/hipModuleUnload.cc index 914b66c89f..54c1a46cdf 100644 --- a/projects/hip-tests/catch/unit/module/hipModuleUnload.cc +++ b/projects/hip-tests/catch/unit/module/hipModuleUnload.cc @@ -1,25 +1,23 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include +#include #include TEST_CASE("Unit_hipModuleUnload_Negative_Module_Is_Nullptr") { @@ -36,3 +34,29 @@ TEST_CASE("Unit_hipModuleUnload_Negative_Double_Unload") { HIP_CHECK(hipModuleUnload(module)); HIP_CHECK_ERROR(hipModuleUnload(module), hipErrorNotFound); } +/** +* @addtogroup hipModuleUnload +* @{ +* @ingroup ModuleTest +* `hipError_t hipModuleUnload(hipModule_t module)` - +* Frees the module +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify the module release. + * Test source + * ------------------------ + * - catch/unit/module/hipModuleUnload.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 +*/ +TEST_CASE("Unit_hipModuleLoad_basic") { + constexpr auto fileName = "vcpy_kernel.code"; + hipModule_t module; + HIP_CHECK(hipModuleLoad(&module, fileName)); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); +} diff --git a/projects/hip-tests/catch/unit/module/kernel_composite_test.cpp b/projects/hip-tests/catch/unit/module/kernel_composite_test.cpp new file mode 100644 index 0000000000..9ba270838f --- /dev/null +++ b/projects/hip-tests/catch/unit/module/kernel_composite_test.cpp @@ -0,0 +1,37 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip/hip_runtime.h" +constexpr int GLOBAL_BUF_SIZE = 2048; + +__device__ float deviceGlobalFloat; +__device__ int deviceGlobalInt1; +__device__ int deviceGlobalInt2; +__device__ short deviceGlobalShort; //NOLINT +__device__ char deviceGlobalChar; + +__device__ int getSquareOfGlobalFloat() { + return static_cast(deviceGlobalFloat*deviceGlobalFloat); +} + +extern "C" __global__ void testWeightedCopy(int* a, int* b) { + int tx = threadIdx.x; + b[tx] = deviceGlobalInt1 * a[tx] + deviceGlobalInt2 + + static_cast(deviceGlobalShort) + static_cast(deviceGlobalChar) + + getSquareOfGlobalFloat(); +} diff --git a/projects/hip-tests/catch/unit/module/managed_kernel.cpp b/projects/hip-tests/catch/unit/module/managed_kernel.cpp new file mode 100644 index 0000000000..2302fe4c96 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/managed_kernel.cpp @@ -0,0 +1,24 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip/hip_runtime.h" +__managed__ int x = 10; + +extern "C" __global__ void GPU_func() { + x++; +} diff --git a/projects/hip-tests/catch/unit/module/matmul.cpp b/projects/hip-tests/catch/unit/module/matmul.cpp new file mode 100644 index 0000000000..e2931549ad --- /dev/null +++ b/projects/hip-tests/catch/unit/module/matmul.cpp @@ -0,0 +1,82 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include"hip/hip_runtime.h" +__device__ int deviceGlobal = 1; + +extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C, + int N) { + int ROW = blockIdx.y * blockDim.y + threadIdx.y; + int COL = blockIdx.x * blockDim.x + threadIdx.x; + int tmpSum = 0; + if ((ROW < N) && (COL < N)) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + C[ROW * N + COL] = tmpSum; + } +} + +extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, + int *D, int N) { + int ROW = blockIdx.y * blockDim.y + threadIdx.y; + int COL = blockIdx.x * blockDim.x + threadIdx.x; + int tmpSum = 0; + if (ROW < N && COL < N) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + } + C[ROW * N + COL] = tmpSum; + D[ROW * N + COL] = tmpSum; +} + +extern "C" __global__ void SixteenSecKernel(int clockrate) { + uint64_t wait_t = 16000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); +} + +extern "C" __global__ void TwoSecKernel(int clockrate) { + if (deviceGlobal == 0x2222) { + deviceGlobal = 0x3333; + } + uint64_t wait_t = 2000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); + if (deviceGlobal != 0x3333) { + deviceGlobal = 0x5555; + } +} + +extern "C" __global__ void FourSecKernel(int clockrate) { + if (deviceGlobal == 1) { + deviceGlobal = 0x2222; + } + uint64_t wait_t = 4000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); + if (deviceGlobal == 0x2222) { + deviceGlobal = 0x4444; + } +} + +extern "C" __global__ void dummyKernel() { +} diff --git a/projects/hip-tests/catch/unit/module/testhipModuleLoadUnloadFunc_exe.cc b/projects/hip-tests/catch/unit/module/testhipModuleLoadUnloadFunc_exe.cc new file mode 100644 index 0000000000..bbf91abc2d --- /dev/null +++ b/projects/hip-tests/catch/unit/module/testhipModuleLoadUnloadFunc_exe.cc @@ -0,0 +1,170 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#define HIP_CHECK(error)\ +{\ + hipError_t localError = error;\ + if ((localError != hipSuccess) && \ + (localError != hipErrorPeerAccessAlreadyEnabled)) {\ + printf("error: '%s'(%d) from %s at %s:%d\n", \ + hipGetErrorString(localError), \ + localError, #error, __FUNCTION__, __LINE__);\ + exit(0);\ + }\ +} +constexpr auto CODEOBJ_FILE = "kernel_composite_test.code"; + +bool testhipModuleLoadUnloadFunc(const std::vector& buffer, + char* globTestID) { + constexpr auto CODEOBJ_GLOB_KERNEL1 = "testWeightedCopy"; + size_t N = 16*16; + size_t Nbytes = N * sizeof(int); + int *A_d, *B_d; + int *A_h, *B_h; + int deviceid; + HIP_CHECK(hipGetDevice(&deviceid)); + // allocate host and device buffer + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&B_d, Nbytes)); + + A_h = reinterpret_cast(malloc(Nbytes)); + B_h = reinterpret_cast(malloc(Nbytes)); + // set host buffers + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = deviceid; + } + // Copy buffer from host to device + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + hipModule_t Module; + hipFunction_t Function; + int check = atoi(globTestID); +/** + * Validates hipModuleLoadUnload if globTestID = 1 + * Validates hipModuleLoadDataUnload if globTestID = 2 + * Validates hipModuleLoadDataExUnload if globTestID = 3 +*/ + switch (check) { + case 1: + HIP_CHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + case 2: + HIP_CHECK(hipModuleLoadData(&Module, &buffer[0])); + case 3: + HIP_CHECK(hipModuleLoadDataEx(&Module, + &buffer[0], 0, nullptr, nullptr)); + } + HIP_CHECK(hipModuleGetFunction(&Function, Module, + CODEOBJ_GLOB_KERNEL1)); + float deviceGlobalFloatH = 3.14; + int deviceGlobalInt1H = 100*deviceid; + int deviceGlobalInt2H = 50*deviceid; + uint32_t deviceGlobalShortH = 25*deviceid; + char deviceGlobalCharH = 13*deviceid; + hipDeviceptr_t deviceGlobal; + size_t deviceGlobalSize; + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, + Module, "deviceGlobalFloat")); + HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), + &deviceGlobalFloatH, + deviceGlobalSize)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, + Module, "deviceGlobalInt1")); + HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), + &deviceGlobalInt1H, + deviceGlobalSize)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, + Module, + "deviceGlobalInt2")); + HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), + &deviceGlobalInt2H, deviceGlobalSize)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, + Module, "deviceGlobalShort")); + HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), + &deviceGlobalShortH, deviceGlobalSize)); + HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, + &deviceGlobalSize, Module, "deviceGlobalChar")); + HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), + &deviceGlobalCharH, deviceGlobalSize)); + // Launch Function kernel function + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + struct { + void* _Ad; + void* _Bd; + } args; + args._Ad = reinterpret_cast(A_d); + args._Bd = reinterpret_cast(B_d); + size_t size = sizeof(args); + + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, + N, 1, 1, 0, stream, NULL, + reinterpret_cast(&config))); + // Copy buffer from decice to host + HIP_CHECK(hipMemcpyAsync(B_h, B_d, Nbytes, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipStreamDestroy(stream)); + + // Check the results + for (size_t idx = 0; idx < N; idx++) { + if (B_h[idx] != (deviceGlobalInt1H*A_h[idx] + + deviceGlobalInt2H + + static_cast(deviceGlobalShortH) + + + static_cast(deviceGlobalCharH) + + static_cast(deviceGlobalFloatH*deviceGlobalFloatH))) { + // exit the current process with failure + return false; + } + } + HIP_CHECK(hipModuleUnload(Module)); + // free memory + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(A_d)); + free(B_h); + free(A_h); + + return true; +} +int main(int argc, char* argv[]) { + if(argc > 0) { + bool value = false; + std::ifstream file(CODEOBJ_FILE, + std::ios::binary | std::ios::ate); + std::streamsize fsize = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buffer(fsize); + if (!file.read(buffer.data(), fsize)) { + value = false; + } + file.close(); + value = testhipModuleLoadUnloadFunc(buffer, argv[1]); + return value; + } +} diff --git a/projects/hip-tests/catch/unit/module/vcpy_kernel.cpp b/projects/hip-tests/catch/unit/module/vcpy_kernel.cpp new file mode 100644 index 0000000000..af362ae58a --- /dev/null +++ b/projects/hip-tests/catch/unit/module/vcpy_kernel.cpp @@ -0,0 +1,24 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip/hip_runtime.h" + +extern "C" __global__ void hello_world(float* a, float* b) { + int tx = threadIdx.x; + b[tx] = a[tx]; +}