diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index e660b3bcfb..6d60d24de8 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_SRC hipModuleLaunchCooperativeKernel.cc hipModuleLaunchCooperativeKernelMultiDevice.cc hipFuncGetAttribute.cc + hipGetFuncBySymbol.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code @@ -178,3 +179,6 @@ add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiou copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code) endif() endif() + +add_executable(hipGetFuncBySymbol_exe EXCLUDE_FROM_ALL hipGetFuncBySymbol_exe.cc) +add_dependencies(build_tests hipGetFuncBySymbol_exe) diff --git a/catch/unit/module/hipGetFuncBySymbol.cc b/catch/unit/module/hipGetFuncBySymbol.cc index 1c36e5b3c3..ca85a97f7f 100644 --- a/catch/unit/module/hipGetFuncBySymbol.cc +++ b/catch/unit/module/hipGetFuncBySymbol.cc @@ -16,12 +16,28 @@ LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * @addtogroup hipGetFuncBySymbol hipGetFuncBySymbol + * @{ + * @ingroup KernelTest + * `hipError_t hipGetFuncBySymbol (hipFunction_t* functionPtr, + * const void* symbolPtr + * )` - + * Gets pointer to device entry function that matches entry function symbolPtr. + */ + #include #include +#include +#include #define LEN 64 #define SIZE LEN * sizeof(float) +#define ARR_SIZE (32*32) +#define SIZE_BYTES (ARR_SIZE*sizeof(int)) + extern "C" __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); @@ -36,16 +52,35 @@ extern "C" __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* } /** -* @addtogroup hipGetFuncBySymbol hipModuleLaunchKernel -* @{ -* @ingroup KernelTest -* `hipError_t hipGetFuncBySymbol(hipFunction_t*, const void*)` - -* function with kernelname will be fetched when pointer to the kernel function is passed. -* `hipError_t hipModuleLaunchKernel(hipFunction_t, unsigned int, -* unsigned int, unsigned int, unsigned int, unsigned int, -* unsigned int, unsigned int, hipStream_t, void**, void**)` - -* launches Kernel with launch parameters and shared memory on stream with arguments passed. -*/ + * Host Function to check for negative case. + */ +__host__ void hostFunction() { + printf("hostFunction\n"); +} + +/** + * Sample Kernel to be used for functional test cases + */ +__global__ void hipKernel(int *a) { + int offset = blockDim.x * blockIdx.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + for (int i = offset; i < ARR_SIZE; i+= stride) { + a[i] += a[i]; + } +} + +/** + * Local Function to validate the result + */ +bool verifyResult(int *a, int *output_ref, int arrSize) { + for (int i = 0; i < arrSize; i++) { + if (a[i] != output_ref[i]) { + return false; + } + } + return true; +} /** * Test Description @@ -61,7 +96,7 @@ extern "C" __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* * - HIP_VERSION >= 6.2 */ -TEST_CASE("Unit_hipGetFuncBySymbol") { +TEST_CASE("Unit_hipGetFuncBySymbol_PositiveTest") { uint32_t *A_d, *C_d; uint32_t *A_h, *C_h; size_t N = 1000000; @@ -100,7 +135,8 @@ TEST_CASE("Unit_hipGetFuncBySymbol") { HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; hipFunction_t Function; - HIPCHECK(hipGetFuncBySymbol(&Function, reinterpret_cast(bit_extract_kernel))); + HIPCHECK(hipGetFuncBySymbol(&Function, + reinterpret_cast(bit_extract_kernel))); HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, reinterpret_cast(&config))); @@ -118,3 +154,202 @@ TEST_CASE("Unit_hipGetFuncBySymbol") { free(A_h); free(C_h); } + +/** + * Test Description + * ------------------------ + * - Pass the NULL as a symbolPtr it should return + * hipErrorInvalidDeviceFunction + * - Pass a host function as a symbolPtr it should return + * hipErrorInvalidDeviceFunction + * Test source + * ------------------------ + * - catch/unit/module/hipGetFuncBySymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ + +TEST_CASE("Unit_hipGetFuncBySymbol_NegativeTests") { + hipFunction_t funcPointer; + + // Passing NULL as second parameter + REQUIRE(hipGetFuncBySymbol(&funcPointer, NULL) != hipSuccess); + + // Passing hostFunction as second parameter + REQUIRE(hipGetFuncBySymbol(&funcPointer, + reinterpret_cast(hostFunction))); +} + +/** + * Test Description + * ------------------------ + * - Create a child process and pass the __global__ function as a symbolPtr + * it should return hipSuccess, and kernel launch and execution with + * functionPtr should success. + * Test source + * ------------------------ + * - catch/unit/module/hipGetFuncBySymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipGetFuncBySymbol_InChildProcess") { + hip::SpawnProc proc("hipGetFuncBySymbol_exe", true); + REQUIRE(proc.run() == 0); +} + +/** + * Test Description + * ------------------------ + * - For all the GPU devices in the system and pass the __global__ function + * as a symbolPtr it should return hipSuccess, and kernel launch and + * execution with functionPtr should success. + * Test source + * ------------------------ + * - catch/unit/module/hipGetFuncBySymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipGetFuncBySymbol_MultiDev") { + int deviceCount = 0; + HIP_CHECK(hipGetDeviceCount(&deviceCount)); + if (deviceCount < 2) { + HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2"); + return; + } + + hipFunction_t funcPointer; + + for (int deviceId = 0; deviceId < deviceCount; deviceId++) { + HIP_CHECK(hipSetDevice(deviceId)); + + REQUIRE(hipGetFuncBySymbol(&funcPointer, + reinterpret_cast(hipKernel))== hipSuccess); + + int *h_a = reinterpret_cast(malloc(SIZE_BYTES)); + REQUIRE(h_a != nullptr); + int *output_ref = reinterpret_cast(malloc(SIZE_BYTES)); + REQUIRE(output_ref != nullptr); + + for (int i = 0; i < ARR_SIZE; i++) { + h_a[i] = 2; + output_ref[i] = 4; + } + + int *d_a = nullptr; + HIP_CHECK(hipMalloc(&d_a, SIZE_BYTES)); + REQUIRE(d_a != nullptr); + HIP_CHECK(hipMemcpy(d_a, h_a, SIZE_BYTES, hipMemcpyHostToDevice)); + + dim3 blocksPerGrid(1, 1, 1); + dim3 threadsPerBlock(1, 1, 64); + + void* kernelParam[] = {d_a}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + + REQUIRE(hipModuleLaunchKernel(funcPointer, + blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z, + threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z, + 0, 0, nullptr, kernel_parameter) == hipSuccess); + + HIP_CHECK(hipMemcpy(h_a, d_a, SIZE_BYTES, hipMemcpyDeviceToHost)); + + REQUIRE(verifyResult(h_a, output_ref, ARR_SIZE) == true); + + free(h_a); + free(output_ref); + HIP_CHECK(hipFree(d_a)); + } +} + +/** + * Local function useful to create stream and memory copy and launch kernel + */ +void MultiThreadMultiDevFunc(int DevId) { + HIP_CHECK(hipSetDevice(DevId)); + + int *h_a = reinterpret_cast(malloc(SIZE_BYTES)); + REQUIRE(h_a != nullptr); + int *output_ref = reinterpret_cast(malloc(SIZE_BYTES)); + REQUIRE(output_ref != nullptr); + + for (int i = 0; i < ARR_SIZE; i++) { + h_a[i] = 2; + output_ref[i] = 4; + } + + hipStream_t stream; + HIP_CHECK(hipSetDevice(DevId)); + HIP_CHECK(hipStreamCreate(&stream)); + + int *d_a = nullptr; + HIP_CHECK(hipMalloc(&d_a, SIZE_BYTES)); + REQUIRE(d_a != nullptr); + HIP_CHECK(hipMemcpyAsync(d_a, h_a, SIZE_BYTES, + hipMemcpyHostToDevice, stream)); + + dim3 blocksPerGrid(1, 1, 1); + dim3 threadsPerBlock(1, 1, 64); + + hipFunction_t funcPointer; + REQUIRE(hipGetFuncBySymbol(&funcPointer, + reinterpret_cast(hipKernel))== hipSuccess); + + void* kernelParam[] = {d_a}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + + REQUIRE(hipModuleLaunchKernel(funcPointer, + blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z, + threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z, + 0, stream, nullptr, kernel_parameter) == hipSuccess); + + HIP_CHECK(hipMemcpyAsync(h_a, d_a, SIZE_BYTES, + hipMemcpyDeviceToHost, stream)); + + REQUIRE(verifyResult(h_a, output_ref, ARR_SIZE) == true); + + free(h_a); + free(output_ref); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(d_a)); +} + +/** + * Test Description + * ------------------------ + * - Create number of threads equals to number of devices and in each devices + * pass the __global__ function as a symbolPtr it should return hipSuccess, + * and kernel launch and execution with functionPtr should success. + * Test source + * ------------------------ + * - catch/unit/module/hipGetFuncBySymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipGetFuncBySymbol_MultiDevMultiThread") { + int deviceCount = 0; + HIP_CHECK(hipGetDeviceCount(&deviceCount)); + if (deviceCount < 2) { + HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2"); + return; + } + + ::std::vector< ::std::thread> threads; + + for (int dev = 0; dev < deviceCount; dev++) { + threads.push_back(::std::thread(MultiThreadMultiDevFunc, dev)); + } + + for (int dev = 0; (dev < deviceCount) && (dev < threads.size()); dev++) { + threads[dev].join(); + } +} diff --git a/catch/unit/module/hipGetFuncBySymbol_exe.cc b/catch/unit/module/hipGetFuncBySymbol_exe.cc new file mode 100644 index 0000000000..cb7771dc89 --- /dev/null +++ b/catch/unit/module/hipGetFuncBySymbol_exe.cc @@ -0,0 +1,112 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" + +#define ARR_SIZE (32*32) +#define SIZE (ARR_SIZE*sizeof(int)) + +#define HIP_CHECK(error) \ +{ \ + hipError_t localError = error; \ + if ((localError != hipSuccess) && \ + (localError != hipErrorPeerAccessAlreadyEnabled)) { \ + printf("error: '%s'(%d) from %s at %s:%d\n", \ + hipGetErrorString(localError), \ + localError, #error, __FUNCTION__, __LINE__);\ + exit(0); \ + } \ +} + +/** + * Sample Kernel to be used for functional test cases + */ +__global__ void hipKernel(int *a) { + int offset = blockDim.x * blockIdx.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + for (int i = offset; i < ARR_SIZE; i+= stride) { + a[i] += a[i]; + } +} + +/** + * This function gets the function pointer and launches the kernel and + * verifies the reult + */ +int main() { + hipFunction_t funcPointer; + + if (hipGetFuncBySymbol(&funcPointer, + reinterpret_cast(hipKernel)) != hipSuccess) { + return -1; + } + + int *h_a = reinterpret_cast(malloc(SIZE)); + if (h_a == nullptr) { + return -1; + } + + int *output_ref = reinterpret_cast(malloc(SIZE)); + if (output_ref == nullptr) { + return -1; + } + + for (int i = 0; i < ARR_SIZE; i++) { + h_a[i] = 2; + output_ref[i] = 4; + } + + int *d_a = nullptr; + HIP_CHECK(hipMalloc(&d_a, SIZE)); + if (d_a == nullptr) { + return -1; + } + HIP_CHECK(hipMemcpy(d_a, h_a, SIZE, hipMemcpyHostToDevice)); + + dim3 blocksPerGrid(1, 1, 1); + dim3 threadsPerBlock(1, 1, 64); + + void* kernelParam[] = {d_a}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + + if (hipModuleLaunchKernel(funcPointer, + blocksPerGrid.x, blocksPerGrid.y, blocksPerGrid.z, + threadsPerBlock.x, threadsPerBlock.y, threadsPerBlock.z, + 0, 0, nullptr, kernel_parameter) != hipSuccess) { + return -1; + } + + HIP_CHECK(hipMemcpy(h_a, d_a, SIZE, hipMemcpyDeviceToHost)); + + for (int i = 0; i < ARR_SIZE; i++) { + if (h_a[i] != output_ref[i]) { + return -1; + } + } + + free(h_a); + free(output_ref); + HIP_CHECK(hipFree(d_a)); + + return 0; +}