From fd758200e5fb7f0beb9a82c13135d5fd30176af7 Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Wed, 21 Jun 2023 15:56:36 +0530 Subject: [PATCH] SWDEV-368280 - Add Unit_hipStreamCaptureExtModuleLaunchKernel (#105) For stream capture test of hipExtModuleLaunchKernel. SWDEV-372824 - Add Unit_hipStreamCaptureRtc For stream capture test based on RTC Change-Id: I96ef1395f75189ad751ca637f5c3273d280e849a --- catch/unit/graph/CMakeLists.txt | 12 + catch/unit/graph/hipMatMul.cc | 58 +++++ .../hipStreamCaptureExtModuleLaunchKernel.cc | 215 ++++++++++++++++++ catch/unit/rtc/CMakeLists.txt | 1 + catch/unit/rtc/hipStreamCaptureRtc.cc | 136 +++++++++++ 5 files changed, 422 insertions(+) create mode 100644 catch/unit/graph/hipMatMul.cc create mode 100644 catch/unit/graph/hipStreamCaptureExtModuleLaunchKernel.cc create mode 100644 catch/unit/rtc/hipStreamCaptureRtc.cc diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index ff9d578753..a1c7074691 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -111,6 +111,18 @@ set(TEST_SRC hipGraphKernelNodeCopyAttributes.cc ) +if(HIP_PLATFORM MATCHES "amd") + set(AMD_SRC + hipStreamCaptureExtModuleLaunchKernel.cc + ) + set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) +endif() + hip_add_exe_to_target(NAME GraphsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) + +if(HIP_PLATFORM MATCHES "amd") + add_custom_target(hipMatMul COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/hipMatMul.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/graph/hipMatMul.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + add_dependencies(build_tests hipMatMul) +endif() diff --git a/catch/unit/graph/hipMatMul.cc b/catch/unit/graph/hipMatMul.cc new file mode 100644 index 0000000000..10a6d79ea3 --- /dev/null +++ b/catch/unit/graph/hipMatMul.cc @@ -0,0 +1,58 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + This code object should be automatically built via "make build_tests". + In case it's missing, please type the following to generate it, + /opt/rocm/hip/bin/hipcc --genco hipMatMul.cc -o hipMatMul.code +*/ +#include"hip/hip_runtime.h" +__device__ int deviceGlobal = 1; + +extern "C" __global__ void matmulK(int* A, int* B, int* C, + int N) { + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + int tmpSum = 0; + if ((ROW < N) && (COL < N)) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + C[ROW * N + COL] = tmpSum; + } +} + +extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, + int *D, int N) { + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + int tmpSum = 0; + if (ROW < N && COL < N) { + // each thread computes one element of the block sub-matrix + for (int i = 0; i < N; i++) { + tmpSum += A[ROW * N + i] * B[i * N + COL]; + } + } + C[ROW * N + COL] = tmpSum; + D[ROW * N + COL] = tmpSum; +} + +extern "C" __global__ void dummyKernel() { +} diff --git a/catch/unit/graph/hipStreamCaptureExtModuleLaunchKernel.cc b/catch/unit/graph/hipStreamCaptureExtModuleLaunchKernel.cc new file mode 100644 index 0000000000..b1f0880f74 --- /dev/null +++ b/catch/unit/graph/hipStreamCaptureExtModuleLaunchKernel.cc @@ -0,0 +1,215 @@ +/* + Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#include +#include "hip/hip_ext.h" +#include +#include +#include +#include +#if !defined(S_IFREG) && defined(_S_IFREG) +#define S_IFREG _S_IFREG +#endif + +struct gridblockDim { + unsigned int gridX; + unsigned int gridY; + unsigned int gridZ; + unsigned int blockX; + unsigned int blockY; + unsigned int blockZ; +}; +class GraphModuleLaunchKernel { + int N = 64; + int SIZE = N*N; + int *A, *B, *C; + hipDeviceptr_t *Ad, *Bd; + hipStream_t stream1, stream2; + hipModule_t module; + hipFunction_t multKernel; + struct { + void* _Ad; + void* _Bd; + void* _Cd; + int _n; + } args1, args2; + size_t size1, size2; + + static constexpr char matmulK[] = "matmulK"; + + public : + GraphModuleLaunchKernel() { + allocateMemory(); + moduleLoad(); + } + + ~GraphModuleLaunchKernel() { + deAllocateMemory(); + } + + void allocateMemory(); + void deAllocateMemory(); + void moduleLoad(); + bool extModuleKernelExecutionMatmul(); + bool extModuleKernelExecutionMatmulwithStreamCapture(bool LaunchByDifferentStream = false); + static constexpr char fileName[] = "hipMatMul.code"; +}; + +void GraphModuleLaunchKernel::allocateMemory() { + A = new int[N*N*sizeof(int)]; + B = new int[N*N*sizeof(int)]; + for (int i=0; i < N; i++) { + for (int j=0; j < N; j++) { + A[i*N +j] = 1; + B[i*N +j] = 1; + } + } + HIPCHECK(hipStreamCreate(&stream1)); + HIPCHECK(hipStreamCreate(&stream2)); + HIPCHECK(hipMalloc(reinterpret_cast(&Ad), + SIZE*sizeof(int))); + HIPCHECK(hipMalloc(reinterpret_cast(&Bd), + SIZE*sizeof(int))); + HIPCHECK(hipHostMalloc(reinterpret_cast(&C), SIZE*sizeof(int))); + HIPCHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); + args1._Ad = Ad; + args1._Bd = Bd; + args1._Cd = C; + args1._n = N; + args2._Ad = NULL; + args2._Bd = NULL; + args2._Cd = NULL; + args2._n = 0; + size1 = sizeof(args1); + size2 = sizeof(args2); +} + +void GraphModuleLaunchKernel::moduleLoad() { + HIPCHECK(hipModuleLoad(&module, fileName)); + HIPCHECK(hipModuleGetFunction(&multKernel, module, matmulK)); +} + +void GraphModuleLaunchKernel::deAllocateMemory() { + HIPCHECK(hipStreamDestroy(stream1)); + HIPCHECK(hipStreamDestroy(stream2)); + delete[] A; + delete[] B; + HIPCHECK(hipFree(Ad)); + HIPCHECK(hipFree(Bd)); + HIPCHECK(hipHostFree(C)); + HIPCHECK(hipModuleUnload(module)); +} + +bool GraphModuleLaunchKernel::extModuleKernelExecutionMatmul() { + bool testStatus = true; + int mismatch = 0; + void* config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + HIPCHECK(hipExtModuleLaunchKernel(multKernel, N, N, 1, 32, 32 , 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + NULL, NULL, 0)); + HIPCHECK(hipStreamSynchronize(stream1)); + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + if (C[i*N + j] != N) + mismatch++; + } + } + if (mismatch) { + printf("Test failed: the result of matrix multiplications incorrect.\n"); + testStatus = false; + } + return testStatus; +} + +bool GraphModuleLaunchKernel::extModuleKernelExecutionMatmulwithStreamCapture(bool LaunchByDifferentStream) { + bool testStatus = true; + int mismatch = 0; + + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + + HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); + + void* config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, + HIP_LAUNCH_PARAM_END}; + + HIPCHECK(hipExtModuleLaunchKernel(multKernel, N, N, 1, 32, 32 , 1, 0, + stream1, NULL, + reinterpret_cast(&config1), + NULL, NULL, 0)); + + HIP_CHECK(hipStreamEndCapture(stream1, &graph)); + + // Validate end capture is successful + REQUIRE(graph != nullptr); + + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + REQUIRE(graphExec != nullptr); + + // Replay the recorded sequence + HIP_CHECK(hipGraphLaunch(graphExec, LaunchByDifferentStream ? stream2 : stream1)); + + HIP_CHECK(hipStreamSynchronize(LaunchByDifferentStream ? stream2 : stream1)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + if (C[i*N + j] != N) + mismatch++; + } + } + if (mismatch) { + printf("Test failed: the result of matrix multiplications incorrect.\n"); + testStatus = false; + } + return testStatus; +} + +TEST_CASE("Unit_hipStreamCapture_ExtModuleLaunchKernel") { + struct stat fileStat; + if (stat(GraphModuleLaunchKernel::fileName, &fileStat) + || !(fileStat.st_mode & S_IFREG)) { + FAIL("module file " << GraphModuleLaunchKernel::fileName + << " doesn't exist! aborted! \n" + << "To generate the file, type\n" + << "/opt/rocm/hip/bin/hipcc --genco hipMatMul.cc -o hipMatMul.code"); + return; + } + HIPCHECK(hipSetDevice(0)); + GraphModuleLaunchKernel kernelLaunch; + + SECTION("extModuleKernelExecutionMatmul") { + REQUIRE(kernelLaunch.extModuleKernelExecutionMatmul()); + } + + SECTION("extModuleKernelExecutionMatmul_withStreamCapture") { + REQUIRE(kernelLaunch.extModuleKernelExecutionMatmulwithStreamCapture()); + } + + SECTION("extModuleKernelExecutionMatmul_withStreamCapture_launchByDifferentStream") { + REQUIRE(kernelLaunch.extModuleKernelExecutionMatmulwithStreamCapture(true)); + } +} diff --git a/catch/unit/rtc/CMakeLists.txt b/catch/unit/rtc/CMakeLists.txt index 7d4b4893ac..42d5ec5d9a 100644 --- a/catch/unit/rtc/CMakeLists.txt +++ b/catch/unit/rtc/CMakeLists.txt @@ -3,6 +3,7 @@ set(TEST_SRC saxpy.cc warpsize.cc hipRtcFunctional.cc + hipStreamCaptureRtc.cc ) # AMD only tests diff --git a/catch/unit/rtc/hipStreamCaptureRtc.cc b/catch/unit/rtc/hipStreamCaptureRtc.cc new file mode 100644 index 0000000000..ab5296c4d5 --- /dev/null +++ b/catch/unit/rtc/hipStreamCaptureRtc.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +static constexpr auto kernel_src { + R"_KERN_EMBED_( + extern "C" __global__ void kernel_func(float* f) + { + f[0] = 1.0; + } + )_KERN_EMBED_" +}; + + +TEST_CASE("Unit_hipStreamCaptureRtc") { + hipStream_t stream = nullptr; + hipGraph_t graph = nullptr; + hipGraphExec_t graph_exec = nullptr; + + float data_h = 0.0; + float* data_d = nullptr; + + // Init data + HIPCHECK(hipMalloc(&data_d, sizeof(float))); + HIPCHECK(hipMemcpy(data_d, &data_h, sizeof(float), hipMemcpyHostToDevice)); + + // Compile kernel + std::vector code; + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, kernel_src, "hipStreamCaptureRtc.cu", 0, nullptr, nullptr)); + + hipDeviceProp_t props; + int device = 0; + HIP_CHECK(hipSetDevice(device)); + HIP_CHECK(hipGetDeviceProperties(&props, device)); +#ifdef __HIP_PLATFORM_AMD__ + std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; +#else + std::string sarg = std::string("--fmad=false"); +#endif + + std::vector options = { sarg.c_str() }; + + auto compileResult = hiprtcCompileProgram(prog, options.size(), options.data()); + if (compileResult != HIPRTC_SUCCESS) { + size_t logSize = 0; + hiprtcGetProgramLogSize(prog, &logSize); + if (logSize) { + std::vector log(logSize, '\0'); + if (hiprtcGetProgramLog(prog, log.data()) == HIPRTC_SUCCESS) { + FAIL("hiprtcCompileProgram failed with log" << log.data()); + return; + } + } + FAIL("hiprtcCompileProgram failed without log"); + return; + } + + size_t codeSize = 0; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + + code.resize(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + + hipModule_t module = nullptr; + hipFunction_t kernel = nullptr; +#if HT_NVIDIA + HIPCHECK(hipInit(0)); + hipCtx_t ctx; + HIPCHECK(hipCtxCreate(&ctx, 0, device)); +#endif + + HIPCHECK(hipModuleLoadData(&module, code.data())); + + HIPCHECK(hipModuleGetFunction(&kernel, module, "kernel_func")); + + // Start capture + HIPCHECK(hipStreamCreate(&stream)); + HIPCHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + + // Launch kernel + auto size = sizeof(float*); + void *config[] = { HIP_LAUNCH_PARAM_BUFFER_POINTER, &data_d, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END }; + HIPCHECK(hipModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, stream, nullptr, config)); + HIPCHECK(hipStreamEndCapture(stream, &graph)); + + size_t numNodes = 0; + HIPCHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + INFO("Num of nodes returned by GetNodes : " << numNodes); + REQUIRE(numNodes == 1); + + // Ensure that no actual work has been done for the captured + // stream before graph execution + float tmp = 2.0; + HIPCHECK(hipMemcpy(&tmp, data_d, sizeof(float), hipMemcpyDeviceToHost)); + REQUIRE(tmp == 0.0); + + HIPCHECK(hipGraphInstantiate(&graph_exec, graph, NULL, NULL, 0)); + HIPCHECK(hipGraphDestroy(graph)); + + HIPCHECK(hipGraphLaunch(graph_exec, stream)); + + HIPCHECK(hipStreamSynchronize(stream)); + HIPCHECK(hipStreamDestroy(stream)); + + // Check that the work was done + HIPCHECK(hipMemcpy(&tmp, data_d, sizeof(float), hipMemcpyDeviceToHost)); + HIPCHECK(hipFree(data_d)); + + REQUIRE(tmp == 1.0); +#if HT_NVIDIA + HIPCHECK(hipCtxDestroy(ctx)); +#endif +}