From 6e1c4c26555682bf54a9f16e90ecb4ffaf75a2ed Mon Sep 17 00:00:00 2001 From: taosang2 Date: Mon, 29 Apr 2024 14:06:42 -0400 Subject: [PATCH] SWDEV-459479 - Add compressed codeobj test Add simple tests to verify compressed code objects. Change-Id: Iae148c3c928e18247624937512918dbb3cbc462d --- catch/unit/compiler/CMakeLists.txt | 13 +++ catch/unit/compiler/hipSquare.cc | 83 +++++++++++++++++++ catch/unit/module/CMakeLists.txt | 10 +++ catch/unit/module/hipExtModuleLaunchKernel.cc | 8 +- catch/unit/module/hipModuleLoadData.cc | 13 +++ 5 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 catch/unit/compiler/hipSquare.cc diff --git a/catch/unit/compiler/CMakeLists.txt b/catch/unit/compiler/CMakeLists.txt index 67eb7afcf5..c8aa25c64b 100644 --- a/catch/unit/compiler/CMakeLists.txt +++ b/catch/unit/compiler/CMakeLists.txt @@ -6,3 +6,16 @@ set(TEST_SRC hip_add_exe_to_target(NAME CompilerTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) + +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + hipSquare.cc + ) + + set_source_files_properties(hipSquare.cc PROPERTIES COMPILE_FLAGS "--offload-compress") + + hip_add_exe_to_target(NAME SimpleCompressedCodeObjectTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) +endif() + diff --git a/catch/unit/compiler/hipSquare.cc b/catch/unit/compiler/hipSquare.cc new file mode 100644 index 0000000000..693f2b2e5b --- /dev/null +++ b/catch/unit/compiler/hipSquare.cc @@ -0,0 +1,83 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include + +/* + * Square each element in the array A and write to array C. + */ +template +__global__ void vector_square(T* C_d, const T* A_d, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } +} + +TEST_CASE("Unit_test_compressed_codeobject") { + float *A_d, *C_d; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + static int device = 0; + HIP_CHECK(hipSetDevice(device)); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, device /*deviceID*/)); + printf("info: running on device %s\n", props.name); + #ifdef __HIP_PLATFORM_AMD__ + printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName); + #endif + printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + A_h = (float*)malloc(Nbytes); + HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIP_CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + // Fill with Phi + i + for (size_t i = 0; i < N; i++) { + A_h[i] = 1.618f + i; + } + printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + + printf("info: copy Host2Device\n"); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + + printf("info: launch 'vector_square' kernel\n"); + hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); + + printf("info: copy Device2Host\n"); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + printf("info: check result\n"); + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + HIP_CHECK(hipErrorUnknown); + } + } + printf("PASSED!\n"); + REQUIRE(true); +} diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index 89bd3091db..ddf94d4ae6 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -106,12 +106,21 @@ add_custom_target(addKernel.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) +add_custom_target(copyKernelCompressed.code + COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=5 --offload-compress --genco ${OFFLOAD_ARCH_STR} + ${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc + -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/copyKernelCompressed.code + -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} + -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/empty_module.code ${CMAKE_CURRENT_BINARY_DIR}/copyKernel.code ${CMAKE_CURRENT_BINARY_DIR}/copyKernel.s ${CMAKE_CURRENT_BINARY_DIR}/addKernel.code + ${CMAKE_CURRENT_BINARY_DIR}/copyKernelCompressed.code ) + if(UNIX) set(TEST_SRC ${TEST_SRC} @@ -204,6 +213,7 @@ if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests empty_module.code) add_dependencies(build_tests copyKernel.code copyKernel.s) add_dependencies(build_tests addKernel.code) +add_dependencies(build_tests copyKernelCompressed.code) if(UNIX) add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code) diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc index d5c2f88a4f..c1d64b7388 100644 --- a/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -51,6 +51,7 @@ THE SOFTWARE. constexpr auto fileName = "copyKernel.code"; constexpr auto kernel_name = "copy_ker"; +constexpr auto fileNameCompressed = "copyKernelCompressed.code"; static constexpr auto totalWorkGroups{1024}; static constexpr auto localWorkSize{512}; static constexpr auto lastWorkSizeEven{256}; @@ -189,7 +190,12 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { // Get module and function from module hipModule_t Module; hipFunction_t Function; - HIP_CHECK(hipModuleLoad(&Module, fileName)); + SECTION("uncompressed codeobjects") { + HIP_CHECK(hipModuleLoad(&Module, fileName)); + } + SECTION("compressed codeobjects") { + HIP_CHECK(hipModuleLoad(&Module, fileNameCompressed)); + } HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); // Allocate resources int* A = new int[arraylength]; diff --git a/catch/unit/module/hipModuleLoadData.cc b/catch/unit/module/hipModuleLoadData.cc index 1f112bd09a..5c6489b413 100644 --- a/catch/unit/module/hipModuleLoadData.cc +++ b/catch/unit/module/hipModuleLoadData.cc @@ -36,6 +36,18 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") { HIP_CHECK(hipModuleUnload(module)); } +#if defined(__HIP_PLATFORM_AMD__) + SECTION("Load compiled module from file with compressed code objects") { + const auto loaded_module = LoadModuleIntoBuffer("copyKernelCompressed.code"); + HIP_CHECK(hipModuleLoadData(&module, loaded_module.data())); + REQUIRE(module != nullptr); + hipFunction_t kernel = nullptr; + HIP_CHECK(hipModuleGetFunction(&kernel, module, "copy_ker")); + REQUIRE(kernel != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } +#endif + SECTION("Load RTCd module") { const auto rtc = CreateRTCCharArray(R"(extern "C" __global__ void kernel() {})"); HIP_CHECK(hipModuleLoadData(&module, rtc.data())); @@ -65,6 +77,7 @@ TEST_CASE("Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String") { HIP_CHECK_ERROR(hipModuleLoadData(&module, ""), hipErrorInvalidImage); } + /** * @addtogroup hipModuleLoad hipModuleGetFunction * @{