From f5815181038cbf30973bb4e3074a671554b89e84 Mon Sep 17 00:00:00 2001 From: taosang2 Date: Fri, 28 Jun 2024 17:17:31 -0400 Subject: [PATCH] SWDEV-447973 - Add generic target codeobj test Add simple tests to verify generic target code objects. Change-Id: Iae148c3c938b18247624938512918dbb3cbc462e --- catch/hipTestMain/config/config_amd_linux | 2 + catch/hipTestMain/hip_test_features.cc | 95 ++++++++- catch/include/hip_test_common.hh | 1 + catch/include/hip_test_features.hh | 2 + catch/unit/compiler/CMakeLists.txt | 38 +++- catch/unit/compiler/hipSquareGenericTarget.cc | 108 ++++++++++ catch/unit/module/CMakeLists.txt | 11 + catch/unit/module/hipExtModuleLaunchKernel.cc | 9 + catch/unit/module/hipModuleLoadData.cc | 16 +- samples/0_Intro/CMakeLists.txt | 3 +- samples/0_Intro/generic_target/CMakeLists.txt | 88 ++++++++ samples/0_Intro/generic_target/README.md | 61 ++++++ samples/0_Intro/generic_target/saxpy.cpp | 193 ++++++++++++++++++ samples/0_Intro/generic_target/square.cpp | 97 +++++++++ 14 files changed, 720 insertions(+), 4 deletions(-) create mode 100644 catch/unit/compiler/hipSquareGenericTarget.cc create mode 100644 samples/0_Intro/generic_target/CMakeLists.txt create mode 100644 samples/0_Intro/generic_target/README.md create mode 100644 samples/0_Intro/generic_target/saxpy.cpp create mode 100644 samples/0_Intro/generic_target/square.cpp diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 9ebbb88d5c..9780f08097 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -870,6 +870,8 @@ "Unit_safeAtomicMin_Positive_SameAddress - float", "=== SWDEV-454220 : Below test hanged in stress test on 22/03/24 ===", "Unit_hipExtLaunchKernel_Positive_Basic", + "=== Temporarily disable the test that failed in mi300 ===", + "Unit_test_generic_target_only_codeobject", #endif #if defined gfx1030 "=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===", diff --git a/catch/hipTestMain/hip_test_features.cc b/catch/hipTestMain/hip_test_features.cc index f276004d99..2d0422f341 100644 --- a/catch/hipTestMain/hip_test_features.cc +++ b/catch/hipTestMain/hip_test_features.cc @@ -2,7 +2,7 @@ #include #include - +#include #include "hip_test_context.hh" std::vector> GCNArchFeatMap = { @@ -43,3 +43,96 @@ bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch) { assert(false); #endif } + +// Return true if agentTarget has corresponding generic target which will be returned in +// genericTarget; +// false, otherwise. +// Note: it will naturely return false on Nvidia device +bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget) { + // The map is subject to change per removing policy + static std::map genericTargetMap{ + // "gfx9-generic" + {"gfx900", "gfx9-generic"}, + {"gfx902", "gfx9-generic"}, + {"gfx904", "gfx9-generic"}, + {"gfx906", "gfx9-generic"}, + {"gfx909", "gfx9-generic"}, + {"gfx90c", "gfx9-generic"}, + // "gfx9-4-generic + {"gfx940", "gfx9-4-generic"}, + {"gfx941", "gfx9-4-generic"}, + {"gfx942", "gfx9-4-generic"}, + {"gfx950", "gfx9-4-generic"}, + // "gfx10-1-generic" + {"gfx1010", "gfx10-1-generic"}, + {"gfx1011", "gfx10-1-generic"}, + {"gfx1012", "gfx10-1-generic"}, + {"gfx1013", "gfx10-1-generic"}, + // "gfx10-3-generic" + {"gfx1030", "gfx10-3-generic"}, + {"gfx1031", "gfx10-3-generic"}, + {"gfx1032", "gfx10-3-generic"}, + {"gfx1033", "gfx10-3-generic"}, + {"gfx1034", "gfx10-3-generic"}, + {"gfx1035", "gfx10-3-generic"}, + {"gfx1036", "gfx10-3-generic"}, + // "gfx11-generic" + {"gfx1100", "gfx11-generic"}, + {"gfx1101", "gfx11-generic"}, + {"gfx1102", "gfx11-generic"}, + {"gfx1103", "gfx11-generic"}, + {"gfx1150", "gfx11-generic"}, + {"gfx1151", "gfx11-generic"}, + // "gfx12-generic" + {"gfx1200", "gfx12-generic"}, + {"gfx1201", "gfx12-generic"}, + }; + auto search = genericTargetMap.find(agentTarget); + if (search == genericTargetMap.end()) return false; + genericTarget = search->second; + return true; +} + +/* +Return true, if gcnArchName has corresponding generic target; + false, otherwise. +If gcnArchName is nullptr, it will be queried from deviceId; + otherwise, deviceId will be ignored. + +The specific arches have the following mapping to generic targets, + +Generic GFX11 + +--offload-arch=gfx11-generic - includes [gfx1100-gfx1103], gfx1150, gfx1151 + +Generic GFX10.3 + +--offload-arch=gfx10.3-generic - includes [gfx1030-gfx1036] + +Generic GFX10.1 + +--offload-arch=gfx10.1-generic - includes [gfx1010-gfx1013] + +Generic GFX9 / Consumer + +--offload-arch=gfx9-generic - includes gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c + +Generic GFX9.4 / Data center + +--offload-arch=gfx9-4-generic - includes gfx940, gfx941, gfx942, gfx950 +*/ +bool isGenericTargetSupported(char* gcnArchName, int deviceId) { + hipDeviceProp_t props{}; + if (gcnArchName == nullptr) { + if (hipGetDeviceProperties(&props, deviceId) != hipSuccess) return false; + gcnArchName = props.gcnArchName; + } + std::string target{gcnArchName}; + std::string genericTarget{}; + auto pos = target.find(':'); + if (pos != std::string::npos) { + target[pos] = 0; + target.resize(pos); + } + return getGenericTarget(target, genericTarget); +} diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index c801d0f1c1..86fc4d8836 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -34,6 +34,7 @@ THE SOFTWARE. #include #include #include +#include "hip_test_features.hh" #ifdef TEST_CLOCK_CYCLE #define clock_function() clock64() diff --git a/catch/include/hip_test_features.hh b/catch/include/hip_test_features.hh index c1df46e5d9..0534e9b954 100644 --- a/catch/include/hip_test_features.hh +++ b/catch/include/hip_test_features.hh @@ -36,3 +36,5 @@ typedef enum CTFeatures { } CTFeatures; bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch); +bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget); +bool isGenericTargetSupported(char* gcnArchName = nullptr, int deviceId = 0); diff --git a/catch/unit/compiler/CMakeLists.txt b/catch/unit/compiler/CMakeLists.txt index c8aa25c64b..4837ebfc73 100644 --- a/catch/unit/compiler/CMakeLists.txt +++ b/catch/unit/compiler/CMakeLists.txt @@ -17,5 +17,41 @@ if(HIP_PLATFORM MATCHES "amd") hip_add_exe_to_target(NAME SimpleCompressedCodeObjectTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) -endif() + set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic") + + # Build hipSquareGenericTargetOnly to cover generic targets only + # Because default catch2 build will reference CMAKE_CXX_FLAGS that contains specific targets which will hijack generic + # target in hip-rt, we have to use custom build to contain generic targets only. + set(GENERIC_TARGET_ONLY_EXE hipSquareGenericTargetOnly) + set(LIBFS) + if(WIN32) + set(GENERIC_TARGET_ONLY_EXE ${GENERIC_TARGET_ONLY_EXE}.exe) + else() + set(LIBFS -lstdc++fs) + endif() + + add_custom_target(hipSquareGenericTargetOnly ALL + COMMAND ${CMAKE_CXX_COMPILER} -DNO_GENERIC_TARGET_ONLY_TEST --std=c++17 -mcode-object-version=6 -w "${OFFLOAD_ARCH_GENERIC_STR}" + ${CMAKE_CURRENT_SOURCE_DIR}/hipSquareGenericTarget.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_context.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_features.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/main.cc + -o ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE} + -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} + -I${CMAKE_CURRENT_SOURCE_DIR}/../../include + -I${CMAKE_CURRENT_SOURCE_DIR}/../../external/Catch2 + -I${CMAKE_CURRENT_SOURCE_DIR}/../../external/picojson ${LIBFS}) + set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE}) + + # Build hipSquareGenericTarget to cover generic targets and the specific target + set(TEST_SRC + hipSquareGenericTarget.cc + ) + hip_add_exe_to_target(NAME hipSquareGenericTarget + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) + set_source_files_properties(hipSquareGenericTarget.cc + PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}") + add_dependencies(hipSquareGenericTarget hipSquareGenericTargetOnly) +endif() diff --git a/catch/unit/compiler/hipSquareGenericTarget.cc b/catch/unit/compiler/hipSquareGenericTarget.cc new file mode 100644 index 0000000000..01971f63f6 --- /dev/null +++ b/catch/unit/compiler/hipSquareGenericTarget.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include + +/* + * Square each element in the array A and write to array C. + */ +template +static __global__ void vector_square_generic(T* C_d, const T* A_d, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } +} + + +TEST_CASE("Unit_test_generic_target_codeobject") { + if (!isGenericTargetSupported()) { + fprintf(stderr, "Generic target test is skipped\n"); + return; + } + float *A_d, *C_d; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + static int device = 0; + HIP_CHECK(hipSetDevice(device)); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, device /*deviceID*/)); + printf("info: running on device %s\n", props.name); + #ifdef __HIP_PLATFORM_AMD__ + printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName); + // check the scope of supportted types + #endif + printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + A_h = (float*)malloc(Nbytes); + HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIP_CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + // Fill with Phi + i + for (size_t i = 0; i < N; i++) { + A_h[i] = 1.618f + i; + } + printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + + printf("info: copy Host2Device\n"); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + + printf("info: launch 'vector_square' kernel\n"); + hipLaunchKernelGGL(vector_square_generic, dim3(blocks), dim3(threadsPerBlock), + 0, 0, C_d, A_d, N); + + printf("info: copy Device2Host\n"); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + printf("info: check result\n"); + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + HIP_CHECK(hipErrorUnknown); + } + } + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(C_d)); + free(A_h); + free(C_h); + printf("PASSED!\n"); + REQUIRE(true); +} + +#ifndef NO_GENERIC_TARGET_ONLY_TEST +TEST_CASE("Unit_test_generic_target_only_codeobject") { +#ifdef __linux__ + char *cmd = "chmod u+x ./hipSquareGenericTargetOnly && ./hipSquareGenericTargetOnly"; +#else + char *cmd = "hipSquareGenericTargetOnly.exe"; +#endif + printf("Run %s\n", cmd); + REQUIRE(std::system(cmd) == 0); + printf("PASSED!\n"); +} +#endif diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index 97a7fd59ec..778a89aaf7 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -113,12 +113,21 @@ add_custom_target(copyKernelCompressed.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) +set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic") +add_custom_target(copyKernelGenericTarget.code + COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=6 --genco ${OFFLOAD_ARCH_GENERIC_STR} + ${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc + -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/copyKernelGenericTarget.code + -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} + -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/empty_module.code ${CMAKE_CURRENT_BINARY_DIR}/copyKernel.code ${CMAKE_CURRENT_BINARY_DIR}/copyKernel.s ${CMAKE_CURRENT_BINARY_DIR}/addKernel.code ${CMAKE_CURRENT_BINARY_DIR}/copyKernelCompressed.code + ${CMAKE_CURRENT_BINARY_DIR}/copyKernelGenericTarget.code ) if(UNIX) @@ -214,6 +223,8 @@ add_dependencies(build_tests empty_module.code) add_dependencies(build_tests copyKernel.code copyKernel.s) add_dependencies(build_tests addKernel.code) add_dependencies(build_tests copyKernelCompressed.code) +add_dependencies(build_tests copyKernelGenericTarget.code) + if(UNIX) add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code) diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc index 1a2766dca0..faf4fa3708 100644 --- a/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -52,6 +52,8 @@ THE SOFTWARE. constexpr auto fileName = "copyKernel.code"; constexpr auto kernel_name = "copy_ker"; constexpr auto fileNameCompressed = "copyKernelCompressed.code"; +constexpr auto fileNameGenericTarget = "copyKernelGenericTarget.code"; + static constexpr auto totalWorkGroups{1024}; static constexpr auto localWorkSize{512}; static constexpr auto lastWorkSizeEven{256}; @@ -196,6 +198,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { SECTION("compressed codeobjects") { HIP_CHECK(hipModuleLoad(&Module, fileNameCompressed)); } + SECTION("generic target codeobjects") { + if (!isGenericTargetSupported()) { + fprintf(stderr, "Generic target test is skipped\n"); + return; + } + HIP_CHECK(hipModuleLoad(&Module, fileNameGenericTarget)); + } HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); // Allocate resources int* A = new int[arraylength]; diff --git a/catch/unit/module/hipModuleLoadData.cc b/catch/unit/module/hipModuleLoadData.cc index 5c6489b413..3210dd90c4 100644 --- a/catch/unit/module/hipModuleLoadData.cc +++ b/catch/unit/module/hipModuleLoadData.cc @@ -36,7 +36,7 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") { HIP_CHECK(hipModuleUnload(module)); } -#if defined(__HIP_PLATFORM_AMD__) +#if HT_AMD SECTION("Load compiled module from file with compressed code objects") { const auto loaded_module = LoadModuleIntoBuffer("copyKernelCompressed.code"); HIP_CHECK(hipModuleLoadData(&module, loaded_module.data())); @@ -46,6 +46,20 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") { REQUIRE(kernel != nullptr); HIP_CHECK(hipModuleUnload(module)); } + + SECTION("Load compiled module from file with generic target code objects") { + if (!isGenericTargetSupported()) { + fprintf(stderr, "Generic target test is skipped\n"); + return; + } + const auto loaded_module = LoadModuleIntoBuffer("copyKernelGenericTarget.code"); + HIP_CHECK(hipModuleLoadData(&module, loaded_module.data())); + REQUIRE(module != nullptr); + hipFunction_t kernel = nullptr; + HIP_CHECK(hipModuleGetFunction(&kernel, module, "copy_ker")); + REQUIRE(kernel != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } #endif SECTION("Load RTCd module") { diff --git a/samples/0_Intro/CMakeLists.txt b/samples/0_Intro/CMakeLists.txt index 6704c1b5c1..ed73f09e3d 100644 --- a/samples/0_Intro/CMakeLists.txt +++ b/samples/0_Intro/CMakeLists.txt @@ -23,4 +23,5 @@ add_custom_target(build_intro) add_subdirectory(bit_extract) add_subdirectory(module_api) add_subdirectory(module_api_global) -add_subdirectory(square) \ No newline at end of file +add_subdirectory(square) +add_subdirectory(generic_target) \ No newline at end of file diff --git a/samples/0_Intro/generic_target/CMakeLists.txt b/samples/0_Intro/generic_target/CMakeLists.txt new file mode 100644 index 0000000000..9c94f8d72b --- /dev/null +++ b/samples/0_Intro/generic_target/CMakeLists.txt @@ -0,0 +1,88 @@ +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.10) + +project(generic_target) + +if(UNIX) + if(NOT DEFINED ROCM_PATH) + set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.") + endif() + # Search for rocm in common locations + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) +endif() + +# Find hiprtc +find_package(hiprtc) +# Find hip +find_package(hip) + +if(NOT HIP_PLATFORM MATCHES "amd") +message("Generic target is only supporte on AMD GPU") +return() +endif() + +# Set compiler and linker +set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) +set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) + +# Create the excutable +if(TARGET build_intro) +set(EXCLUDE_OPTION EXCLUDE_FROM_ALL) +else() +set(EXCLUDE_OPTION ) +endif() + +# Test generic target without feature +set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic") + +add_executable(squareGenericTarget ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc) +target_include_directories(squareGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/) +set_target_properties(squareGenericTarget PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}") +target_link_libraries(squareGenericTarget hip::host) + +# Test generic target with features +set(OFFLOAD_ARCH_GENERIC_FEATURE_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic") + +add_executable(squareGenericTargetWithFeatures ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc) +target_include_directories(squareGenericTargetWithFeatures PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/) +set_target_properties(squareGenericTargetWithFeatures PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_FEATURE_STR}") +target_link_libraries(squareGenericTargetWithFeatures hip::host) + +# Create the excutable +add_executable(saxpyGenericTarget ${EXCLUDE_OPTION} saxpy.cpp ../../../catch/hipTestMain/hip_test_features.cc) + +# Link with HIPRTC +target_link_libraries(saxpyGenericTarget hiprtc) +# Link with HIP +target_link_libraries(saxpyGenericTarget hip::device) + +if(NOT BUILD_SHARED_LIBS) + target_link_libraries(saxpyGenericTarget hiprtc-builtins) +endif() + +target_include_directories(saxpyGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/) + +if(TARGET build_intro) +add_dependencies(build_intro saxpyGenericTarget) +add_dependencies(build_intro squareGenericTarget) +add_dependencies(build_intro squareGenericTargetWithFeatures) +endif() diff --git a/samples/0_Intro/generic_target/README.md b/samples/0_Intro/generic_target/README.md new file mode 100644 index 0000000000..5c0688ea0f --- /dev/null +++ b/samples/0_Intro/generic_target/README.md @@ -0,0 +1,61 @@ +# GenericTarget.md + +- Add hip/bin path to the PATH +``` +$ export PATH=$PATH:[MYHIP]/bin +``` + +- Define environment variable +``` +$ export HIP_PATH=[MYHIP] +``` + +- Create build folder +``` +$ cd ~/hip/samples/0_Intro/genericTarget + mkdir -p build && cd build +``` + +- Build with cmake +``` + cmake .. + make +``` + +- Build without cmake +``` + +/opt/rocm/bin/hipcc ../square.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTarget + +/opt/rocm/bin/hipcc ../square1.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTargetWithFeatures + +/opt/rocm/bin/hipcc ../saxpy.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include -o saxpyGenericTarget +``` +- Execute tests +``` + +$ ./squareGenericTarget +info: running on device AMD Radeon RX 6900 XT +info: allocate host mem ( 7.63 MB) +info: allocate device mem ( 7.63 MB) +info: copy Host2Device +info: launch 'vector_square' kernel +info: copy Device2Host +info: check result +PASSED: generic target! + +Best to run on Mi3XX to verify features +$ ./squareGenericTargetWithFeatures +info: running on device AMD Radeon RX 6900 XT +info: allocate host mem ( 7.63 MB) +info: allocate device mem ( 7.63 MB) +info: copy Host2Device +info: launch 'vector_square' kernel +info: copy Device2Host +info: check result +PASSED: generic targets! + +$./saxyGenericTarget +Find generic target gfx11-generic +SAXPY test passed +``` \ No newline at end of file diff --git a/samples/0_Intro/generic_target/saxpy.cpp b/samples/0_Intro/generic_target/saxpy.cpp new file mode 100644 index 0000000000..43052ef3a2 --- /dev/null +++ b/samples/0_Intro/generic_target/saxpy.cpp @@ -0,0 +1,193 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "hip_test_features.hh" +static constexpr auto NUM_THREADS{128}; +static constexpr auto NUM_BLOCKS{32}; +using namespace std; + +static constexpr auto saxpy{ +R"( +#include "test_header.h" +#include "test_header1.h" +extern "C" +__global__ +void saxpy(real a, realptr x, realptr y, realptr out, size_t n) +{ + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < n) { + out[tid] = a * x[tid] + y[tid] ; + } +} +)"}; + +int main() +{ + hipDeviceProp_t props; + int device = 0; + checkHipErrors(hipSetDevice(device)); + checkHipErrors(hipGetDeviceProperties(&props, device)); + string agentTarget{props.gcnArchName}; + string genericTarget{}; + string postFix{}; + auto pos = agentTarget.find(':'); + + if (pos != std::string::npos) { + postFix = agentTarget.substr(pos); // Features + agentTarget.resize(pos); + } + if (!getGenericTarget(agentTarget, genericTarget)) { + cout << props.gcnArchName <<" has no generic target support. Skipped!" << endl; + return 0; + } + if (agentTarget.find("gfx906") != std::string::npos) { + if (postFix.find(":sramecc") != std::string::npos) { + // gfx906's generic target gfx9-generic doesn't support sramecc + postFix = postFix.substr(strlen(":sramecc") + 1); + } + } + if (!postFix.empty()) { + genericTarget += postFix; + } + + cout << "Find generic target " << genericTarget << endl; + hiprtcProgram prog; + int num_headers = 2; + vector header_names; + vector header_sources; + header_names.push_back("test_header.h"); + header_names.push_back("test_header1.h"); + header_sources.push_back("#ifndef HIPRTC_TEST_HEADER_H\n#define HIPRTC_TEST_HEADER_H\ntypedef float real;\n#endif //HIPRTC_TEST_HEADER_H\n"); + header_sources.push_back("#ifndef HIPRTC_TEST_HEADER1_H\n#define HIPRTC_TEST_HEADER1_H\ntypedef float* realptr;\n#endif //HIPRTC_TEST_HEADER1_H\n"); + hiprtcCreateProgram(&prog, // prog + saxpy, // buffer + "saxpy.cu", // name + num_headers, // numHeaders + &header_sources[0], // headers + &header_names[0]); // includeNames + + string offload {"--offload-arch="}; + offload += genericTarget.c_str(); + /* + * offload must be one of following: + * "--offload-arch=gfx9-generic", + * "--offload-arch=gfx9-4-generic", + * "--offload-arch=gfx10-1-generic", + * "--offload-arch=gfx10-3-generic", + * "--offload-arch=gfx11-generic", + * "--offload-arch=gfx12-generic" + * + * */ + const char* options[] = {offload.c_str(), "-mcode-object-version=6", "-w"}; + hiprtcResult compileResult { + hiprtcCompileProgram(prog, sizeof(options) / sizeof(options[0]), options) }; + + size_t logSize; + hiprtcGetProgramLogSize(prog, &logSize); + + if (logSize) { + string log(logSize, '\0'); + hiprtcGetProgramLog(prog, &log[0]); + + cout << log << '\n'; + } + + if (compileResult != HIPRTC_SUCCESS) { + cout << "Compilation failed." << endl; + } + + size_t codeSize; + hiprtcGetCodeSize(prog, &codeSize); + + vector code(codeSize); + hiprtcGetCode(prog, code.data()); + + hiprtcDestroyProgram(&prog); + + hipModule_t module; + hipFunction_t kernel; + + checkHipErrors(hipModuleLoadData(&module, code.data())); + checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy")); + + size_t n = NUM_THREADS * NUM_BLOCKS; + size_t bufferSize = n * sizeof(float); + + float a = 5.1f; + unique_ptr hX{new float[n]}; + unique_ptr hY{new float[n]}; + unique_ptr hOut{new float[n]}; + + for (size_t i = 0; i < n; ++i) { + hX[i] = static_cast(i); + hY[i] = static_cast(i * 2); + } + + hipDeviceptr_t dX, dY, dOut; + checkHipErrors(hipMalloc((void **)&dX, bufferSize)); + checkHipErrors(hipMalloc((void **)&dY, bufferSize)); + checkHipErrors(hipMalloc((void **)&dOut, bufferSize)); + checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize)); + checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize)); + + struct { + float a_; + hipDeviceptr_t b_; + hipDeviceptr_t c_; + hipDeviceptr_t d_; + size_t e_; + } args{a, dX, dY, dOut, n}; + + auto size = sizeof(args); + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + + checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, + 0, nullptr, nullptr, config)); + checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize)); + + for (size_t i = 0; i < n; ++i) { + if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { + cout << "Validation failed." << endl; + } + } + + checkHipErrors(hipFree((void *)dX)); + checkHipErrors(hipFree((void *)dY)); + checkHipErrors(hipFree((void *)dOut)); + + checkHipErrors(hipModuleUnload(module)); + + cout << "SAXPY test passed" << endl; + return 0; +} diff --git a/samples/0_Intro/generic_target/square.cpp b/samples/0_Intro/generic_target/square.cpp new file mode 100644 index 0000000000..a1962c5fd5 --- /dev/null +++ b/samples/0_Intro/generic_target/square.cpp @@ -0,0 +1,97 @@ +/* +Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip/hip_runtime.h" +#include +#include "hip_test_features.hh" + +/* + * Square each element in the array A and write to array C. + */ +template +__global__ void vector_square(T* C_d, const T* A_d, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } +} + +int main(int argc, char* argv[]) { + float *A_d, *C_d; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + static int device = 0; + checkHipErrors(hipSetDevice(device)); + hipDeviceProp_t props; + checkHipErrors(hipGetDeviceProperties(&props, device /*deviceID*/)); + printf("info: running on device %s\n", props.name); +#ifdef __HIP_PLATFORM_AMD__ + printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName); +#endif + if (!isGenericTargetSupported(props.gcnArchName)) { + printf("%s has no generic target support. Skipped\n", props.gcnArchName); + return 0; + } + printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + A_h = (float*)malloc(Nbytes); + checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + // Fill with Phi + i + for (size_t i = 0; i < N; i++) { + A_h[i] = 1.618f + i; + } + + printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); + checkHipErrors(hipMalloc(&A_d, Nbytes)); + checkHipErrors(hipMalloc(&C_d, Nbytes)); + + printf("info: copy Host2Device\n"); + checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + + const unsigned blocks = 512; + const unsigned threadsPerBlock = 256; + + printf("info: launch 'vector_square' kernel\n"); + hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); + + printf("info: copy Device2Host\n"); + checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + printf("info: check result\n"); + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + checkHipErrors(hipErrorUnknown); + } + } + + checkHipErrors(hipFree(A_d)); + checkHipErrors(hipFree(C_d)); + free(A_h); + free(C_h); + + printf("PASSED: generic target!\n"); +}