SWDEV-447973 - Add generic target codeobj test
Add simple tests to verify generic target code objects. Change-Id: Iae148c3c938b18247624938512918dbb3cbc462e
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -870,6 +870,8 @@
|
||||
"Unit_safeAtomicMin_Positive_SameAddress - float",
|
||||
"=== SWDEV-454220 : Below test hanged in stress test on 22/03/24 ===",
|
||||
"Unit_hipExtLaunchKernel_Positive_Basic",
|
||||
"=== Temporarily disable the test that failed in mi300 ===",
|
||||
"Unit_test_generic_target_only_codeobject",
|
||||
#endif
|
||||
#if defined gfx1030
|
||||
"=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <assert.h>
|
||||
|
||||
#include <map>
|
||||
#include "hip_test_context.hh"
|
||||
|
||||
std::vector<std::unordered_set<std::string>> GCNArchFeatMap = {
|
||||
@@ -43,3 +43,96 @@ bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch) {
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return true if agentTarget has corresponding generic target which will be returned in
|
||||
// genericTarget;
|
||||
// false, otherwise.
|
||||
// Note: it will naturely return false on Nvidia device
|
||||
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget) {
|
||||
// The map is subject to change per removing policy
|
||||
static std::map<std::string, std::string> genericTargetMap{
|
||||
// "gfx9-generic"
|
||||
{"gfx900", "gfx9-generic"},
|
||||
{"gfx902", "gfx9-generic"},
|
||||
{"gfx904", "gfx9-generic"},
|
||||
{"gfx906", "gfx9-generic"},
|
||||
{"gfx909", "gfx9-generic"},
|
||||
{"gfx90c", "gfx9-generic"},
|
||||
// "gfx9-4-generic
|
||||
{"gfx940", "gfx9-4-generic"},
|
||||
{"gfx941", "gfx9-4-generic"},
|
||||
{"gfx942", "gfx9-4-generic"},
|
||||
{"gfx950", "gfx9-4-generic"},
|
||||
// "gfx10-1-generic"
|
||||
{"gfx1010", "gfx10-1-generic"},
|
||||
{"gfx1011", "gfx10-1-generic"},
|
||||
{"gfx1012", "gfx10-1-generic"},
|
||||
{"gfx1013", "gfx10-1-generic"},
|
||||
// "gfx10-3-generic"
|
||||
{"gfx1030", "gfx10-3-generic"},
|
||||
{"gfx1031", "gfx10-3-generic"},
|
||||
{"gfx1032", "gfx10-3-generic"},
|
||||
{"gfx1033", "gfx10-3-generic"},
|
||||
{"gfx1034", "gfx10-3-generic"},
|
||||
{"gfx1035", "gfx10-3-generic"},
|
||||
{"gfx1036", "gfx10-3-generic"},
|
||||
// "gfx11-generic"
|
||||
{"gfx1100", "gfx11-generic"},
|
||||
{"gfx1101", "gfx11-generic"},
|
||||
{"gfx1102", "gfx11-generic"},
|
||||
{"gfx1103", "gfx11-generic"},
|
||||
{"gfx1150", "gfx11-generic"},
|
||||
{"gfx1151", "gfx11-generic"},
|
||||
// "gfx12-generic"
|
||||
{"gfx1200", "gfx12-generic"},
|
||||
{"gfx1201", "gfx12-generic"},
|
||||
};
|
||||
auto search = genericTargetMap.find(agentTarget);
|
||||
if (search == genericTargetMap.end()) return false;
|
||||
genericTarget = search->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
Return true, if gcnArchName has corresponding generic target;
|
||||
false, otherwise.
|
||||
If gcnArchName is nullptr, it will be queried from deviceId;
|
||||
otherwise, deviceId will be ignored.
|
||||
|
||||
The specific arches have the following mapping to generic targets,
|
||||
|
||||
Generic GFX11
|
||||
|
||||
--offload-arch=gfx11-generic - includes [gfx1100-gfx1103], gfx1150, gfx1151
|
||||
|
||||
Generic GFX10.3
|
||||
|
||||
--offload-arch=gfx10.3-generic - includes [gfx1030-gfx1036]
|
||||
|
||||
Generic GFX10.1
|
||||
|
||||
--offload-arch=gfx10.1-generic - includes [gfx1010-gfx1013]
|
||||
|
||||
Generic GFX9 / Consumer
|
||||
|
||||
--offload-arch=gfx9-generic - includes gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c
|
||||
|
||||
Generic GFX9.4 / Data center
|
||||
|
||||
--offload-arch=gfx9-4-generic - includes gfx940, gfx941, gfx942, gfx950
|
||||
*/
|
||||
bool isGenericTargetSupported(char* gcnArchName, int deviceId) {
|
||||
hipDeviceProp_t props{};
|
||||
if (gcnArchName == nullptr) {
|
||||
if (hipGetDeviceProperties(&props, deviceId) != hipSuccess) return false;
|
||||
gcnArchName = props.gcnArchName;
|
||||
}
|
||||
std::string target{gcnArchName};
|
||||
std::string genericTarget{};
|
||||
auto pos = target.find(':');
|
||||
if (pos != std::string::npos) {
|
||||
target[pos] = 0;
|
||||
target.resize(pos);
|
||||
}
|
||||
return getGenericTarget(target, genericTarget);
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ THE SOFTWARE.
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include <thread>
|
||||
#include "hip_test_features.hh"
|
||||
|
||||
#ifdef TEST_CLOCK_CYCLE
|
||||
#define clock_function() clock64()
|
||||
|
||||
@@ -36,3 +36,5 @@ typedef enum CTFeatures {
|
||||
} CTFeatures;
|
||||
|
||||
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch);
|
||||
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget);
|
||||
bool isGenericTargetSupported(char* gcnArchName = nullptr, int deviceId = 0);
|
||||
|
||||
@@ -17,5 +17,41 @@ if(HIP_PLATFORM MATCHES "amd")
|
||||
hip_add_exe_to_target(NAME SimpleCompressedCodeObjectTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
endif()
|
||||
|
||||
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
|
||||
|
||||
# Build hipSquareGenericTargetOnly to cover generic targets only
|
||||
# Because default catch2 build will reference CMAKE_CXX_FLAGS that contains specific targets which will hijack generic
|
||||
# target in hip-rt, we have to use custom build to contain generic targets only.
|
||||
set(GENERIC_TARGET_ONLY_EXE hipSquareGenericTargetOnly)
|
||||
set(LIBFS)
|
||||
if(WIN32)
|
||||
set(GENERIC_TARGET_ONLY_EXE ${GENERIC_TARGET_ONLY_EXE}.exe)
|
||||
else()
|
||||
set(LIBFS -lstdc++fs)
|
||||
endif()
|
||||
|
||||
add_custom_target(hipSquareGenericTargetOnly ALL
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -DNO_GENERIC_TARGET_ONLY_TEST --std=c++17 -mcode-object-version=6 -w "${OFFLOAD_ARCH_GENERIC_STR}"
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hipSquareGenericTarget.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_context.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_features.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/main.cc
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE}
|
||||
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../external/Catch2
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../external/picojson ${LIBFS})
|
||||
set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE})
|
||||
|
||||
# Build hipSquareGenericTarget to cover generic targets and the specific target
|
||||
set(TEST_SRC
|
||||
hipSquareGenericTarget.cc
|
||||
)
|
||||
hip_add_exe_to_target(NAME hipSquareGenericTarget
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
set_source_files_properties(hipSquareGenericTarget.cc
|
||||
PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}")
|
||||
add_dependencies(hipSquareGenericTarget hipSquareGenericTargetOnly)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
*/
|
||||
template <typename T>
|
||||
static __global__ void vector_square_generic(T* C_d, const T* A_d, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("Unit_test_generic_target_codeobject") {
|
||||
if (!isGenericTargetSupported()) {
|
||||
fprintf(stderr, "Generic target test is skipped\n");
|
||||
return;
|
||||
}
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
static int device = 0;
|
||||
HIP_CHECK(hipSetDevice(device));
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device /*deviceID*/));
|
||||
printf("info: running on device %s\n", props.name);
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName);
|
||||
// check the scope of supportted types
|
||||
#endif
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
HIP_CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
printf("info: copy Host2Device\n");
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf("info: launch 'vector_square' kernel\n");
|
||||
hipLaunchKernelGGL(vector_square_generic, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, C_d, A_d, N);
|
||||
|
||||
printf("info: copy Device2Host\n");
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
printf("info: check result\n");
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
HIP_CHECK(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
printf("PASSED!\n");
|
||||
REQUIRE(true);
|
||||
}
|
||||
|
||||
#ifndef NO_GENERIC_TARGET_ONLY_TEST
|
||||
TEST_CASE("Unit_test_generic_target_only_codeobject") {
|
||||
#ifdef __linux__
|
||||
char *cmd = "chmod u+x ./hipSquareGenericTargetOnly && ./hipSquareGenericTargetOnly";
|
||||
#else
|
||||
char *cmd = "hipSquareGenericTargetOnly.exe";
|
||||
#endif
|
||||
printf("Run %s\n", cmd);
|
||||
REQUIRE(std::system(cmd) == 0);
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
#endif
|
||||
@@ -113,12 +113,21 @@ add_custom_target(copyKernelCompressed.code
|
||||
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
|
||||
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
|
||||
add_custom_target(copyKernelGenericTarget.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=6 --genco ${OFFLOAD_ARCH_GENERIC_STR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/copyKernelGenericTarget.code
|
||||
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
|
||||
set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS
|
||||
${CMAKE_CURRENT_BINARY_DIR}/empty_module.code
|
||||
${CMAKE_CURRENT_BINARY_DIR}/copyKernel.code
|
||||
${CMAKE_CURRENT_BINARY_DIR}/copyKernel.s
|
||||
${CMAKE_CURRENT_BINARY_DIR}/addKernel.code
|
||||
${CMAKE_CURRENT_BINARY_DIR}/copyKernelCompressed.code
|
||||
${CMAKE_CURRENT_BINARY_DIR}/copyKernelGenericTarget.code
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
@@ -214,6 +223,8 @@ add_dependencies(build_tests empty_module.code)
|
||||
add_dependencies(build_tests copyKernel.code copyKernel.s)
|
||||
add_dependencies(build_tests addKernel.code)
|
||||
add_dependencies(build_tests copyKernelCompressed.code)
|
||||
add_dependencies(build_tests copyKernelGenericTarget.code)
|
||||
|
||||
if(UNIX)
|
||||
add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code
|
||||
copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code)
|
||||
|
||||
@@ -52,6 +52,8 @@ THE SOFTWARE.
|
||||
constexpr auto fileName = "copyKernel.code";
|
||||
constexpr auto kernel_name = "copy_ker";
|
||||
constexpr auto fileNameCompressed = "copyKernelCompressed.code";
|
||||
constexpr auto fileNameGenericTarget = "copyKernelGenericTarget.code";
|
||||
|
||||
static constexpr auto totalWorkGroups{1024};
|
||||
static constexpr auto localWorkSize{512};
|
||||
static constexpr auto lastWorkSizeEven{256};
|
||||
@@ -196,6 +198,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") {
|
||||
SECTION("compressed codeobjects") {
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileNameCompressed));
|
||||
}
|
||||
SECTION("generic target codeobjects") {
|
||||
if (!isGenericTargetSupported()) {
|
||||
fprintf(stderr, "Generic target test is skipped\n");
|
||||
return;
|
||||
}
|
||||
HIP_CHECK(hipModuleLoad(&Module, fileNameGenericTarget));
|
||||
}
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
|
||||
// Allocate resources
|
||||
int* A = new int[arraylength];
|
||||
|
||||
@@ -36,7 +36,7 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") {
|
||||
HIP_CHECK(hipModuleUnload(module));
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#if HT_AMD
|
||||
SECTION("Load compiled module from file with compressed code objects") {
|
||||
const auto loaded_module = LoadModuleIntoBuffer("copyKernelCompressed.code");
|
||||
HIP_CHECK(hipModuleLoadData(&module, loaded_module.data()));
|
||||
@@ -46,6 +46,20 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") {
|
||||
REQUIRE(kernel != nullptr);
|
||||
HIP_CHECK(hipModuleUnload(module));
|
||||
}
|
||||
|
||||
SECTION("Load compiled module from file with generic target code objects") {
|
||||
if (!isGenericTargetSupported()) {
|
||||
fprintf(stderr, "Generic target test is skipped\n");
|
||||
return;
|
||||
}
|
||||
const auto loaded_module = LoadModuleIntoBuffer("copyKernelGenericTarget.code");
|
||||
HIP_CHECK(hipModuleLoadData(&module, loaded_module.data()));
|
||||
REQUIRE(module != nullptr);
|
||||
hipFunction_t kernel = nullptr;
|
||||
HIP_CHECK(hipModuleGetFunction(&kernel, module, "copy_ker"));
|
||||
REQUIRE(kernel != nullptr);
|
||||
HIP_CHECK(hipModuleUnload(module));
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Load RTCd module") {
|
||||
|
||||
@@ -23,4 +23,5 @@ add_custom_target(build_intro)
|
||||
add_subdirectory(bit_extract)
|
||||
add_subdirectory(module_api)
|
||||
add_subdirectory(module_api_global)
|
||||
add_subdirectory(square)
|
||||
add_subdirectory(square)
|
||||
add_subdirectory(generic_target)
|
||||
@@ -0,0 +1,88 @@
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(generic_target)
|
||||
|
||||
if(UNIX)
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
|
||||
endif()
|
||||
# Search for rocm in common locations
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
|
||||
endif()
|
||||
|
||||
# Find hiprtc
|
||||
find_package(hiprtc)
|
||||
# Find hip
|
||||
find_package(hip)
|
||||
|
||||
if(NOT HIP_PLATFORM MATCHES "amd")
|
||||
message("Generic target is only supporte on AMD GPU")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Set compiler and linker
|
||||
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
|
||||
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
|
||||
|
||||
# Create the excutable
|
||||
if(TARGET build_intro)
|
||||
set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)
|
||||
else()
|
||||
set(EXCLUDE_OPTION )
|
||||
endif()
|
||||
|
||||
# Test generic target without feature
|
||||
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
|
||||
|
||||
add_executable(squareGenericTarget ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc)
|
||||
target_include_directories(squareGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
|
||||
set_target_properties(squareGenericTarget PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}")
|
||||
target_link_libraries(squareGenericTarget hip::host)
|
||||
|
||||
# Test generic target with features
|
||||
set(OFFLOAD_ARCH_GENERIC_FEATURE_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
|
||||
|
||||
add_executable(squareGenericTargetWithFeatures ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc)
|
||||
target_include_directories(squareGenericTargetWithFeatures PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
|
||||
set_target_properties(squareGenericTargetWithFeatures PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_FEATURE_STR}")
|
||||
target_link_libraries(squareGenericTargetWithFeatures hip::host)
|
||||
|
||||
# Create the excutable
|
||||
add_executable(saxpyGenericTarget ${EXCLUDE_OPTION} saxpy.cpp ../../../catch/hipTestMain/hip_test_features.cc)
|
||||
|
||||
# Link with HIPRTC
|
||||
target_link_libraries(saxpyGenericTarget hiprtc)
|
||||
# Link with HIP
|
||||
target_link_libraries(saxpyGenericTarget hip::device)
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
target_link_libraries(saxpyGenericTarget hiprtc-builtins)
|
||||
endif()
|
||||
|
||||
target_include_directories(saxpyGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
|
||||
|
||||
if(TARGET build_intro)
|
||||
add_dependencies(build_intro saxpyGenericTarget)
|
||||
add_dependencies(build_intro squareGenericTarget)
|
||||
add_dependencies(build_intro squareGenericTargetWithFeatures)
|
||||
endif()
|
||||
@@ -0,0 +1,61 @@
|
||||
# GenericTarget.md
|
||||
|
||||
- Add hip/bin path to the PATH
|
||||
```
|
||||
$ export PATH=$PATH:[MYHIP]/bin
|
||||
```
|
||||
|
||||
- Define environment variable
|
||||
```
|
||||
$ export HIP_PATH=[MYHIP]
|
||||
```
|
||||
|
||||
- Create build folder
|
||||
```
|
||||
$ cd ~/hip/samples/0_Intro/genericTarget
|
||||
mkdir -p build && cd build
|
||||
```
|
||||
|
||||
- Build with cmake
|
||||
```
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
- Build without cmake
|
||||
```
|
||||
|
||||
/opt/rocm/bin/hipcc ../square.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTarget
|
||||
|
||||
/opt/rocm/bin/hipcc ../square1.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTargetWithFeatures
|
||||
|
||||
/opt/rocm/bin/hipcc ../saxpy.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include -o saxpyGenericTarget
|
||||
```
|
||||
- Execute tests
|
||||
```
|
||||
|
||||
$ ./squareGenericTarget
|
||||
info: running on device AMD Radeon RX 6900 XT
|
||||
info: allocate host mem ( 7.63 MB)
|
||||
info: allocate device mem ( 7.63 MB)
|
||||
info: copy Host2Device
|
||||
info: launch 'vector_square' kernel
|
||||
info: copy Device2Host
|
||||
info: check result
|
||||
PASSED: generic target!
|
||||
|
||||
Best to run on Mi3XX to verify features
|
||||
$ ./squareGenericTargetWithFeatures
|
||||
info: running on device AMD Radeon RX 6900 XT
|
||||
info: allocate host mem ( 7.63 MB)
|
||||
info: allocate device mem ( 7.63 MB)
|
||||
info: copy Host2Device
|
||||
info: launch 'vector_square' kernel
|
||||
info: copy Device2Host
|
||||
info: check result
|
||||
PASSED: generic targets!
|
||||
|
||||
$./saxyGenericTarget
|
||||
Find generic target gfx11-generic
|
||||
SAXPY test passed
|
||||
```
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hiprtc.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip_helper.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
#include "hip_test_features.hh"
|
||||
static constexpr auto NUM_THREADS{128};
|
||||
static constexpr auto NUM_BLOCKS{32};
|
||||
using namespace std;
|
||||
|
||||
static constexpr auto saxpy{
|
||||
R"(
|
||||
#include "test_header.h"
|
||||
#include "test_header1.h"
|
||||
extern "C"
|
||||
__global__
|
||||
void saxpy(real a, realptr x, realptr y, realptr out, size_t n)
|
||||
{
|
||||
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < n) {
|
||||
out[tid] = a * x[tid] + y[tid] ;
|
||||
}
|
||||
}
|
||||
)"};
|
||||
|
||||
int main()
|
||||
{
|
||||
hipDeviceProp_t props;
|
||||
int device = 0;
|
||||
checkHipErrors(hipSetDevice(device));
|
||||
checkHipErrors(hipGetDeviceProperties(&props, device));
|
||||
string agentTarget{props.gcnArchName};
|
||||
string genericTarget{};
|
||||
string postFix{};
|
||||
auto pos = agentTarget.find(':');
|
||||
|
||||
if (pos != std::string::npos) {
|
||||
postFix = agentTarget.substr(pos); // Features
|
||||
agentTarget.resize(pos);
|
||||
}
|
||||
if (!getGenericTarget(agentTarget, genericTarget)) {
|
||||
cout << props.gcnArchName <<" has no generic target support. Skipped!" << endl;
|
||||
return 0;
|
||||
}
|
||||
if (agentTarget.find("gfx906") != std::string::npos) {
|
||||
if (postFix.find(":sramecc") != std::string::npos) {
|
||||
// gfx906's generic target gfx9-generic doesn't support sramecc
|
||||
postFix = postFix.substr(strlen(":sramecc") + 1);
|
||||
}
|
||||
}
|
||||
if (!postFix.empty()) {
|
||||
genericTarget += postFix;
|
||||
}
|
||||
|
||||
cout << "Find generic target " << genericTarget << endl;
|
||||
hiprtcProgram prog;
|
||||
int num_headers = 2;
|
||||
vector<const char*> header_names;
|
||||
vector<const char*> header_sources;
|
||||
header_names.push_back("test_header.h");
|
||||
header_names.push_back("test_header1.h");
|
||||
header_sources.push_back("#ifndef HIPRTC_TEST_HEADER_H\n#define HIPRTC_TEST_HEADER_H\ntypedef float real;\n#endif //HIPRTC_TEST_HEADER_H\n");
|
||||
header_sources.push_back("#ifndef HIPRTC_TEST_HEADER1_H\n#define HIPRTC_TEST_HEADER1_H\ntypedef float* realptr;\n#endif //HIPRTC_TEST_HEADER1_H\n");
|
||||
hiprtcCreateProgram(&prog, // prog
|
||||
saxpy, // buffer
|
||||
"saxpy.cu", // name
|
||||
num_headers, // numHeaders
|
||||
&header_sources[0], // headers
|
||||
&header_names[0]); // includeNames
|
||||
|
||||
string offload {"--offload-arch="};
|
||||
offload += genericTarget.c_str();
|
||||
/*
|
||||
* offload must be one of following:
|
||||
* "--offload-arch=gfx9-generic",
|
||||
* "--offload-arch=gfx9-4-generic",
|
||||
* "--offload-arch=gfx10-1-generic",
|
||||
* "--offload-arch=gfx10-3-generic",
|
||||
* "--offload-arch=gfx11-generic",
|
||||
* "--offload-arch=gfx12-generic"
|
||||
*
|
||||
* */
|
||||
const char* options[] = {offload.c_str(), "-mcode-object-version=6", "-w"};
|
||||
hiprtcResult compileResult {
|
||||
hiprtcCompileProgram(prog, sizeof(options) / sizeof(options[0]), options) };
|
||||
|
||||
size_t logSize;
|
||||
hiprtcGetProgramLogSize(prog, &logSize);
|
||||
|
||||
if (logSize) {
|
||||
string log(logSize, '\0');
|
||||
hiprtcGetProgramLog(prog, &log[0]);
|
||||
|
||||
cout << log << '\n';
|
||||
}
|
||||
|
||||
if (compileResult != HIPRTC_SUCCESS) {
|
||||
cout << "Compilation failed." << endl;
|
||||
}
|
||||
|
||||
size_t codeSize;
|
||||
hiprtcGetCodeSize(prog, &codeSize);
|
||||
|
||||
vector<char> code(codeSize);
|
||||
hiprtcGetCode(prog, code.data());
|
||||
|
||||
hiprtcDestroyProgram(&prog);
|
||||
|
||||
hipModule_t module;
|
||||
hipFunction_t kernel;
|
||||
|
||||
checkHipErrors(hipModuleLoadData(&module, code.data()));
|
||||
checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy"));
|
||||
|
||||
size_t n = NUM_THREADS * NUM_BLOCKS;
|
||||
size_t bufferSize = n * sizeof(float);
|
||||
|
||||
float a = 5.1f;
|
||||
unique_ptr<float[]> hX{new float[n]};
|
||||
unique_ptr<float[]> hY{new float[n]};
|
||||
unique_ptr<float[]> hOut{new float[n]};
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
hX[i] = static_cast<float>(i);
|
||||
hY[i] = static_cast<float>(i * 2);
|
||||
}
|
||||
|
||||
hipDeviceptr_t dX, dY, dOut;
|
||||
checkHipErrors(hipMalloc((void **)&dX, bufferSize));
|
||||
checkHipErrors(hipMalloc((void **)&dY, bufferSize));
|
||||
checkHipErrors(hipMalloc((void **)&dOut, bufferSize));
|
||||
checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize));
|
||||
checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize));
|
||||
|
||||
struct {
|
||||
float a_;
|
||||
hipDeviceptr_t b_;
|
||||
hipDeviceptr_t c_;
|
||||
hipDeviceptr_t d_;
|
||||
size_t e_;
|
||||
} args{a, dX, dY, dOut, n};
|
||||
|
||||
auto size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
|
||||
0, nullptr, nullptr, config));
|
||||
checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize));
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) {
|
||||
cout << "Validation failed." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
checkHipErrors(hipFree((void *)dX));
|
||||
checkHipErrors(hipFree((void *)dY));
|
||||
checkHipErrors(hipFree((void *)dOut));
|
||||
|
||||
checkHipErrors(hipModuleUnload(module));
|
||||
|
||||
cout << "SAXPY test passed" << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip_helper.h>
|
||||
#include "hip_test_features.hh"
|
||||
|
||||
/*
|
||||
* Square each element in the array A and write to array C.
|
||||
*/
|
||||
template <typename T>
|
||||
__global__ void vector_square(T* C_d, const T* A_d, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
static int device = 0;
|
||||
checkHipErrors(hipSetDevice(device));
|
||||
hipDeviceProp_t props;
|
||||
checkHipErrors(hipGetDeviceProperties(&props, device /*deviceID*/));
|
||||
printf("info: running on device %s\n", props.name);
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName);
|
||||
#endif
|
||||
if (!isGenericTargetSupported(props.gcnArchName)) {
|
||||
printf("%s has no generic target support. Skipped\n", props.gcnArchName);
|
||||
return 0;
|
||||
}
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
A_h = (float*)malloc(Nbytes);
|
||||
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
C_h = (float*)malloc(Nbytes);
|
||||
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
|
||||
checkHipErrors(hipMalloc(&A_d, Nbytes));
|
||||
checkHipErrors(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
printf("info: copy Host2Device\n");
|
||||
checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf("info: launch 'vector_square' kernel\n");
|
||||
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
|
||||
printf("info: copy Device2Host\n");
|
||||
checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
printf("info: check result\n");
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
checkHipErrors(hipErrorUnknown);
|
||||
}
|
||||
}
|
||||
|
||||
checkHipErrors(hipFree(A_d));
|
||||
checkHipErrors(hipFree(C_d));
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
|
||||
printf("PASSED: generic target!\n");
|
||||
}
|
||||
Αναφορά σε νέο ζήτημα
Block a user