SWDEV-447973 - Add generic target codeobj test

Add simple tests to verify generic target code
objects.

Change-Id: Iae148c3c938b18247624938512918dbb3cbc462e
Αυτή η υποβολή περιλαμβάνεται σε:
taosang2
2024-06-28 17:17:31 -04:00
υποβλήθηκε από Tao Sang
γονέας 168312b97f
υποβολή f581518103
14 αρχεία άλλαξαν με 720 προσθήκες και 4 διαγραφές
@@ -870,6 +870,8 @@
"Unit_safeAtomicMin_Positive_SameAddress - float",
"=== SWDEV-454220 : Below test hanged in stress test on 22/03/24 ===",
"Unit_hipExtLaunchKernel_Positive_Basic",
"=== Temporarily disable the test that failed in mi300 ===",
"Unit_test_generic_target_only_codeobject",
#endif
#if defined gfx1030
"=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===",
@@ -2,7 +2,7 @@
#include <iostream>
#include <assert.h>
#include <map>
#include "hip_test_context.hh"
std::vector<std::unordered_set<std::string>> GCNArchFeatMap = {
@@ -43,3 +43,96 @@ bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch) {
assert(false);
#endif
}
// Return true if agentTarget has corresponding generic target which will be returned in
// genericTarget;
// false, otherwise.
// Note: it will naturely return false on Nvidia device
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget) {
// The map is subject to change per removing policy
static std::map<std::string, std::string> genericTargetMap{
// "gfx9-generic"
{"gfx900", "gfx9-generic"},
{"gfx902", "gfx9-generic"},
{"gfx904", "gfx9-generic"},
{"gfx906", "gfx9-generic"},
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx9-4-generic
{"gfx940", "gfx9-4-generic"},
{"gfx941", "gfx9-4-generic"},
{"gfx942", "gfx9-4-generic"},
{"gfx950", "gfx9-4-generic"},
// "gfx10-1-generic"
{"gfx1010", "gfx10-1-generic"},
{"gfx1011", "gfx10-1-generic"},
{"gfx1012", "gfx10-1-generic"},
{"gfx1013", "gfx10-1-generic"},
// "gfx10-3-generic"
{"gfx1030", "gfx10-3-generic"},
{"gfx1031", "gfx10-3-generic"},
{"gfx1032", "gfx10-3-generic"},
{"gfx1033", "gfx10-3-generic"},
{"gfx1034", "gfx10-3-generic"},
{"gfx1035", "gfx10-3-generic"},
{"gfx1036", "gfx10-3-generic"},
// "gfx11-generic"
{"gfx1100", "gfx11-generic"},
{"gfx1101", "gfx11-generic"},
{"gfx1102", "gfx11-generic"},
{"gfx1103", "gfx11-generic"},
{"gfx1150", "gfx11-generic"},
{"gfx1151", "gfx11-generic"},
// "gfx12-generic"
{"gfx1200", "gfx12-generic"},
{"gfx1201", "gfx12-generic"},
};
auto search = genericTargetMap.find(agentTarget);
if (search == genericTargetMap.end()) return false;
genericTarget = search->second;
return true;
}
/*
Return true, if gcnArchName has corresponding generic target;
false, otherwise.
If gcnArchName is nullptr, it will be queried from deviceId;
otherwise, deviceId will be ignored.
The specific arches have the following mapping to generic targets,
Generic GFX11
--offload-arch=gfx11-generic - includes [gfx1100-gfx1103], gfx1150, gfx1151
Generic GFX10.3
--offload-arch=gfx10.3-generic - includes [gfx1030-gfx1036]
Generic GFX10.1
--offload-arch=gfx10.1-generic - includes [gfx1010-gfx1013]
Generic GFX9 / Consumer
--offload-arch=gfx9-generic - includes gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c
Generic GFX9.4 / Data center
--offload-arch=gfx9-4-generic - includes gfx940, gfx941, gfx942, gfx950
*/
bool isGenericTargetSupported(char* gcnArchName, int deviceId) {
hipDeviceProp_t props{};
if (gcnArchName == nullptr) {
if (hipGetDeviceProperties(&props, deviceId) != hipSuccess) return false;
gcnArchName = props.gcnArchName;
}
std::string target{gcnArchName};
std::string genericTarget{};
auto pos = target.find(':');
if (pos != std::string::npos) {
target[pos] = 0;
target.resize(pos);
}
return getGenericTarget(target, genericTarget);
}
@@ -34,6 +34,7 @@ THE SOFTWARE.
#include <mutex>
#include <cstdlib>
#include <thread>
#include "hip_test_features.hh"
#ifdef TEST_CLOCK_CYCLE
#define clock_function() clock64()
@@ -36,3 +36,5 @@ typedef enum CTFeatures {
} CTFeatures;
bool CheckIfFeatSupported(enum CTFeatures test_feat, std::string gcn_arch);
bool getGenericTarget(const std::string& agentTarget, std::string& genericTarget);
bool isGenericTargetSupported(char* gcnArchName = nullptr, int deviceId = 0);
@@ -17,5 +17,41 @@ if(HIP_PLATFORM MATCHES "amd")
hip_add_exe_to_target(NAME SimpleCompressedCodeObjectTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
endif()
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
# Build hipSquareGenericTargetOnly to cover generic targets only
# Because default catch2 build will reference CMAKE_CXX_FLAGS that contains specific targets which will hijack generic
# target in hip-rt, we have to use custom build to contain generic targets only.
set(GENERIC_TARGET_ONLY_EXE hipSquareGenericTargetOnly)
set(LIBFS)
if(WIN32)
set(GENERIC_TARGET_ONLY_EXE ${GENERIC_TARGET_ONLY_EXE}.exe)
else()
set(LIBFS -lstdc++fs)
endif()
add_custom_target(hipSquareGenericTargetOnly ALL
COMMAND ${CMAKE_CXX_COMPILER} -DNO_GENERIC_TARGET_ONLY_TEST --std=c++17 -mcode-object-version=6 -w "${OFFLOAD_ARCH_GENERIC_STR}"
${CMAKE_CURRENT_SOURCE_DIR}/hipSquareGenericTarget.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_context.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/hip_test_features.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../hipTestMain/main.cc
-o ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE}
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include
-I${CMAKE_CURRENT_SOURCE_DIR}/../../external/Catch2
-I${CMAKE_CURRENT_SOURCE_DIR}/../../external/picojson ${LIBFS})
set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/${GENERIC_TARGET_ONLY_EXE})
# Build hipSquareGenericTarget to cover generic targets and the specific target
set(TEST_SRC
hipSquareGenericTarget.cc
)
hip_add_exe_to_target(NAME hipSquareGenericTarget
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
set_source_files_properties(hipSquareGenericTarget.cc
PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}")
add_dependencies(hipSquareGenericTarget hipSquareGenericTargetOnly)
endif()
@@ -0,0 +1,108 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
/*
* Square each element in the array A and write to array C.
*/
template <typename T>
static __global__ void vector_square_generic(T* C_d, const T* A_d, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
C_d[i] = A_d[i] * A_d[i];
}
}
TEST_CASE("Unit_test_generic_target_codeobject") {
if (!isGenericTargetSupported()) {
fprintf(stderr, "Generic target test is skipped\n");
return;
}
float *A_d, *C_d;
float *A_h, *C_h;
size_t N = 1000000;
size_t Nbytes = N * sizeof(float);
static int device = 0;
HIP_CHECK(hipSetDevice(device));
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, device /*deviceID*/));
printf("info: running on device %s\n", props.name);
#ifdef __HIP_PLATFORM_AMD__
printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName);
// check the scope of supportted types
#endif
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
A_h = (float*)malloc(Nbytes);
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
C_h = (float*)malloc(Nbytes);
HIP_CHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
printf("info: copy Host2Device\n");
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
printf("info: launch 'vector_square' kernel\n");
hipLaunchKernelGGL(vector_square_generic, dim3(blocks), dim3(threadsPerBlock),
0, 0, C_d, A_d, N);
printf("info: copy Device2Host\n");
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
printf("info: check result\n");
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
HIP_CHECK(hipErrorUnknown);
}
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A_h);
free(C_h);
printf("PASSED!\n");
REQUIRE(true);
}
#ifndef NO_GENERIC_TARGET_ONLY_TEST
TEST_CASE("Unit_test_generic_target_only_codeobject") {
#ifdef __linux__
char *cmd = "chmod u+x ./hipSquareGenericTargetOnly && ./hipSquareGenericTargetOnly";
#else
char *cmd = "hipSquareGenericTargetOnly.exe";
#endif
printf("Run %s\n", cmd);
REQUIRE(std::system(cmd) == 0);
printf("PASSED!\n");
}
#endif
@@ -113,12 +113,21 @@ add_custom_target(copyKernelCompressed.code
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
add_custom_target(copyKernelGenericTarget.code
COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=6 --genco ${OFFLOAD_ARCH_GENERIC_STR}
${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc
-o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/copyKernelGenericTarget.code
-I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH}
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
set_property(GLOBAL APPEND PROPERTY G_INSTALL_CUSTOM_TARGETS
${CMAKE_CURRENT_BINARY_DIR}/empty_module.code
${CMAKE_CURRENT_BINARY_DIR}/copyKernel.code
${CMAKE_CURRENT_BINARY_DIR}/copyKernel.s
${CMAKE_CURRENT_BINARY_DIR}/addKernel.code
${CMAKE_CURRENT_BINARY_DIR}/copyKernelCompressed.code
${CMAKE_CURRENT_BINARY_DIR}/copyKernelGenericTarget.code
)
if(UNIX)
@@ -214,6 +223,8 @@ add_dependencies(build_tests empty_module.code)
add_dependencies(build_tests copyKernel.code copyKernel.s)
add_dependencies(build_tests addKernel.code)
add_dependencies(build_tests copyKernelCompressed.code)
add_dependencies(build_tests copyKernelGenericTarget.code)
if(UNIX)
add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code
copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code)
@@ -52,6 +52,8 @@ THE SOFTWARE.
constexpr auto fileName = "copyKernel.code";
constexpr auto kernel_name = "copy_ker";
constexpr auto fileNameCompressed = "copyKernelCompressed.code";
constexpr auto fileNameGenericTarget = "copyKernelGenericTarget.code";
static constexpr auto totalWorkGroups{1024};
static constexpr auto localWorkSize{512};
static constexpr auto lastWorkSizeEven{256};
@@ -196,6 +198,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") {
SECTION("compressed codeobjects") {
HIP_CHECK(hipModuleLoad(&Module, fileNameCompressed));
}
SECTION("generic target codeobjects") {
if (!isGenericTargetSupported()) {
fprintf(stderr, "Generic target test is skipped\n");
return;
}
HIP_CHECK(hipModuleLoad(&Module, fileNameGenericTarget));
}
HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name));
// Allocate resources
int* A = new int[arraylength];
@@ -36,7 +36,7 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") {
HIP_CHECK(hipModuleUnload(module));
}
#if defined(__HIP_PLATFORM_AMD__)
#if HT_AMD
SECTION("Load compiled module from file with compressed code objects") {
const auto loaded_module = LoadModuleIntoBuffer("copyKernelCompressed.code");
HIP_CHECK(hipModuleLoadData(&module, loaded_module.data()));
@@ -46,6 +46,20 @@ TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") {
REQUIRE(kernel != nullptr);
HIP_CHECK(hipModuleUnload(module));
}
SECTION("Load compiled module from file with generic target code objects") {
if (!isGenericTargetSupported()) {
fprintf(stderr, "Generic target test is skipped\n");
return;
}
const auto loaded_module = LoadModuleIntoBuffer("copyKernelGenericTarget.code");
HIP_CHECK(hipModuleLoadData(&module, loaded_module.data()));
REQUIRE(module != nullptr);
hipFunction_t kernel = nullptr;
HIP_CHECK(hipModuleGetFunction(&kernel, module, "copy_ker"));
REQUIRE(kernel != nullptr);
HIP_CHECK(hipModuleUnload(module));
}
#endif
SECTION("Load RTCd module") {
@@ -23,4 +23,5 @@ add_custom_target(build_intro)
add_subdirectory(bit_extract)
add_subdirectory(module_api)
add_subdirectory(module_api_global)
add_subdirectory(square)
add_subdirectory(square)
add_subdirectory(generic_target)
@@ -0,0 +1,88 @@
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
cmake_minimum_required(VERSION 3.10)
project(generic_target)
if(UNIX)
if(NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
endif()
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
endif()
# Find hiprtc
find_package(hiprtc)
# Find hip
find_package(hip)
if(NOT HIP_PLATFORM MATCHES "amd")
message("Generic target is only supporte on AMD GPU")
return()
endif()
# Set compiler and linker
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
# Create the excutable
if(TARGET build_intro)
set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)
else()
set(EXCLUDE_OPTION )
endif()
# Test generic target without feature
set(OFFLOAD_ARCH_GENERIC_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
add_executable(squareGenericTarget ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc)
target_include_directories(squareGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
set_target_properties(squareGenericTarget PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_STR}")
target_link_libraries(squareGenericTarget hip::host)
# Test generic target with features
set(OFFLOAD_ARCH_GENERIC_FEATURE_STR "--offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic")
add_executable(squareGenericTargetWithFeatures ${EXCLUDE_OPTION} square.cpp ../../../catch/hipTestMain/hip_test_features.cc)
target_include_directories(squareGenericTargetWithFeatures PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
set_target_properties(squareGenericTargetWithFeatures PROPERTIES COMPILE_FLAGS "-mcode-object-version=6 -w ${OFFLOAD_ARCH_GENERIC_FEATURE_STR}")
target_link_libraries(squareGenericTargetWithFeatures hip::host)
# Create the excutable
add_executable(saxpyGenericTarget ${EXCLUDE_OPTION} saxpy.cpp ../../../catch/hipTestMain/hip_test_features.cc)
# Link with HIPRTC
target_link_libraries(saxpyGenericTarget hiprtc)
# Link with HIP
target_link_libraries(saxpyGenericTarget hip::device)
if(NOT BUILD_SHARED_LIBS)
target_link_libraries(saxpyGenericTarget hiprtc-builtins)
endif()
target_include_directories(saxpyGenericTarget PRIVATE ../../common ${CMAKE_CURRENT_SOURCE_DIR}/../../../catch/include/)
if(TARGET build_intro)
add_dependencies(build_intro saxpyGenericTarget)
add_dependencies(build_intro squareGenericTarget)
add_dependencies(build_intro squareGenericTargetWithFeatures)
endif()
@@ -0,0 +1,61 @@
# GenericTarget.md
- Add hip/bin path to the PATH
```
$ export PATH=$PATH:[MYHIP]/bin
```
- Define environment variable
```
$ export HIP_PATH=[MYHIP]
```
- Create build folder
```
$ cd ~/hip/samples/0_Intro/genericTarget
mkdir -p build && cd build
```
- Build with cmake
```
cmake ..
make
```
- Build without cmake
```
/opt/rocm/bin/hipcc ../square.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTarget
/opt/rocm/bin/hipcc ../square1.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include --offload-arch=gfx9-generic --offload-arch=gfx9-4-generic:sramecc+:xnack- --offload-arch=gfx9-4-generic:sramecc-:xnack- --offload-arch=gfx9-4-generic:xnack+ --offload-arch=gfx10-1-generic --offload-arch=gfx10-3-generic --offload-arch=gfx11-generic --offload-arch=gfx12-generic -mcode-object-version=6 -w -o squareGenericTargetWithFeatures
/opt/rocm/bin/hipcc ../saxpy.cpp ../../../../catch/hipTestMain/hip_test_features.cc -I../../../common -I../../../../catch/include -o saxpyGenericTarget
```
- Execute tests
```
$ ./squareGenericTarget
info: running on device AMD Radeon RX 6900 XT
info: allocate host mem ( 7.63 MB)
info: allocate device mem ( 7.63 MB)
info: copy Host2Device
info: launch 'vector_square' kernel
info: copy Device2Host
info: check result
PASSED: generic target!
Best to run on Mi3XX to verify features
$ ./squareGenericTargetWithFeatures
info: running on device AMD Radeon RX 6900 XT
info: allocate host mem ( 7.63 MB)
info: allocate device mem ( 7.63 MB)
info: copy Host2Device
info: launch 'vector_square' kernel
info: copy Device2Host
info: check result
PASSED: generic targets!
$./saxyGenericTarget
Find generic target gfx11-generic
SAXPY test passed
```
@@ -0,0 +1,193 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hiprtc.h>
#include <hip/hip_runtime.h>
#include <hip_helper.h>
#include <cassert>
#include <cstddef>
#include <memory>
#include <iostream>
#include <iterator>
#include <vector>
#include "hip_test_features.hh"
static constexpr auto NUM_THREADS{128};
static constexpr auto NUM_BLOCKS{32};
using namespace std;
static constexpr auto saxpy{
R"(
#include "test_header.h"
#include "test_header1.h"
extern "C"
__global__
void saxpy(real a, realptr x, realptr y, realptr out, size_t n)
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < n) {
out[tid] = a * x[tid] + y[tid] ;
}
}
)"};
int main()
{
hipDeviceProp_t props;
int device = 0;
checkHipErrors(hipSetDevice(device));
checkHipErrors(hipGetDeviceProperties(&props, device));
string agentTarget{props.gcnArchName};
string genericTarget{};
string postFix{};
auto pos = agentTarget.find(':');
if (pos != std::string::npos) {
postFix = agentTarget.substr(pos); // Features
agentTarget.resize(pos);
}
if (!getGenericTarget(agentTarget, genericTarget)) {
cout << props.gcnArchName <<" has no generic target support. Skipped!" << endl;
return 0;
}
if (agentTarget.find("gfx906") != std::string::npos) {
if (postFix.find(":sramecc") != std::string::npos) {
// gfx906's generic target gfx9-generic doesn't support sramecc
postFix = postFix.substr(strlen(":sramecc") + 1);
}
}
if (!postFix.empty()) {
genericTarget += postFix;
}
cout << "Find generic target " << genericTarget << endl;
hiprtcProgram prog;
int num_headers = 2;
vector<const char*> header_names;
vector<const char*> header_sources;
header_names.push_back("test_header.h");
header_names.push_back("test_header1.h");
header_sources.push_back("#ifndef HIPRTC_TEST_HEADER_H\n#define HIPRTC_TEST_HEADER_H\ntypedef float real;\n#endif //HIPRTC_TEST_HEADER_H\n");
header_sources.push_back("#ifndef HIPRTC_TEST_HEADER1_H\n#define HIPRTC_TEST_HEADER1_H\ntypedef float* realptr;\n#endif //HIPRTC_TEST_HEADER1_H\n");
hiprtcCreateProgram(&prog, // prog
saxpy, // buffer
"saxpy.cu", // name
num_headers, // numHeaders
&header_sources[0], // headers
&header_names[0]); // includeNames
string offload {"--offload-arch="};
offload += genericTarget.c_str();
/*
* offload must be one of following:
* "--offload-arch=gfx9-generic",
* "--offload-arch=gfx9-4-generic",
* "--offload-arch=gfx10-1-generic",
* "--offload-arch=gfx10-3-generic",
* "--offload-arch=gfx11-generic",
* "--offload-arch=gfx12-generic"
*
* */
const char* options[] = {offload.c_str(), "-mcode-object-version=6", "-w"};
hiprtcResult compileResult {
hiprtcCompileProgram(prog, sizeof(options) / sizeof(options[0]), options) };
size_t logSize;
hiprtcGetProgramLogSize(prog, &logSize);
if (logSize) {
string log(logSize, '\0');
hiprtcGetProgramLog(prog, &log[0]);
cout << log << '\n';
}
if (compileResult != HIPRTC_SUCCESS) {
cout << "Compilation failed." << endl;
}
size_t codeSize;
hiprtcGetCodeSize(prog, &codeSize);
vector<char> code(codeSize);
hiprtcGetCode(prog, code.data());
hiprtcDestroyProgram(&prog);
hipModule_t module;
hipFunction_t kernel;
checkHipErrors(hipModuleLoadData(&module, code.data()));
checkHipErrors(hipModuleGetFunction(&kernel, module, "saxpy"));
size_t n = NUM_THREADS * NUM_BLOCKS;
size_t bufferSize = n * sizeof(float);
float a = 5.1f;
unique_ptr<float[]> hX{new float[n]};
unique_ptr<float[]> hY{new float[n]};
unique_ptr<float[]> hOut{new float[n]};
for (size_t i = 0; i < n; ++i) {
hX[i] = static_cast<float>(i);
hY[i] = static_cast<float>(i * 2);
}
hipDeviceptr_t dX, dY, dOut;
checkHipErrors(hipMalloc((void **)&dX, bufferSize));
checkHipErrors(hipMalloc((void **)&dY, bufferSize));
checkHipErrors(hipMalloc((void **)&dOut, bufferSize));
checkHipErrors(hipMemcpyHtoD(dX, hX.get(), bufferSize));
checkHipErrors(hipMemcpyHtoD(dY, hY.get(), bufferSize));
struct {
float a_;
hipDeviceptr_t b_;
hipDeviceptr_t c_;
hipDeviceptr_t d_;
size_t e_;
} args{a, dX, dY, dOut, n};
auto size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END};
checkHipErrors(hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
0, nullptr, nullptr, config));
checkHipErrors(hipMemcpyDtoH(hOut.get(), dOut, bufferSize));
for (size_t i = 0; i < n; ++i) {
if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) {
cout << "Validation failed." << endl;
}
}
checkHipErrors(hipFree((void *)dX));
checkHipErrors(hipFree((void *)dY));
checkHipErrors(hipFree((void *)dOut));
checkHipErrors(hipModuleUnload(module));
cout << "SAXPY test passed" << endl;
return 0;
}
@@ -0,0 +1,97 @@
/*
Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <stdio.h>
#include "hip/hip_runtime.h"
#include <hip_helper.h>
#include "hip_test_features.hh"
/*
* Square each element in the array A and write to array C.
*/
template <typename T>
__global__ void vector_square(T* C_d, const T* A_d, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
C_d[i] = A_d[i] * A_d[i];
}
}
int main(int argc, char* argv[]) {
float *A_d, *C_d;
float *A_h, *C_h;
size_t N = 1000000;
size_t Nbytes = N * sizeof(float);
static int device = 0;
checkHipErrors(hipSetDevice(device));
hipDeviceProp_t props;
checkHipErrors(hipGetDeviceProperties(&props, device /*deviceID*/));
printf("info: running on device %s\n", props.name);
#ifdef __HIP_PLATFORM_AMD__
printf("info: architecture on AMD GPU device is: %s\n", props.gcnArchName);
#endif
if (!isGenericTargetSupported(props.gcnArchName)) {
printf("%s has no generic target support. Skipped\n", props.gcnArchName);
return 0;
}
printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
A_h = (float*)malloc(Nbytes);
checkHipErrors(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
C_h = (float*)malloc(Nbytes);
checkHipErrors(C_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
checkHipErrors(hipMalloc(&A_d, Nbytes));
checkHipErrors(hipMalloc(&C_d, Nbytes));
printf("info: copy Host2Device\n");
checkHipErrors(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
printf("info: launch 'vector_square' kernel\n");
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
printf("info: copy Device2Host\n");
checkHipErrors(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf("info: check result\n");
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
checkHipErrors(hipErrorUnknown);
}
}
checkHipErrors(hipFree(A_d));
checkHipErrors(hipFree(C_d));
free(A_h);
free(C_h);
printf("PASSED: generic target!\n");
}