[rocprofiler-systems] Add build option for "examples" to specify gfx-arch (#2626)

## Motivation
 - Added `check_rocminfo` function that returns true if the provided regex was found, false otherwise. Can also use `GET_OUTPUT` to get the raw output filtered with or without a regex.
 - Moved `rocprofiler_systems_get_gfx_archs()` to `MacroUtilities.cmake` 
 - Added `rocprofiler_systems_lookup_gfx()`, which detects whether a given `gfx` is from the `instinct`, `radeon` or `apu` family.
 - Added `ROCPROFSYS_GFX_TARGETS` as a build argument. Used to specify the offloading architectures that GPU examples should compile for. If empty, defaults to whatever your system has.
 - GPU examples now check if the given `gfx` targets (from `ROCPROFSYS_GFX_TARGETS`) are supported.
 - OMPVV offload tests now only compile if `amdflang` version is `>= 20`
 - Improve link time by reducing the number of GFX targets that binaries need to support.
   - RCCL is now passed a `GPU_TARGETS` var specifying the architectures to build/link against.
This commit is contained in:
Kian Cossettini
2026-01-20 12:13:21 -05:00
zatwierdzone przez GitHub
rodzic 4a5cbbfba5
commit 698ac6b8bc
13 zmienionych plików z 352 dodań i 162 usunięć
@@ -12,6 +12,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
- Added dependency to `spdlog` library.
- Added environment variable `ROCPROFSYS_LOG_LEVEL` which control level of logging.
- Available log levels: `critical`, `error`, `warning`, `info`(default), `debug`, `trace` and `off`.
- Added cmake option `ROCPROFSYS_GFX_TARGETS` which controls GFX targets used to build example binaries.
### Changed
@@ -581,6 +581,148 @@ function(ROCPROFILER_SYSTEMS_PRINT_FEATURES)
rocprofiler_systems_print_disabled_features()
endfunction()
# ----------------------------------------------------------------------------
# function check_rocminfo()
# Searches for a given regex in the output of rocminfo, returns true if found, false otherwise.
# By default, returns a boolean, but if GET_OUTPUT is present, returns the output of rocminfo with the regex applied
#
# ARGS:
# _REGEX: The regex to search for
# _RESULT_VARIABLE: The variable to store the result
# GET_OUTPUT: If present, return the output of rocminfo
#
# Returns:
# Default: true if the regex is found, false otherwise
# GET_OUTPUT: output of rocminfo is returned in _RESULT_VARIABLE (empty string if error)
# Note: If regex empty, it is ignored
#
function(CHECK_ROCMINFO _REGEX _RESULT_VARIABLE)
cmake_parse_arguments(ARG "GET_OUTPUT" "" "" ${ARGN})
find_program(
rocminfo_EXECUTABLE
NAMES rocminfo
HINTS ${ROCM_PATH} ${ROCmVersion_DIR} /opt/rocm
PATHS ${ROCM_PATH} ${ROCmVersion_DIR} /opt/rocm
PATH_SUFFIXES bin
)
if(NOT DEFINED ARG_GET_OUTPUT AND _REGEX STREQUAL "")
message(FATAL_ERROR "Regex is empty, but GET_OUTPUT is not defined")
endif()
set(_result FALSE)
set(_failure FALSE)
if(rocminfo_EXECUTABLE)
execute_process(
COMMAND ${rocminfo_EXECUTABLE}
RESULT_VARIABLE rocminfo_RET
OUTPUT_VARIABLE rocminfo_OUTPUT
ERROR_VARIABLE rocminfo_ERROR
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE
)
if(rocminfo_RET EQUAL 0)
if(NOT _REGEX STREQUAL "")
string(REGEX MATCHALL "${_REGEX}" rocminfo_OUTPUT "${rocminfo_OUTPUT}")
if(rocminfo_OUTPUT)
set(_result TRUE)
endif()
endif()
else()
message(
AUTHOR_WARNING
"${rocminfo_EXECUTABLE} failed with error code ${rocminfo_RET}\nstderr:\n${rocminfo_ERROR}\nstdout:\n${rocminfo_OUTPUT}"
)
set(_failure TRUE)
endif()
else()
message(AUTHOR_WARNING "rocminfo not found")
set(_failure TRUE)
endif()
if(DEFINED ARG_GET_OUTPUT)
if(NOT _failure)
set(${_RESULT_VARIABLE} "${rocminfo_OUTPUT}" PARENT_SCOPE)
else()
set(${_RESULT_VARIABLE} "" PARENT_SCOPE)
endif()
return()
endif()
set(${_RESULT_VARIABLE} ${_result} PARENT_SCOPE)
endfunction()
# ----------------------------------------------------------------------------------------#
# function rocprofiler_systems_get_gfx_archs()
# If a regex is provided, it will be used to filter the architectures.
# Otherwise, all architectures will be returned.
#
# Arguments:
# _VAR - Output variable to store detected architectures
# ECHO - If present, print detected architectures to console
# PREFIX - Prefix for echo message (default: [${PROJECT_NAME}])
# DELIM - Delimiter between architectures (default: ", ")
# GFX_MATCH - Regex to filter architectures
#
function(ROCPROFILER_SYSTEMS_GET_GFX_ARCHS _VAR)
cmake_parse_arguments(ARG "ECHO" "PREFIX;DELIM;GFX_MATCH" "" ${ARGN})
if(NOT DEFINED ARG_DELIM)
set(ARG_DELIM ", ")
endif()
if(NOT DEFINED ARG_PREFIX)
set(ARG_PREFIX "[${PROJECT_NAME}] ")
endif()
# Match only "Name:" lines to avoid matching gfx in marketing names/descriptions.
check_rocminfo("Name:[ \t]+gfx[0-9A-Fa-f][0-9A-Fa-f]+" _RAW_GFXINFO GET_OUTPUT)
if(NOT _RAW_GFXINFO)
message(AUTHOR_WARNING "Could not get system architectures")
return()
endif()
# Extract just the gfx architecture from each "Name: gfxXXXX" match
set(_GFXINFO "")
foreach(_match IN LISTS _RAW_GFXINFO)
string(REGEX MATCH "gfx[0-9A-Fa-f]+" _arch "${_match}")
if(_arch)
list(APPEND _GFXINFO "${_arch}")
endif()
endforeach()
list(REMOVE_ITEM _GFXINFO "gfx000")
list(REMOVE_DUPLICATES _GFXINFO)
# Filter architectures if GFX_MATCH regex is provided
if(DEFINED ARG_GFX_MATCH)
set(_FILTERED_GFXINFO "")
foreach(_arch IN LISTS _GFXINFO)
if(_arch MATCHES "${ARG_GFX_MATCH}")
list(APPEND _FILTERED_GFXINFO "${_arch}")
endif()
endforeach()
set(_GFXINFO "${_FILTERED_GFXINFO}")
endif()
# Echo detected architectures if requested
if(ARG_ECHO)
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${_GFXINFO}")
if(DEFINED ARG_GFX_MATCH)
message(
STATUS
"${ARG_PREFIX}System architectures (filtered: ${ARG_GFX_MATCH}): ${_GFXINFO_ECHO}"
)
else()
message(STATUS "${ARG_PREFIX}System architectures: ${_GFXINFO_ECHO}")
endif()
endif()
set(${_VAR} "${_GFXINFO}" PARENT_SCOPE)
endfunction()
# ----------------------------------------------------------------------------------------#
# this function is provided to easily select which files use alternative compiler:
#
@@ -1003,4 +1145,79 @@ function(COMPUTE_POW2_CEIL _OUTPUT _VALUE)
endif()
endfunction()
# ----------------------------------------------------------------------------
# function rocprofiler_systems_lookup_gfx()
# Classifies AMD GPU architectures (gfx IDs) into instinct, radeon, and apu.
#
# ARGS:
# _TARGET: The gfx ID to classify
# _OUTPUT_LIST: The list of categories the target belongs to
# (instinct, radeon, apu)
#
# Note: If architecture is unknown, defaults to instinct
#
function(ROCPROFILER_SYSTEMS_LOOKUP_GFX _TARGET _OUTPUT_LIST)
set(INSTINCT_LIST
"gfx900"
"gfx906" # MI50/MI60
"gfx908"
"gfx90a"
"gfx942"
"gfx950"
)
# Also includes PRO GPUs
# We ignore Radeon VII (gfx906)
set(RADEON_LIST
"gfx1012"
"gfx1011"
"gfx1010"
"gfx1032"
"gfx1031"
"gfx1030"
"gfx1102"
"gfx1101"
"gfx1100"
"gfx1200"
"gfx1201"
"gfx1202"
)
set(APU_LIST
"gfx1035"
"gfx1036"
"gfx1103"
"gfx1151"
"gfx1152"
"gfx1153"
)
set(_CATEGORIES "")
if(_TARGET IN_LIST INSTINCT_LIST)
list(APPEND _CATEGORIES "instinct")
# Some instinct GPUs may also be an APU (ex: MI300A)
check_rocminfo("APU" _is_apu)
if(_is_apu)
list(APPEND _CATEGORIES "apu")
endif()
endif()
if(_TARGET IN_LIST RADEON_LIST)
list(APPEND _CATEGORIES "radeon")
endif()
if(_TARGET IN_LIST APU_LIST)
list(APPEND _CATEGORIES "apu")
endif()
if(_CATEGORIES STREQUAL "")
rocprofiler_systems_message(
AUTHOR_WARNING
"Unknown GFX target: ${_TARGET}. Defaulting to instinct"
)
list(APPEND _CATEGORIES "instinct")
endif()
set(${_OUTPUT_LIST} "${_CATEGORIES}" PARENT_SCOPE)
endfunction()
cmake_policy(POP)
@@ -58,6 +58,35 @@ if(ROCPROFSYS_INSTALL_EXAMPLES)
include(GNUInstallDirs)
endif()
# GPU architectures to compile for
# If not set, auto-detects from system GPU (or empty if no GPU present)
if(
NOT DEFINED ROCPROFSYS_GFX_TARGETS
OR ROCPROFSYS_GFX_TARGETS STREQUAL ""
OR ROCPROFSYS_GFX_TARGETS STREQUAL "default"
)
rocprofiler_systems_get_gfx_archs(ROCPROFSYS_GFX_TARGETS)
endif()
set(ROCPROFSYS_GFX_TARGETS
"${ROCPROFSYS_GFX_TARGETS}"
CACHE STRING
"GPU architectures to compile for (semicolon-separated)"
FORCE
)
if(ROCPROFSYS_GFX_TARGETS)
message(STATUS "")
rocprofiler_systems_message(STATUS "Detected targets:")
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_lookup_gfx(${arch} _GFX_TYPE)
message(STATUS " ${arch} (categories: ${_GFX_TYPE})")
endforeach()
message(STATUS "")
else()
rocprofiler_systems_message(STATUS "No GPU targets detected/set")
endif()
set(ROCPROFSYS_EXAMPLE_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "")
# defines function for creating causal profiling exes
include(${CMAKE_CURRENT_LIST_DIR}/causal-helpers.cmake)
@@ -20,14 +20,12 @@ target_link_libraries(
)
target_compile_options(fork-example PRIVATE ${_FLAGS})
if(ROCPROFSYS_INSTALL_EXAMPLES)
if(TARGET fork-example)
install(
TARGETS fork-example
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
COMPONENT rocprofiler-systems-examples
)
endif()
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET fork-example)
install(
TARGETS fork-example
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
COMPONENT rocprofiler-systems-examples
)
endif()
# HIP fork example (multi-process concurrency test)
@@ -64,6 +62,18 @@ if(HIPCC_EXECUTABLE)
)
add_executable(hipMallocConcurrencyMproc hipMallocConcurrencyMproc.cpp)
target_link_libraries(hipMallocConcurrencyMproc PRIVATE Threads::Threads)
set_target_properties(
hipMallocConcurrencyMproc
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/fork"
)
# Specify GPU architectures to compile for
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
target_compile_options(
hipMallocConcurrencyMproc
PRIVATE --offload-arch=${arch}
)
target_link_options(hipMallocConcurrencyMproc PUBLIC --offload-arch=${arch})
endforeach()
if(
CMAKE_CXX_COMPILER_ID MATCHES "Clang"
@@ -101,22 +101,6 @@ foreach(OPENMP_EXAMPLE IN LISTS OPENMP_EXAMPLES)
endif()
endforeach()
set(DEFAULT_GPU_TARGETS
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx950"
"gfx1030"
"gfx1010"
"gfx1100"
"gfx1101"
"gfx1102"
)
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING "GPU targets to compile for")
if(ROCPROFSYS_USE_ROCM)
add_subdirectory(external)
else()
@@ -25,6 +25,11 @@ endif()
rocprofiler_systems_message(STATUS "Configuring OMPVV...")
if(NOT ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_message(WARNING "No GPU targets detected/set. Disabling OMPVV offload tests...")
set(OMPVV_USE_OFFLOAD_TESTS FALSE)
endif()
# Master branch contains stable releases
rocprofiler_systems_checkout_git_submodule(
RELATIVE_PATH
@@ -63,9 +68,6 @@ function(configure_ompvv_tests TEST_TYPE TEST_LIST_VAR)
elseif(TEST_TYPE STREQUAL "OFFLOAD")
set(TARGET_PREFIX "openmp-vv-offload")
set(FOFFLOADING_FLAGS "-fopenmp")
foreach(arch IN LISTS DEFAULT_GPU_TARGETS)
set(FOFFLOADING_FLAGS "${FOFFLOADING_FLAGS} --offload-arch=${arch}")
endforeach()
else()
rocprofiler_systems_message(FATAL_ERROR "Unknown TEST_TYPE - Only HOST and OFFLOAD supported")
endif()
@@ -77,6 +79,12 @@ function(configure_ompvv_tests TEST_TYPE TEST_LIST_VAR)
string(REPLACE "_" "-" TARGET_NAME ${TEST_NAME})
set(TARGET_NAME "${TARGET_PREFIX}-${TARGET_NAME}")
if(TARGET_PREFIX STREQUAL "openmp-vv-offload")
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
set(FOFFLOADING_FLAGS "${FOFFLOADING_FLAGS} --offload-arch=${arch}")
endforeach()
endif()
set(CUSTOM_FFLAGS "-lm ${FOFFLOADING_FLAGS}")
set(CUSTOM_FLINKFLAGS "-lm ${FOFFLOADING_FLAGS}")
@@ -184,7 +192,7 @@ set(OMPVV_COMPILER
CACHE FILEPATH
"Fortran compiler used for OMPVV tests"
)
set(OMPVV_FC "amdflang")
set(OMPVV_FC "${amdflang_EXECUTABLE}")
execute_process(
COMMAND ${amdflang_EXECUTABLE} --version
@@ -237,7 +245,7 @@ set(OMPVV_HOST_TESTS_TO_COMPILE
# Set of offloading tests to be compiled (excluding reduction and simd_atomic
# tests due to OMPVV test failure)
set(OMPVV_OFFLOAD_TESTS_TO_COMPILE "")
if(OMPVV_USE_OFFLOAD_TESTS)
if(OMPVV_USE_OFFLOAD_TESTS AND AMDFLANG_VERSION_MAJOR GREATER_EQUAL 20)
set(OMPVV_OFFLOAD_TESTS_TO_COMPILE
"${OMPVV_TDIR}/target_simd/test_target_simd_if.F90"
"${OMPVV_TDIR}/target_teams_distribute_parallel_for/test_target_teams_distribute_parallel_for_collapse.F90"
@@ -3,6 +3,11 @@
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
if(NOT ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_message(WARNING "No GPU targets detected/set. Disabling OpenMP target example...")
return()
endif()
if(NOT OMP_TARGET_COMPILER)
find_program(
amdclangpp_EXECUTABLE
@@ -36,7 +41,7 @@ set(CMAKE_BUILD_TYPE "RelWithDebInfo")
find_package(Threads REQUIRED)
function(add_offload_flags tgt)
foreach(arch IN LISTS GPU_TARGETS)
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
target_compile_options(${tgt} PRIVATE --offload-arch=${arch})
target_link_options(${tgt} PUBLIC --offload-arch=${arch})
endforeach()
@@ -1,24 +1,5 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Copyright (c) Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT "rccl-tests")
@@ -68,6 +49,33 @@ if(hip_FOUND AND rccl_FOUND)
return()
endif()
# Parse comment describing supported gfx targets from rccl src/Makefile
execute_process(
COMMAND grep "Currently, supports" ${rccl-tests_SOURCE_DIR}/src/Makefile
OUTPUT_VARIABLE _supported_line
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE _rccl_grep_ret
)
if(NOT _rccl_grep_ret EQUAL 0)
message(
FATAL_ERROR
"This grep should never fail unless rccl-tests/src/Makefile was modified in an incompatible way"
)
endif()
# Extract just the gfx architectures
string(REGEX MATCHALL "gfx[0-9a-z]+" RCCL_SUPPORTED_TARGETS "${_supported_line}")
set(RCCL_GPU_TARGETS "")
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
if(arch IN_LIST RCCL_SUPPORTED_TARGETS)
list(APPEND RCCL_GPU_TARGETS ${arch})
endif()
endforeach()
if(RCCL_GPU_TARGETS STREQUAL "")
message(AUTHOR_WARNING "rccl-tests skipped. No supported GPU targets found.")
return()
endif()
# Copy source to build directory
file(COPY ${rccl-tests_SOURCE_DIR}/ DESTINATION ${rccl-tests_BUILD_DIR})
@@ -78,7 +86,9 @@ if(hip_FOUND AND rccl_FOUND)
add_custom_target(
build-rccl-tests
ALL
COMMAND make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR} -j
COMMAND
make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR}
GPU_TARGETS=${RCCL_GPU_TARGETS} -j
WORKING_DIRECTORY ${rccl-tests_BUILD_DIR}
COMMENT
"Building rccl-tests with HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR} ..."
@@ -89,6 +89,12 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
target_compile_options(roctx PRIVATE -g1)
endif()
# Specify GPU architectures to compile for
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
target_compile_options(roctx PRIVATE --offload-arch=${arch})
target_link_options(roctx PUBLIC --offload-arch=${arch})
endforeach()
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
# defined in MacroUtilities.cmake
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx)
@@ -118,6 +118,16 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
target_compile_options(transferBench PRIVATE -g1)
endif()
# Specify GPU architectures to compile for
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_lookup_gfx(${arch} GPU_CATEGORIES)
if("apu" IN_LIST GPU_CATEGORIES AND NOT "instinct" IN_LIST GPU_CATEGORIES)
continue()
endif()
target_compile_options(transferBench PRIVATE --offload-arch=${arch})
target_link_options(transferBench PUBLIC --offload-arch=${arch})
endforeach()
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
# defined in MacroUtilities.cmake
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transferBench)
@@ -66,6 +66,12 @@ endif()
add_executable(transpose transpose.cpp)
target_link_libraries(transpose PRIVATE Threads::Threads)
# Specify GPU architectures to compile for
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
target_compile_options(transpose PRIVATE --offload-arch=${arch})
target_link_options(transpose PUBLIC --offload-arch=${arch})
endforeach()
if(
CMAKE_CXX_COMPILER_ID MATCHES "Clang"
AND NOT CMAKE_CXX_COMPILER_IS_HIPCC
@@ -1,24 +1,5 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# Copyright (c) Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
# -------------------------------------------------------------------------------------- #
#
@@ -101,8 +82,16 @@ rocprofiler_systems_add_test(
# -------------------------------------------------------------------------------------- #
if(ROCPROFSYS_USE_ROCM)
set(NAVI_REGEX "gfx(10|11|12)[A-Fa-f0-9][A-Fa-f0-9]")
rocprofiler_systems_get_gfx_archs(NAVI_DETECTED GFX_MATCH ${NAVI_REGEX} ECHO)
if(ROCPROFSYS_GFX_TARGETS)
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_lookup_gfx(${arch} GPU_CATEGORY)
if("instinct" IN_LIST GPU_CATEGORY)
continue()
endif()
set(NAVI_DETECTED TRUE)
break()
endforeach()
endif()
if(NAVI_DETECTED)
set(ROCPROFSYS_ROCM_EVENTS_TEST "SQ_WAVES")
@@ -1,24 +1,5 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# Copyright (c) Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
#
# configuration and functions for testing
@@ -422,72 +403,6 @@ ${_FILE_CONTENTS}
set(${_ENV} "${_ENV_CONTENTS}" PARENT_SCOPE)
endfunction()
# -------------------------------------------------------------------------------------- #
# Check GPU architectures on the system. If a regex is provided, it will be used to filter
# the architectures. Otherwise, all architectures will be returned. Uses rocminfo to get
# the architectures.
function(ROCPROFILER_SYSTEMS_GET_GFX_ARCHS _VAR)
cmake_parse_arguments(ARG "ECHO" "PREFIX;DELIM;GFX_MATCH" "" ${ARGN})
if(NOT DEFINED ARG_DELIM)
set(ARG_DELIM ", ")
endif()
if(NOT DEFINED ARG_PREFIX)
set(ARG_PREFIX "[${PROJECT_NAME}] ")
endif()
find_program(
rocminfo_EXECUTABLE
NAMES rocminfo
HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm
PATHS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm
PATH_SUFFIXES bin
)
if(rocminfo_EXECUTABLE)
execute_process(
COMMAND ${rocminfo_EXECUTABLE}
RESULT_VARIABLE rocminfo_RET
OUTPUT_VARIABLE rocminfo_OUT
ERROR_VARIABLE rocminfo_ERR
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE
)
if(rocminfo_RET EQUAL 0)
string(REGEX MATCHALL "gfx([0-9A-Fa-f]+)" rocminfo_GFXINFO "${rocminfo_OUT}")
list(REMOVE_DUPLICATES rocminfo_GFXINFO)
set(${_VAR} "${rocminfo_GFXINFO}" PARENT_SCOPE)
if(ARG_ECHO)
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${rocminfo_GFXINFO}")
message(STATUS "${ARG_PREFIX}System architectures: ${_GFXINFO_ECHO}")
endif()
# Filter the architectures if a regex is provided
if(ARG_GFX_MATCH)
string(REGEX MATCH "${ARG_GFX_MATCH}" _GFX_MATCH "${rocminfo_GFXINFO}")
list(REMOVE_DUPLICATES _GFX_MATCH)
set(${_VAR} "${_GFX_MATCH}" PARENT_SCOPE)
if(ARG_ECHO)
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${_GFX_MATCH}")
message(
STATUS
"${ARG_PREFIX}System architectures (filtered: ${ARG_GFX_MATCH}): ${_GFXINFO_ECHO}"
)
endif()
endif()
else()
message(
AUTHOR_WARNING
"${rocminfo_EXECUTABLE} failed with error code ${rocminfo_RET}\nstderr:\n${rocminfo_ERR}\nstdout:\n${rocminfo_OUT}"
)
endif()
endif()
endfunction()
# -------------------------------------------------------------------------------------- #
# extends the timeout when sanitizers are used due to slowdown
function(ROCPROFILER_SYSTEMS_ADJUST_TIMEOUT_FOR_SANITIZER _VAR)