[rocprofiler-systems] Add build option for "examples" to specify gfx-arch (#2626)
## Motivation - Added `check_rocminfo` function that returns true if the provided regex was found, false otherwise. Can also use `GET_OUTPUT` to get the raw output filtered with or without a regex. - Moved `rocprofiler_systems_get_gfx_archs()` to `MacroUtilities.cmake` - Added `rocprofiler_systems_lookup_gfx()`, which detects whether a given `gfx` is from the `instinct`, `radeon` or `apu` family. - Added `ROCPROFSYS_GFX_TARGETS` as a build argument. Used to specify the offloading architectures that GPU examples should compile for. If empty, defaults to whatever your system has. - GPU examples now check if the given `gfx` targets (from `ROCPROFSYS_GFX_TARGETS`) are supported. - OMPVV offload tests now only compile if `amdflang` version is `>= 20` - Improve link time by reducing the number of GFX targets that binaries need to support. - RCCL is now passed a `GPU_TARGETS` var specifying the architectures to build/link against.
This commit is contained in:
@@ -12,6 +12,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
|
||||
- Added dependency to `spdlog` library.
|
||||
- Added environment variable `ROCPROFSYS_LOG_LEVEL` which control level of logging.
|
||||
- Available log levels: `critical`, `error`, `warning`, `info`(default), `debug`, `trace` and `off`.
|
||||
- Added cmake option `ROCPROFSYS_GFX_TARGETS` which controls GFX targets used to build example binaries.
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@@ -581,6 +581,148 @@ function(ROCPROFILER_SYSTEMS_PRINT_FEATURES)
|
||||
rocprofiler_systems_print_disabled_features()
|
||||
endfunction()
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# function check_rocminfo()
|
||||
# Searches for a given regex in the output of rocminfo, returns true if found, false otherwise.
|
||||
# By default, returns a boolean, but if GET_OUTPUT is present, returns the output of rocminfo with the regex applied
|
||||
#
|
||||
# ARGS:
|
||||
# _REGEX: The regex to search for
|
||||
# _RESULT_VARIABLE: The variable to store the result
|
||||
# GET_OUTPUT: If present, return the output of rocminfo
|
||||
#
|
||||
# Returns:
|
||||
# Default: true if the regex is found, false otherwise
|
||||
# GET_OUTPUT: output of rocminfo is returned in _RESULT_VARIABLE (empty string if error)
|
||||
# Note: If regex empty, it is ignored
|
||||
#
|
||||
function(CHECK_ROCMINFO _REGEX _RESULT_VARIABLE)
|
||||
cmake_parse_arguments(ARG "GET_OUTPUT" "" "" ${ARGN})
|
||||
find_program(
|
||||
rocminfo_EXECUTABLE
|
||||
NAMES rocminfo
|
||||
HINTS ${ROCM_PATH} ${ROCmVersion_DIR} /opt/rocm
|
||||
PATHS ${ROCM_PATH} ${ROCmVersion_DIR} /opt/rocm
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
if(NOT DEFINED ARG_GET_OUTPUT AND _REGEX STREQUAL "")
|
||||
message(FATAL_ERROR "Regex is empty, but GET_OUTPUT is not defined")
|
||||
endif()
|
||||
|
||||
set(_result FALSE)
|
||||
set(_failure FALSE)
|
||||
|
||||
if(rocminfo_EXECUTABLE)
|
||||
execute_process(
|
||||
COMMAND ${rocminfo_EXECUTABLE}
|
||||
RESULT_VARIABLE rocminfo_RET
|
||||
OUTPUT_VARIABLE rocminfo_OUTPUT
|
||||
ERROR_VARIABLE rocminfo_ERROR
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
ERROR_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
if(rocminfo_RET EQUAL 0)
|
||||
if(NOT _REGEX STREQUAL "")
|
||||
string(REGEX MATCHALL "${_REGEX}" rocminfo_OUTPUT "${rocminfo_OUTPUT}")
|
||||
if(rocminfo_OUTPUT)
|
||||
set(_result TRUE)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
message(
|
||||
AUTHOR_WARNING
|
||||
"${rocminfo_EXECUTABLE} failed with error code ${rocminfo_RET}\nstderr:\n${rocminfo_ERROR}\nstdout:\n${rocminfo_OUTPUT}"
|
||||
)
|
||||
set(_failure TRUE)
|
||||
endif()
|
||||
else()
|
||||
message(AUTHOR_WARNING "rocminfo not found")
|
||||
set(_failure TRUE)
|
||||
endif()
|
||||
|
||||
if(DEFINED ARG_GET_OUTPUT)
|
||||
if(NOT _failure)
|
||||
set(${_RESULT_VARIABLE} "${rocminfo_OUTPUT}" PARENT_SCOPE)
|
||||
else()
|
||||
set(${_RESULT_VARIABLE} "" PARENT_SCOPE)
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(${_RESULT_VARIABLE} ${_result} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# function rocprofiler_systems_get_gfx_archs()
|
||||
# If a regex is provided, it will be used to filter the architectures.
|
||||
# Otherwise, all architectures will be returned.
|
||||
#
|
||||
# Arguments:
|
||||
# _VAR - Output variable to store detected architectures
|
||||
# ECHO - If present, print detected architectures to console
|
||||
# PREFIX - Prefix for echo message (default: [${PROJECT_NAME}])
|
||||
# DELIM - Delimiter between architectures (default: ", ")
|
||||
# GFX_MATCH - Regex to filter architectures
|
||||
#
|
||||
function(ROCPROFILER_SYSTEMS_GET_GFX_ARCHS _VAR)
|
||||
cmake_parse_arguments(ARG "ECHO" "PREFIX;DELIM;GFX_MATCH" "" ${ARGN})
|
||||
|
||||
if(NOT DEFINED ARG_DELIM)
|
||||
set(ARG_DELIM ", ")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ARG_PREFIX)
|
||||
set(ARG_PREFIX "[${PROJECT_NAME}] ")
|
||||
endif()
|
||||
|
||||
# Match only "Name:" lines to avoid matching gfx in marketing names/descriptions.
|
||||
check_rocminfo("Name:[ \t]+gfx[0-9A-Fa-f][0-9A-Fa-f]+" _RAW_GFXINFO GET_OUTPUT)
|
||||
if(NOT _RAW_GFXINFO)
|
||||
message(AUTHOR_WARNING "Could not get system architectures")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Extract just the gfx architecture from each "Name: gfxXXXX" match
|
||||
set(_GFXINFO "")
|
||||
foreach(_match IN LISTS _RAW_GFXINFO)
|
||||
string(REGEX MATCH "gfx[0-9A-Fa-f]+" _arch "${_match}")
|
||||
if(_arch)
|
||||
list(APPEND _GFXINFO "${_arch}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
list(REMOVE_ITEM _GFXINFO "gfx000")
|
||||
list(REMOVE_DUPLICATES _GFXINFO)
|
||||
|
||||
# Filter architectures if GFX_MATCH regex is provided
|
||||
if(DEFINED ARG_GFX_MATCH)
|
||||
set(_FILTERED_GFXINFO "")
|
||||
foreach(_arch IN LISTS _GFXINFO)
|
||||
if(_arch MATCHES "${ARG_GFX_MATCH}")
|
||||
list(APPEND _FILTERED_GFXINFO "${_arch}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(_GFXINFO "${_FILTERED_GFXINFO}")
|
||||
endif()
|
||||
|
||||
# Echo detected architectures if requested
|
||||
if(ARG_ECHO)
|
||||
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${_GFXINFO}")
|
||||
if(DEFINED ARG_GFX_MATCH)
|
||||
message(
|
||||
STATUS
|
||||
"${ARG_PREFIX}System architectures (filtered: ${ARG_GFX_MATCH}): ${_GFXINFO_ECHO}"
|
||||
)
|
||||
else()
|
||||
message(STATUS "${ARG_PREFIX}System architectures: ${_GFXINFO_ECHO}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(${_VAR} "${_GFXINFO}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# this function is provided to easily select which files use alternative compiler:
|
||||
#
|
||||
@@ -1003,4 +1145,79 @@ function(COMPUTE_POW2_CEIL _OUTPUT _VALUE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# function rocprofiler_systems_lookup_gfx()
|
||||
# Classifies AMD GPU architectures (gfx IDs) into instinct, radeon, and apu.
|
||||
#
|
||||
# ARGS:
|
||||
# _TARGET: The gfx ID to classify
|
||||
# _OUTPUT_LIST: The list of categories the target belongs to
|
||||
# (instinct, radeon, apu)
|
||||
#
|
||||
# Note: If architecture is unknown, defaults to instinct
|
||||
#
|
||||
function(ROCPROFILER_SYSTEMS_LOOKUP_GFX _TARGET _OUTPUT_LIST)
|
||||
set(INSTINCT_LIST
|
||||
"gfx900"
|
||||
"gfx906" # MI50/MI60
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
"gfx950"
|
||||
)
|
||||
|
||||
# Also includes PRO GPUs
|
||||
# We ignore Radeon VII (gfx906)
|
||||
set(RADEON_LIST
|
||||
"gfx1012"
|
||||
"gfx1011"
|
||||
"gfx1010"
|
||||
"gfx1032"
|
||||
"gfx1031"
|
||||
"gfx1030"
|
||||
"gfx1102"
|
||||
"gfx1101"
|
||||
"gfx1100"
|
||||
"gfx1200"
|
||||
"gfx1201"
|
||||
"gfx1202"
|
||||
)
|
||||
|
||||
set(APU_LIST
|
||||
"gfx1035"
|
||||
"gfx1036"
|
||||
"gfx1103"
|
||||
"gfx1151"
|
||||
"gfx1152"
|
||||
"gfx1153"
|
||||
)
|
||||
|
||||
set(_CATEGORIES "")
|
||||
|
||||
if(_TARGET IN_LIST INSTINCT_LIST)
|
||||
list(APPEND _CATEGORIES "instinct")
|
||||
# Some instinct GPUs may also be an APU (ex: MI300A)
|
||||
check_rocminfo("APU" _is_apu)
|
||||
if(_is_apu)
|
||||
list(APPEND _CATEGORIES "apu")
|
||||
endif()
|
||||
endif()
|
||||
if(_TARGET IN_LIST RADEON_LIST)
|
||||
list(APPEND _CATEGORIES "radeon")
|
||||
endif()
|
||||
if(_TARGET IN_LIST APU_LIST)
|
||||
list(APPEND _CATEGORIES "apu")
|
||||
endif()
|
||||
|
||||
if(_CATEGORIES STREQUAL "")
|
||||
rocprofiler_systems_message(
|
||||
AUTHOR_WARNING
|
||||
"Unknown GFX target: ${_TARGET}. Defaulting to instinct"
|
||||
)
|
||||
list(APPEND _CATEGORIES "instinct")
|
||||
endif()
|
||||
|
||||
set(${_OUTPUT_LIST} "${_CATEGORIES}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
cmake_policy(POP)
|
||||
|
||||
@@ -58,6 +58,35 @@ if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||
include(GNUInstallDirs)
|
||||
endif()
|
||||
|
||||
# GPU architectures to compile for
|
||||
# If not set, auto-detects from system GPU (or empty if no GPU present)
|
||||
if(
|
||||
NOT DEFINED ROCPROFSYS_GFX_TARGETS
|
||||
OR ROCPROFSYS_GFX_TARGETS STREQUAL ""
|
||||
OR ROCPROFSYS_GFX_TARGETS STREQUAL "default"
|
||||
)
|
||||
rocprofiler_systems_get_gfx_archs(ROCPROFSYS_GFX_TARGETS)
|
||||
endif()
|
||||
|
||||
set(ROCPROFSYS_GFX_TARGETS
|
||||
"${ROCPROFSYS_GFX_TARGETS}"
|
||||
CACHE STRING
|
||||
"GPU architectures to compile for (semicolon-separated)"
|
||||
FORCE
|
||||
)
|
||||
|
||||
if(ROCPROFSYS_GFX_TARGETS)
|
||||
message(STATUS "")
|
||||
rocprofiler_systems_message(STATUS "Detected targets:")
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
rocprofiler_systems_lookup_gfx(${arch} _GFX_TYPE)
|
||||
message(STATUS " ${arch} (categories: ${_GFX_TYPE})")
|
||||
endforeach()
|
||||
message(STATUS "")
|
||||
else()
|
||||
rocprofiler_systems_message(STATUS "No GPU targets detected/set")
|
||||
endif()
|
||||
|
||||
set(ROCPROFSYS_EXAMPLE_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "")
|
||||
# defines function for creating causal profiling exes
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/causal-helpers.cmake)
|
||||
|
||||
@@ -20,14 +20,12 @@ target_link_libraries(
|
||||
)
|
||||
target_compile_options(fork-example PRIVATE ${_FLAGS})
|
||||
|
||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||
if(TARGET fork-example)
|
||||
install(
|
||||
TARGETS fork-example
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||
COMPONENT rocprofiler-systems-examples
|
||||
)
|
||||
endif()
|
||||
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET fork-example)
|
||||
install(
|
||||
TARGETS fork-example
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||
COMPONENT rocprofiler-systems-examples
|
||||
)
|
||||
endif()
|
||||
|
||||
# HIP fork example (multi-process concurrency test)
|
||||
@@ -64,6 +62,18 @@ if(HIPCC_EXECUTABLE)
|
||||
)
|
||||
add_executable(hipMallocConcurrencyMproc hipMallocConcurrencyMproc.cpp)
|
||||
target_link_libraries(hipMallocConcurrencyMproc PRIVATE Threads::Threads)
|
||||
set_target_properties(
|
||||
hipMallocConcurrencyMproc
|
||||
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/fork"
|
||||
)
|
||||
# Specify GPU architectures to compile for
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
target_compile_options(
|
||||
hipMallocConcurrencyMproc
|
||||
PRIVATE --offload-arch=${arch}
|
||||
)
|
||||
target_link_options(hipMallocConcurrencyMproc PUBLIC --offload-arch=${arch})
|
||||
endforeach()
|
||||
|
||||
if(
|
||||
CMAKE_CXX_COMPILER_ID MATCHES "Clang"
|
||||
|
||||
@@ -101,22 +101,6 @@ foreach(OPENMP_EXAMPLE IN LISTS OPENMP_EXAMPLES)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(DEFAULT_GPU_TARGETS
|
||||
"gfx900"
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx942"
|
||||
"gfx950"
|
||||
"gfx1030"
|
||||
"gfx1010"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
)
|
||||
|
||||
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING "GPU targets to compile for")
|
||||
|
||||
if(ROCPROFSYS_USE_ROCM)
|
||||
add_subdirectory(external)
|
||||
else()
|
||||
|
||||
+13
-5
@@ -25,6 +25,11 @@ endif()
|
||||
|
||||
rocprofiler_systems_message(STATUS "Configuring OMPVV...")
|
||||
|
||||
if(NOT ROCPROFSYS_GFX_TARGETS)
|
||||
rocprofiler_systems_message(WARNING "No GPU targets detected/set. Disabling OMPVV offload tests...")
|
||||
set(OMPVV_USE_OFFLOAD_TESTS FALSE)
|
||||
endif()
|
||||
|
||||
# Master branch contains stable releases
|
||||
rocprofiler_systems_checkout_git_submodule(
|
||||
RELATIVE_PATH
|
||||
@@ -63,9 +68,6 @@ function(configure_ompvv_tests TEST_TYPE TEST_LIST_VAR)
|
||||
elseif(TEST_TYPE STREQUAL "OFFLOAD")
|
||||
set(TARGET_PREFIX "openmp-vv-offload")
|
||||
set(FOFFLOADING_FLAGS "-fopenmp")
|
||||
foreach(arch IN LISTS DEFAULT_GPU_TARGETS)
|
||||
set(FOFFLOADING_FLAGS "${FOFFLOADING_FLAGS} --offload-arch=${arch}")
|
||||
endforeach()
|
||||
else()
|
||||
rocprofiler_systems_message(FATAL_ERROR "Unknown TEST_TYPE - Only HOST and OFFLOAD supported")
|
||||
endif()
|
||||
@@ -77,6 +79,12 @@ function(configure_ompvv_tests TEST_TYPE TEST_LIST_VAR)
|
||||
string(REPLACE "_" "-" TARGET_NAME ${TEST_NAME})
|
||||
set(TARGET_NAME "${TARGET_PREFIX}-${TARGET_NAME}")
|
||||
|
||||
if(TARGET_PREFIX STREQUAL "openmp-vv-offload")
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
set(FOFFLOADING_FLAGS "${FOFFLOADING_FLAGS} --offload-arch=${arch}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
set(CUSTOM_FFLAGS "-lm ${FOFFLOADING_FLAGS}")
|
||||
set(CUSTOM_FLINKFLAGS "-lm ${FOFFLOADING_FLAGS}")
|
||||
|
||||
@@ -184,7 +192,7 @@ set(OMPVV_COMPILER
|
||||
CACHE FILEPATH
|
||||
"Fortran compiler used for OMPVV tests"
|
||||
)
|
||||
set(OMPVV_FC "amdflang")
|
||||
set(OMPVV_FC "${amdflang_EXECUTABLE}")
|
||||
|
||||
execute_process(
|
||||
COMMAND ${amdflang_EXECUTABLE} --version
|
||||
@@ -237,7 +245,7 @@ set(OMPVV_HOST_TESTS_TO_COMPILE
|
||||
# Set of offloading tests to be compiled (excluding reduction and simd_atomic
|
||||
# tests due to OMPVV test failure)
|
||||
set(OMPVV_OFFLOAD_TESTS_TO_COMPILE "")
|
||||
if(OMPVV_USE_OFFLOAD_TESTS)
|
||||
if(OMPVV_USE_OFFLOAD_TESTS AND AMDFLANG_VERSION_MAJOR GREATER_EQUAL 20)
|
||||
set(OMPVV_OFFLOAD_TESTS_TO_COMPILE
|
||||
"${OMPVV_TDIR}/target_simd/test_target_simd_if.F90"
|
||||
"${OMPVV_TDIR}/target_teams_distribute_parallel_for/test_target_teams_distribute_parallel_for_collapse.F90"
|
||||
|
||||
@@ -3,6 +3,11 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||
|
||||
if(NOT ROCPROFSYS_GFX_TARGETS)
|
||||
rocprofiler_systems_message(WARNING "No GPU targets detected/set. Disabling OpenMP target example...")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(NOT OMP_TARGET_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
@@ -36,7 +41,7 @@ set(CMAKE_BUILD_TYPE "RelWithDebInfo")
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
function(add_offload_flags tgt)
|
||||
foreach(arch IN LISTS GPU_TARGETS)
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
target_compile_options(${tgt} PRIVATE --offload-arch=${arch})
|
||||
target_link_options(${tgt} PUBLIC --offload-arch=${arch})
|
||||
endforeach()
|
||||
|
||||
@@ -1,24 +1,5 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
# Copyright (c) Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||
list(APPEND CMAKE_MESSAGE_CONTEXT "rccl-tests")
|
||||
@@ -68,6 +49,33 @@ if(hip_FOUND AND rccl_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Parse comment describing supported gfx targets from rccl src/Makefile
|
||||
execute_process(
|
||||
COMMAND grep "Currently, supports" ${rccl-tests_SOURCE_DIR}/src/Makefile
|
||||
OUTPUT_VARIABLE _supported_line
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE _rccl_grep_ret
|
||||
)
|
||||
if(NOT _rccl_grep_ret EQUAL 0)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"This grep should never fail unless rccl-tests/src/Makefile was modified in an incompatible way"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Extract just the gfx architectures
|
||||
string(REGEX MATCHALL "gfx[0-9a-z]+" RCCL_SUPPORTED_TARGETS "${_supported_line}")
|
||||
set(RCCL_GPU_TARGETS "")
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
if(arch IN_LIST RCCL_SUPPORTED_TARGETS)
|
||||
list(APPEND RCCL_GPU_TARGETS ${arch})
|
||||
endif()
|
||||
endforeach()
|
||||
if(RCCL_GPU_TARGETS STREQUAL "")
|
||||
message(AUTHOR_WARNING "rccl-tests skipped. No supported GPU targets found.")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Copy source to build directory
|
||||
file(COPY ${rccl-tests_SOURCE_DIR}/ DESTINATION ${rccl-tests_BUILD_DIR})
|
||||
|
||||
@@ -78,7 +86,9 @@ if(hip_FOUND AND rccl_FOUND)
|
||||
add_custom_target(
|
||||
build-rccl-tests
|
||||
ALL
|
||||
COMMAND make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR} -j
|
||||
COMMAND
|
||||
make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR}
|
||||
GPU_TARGETS=${RCCL_GPU_TARGETS} -j
|
||||
WORKING_DIRECTORY ${rccl-tests_BUILD_DIR}
|
||||
COMMENT
|
||||
"Building rccl-tests with HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR} ..."
|
||||
|
||||
@@ -89,6 +89,12 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
|
||||
target_compile_options(roctx PRIVATE -g1)
|
||||
endif()
|
||||
|
||||
# Specify GPU architectures to compile for
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
target_compile_options(roctx PRIVATE --offload-arch=${arch})
|
||||
target_link_options(roctx PUBLIC --offload-arch=${arch})
|
||||
endforeach()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
|
||||
# defined in MacroUtilities.cmake
|
||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx)
|
||||
|
||||
@@ -118,6 +118,16 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
|
||||
target_compile_options(transferBench PRIVATE -g1)
|
||||
endif()
|
||||
|
||||
# Specify GPU architectures to compile for
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
rocprofiler_systems_lookup_gfx(${arch} GPU_CATEGORIES)
|
||||
if("apu" IN_LIST GPU_CATEGORIES AND NOT "instinct" IN_LIST GPU_CATEGORIES)
|
||||
continue()
|
||||
endif()
|
||||
target_compile_options(transferBench PRIVATE --offload-arch=${arch})
|
||||
target_link_options(transferBench PUBLIC --offload-arch=${arch})
|
||||
endforeach()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
|
||||
# defined in MacroUtilities.cmake
|
||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transferBench)
|
||||
|
||||
@@ -66,6 +66,12 @@ endif()
|
||||
add_executable(transpose transpose.cpp)
|
||||
target_link_libraries(transpose PRIVATE Threads::Threads)
|
||||
|
||||
# Specify GPU architectures to compile for
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
target_compile_options(transpose PRIVATE --offload-arch=${arch})
|
||||
target_link_options(transpose PUBLIC --offload-arch=${arch})
|
||||
endforeach()
|
||||
|
||||
if(
|
||||
CMAKE_CXX_COMPILER_ID MATCHES "Clang"
|
||||
AND NOT CMAKE_CXX_COMPILER_IS_HIPCC
|
||||
|
||||
@@ -1,24 +1,5 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
# Copyright (c) Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
#
|
||||
@@ -101,8 +82,16 @@ rocprofiler_systems_add_test(
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
if(ROCPROFSYS_USE_ROCM)
|
||||
set(NAVI_REGEX "gfx(10|11|12)[A-Fa-f0-9][A-Fa-f0-9]")
|
||||
rocprofiler_systems_get_gfx_archs(NAVI_DETECTED GFX_MATCH ${NAVI_REGEX} ECHO)
|
||||
if(ROCPROFSYS_GFX_TARGETS)
|
||||
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
|
||||
rocprofiler_systems_lookup_gfx(${arch} GPU_CATEGORY)
|
||||
if("instinct" IN_LIST GPU_CATEGORY)
|
||||
continue()
|
||||
endif()
|
||||
set(NAVI_DETECTED TRUE)
|
||||
break()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(NAVI_DETECTED)
|
||||
set(ROCPROFSYS_ROCM_EVENTS_TEST "SQ_WAVES")
|
||||
|
||||
@@ -1,24 +1,5 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
# Copyright (c) Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
#
|
||||
# configuration and functions for testing
|
||||
@@ -422,72 +403,6 @@ ${_FILE_CONTENTS}
|
||||
set(${_ENV} "${_ENV_CONTENTS}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
# Check GPU architectures on the system. If a regex is provided, it will be used to filter
|
||||
# the architectures. Otherwise, all architectures will be returned. Uses rocminfo to get
|
||||
# the architectures.
|
||||
function(ROCPROFILER_SYSTEMS_GET_GFX_ARCHS _VAR)
|
||||
cmake_parse_arguments(ARG "ECHO" "PREFIX;DELIM;GFX_MATCH" "" ${ARGN})
|
||||
|
||||
if(NOT DEFINED ARG_DELIM)
|
||||
set(ARG_DELIM ", ")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ARG_PREFIX)
|
||||
set(ARG_PREFIX "[${PROJECT_NAME}] ")
|
||||
endif()
|
||||
|
||||
find_program(
|
||||
rocminfo_EXECUTABLE
|
||||
NAMES rocminfo
|
||||
HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm
|
||||
PATHS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
if(rocminfo_EXECUTABLE)
|
||||
execute_process(
|
||||
COMMAND ${rocminfo_EXECUTABLE}
|
||||
RESULT_VARIABLE rocminfo_RET
|
||||
OUTPUT_VARIABLE rocminfo_OUT
|
||||
ERROR_VARIABLE rocminfo_ERR
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
ERROR_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
if(rocminfo_RET EQUAL 0)
|
||||
string(REGEX MATCHALL "gfx([0-9A-Fa-f]+)" rocminfo_GFXINFO "${rocminfo_OUT}")
|
||||
list(REMOVE_DUPLICATES rocminfo_GFXINFO)
|
||||
set(${_VAR} "${rocminfo_GFXINFO}" PARENT_SCOPE)
|
||||
|
||||
if(ARG_ECHO)
|
||||
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${rocminfo_GFXINFO}")
|
||||
message(STATUS "${ARG_PREFIX}System architectures: ${_GFXINFO_ECHO}")
|
||||
endif()
|
||||
|
||||
# Filter the architectures if a regex is provided
|
||||
if(ARG_GFX_MATCH)
|
||||
string(REGEX MATCH "${ARG_GFX_MATCH}" _GFX_MATCH "${rocminfo_GFXINFO}")
|
||||
list(REMOVE_DUPLICATES _GFX_MATCH)
|
||||
set(${_VAR} "${_GFX_MATCH}" PARENT_SCOPE)
|
||||
|
||||
if(ARG_ECHO)
|
||||
string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${_GFX_MATCH}")
|
||||
message(
|
||||
STATUS
|
||||
"${ARG_PREFIX}System architectures (filtered: ${ARG_GFX_MATCH}): ${_GFXINFO_ECHO}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
message(
|
||||
AUTHOR_WARNING
|
||||
"${rocminfo_EXECUTABLE} failed with error code ${rocminfo_RET}\nstderr:\n${rocminfo_ERR}\nstdout:\n${rocminfo_OUT}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
# extends the timeout when sanitizers are used due to slowdown
|
||||
function(ROCPROFILER_SYSTEMS_ADJUST_TIMEOUT_FOR_SANITIZER _VAR)
|
||||
|
||||
مرجع در شماره جدید
Block a user