Files
Kian Cossettini 698ac6b8bc [rocprofiler-systems] Add build option for "examples" to specify gfx-arch (#2626)
## Motivation
 - Added `check_rocminfo` function that returns true if the provided regex was found, false otherwise. Can also use `GET_OUTPUT` to get the raw output filtered with or without a regex.
 - Moved `rocprofiler_systems_get_gfx_archs()` to `MacroUtilities.cmake` 
 - Added `rocprofiler_systems_lookup_gfx()`, which detects whether a given `gfx` is from the `instinct`, `radeon` or `apu` family.
 - Added `ROCPROFSYS_GFX_TARGETS` as a build argument. Used to specify the offloading architectures that GPU examples should compile for. If empty, defaults to whatever your system has.
 - GPU examples now check if the given `gfx` targets (from `ROCPROFSYS_GFX_TARGETS`) are supported.
 - OMPVV offload tests now only compile if `amdflang` version is `>= 20`
 - Improve link time by reducing the number of GFX targets that binaries need to support.
   - RCCL is now passed a `GPU_TARGETS` var specifying the architectures to build/link against.
2026-01-20 12:13:21 -05:00

172 řádky
5.1 KiB
CMake

# Copyright (c) Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
# -------------------------------------------------------------------------------------- #
#
# ROCm transpose tests
#
# -------------------------------------------------------------------------------------- #
set(_transpose_environment
"${_base_environment}"
"ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,memory_allocation,hsa_api"
)
# Enable ROCPD for tests only if valid ROCm is installed and a valid GPU is detected
if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU})
list(APPEND _transpose_environment "ROCPROFSYS_USE_ROCPD=ON")
endif()
rocprofiler_systems_add_test(
NAME transpose
TARGET transpose
MPI ${TRANSPOSE_USE_MPI}
GPU ON
NUM_PROCS ${NUM_PROCS}
REWRITE_ARGS -e -v 2 --print-instructions -E uniform_int_distribution
RUNTIME_ARGS
-e
-v
1
--label
file
line
return
args
-E
uniform_int_distribution
ENVIRONMENT "${_transpose_environment}"
RUNTIME_TIMEOUT 480
)
rocprofiler_systems_add_test(
SKIP_REWRITE SKIP_RUNTIME
NAME transpose-two-kernels
TARGET transpose
MPI OFF
GPU ON
NUM_PROCS 1
RUN_ARGS 1 2 2
ENVIRONMENT "${_transpose_environment}"
)
rocprofiler_systems_add_test(
SKIP_BASELINE SKIP_RUNTIME
NAME transpose-loops
TARGET transpose
LABELS "loops"
MPI ${TRANSPOSE_USE_MPI}
GPU ON
NUM_PROCS ${NUM_PROCS}
REWRITE_ARGS
-e
-v
2
--label
return
args
-l
-i
8
-E
uniform_int_distribution
RUN_ARGS 2 100 50
ENVIRONMENT "${_transpose_environment}"
REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose"
)
# -------------------------------------------------------------------------------------- #
#
# ROCProfiler tests (counter collection)
#
# -------------------------------------------------------------------------------------- #
if(ROCPROFSYS_USE_ROCM)
if(ROCPROFSYS_GFX_TARGETS)
foreach(arch IN LISTS ROCPROFSYS_GFX_TARGETS)
rocprofiler_systems_lookup_gfx(${arch} GPU_CATEGORY)
if("instinct" IN_LIST GPU_CATEGORY)
continue()
endif()
set(NAVI_DETECTED TRUE)
break()
endforeach()
endif()
if(NAVI_DETECTED)
set(ROCPROFSYS_ROCM_EVENTS_TEST "SQ_WAVES")
set(ROCPROFSYS_FILE_CHECKS "rocprof-device-0-SQ_WAVES.txt")
set(ROCPROFSYS_COUNTER_NAMES_ARG "SQ_WAVES")
else()
set(ROCPROFSYS_ROCM_EVENTS_TEST
"GRBM_COUNT,SQ_WAVES,SQ_INSTS_VALU,TA_TA_BUSY:device=0"
)
set(ROCPROFSYS_FILE_CHECKS
"rocprof-device-0-GRBM_COUNT.txt"
"rocprof-device-0-SQ_WAVES.txt"
"rocprof-device-0-SQ_INSTS_VALU.txt"
"rocprof-device-0-TA_TA_BUSY.txt"
)
set(ROCPROFSYS_COUNTER_NAMES_ARG
"GRBM_COUNT"
"SQ_WAVES"
"SQ_INSTS_VALU"
"TA_TA_BUSY"
)
endif()
rocprofiler_systems_add_test(
SKIP_BASELINE SKIP_RUNTIME
NAME transpose-rocprofiler
TARGET transpose
LABELS "rocprofiler"
MPI ${TRANSPOSE_USE_MPI}
GPU ON
NUM_PROCS ${NUM_PROCS}
REWRITE_ARGS -e -v 2 -E uniform_int_distribution
ENVIRONMENT
"${_transpose_environment};ROCPROFSYS_ROCM_EVENTS=${ROCPROFSYS_ROCM_EVENTS_TEST}"
REWRITE_RUN_PASS_REGEX "${_ROCP_PASS_REGEX}"
SAMPLING_PASS_REGEX "${_ROCP_PASS_REGEX}"
)
rocprofiler_systems_add_validation_test(
NAME transpose-rocprofiler-sampling
PERFETTO_FILE "perfetto-trace.proto"
ARGS --counter-names ${ROCPROFSYS_COUNTER_NAMES_ARG} -p
EXIST_FILES ${ROCPROFSYS_FILE_CHECKS}
LABELS "rocprofiler"
)
rocprofiler_systems_add_validation_test(
NAME transpose-rocprofiler-binary-rewrite
PERFETTO_FILE "perfetto-trace.proto"
ARGS --counter-names ${ROCPROFSYS_COUNTER_NAMES_ARG} -p
EXIST_FILES ${ROCPROFSYS_FILE_CHECKS}
LABELS "rocprofiler"
)
endif()
# -------------------------------------------------------------------------------------- #
#
# ROCpd tests
#
# -------------------------------------------------------------------------------------- #
if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU} AND TEST transpose-sampling)
set_property(TEST transpose-sampling APPEND PROPERTY LABELS rocpd)
rocprofiler_systems_add_validation_test(
NAME transpose-sampling
ROCPD_FILE "rocpd.db"
ARGS --validation-rules
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/validation-rules.json"
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/default-rules.json"
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/amd-smi-rules.json"
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/cpu-metrics-rules.json"
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/timer-sampling-rules.json"
"${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/sdk-metrics-rules.json"
LABELS "rocpd"
)
endif()