From 4c2497562689dae2d30adaeb994fb944363c64da Mon Sep 17 00:00:00 2001 From: anujshuk-amd <204843612+anujshuk-amd@users.noreply.github.com> Date: Thu, 22 May 2025 23:34:33 +0530 Subject: [PATCH] Update transpose-rocprofiler-* tests (#210) - Updating counters collected and tested for on Navi-based machines - Update add CMake function to query GPU architectures - Update decode tests to use new functions --- tests/rocprof-sys-decode-tests.cmake | 3 +- tests/rocprof-sys-rocm-tests.cmake | 29 ++++++--- tests/rocprof-sys-testing.cmake | 94 ++++++++++++++++++++-------- 3 files changed, 90 insertions(+), 36 deletions(-) diff --git a/tests/rocprof-sys-decode-tests.cmake b/tests/rocprof-sys-decode-tests.cmake index b0e427ee7e..3725d0ab84 100644 --- a/tests/rocprof-sys-decode-tests.cmake +++ b/tests/rocprof-sys-decode-tests.cmake @@ -37,7 +37,8 @@ set(_jpeg_decode_environment "ROCPROFSYS_AMD_SMI_METRICS=busy,temp,power,jpeg_activity,mem_usage" "ROCPROFSYS_SAMPLING_CPUS=none") -check_gpu("MI300" MI300_DETECTED) +rocprofiler_systems_get_gfx_archs(MI300_DETECTED GFX_MATCH "gfx9[4-9][A-Fa-f0-9]" ECHO) + if(MI300_DETECTED) list(APPEND VCN_COUNTER_NAMES_ARG --counter-names "VCN Activity") list(APPEND JPEG_COUNTER_NAMES_ARG --counter-names "JPEG Activity") diff --git a/tests/rocprof-sys-rocm-tests.cmake b/tests/rocprof-sys-rocm-tests.cmake index 81e7bcf3d4..68d6685893 100644 --- a/tests/rocprof-sys-rocm-tests.cmake +++ b/tests/rocprof-sys-rocm-tests.cmake @@ -26,8 +26,6 @@ # # -------------------------------------------------------------------------------------- # -set(ROCPROFSYS_ROCM_EVENTS_TEST "GRBM_COUNT,SQ_WAVES,SQ_INSTS_VALU,TA_TA_BUSY:device=0") - rocprofiler_systems_add_test( NAME transpose TARGET transpose @@ -84,6 +82,23 @@ rocprofiler_systems_add_test( REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose") if(ROCPROFSYS_USE_ROCM) + set(NAVI_REGEX "gfx(10|11|12)[A-Fa-f0-9][A-Fa-f0-9]") + rocprofiler_systems_get_gfx_archs(NAVI_DETECTED GFX_MATCH ${NAVI_REGEX} ECHO) + + if(NAVI_DETECTED) + set(ROCPROFSYS_ROCM_EVENTS_TEST "SQ_WAVES") + set(ROCPROFSYS_FILE_CHECKS "rocprof-device-0-SQ_WAVES.txt") + set(ROCPROFSYS_COUNTER_NAMES_ARG "SQ_WAVES") + else() + set(ROCPROFSYS_ROCM_EVENTS_TEST + "GRBM_COUNT,SQ_WAVES,SQ_INSTS_VALU,TA_TA_BUSY:device=0") + set(ROCPROFSYS_FILE_CHECKS + "rocprof-device-0-GRBM_COUNT.txt" "rocprof-device-0-SQ_WAVES.txt" + "rocprof-device-0-SQ_INSTS_VALU.txt" "rocprof-device-0-TA_TA_BUSY.txt") + set(ROCPROFSYS_COUNTER_NAMES_ARG "GRBM_COUNT" "SQ_WAVES" "SQ_INSTS_VALU" + "TA_TA_BUSY") + endif() + rocprofiler_systems_add_test( SKIP_BASELINE SKIP_RUNTIME NAME transpose-rocprofiler @@ -101,16 +116,14 @@ if(ROCPROFSYS_USE_ROCM) rocprofiler_systems_add_validation_test( NAME transpose-rocprofiler-sampling PERFETTO_FILE "perfetto-trace.proto" - ARGS --counter-names "TA_TA_BUSY" "SQ_WAVES" "GRBM_COUNT" "SQ_INSTS_VALU" -p - EXIST_FILES rocprof-device-0-GRBM_COUNT.txt rocprof-device-0-TA_TA_BUSY.txt - rocprof-device-0-SQ_INSTS_VALU.txt rocprof-device-0-SQ_WAVES.txt + ARGS --counter-names ${ROCPROFSYS_COUNTER_NAMES_ARG} -p + EXIST_FILES ${ROCPROFSYS_FILE_CHECKS} LABELS "rocprofiler") rocprofiler_systems_add_validation_test( NAME transpose-rocprofiler-binary-rewrite PERFETTO_FILE "perfetto-trace.proto" - ARGS --counter-names "TA_TA_BUSY" "SQ_WAVES" "GRBM_COUNT" "SQ_INSTS_VALU" -p - EXIST_FILES rocprof-device-0-GRBM_COUNT.txt rocprof-device-0-TA_TA_BUSY.txt - rocprof-device-0-SQ_INSTS_VALU.txt rocprof-device-0-SQ_WAVES.txt + ARGS --counter-names ${ROCPROFSYS_COUNTER_NAMES_ARG} -p + EXIST_FILES ${ROCPROFSYS_FILE_CHECKS} LABELS "rocprofiler") endif() diff --git a/tests/rocprof-sys-testing.cmake b/tests/rocprof-sys-testing.cmake index 4d3467f9b5..a72ddaf857 100644 --- a/tests/rocprof-sys-testing.cmake +++ b/tests/rocprof-sys-testing.cmake @@ -304,33 +304,6 @@ endmacro() # -------------------------------------------------------------------------------------- # -# Define the function to check for a specific GPU -function(check_gpu gpu_name return_var) - # Run the rocminfo command and capture the output - execute_process( - COMMAND bash -c "rocminfo | grep ${gpu_name}" - OUTPUT_VARIABLE ROCMINFO_OUTPUT - RESULT_VARIABLE ROCMINFO_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX MATCH "${gpu_name}" gpu_matches "${ROCMINFO_OUTPUT}") - - # Check if the specified GPU is present - if(ROCMINFO_RESULT EQUAL 0 AND gpu_matches) - message(STATUS "${gpu_name} GPU detected") - set(${return_var} - TRUE - PARENT_SCOPE) - else() - message(STATUS "${gpu_name} GPU not detected") - set(${return_var} - FALSE - PARENT_SCOPE) - endif() -endfunction() - -# -------------------------------------------------------------------------------------- # - function(ROCPROFILER_SYSTEMS_WRITE_TEST_CONFIG _FILE _ENV) set(_ENV_ONLY "ROCPROFSYS_(CI|CI_TIMEOUT|MODE|USE_MPIP|DEBUG_[A-Z_]+|FORCE_ROCPROFILER_INIT|DEFAULT_MIN_INSTRUCTIONS|MONOCHROME|VERBOSE)=" @@ -376,6 +349,73 @@ ${_FILE_CONTENTS} PARENT_SCOPE) endfunction() +# -------------------------------------------------------------------------------------- # +# Check GPU architectures on the system. If a regex is provided, it will be used to filter +# the architectures. Otherwise, all architectures will be returned. Uses rocminfo to get +# the architectures. +function(ROCPROFILER_SYSTEMS_GET_GFX_ARCHS _VAR) + cmake_parse_arguments(ARG "ECHO" "PREFIX;DELIM;GFX_MATCH" "" ${ARGN}) + + if(NOT DEFINED ARG_DELIM) + set(ARG_DELIM ", ") + endif() + + if(NOT DEFINED ARG_PREFIX) + set(ARG_PREFIX "[${PROJECT_NAME}] ") + endif() + + find_program( + rocminfo_EXECUTABLE + NAMES rocminfo + HINTS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm + PATHS ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm + PATH_SUFFIXES bin) + + if(rocminfo_EXECUTABLE) + execute_process( + COMMAND ${rocminfo_EXECUTABLE} + RESULT_VARIABLE rocminfo_RET + OUTPUT_VARIABLE rocminfo_OUT + ERROR_VARIABLE rocminfo_ERR + OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) + + if(rocminfo_RET EQUAL 0) + string(REGEX MATCHALL "gfx([0-9A-Fa-f]+)" rocminfo_GFXINFO "${rocminfo_OUT}") + list(REMOVE_DUPLICATES rocminfo_GFXINFO) + set(${_VAR} + "${rocminfo_GFXINFO}" + PARENT_SCOPE) + + if(ARG_ECHO) + string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${rocminfo_GFXINFO}") + message(STATUS "${ARG_PREFIX}System architectures: ${_GFXINFO_ECHO}") + endif() + + # Filter the architectures if a regex is provided + if(ARG_GFX_MATCH) + string(REGEX MATCH "${ARG_GFX_MATCH}" _GFX_MATCH "${rocminfo_GFXINFO}") + list(REMOVE_DUPLICATES _GFX_MATCH) + set(${_VAR} + "${_GFX_MATCH}" + PARENT_SCOPE) + + if(ARG_ECHO) + string(REPLACE ";" "${ARG_DELIM}" _GFXINFO_ECHO "${_GFX_MATCH}") + message( + STATUS + "${ARG_PREFIX}System architectures (filtered: ${ARG_GFX_MATCH}): ${_GFXINFO_ECHO}" + ) + endif() + endif() + else() + message( + AUTHOR_WARNING + "${rocminfo_EXECUTABLE} failed with error code ${rocminfo_RET}\nstderr:\n${rocminfo_ERR}\nstdout:\n${rocminfo_OUT}" + ) + endif() + endif() +endfunction() + # -------------------------------------------------------------------------------------- # # extends the timeout when sanitizers are used due to slowdown function(ROCPROFILER_SYSTEMS_ADJUST_TIMEOUT_FOR_SANITIZER _VAR)