From ae8f72fa797f6eb4d45e94157a432c56e047e36b Mon Sep 17 00:00:00 2001 From: vedithal-amd Date: Tue, 18 Nov 2025 23:34:38 -0500 Subject: [PATCH] [rocprofiler-compute] Use native tool for counter collection (#1212) * Use native tool for counter collection * Add native counter collection tool which uses rocprofiler-sdk C++ library public API to get counter collection data * This is enabled by default, unless --no-native-tool option is provided or ROCPROF=rocprofv3 env. var. is provided * This tool is only supported for ROCm version >=7.x.x * This tool is not supported for attach/detach scenario * Build native tool shared object during build time * If using rocprof-compute without building then runtime compilation of t push native tool shared object is performed * rocprofiler-sdk tools is still used for services other than counter collection and data collected by native tool is merged into the rocpd/csv output of rocprofiler-sdk tool * Make `rocpd` choice the default choice for `--format-rocprof-output` option * If `rocpd` public API from rocprofiler-sdk library is not present, then fallback to `csv` choice * In this case only `pmc_perf.csv` is written in workload folder instead of multiple `csv` files for each profiling run * Remove `json` choice from `--format-rocprof-output` option since it functions identical to `csv` option * Rename option `--rocprofiler-sdk-library-path` to `--rocprofiler-sdk-tool-path` since we LD_PRELOAD the rocprofiler-sdk tool shared object and not the rocprofiler-sdk library shared object * Fix the meaning of `--dispatch` option in `profile` mode to mention dispatch iteration filtering instead of dispatch id filtering * --dispatch option in analyze mode does dispatch id filtering * Move standalone binary creation logic from cmake file to docker file * fix native counter collection tool during attach/detach * improve logging * fix attach detach with native tool * fix attach detach with native tool * do not support attach/detach in native tool * Update changelog * add standalone binary creation functionality in cmake * address review comments * address review comments * fix formatting * address review comments * Adding paths for cmake to search. Also updated min. cmake requirement to 3.21 as this was when hip was supported. Signed-off-by: Carrie Fallows * Update hip compiler ID check, sometimes comes up as Clang, sometimes ROCMClang- depends on setup. Updated formatting. Signed-off-by: Carrie Fallows * RHEL8.10 unable to compile due to defaulting to old c++ version, need to force c++17 Signed-off-by: Carrie Fallows * Updating changelog per docs team recommendations Signed-off-by: Carrie Fallows * Apply suggestions from code review to changelog Co-authored-by: Pratik Basyal * Do not required HIP complier to build native counter collection tool * fix cmake * gersemi formatting on latest cmake change Signed-off-by: Carrie Fallows * ex ci updated dependencies to include rocprofiler-sdk, but cmake was still not capturing the path- there was a commit that added to the cmake_prefix_path entry that specified rocprof-sdk's cmake location ut was too specific for the search paths in find_package's config mode. removing the cmake_prefix_path var and adding hints to find_package call instead, and specifying config mode so it knows how to construct the search paths Signed-off-by: Carrie Fallows * gersemi run for formatting Signed-off-by: Carrie Fallows * Still need prefix path, should not have been removed in last commit but does need to be shortened to just the rocm path to allow for find_package config mode to do the job Signed-off-by: Carrie Fallows * include cstdint for uint32_t * Run formatting on helper.cpp Signed-off-by: Carrie Fallows * Remove rocm 7.2 release stuff from version and changelog and handle it in separate pr * fix version * fix changelog * fix changelog * run ruff formatter Signed-off-by: Carrie Fallows * fix rocprofiler-sdk attach so path --------- Signed-off-by: Carrie Fallows Co-authored-by: Carrie Fallows Co-authored-by: Pratik Basyal --- .../rocprofiler-compute-formatting.yml | 27 + projects/rocprofiler-compute/CHANGELOG.md | 24 + projects/rocprofiler-compute/CMakeLists.txt | 33 +- .../docker/Dockerfile.standalone | 29 +- .../docs/archive/docs-1.x/getting_started.md | 2 +- .../docs/archive/docs-1.x/profiling.md | 2 +- .../docs/archive/docs-2.x/getting_started.md | 2 +- .../docs/archive/docs-2.x/profiling.md | 2 +- .../docs/how-to/profile/mode.rst | 2 +- .../rocprofiler-compute/docs/how-to/use.rst | 2 +- projects/rocprofiler-compute/src/argparser.py | 42 +- .../src/lib/CMakeLists.txt | 17 + .../rocprofiler-compute/src/lib/helper.cpp | 145 +++++ .../rocprofiler-compute/src/lib/helper.hpp | 31 + .../src/lib/rocprofiler_compute_tool.cpp | 613 ++++++++++++++++++ .../src/rocprof_compute_base.py | 26 +- .../rocprof_compute_profile/profiler_base.py | 74 ++- .../profiler_rocprof_v3.py | 10 +- .../profiler_rocprofiler_sdk.py | 58 +- .../src/rocprof_compute_soc/soc_base.py | 72 +- .../src/utils/rocpd_data.py | 58 ++ .../rocprofiler-compute/src/utils/utils.py | 396 ++++++----- .../rocprofiler-compute/tests/conftest.py | 21 +- .../tests/test_profile_general.py | 281 ++++---- .../rocprofiler-compute/tests/test_utils.py | 203 ++---- 25 files changed, 1599 insertions(+), 573 deletions(-) create mode 100644 projects/rocprofiler-compute/src/lib/CMakeLists.txt create mode 100644 projects/rocprofiler-compute/src/lib/helper.cpp create mode 100644 projects/rocprofiler-compute/src/lib/helper.hpp create mode 100644 projects/rocprofiler-compute/src/lib/rocprofiler_compute_tool.cpp diff --git a/.github/workflows/rocprofiler-compute-formatting.yml b/.github/workflows/rocprofiler-compute-formatting.yml index 6042a177c6..92288a7b3a 100644 --- a/.github/workflows/rocprofiler-compute-formatting.yml +++ b/.github/workflows/rocprofiler-compute-formatting.yml @@ -80,6 +80,33 @@ jobs: exit 1 fi + cxx: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: projects/rocprofiler-compute + - name: Install dependencies + working-directory: projects/rocprofiler-compute + run: | + sudo apt-get update + sudo apt-get install -y python3-pip + python3 -m pip install clang-format + - name: clang-format + working-directory: projects/rocprofiler-compute + run: | + set +e + clang-format -i $(find src -type f | egrep '\.(h|hpp|hh|c|cc|cpp)(|\.in)$') + if [ $(git diff | wc -l) -gt 0 ]; then + echo -e "\nError! cxx code not formatted. Run clang-format...\n" + echo -e "\nFiles:\n" + git diff --name-only + echo -e "\nFull diff:\n" + git diff + exit 1 + fi + python-bytecode: runs-on: ubuntu-22.04 diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index d9ba678114..ec46b6848d 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -4,6 +4,30 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. ## Unreleased +### Added + +* Native tool to perform counter collection using ROCprofiler-SDK public API. It is only supported for ROCm version 7.0.0 (and later). + * Native tool is now the default for counter collection. + * Native tool for counter collection will not be used under the following conditions: + * Specific profiler is provided through the ``ROCPROF`` environment variable. + * ``--no-native-tool`` option is provided, forcing usage of the default profiler. + * When performing a dynamic attach to a process for profiling. + +### Changed + +* Default output format for the underlying ROCprofiler-SDK tool has been changed from ``csv`` to ``rocpd``. + * If the ROCprofiler-SDK ``rocpd`` public library is not available, will fall back to ``csv`` format + +* Option ``--rocprofiler-sdk-library-path`` has been changed to ``--rocprofiler-tool-library-path`` to better reflect the fact that we provide flexibility in choosing the path to ROCprofiler-SDK tool and not the library. + +### Resolved issues + +* Fixed the meaning of --dispatch option in profile mode in argparser to convey the fact that it control which iterations of the kernel to profile and not which dispatch ids to profile. + +* The meaning of --dispatch option in analyze is still the same which is which dispatch ids to analyze + +* Fix the functioning of --dispatch option to act as 1-based index and ensure that correct kernel iterations are being profiled + ## ROCm Compute Profiler 3.4.0 for ROCm 7.2.0 ### Added diff --git a/projects/rocprofiler-compute/CMakeLists.txt b/projects/rocprofiler-compute/CMakeLists.txt index 30b10dd2cc..6b81527e8b 100644 --- a/projects/rocprofiler-compute/CMakeLists.txt +++ b/projects/rocprofiler-compute/CMakeLists.txt @@ -1,4 +1,8 @@ -cmake_minimum_required(VERSION 3.19 FATAL_ERROR) +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) + +# Set cmake_prefix_path for searching, ROCM_PATH if avail otherwise default to general rocm install path +set(CMAKE_PREFIX_PATH $ENV{ROCM_PATH} "/opt/rocm/") +message(STATUS "ROCM_PATH: $ENV{ROCM_PATH}") if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR @@ -36,7 +40,7 @@ string( project( rocprofiler-compute VERSION ${ROCPROFCOMPUTE_VERSION} - LANGUAGES C + LANGUAGES CXX DESCRIPTION "A kernel-level profiling tool for machine learning/HPC workloads running on AMD MI GPUs" HOMEPAGE_URL @@ -471,6 +475,11 @@ if(${ENABLE_COVERAGE}) ) endif() +# ------------------- +# Setup tool library +# ------------------- +add_subdirectory(src/lib) + # --------- # Install # --------- @@ -563,6 +572,12 @@ install( COMPONENT main ) +#install librocprofiler-compute-tool.so +install( + TARGETS rocprofiler-compute-tool + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rocprofiler-compute COMPONENT main +) + # top-level symlink for bin/rocprof-compute install( CODE @@ -590,21 +605,25 @@ add_custom_target( add_custom_target( standalonebinary # Change working directory to src - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + # Install nuitka + COMMAND ${Python3_EXECUTABLE} -m pip install nuitka + # Install patchelf + COMMAND ${Python3_EXECUTABLE} -m pip install patchelf # Check nuitka COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i nuitka > /dev/null 2>&1 # Check patchelf COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i patchelf > /dev/null 2>&1 # Create VERSION.sha file - COMMAND git -C ${PROJECT_SOURCE_DIR} rev-parse HEAD > VERSION.sha + COMMAND git rev-parse HEAD > VERSION.sha # Build standalone binary # NOTE: --no-deployment-flag=self-execution is used to avoid self-execution - # and fork - # bombs as explained in + # and fork bombs as explained in # https://nuitka.net/user-documentation/common-issue-solutions.html#fork-bombs-self-execution COMMAND ${Python3_EXECUTABLE} -m nuitka --mode=onefile --no-deployment-flag=self-execution --include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt + --include-data-files=src/lib/rocprofiler_compute_tool.cpp=lib/rocprofiler_compute_tool.cpp --include-package=dash_svg --include-package-data=dash_svg --include-package=dash_bootstrap_components --include-package-data=dash_bootstrap_components --include-package=plotly @@ -615,7 +634,7 @@ add_custom_target( --include-package-data=rocprof_compute_profile --include-package=rocprof_compute_tui --include-package-data=rocprof_compute_tui --include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc - --include-package=utils --include-package-data=utils rocprof-compute + --include-package=utils --include-package-data=utils src/rocprof-compute # Remove library rpath from executable COMMAND patchelf --remove-rpath rocprof-compute.bin # Move to build directory diff --git a/projects/rocprofiler-compute/docker/Dockerfile.standalone b/projects/rocprofiler-compute/docker/Dockerfile.standalone index cb1e3b07f4..a2bf836ba5 100644 --- a/projects/rocprofiler-compute/docker/Dockerfile.standalone +++ b/projects/rocprofiler-compute/docker/Dockerfile.standalone @@ -1,8 +1,8 @@ FROM redhat/ubi8:8.10 -WORKDIR /app +WORKDIR /app/projects/rocprofiler-compute -RUN yum install -y curl gcc cmake git +RUN yum install -y curl git cmake gcc-c++ # Allows running git commands in /app RUN git config --global --add safe.directory /app @@ -14,10 +14,25 @@ RUN yum install -y python39 python39-devel && \ python3 get-pip.py CMD ["/bin/bash", "-c", "\ - cd /app/projects/rocprofiler-compute \ - && python3 -m pip install -r requirements.txt \ + python3 -m pip install -r requirements.txt \ && python3 -m pip install nuitka patchelf \ - && rm -rf build \ - && cmake -B build -S . \ - && make -C build standalonebinary \ + && git rev-parse HEAD > VERSION.sha \ + && python3 -m nuitka --mode=onefile --no-deployment-flag=self-execution \ + --enable-plugin=no-qt \ + --include-data-files=VERSION*=./ \ + --include-data-files=src/lib/rocprofiler_compute_tool.cpp=lib/rocprofiler_compute_tool.cpp \ + --include-package=dash_svg --include-package-data=dash_svg \ + --include-package=dash_bootstrap_components \ + --include-package-data=dash_bootstrap_components \ + --include-package=plotly --include-package-data=plotly \ + --include-package=kaleido --include-package-data=kaleido \ + --include-package=rocprof_compute_analyze \ + --include-package-data=rocprof_compute_analyze \ + --include-package=rocprof_compute_profile \ + --include-package-data=rocprof_compute_profile \ + --include-package=rocprof_compute_tui --include-package-data=rocprof_compute_tui \ + --include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc \ + --include-package=utils --include-package-data=utils \ + src/rocprof-compute \ + && patchelf --remove-rpath rocprof-compute.bin \ "] diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md index 1ee28a496a..67633415ad 100644 --- a/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/getting_started.md @@ -25,7 +25,7 @@ Some common filters include: - - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID + - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch iteration - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks. To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block. diff --git a/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md index 61827add37..c5835f88a8 100644 --- a/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md +++ b/projects/rocprofiler-compute/docs/archive/docs-1.x/profiling.md @@ -90,7 +90,7 @@ Profile Options: SPI CPC CPF - -d [ ...], --dispatch [ ...] Dispatch ID filtering. + -d [ ...], --dispatch [ ...] Dispatch iteration filtering. --no-roof Profile without collecting roofline data. -- [ ...] Provide command for profiling after double dash. diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md index 9c8740de19..87a6f7db50 100644 --- a/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/getting_started.md @@ -30,7 +30,7 @@ Some common filters include: - `-k`/`--kernel` enables filtering kernels by name. - - `-d`/`--dispatch` enables filtering based on dispatch ID. + - `-d`/`--dispatch` enables filtering based on dispatch iteration. - `-b`/`--block` enables collects metrics for only the specified (one or more) hardware component blocks. To view available metrics by hardware Block you can use the `--list-metrics` argument: diff --git a/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md b/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md index f79a055b24..948ec85b85 100644 --- a/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md +++ b/projects/rocprofiler-compute/docs/archive/docs-2.x/profiling.md @@ -191,7 +191,7 @@ Filtering Options: - The `-k` / `--kernel` \ flag allows for kernel filtering. Usage is equivalent with the current rocProf utility ([see details below](#kernel-filtering)). -- The `-d` / `--dispatch` \ flag allows for dispatch ID filtering. Usage is equivalent with the current rocProf utility ([see details below](#dispatch-filtering)). +- The `-d` / `--dispatch` \ flag allows for dispatch iteration filtering. Usage is equivalent with the current rocProf utility ([see details below](#dispatch-filtering)). - The `-b` / `--block` \ flag allows system profiling on one or more selected hardware components to speed up the profiling process ([see details below](#hardware-component-filtering)). diff --git a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst index 4d08311e82..47397da4c1 100644 --- a/projects/rocprofiler-compute/docs/how-to/profile/mode.rst +++ b/projects/rocprofiler-compute/docs/how-to/profile/mode.rst @@ -271,7 +271,7 @@ Filtering options utility. See :ref:`profiling-kernel-filtering`. ``-d``, ``--dispatch `` - Allows for dispatch ID filtering. Usage is equivalent with the current + Allows for dispatch iteration filtering. Usage is equivalent with the current ``rocprof`` utility. See :ref:`profiling-dispatch-filtering`. ``--set `` diff --git a/projects/rocprofiler-compute/docs/how-to/use.rst b/projects/rocprofiler-compute/docs/how-to/use.rst index 94d269c6b8..4ab1384562 100644 --- a/projects/rocprofiler-compute/docs/how-to/use.rst +++ b/projects/rocprofiler-compute/docs/how-to/use.rst @@ -54,7 +54,7 @@ Common filters to customize data collection include: Enables filtering kernels by name. ``-d``, ``--dispatch`` - Enables filtering based on dispatch ID. + Enables filtering based on dispatch iteration. ``-b``, ``--block`` Enables collection metrics for only the specified analysis report blocks. diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 826841e7ca..5036481093 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -178,7 +178,10 @@ Examples: metavar="", default=None, required=False, - help="\t\t\tProcess id to be attached for profiling.", + help=( + "\t\t\tProcess id to be attached for profiling.\n" + "\t\t\tImplies --no-native-tool" + ), ) profile_group.add_argument( "--attach-duration-msec", @@ -188,9 +191,9 @@ Examples: default=None, required=False, help=( - "\t\t\tWhen --attach-pid is used, it specifies the attach duration " - "in milliseconds. If not set, detachment occurs when " - '"Enter" key is pressed.' + "\t\t\tWhen --attach-pid is used, it specifies the attach duration\n" + "\t\t\tin milliseconds. If not set, detachment occurs when\n" + '\t\t\t"Enter" key is pressed.' ), ) profile_group.add_argument( @@ -255,7 +258,10 @@ Examples: nargs="+", dest="dispatch", required=False, - help="\t\t\tDispatch ID filtering.", + help=( + "\t\t\tWhich dispatch iterations of the kernel to filter \n" + "\t\t\t(e.g. 1 3:5 captures 1st, 3rd, 4th and 5th iterations)." + ), ) profile_group.add_argument( @@ -342,8 +348,8 @@ Examples: metavar="", dest="format_rocprof_output", choices=["csv", "rocpd"], - default="csv", - help="\t\t\tSet the format of output file of rocprof.", + default="rocpd", + help=("\t\t\tSet the format of output file of rocprof."), ) profile_group.add_argument( "--pc-sampling-method", @@ -370,14 +376,28 @@ Examples: ), ) profile_group.add_argument( - "--rocprofiler-sdk-library-path", + "--rocprofiler-sdk-tool-path", type=str, - dest="rocprofiler_sdk_library_path", + dest="rocprofiler_sdk_tool_path", required=False, default=str( - Path(os.getenv("ROCM_PATH", "/opt/rocm")) / "lib/librocprofiler-sdk.so" + Path(os.getenv("ROCM_PATH", "/opt/rocm")) + / "lib/rocprofiler-sdk/librocprofiler-sdk-tool.so" + ), + help="\t\t\tSet the path to rocprofiler-sdk tool.", + ) + profile_group.add_argument( + "--no-native-tool", + required=False, + default=False, + action="store_true", + help=( + "\t\t\tDo not use the native counter collection tool.\n" + "\t\t\tNative tool is not used if ROCPROF env. var. is set " + "and not equal to rocprofiler-sdk.\n" + "\t\t\tNative tool is not used for ROCm version < 7.x.x.\n" + "\t\t\tNative tool is not used attach/detach scenario" ), - help="\t\t\tSet the path to rocprofiler SDK library.", ) profile_group.add_argument( "--retain-rocpd-output", diff --git a/projects/rocprofiler-compute/src/lib/CMakeLists.txt b/projects/rocprofiler-compute/src/lib/CMakeLists.txt new file mode 100644 index 0000000000..5e4afa7a15 --- /dev/null +++ b/projects/rocprofiler-compute/src/lib/CMakeLists.txt @@ -0,0 +1,17 @@ +# Minimum required c++ standard is 17 for compilation +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Set cmake_prefix_path for searching, ROCM_PATH if avail otherwise default to general rocm install path +set(CMAKE_PREFIX_PATH $ENV{ROCM_PATH} "/opt/rocm/") + +find_package( + rocprofiler-sdk + HINTS $ENV{ROCM_PATH}/lib/cmake /opt/rocm/lib/cmake + CONFIG + REQUIRED +) + +add_library(rocprofiler-compute-tool SHARED) +target_sources(rocprofiler-compute-tool PRIVATE rocprofiler_compute_tool.cpp helper.cpp) +target_link_libraries(rocprofiler-compute-tool PRIVATE rocprofiler-sdk::rocprofiler-sdk) diff --git a/projects/rocprofiler-compute/src/lib/helper.cpp b/projects/rocprofiler-compute/src/lib/helper.cpp new file mode 100644 index 0000000000..1f5d05f8b9 --- /dev/null +++ b/projects/rocprofiler-compute/src/lib/helper.cpp @@ -0,0 +1,145 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "helper.hpp" + +#include +#include +#include +#include + +namespace helper_utils { + +// The function extracts the kernel name from +// input string. By using the iterators it finds the +// window in the string which contains only the kernel name. +// For example 'Foo::foo(a[], int (int))' -> 'foo' +std::string truncate_name(std::string_view name) { + auto rit = name.rbegin(); + auto rend = name.rend(); + uint32_t counter = 0; + char open_token = 0; + char close_token = 0; + while (rit != rend) { + if (counter == 0) { + switch (*rit) { + case ')': + counter = 1; + open_token = ')'; + close_token = '('; + break; + case '>': + counter = 1; + open_token = '>'; + close_token = '<'; + break; + case ']': + counter = 1; + open_token = ']'; + close_token = '['; + break; + case ' ': + ++rit; + continue; + } + if (counter == 0) + break; + } else { + if (*rit == open_token) + counter++; + if (*rit == close_token) + counter--; + } + ++rit; + } + auto rbeg = rit; + while ((rit != rend) && (*rit != ' ') && (*rit != ':')) + rit++; + return std::string{name.substr(rend - rit, rit - rbeg)}; +} + +std::string cxa_demangle(std::string_view _mangled_name, int *_status) { + // return the mangled since there is no buffer + if (_mangled_name.empty()) { + *_status = -2; + return std::string{}; + } + + auto _demangled_name = std::string{_mangled_name}; + + // PARAMETERS to __cxa_demangle + // mangled_name: + // A NULL-terminated character string containing the name to be + // demangled. + // buffer: + // A region of memory, allocated with malloc, of *length bytes, into + // which the demangled name is stored. If output_buffer is not long + // enough, it is expanded using realloc. output_buffer may instead be + // NULL; in that case, the demangled name is placed in a region of memory + // allocated with malloc. + // _buflen: + // If length is non-NULL, the length of the buffer containing the + // demangled name is placed in *length. + // status: + // *status is set to one of the following values + size_t _demang_len = 0; + char *_demang = abi::__cxa_demangle(_demangled_name.c_str(), nullptr, + &_demang_len, _status); + switch (*_status) { + // 0 : The demangling operation succeeded. + // -1 : A memory allocation failure occurred. + // -2 : mangled_name is not a valid name under the C++ ABI mangling rules. + // -3 : One of the arguments is invalid. + case 0: { + if (_demang) + _demangled_name = std::string{_demang}; + break; + } + case -1: { + std::clog << "[rocprofiler-compute] memory allocation failure occurred " + "demangling " + << _demangled_name << std::endl; + break; + } + case -2: { + break; + } + case -3: { + std::clog << "[rocprofiler-compute] Invalid argument in: (\"" + << _demangled_name << "\", nullptr, nullptr, " + << static_cast(_status) << ")" << std::endl; + break; + } + default: + break; + }; + + // if it "demangled" but the length is zero, set the status to -2 + if (_demang_len == 0 && *_status == 0) + *_status = -2; + + // free allocated buffer + ::free(_demang); + return _demangled_name; +} + +} // namespace helper_utils diff --git a/projects/rocprofiler-compute/src/lib/helper.hpp b/projects/rocprofiler-compute/src/lib/helper.hpp new file mode 100644 index 0000000000..26a3f17764 --- /dev/null +++ b/projects/rocprofiler-compute/src/lib/helper.hpp @@ -0,0 +1,31 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include + +namespace helper_utils { + +std::string truncate_name(std::string_view name); +std::string cxa_demangle(std::string_view _mangled_name, int *_status); + +} // namespace helper_utils \ No newline at end of file diff --git a/projects/rocprofiler-compute/src/lib/rocprofiler_compute_tool.cpp b/projects/rocprofiler-compute/src/lib/rocprofiler_compute_tool.cpp new file mode 100644 index 0000000000..5b2f311846 --- /dev/null +++ b/projects/rocprofiler-compute/src/lib/rocprofiler_compute_tool.cpp @@ -0,0 +1,613 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/* +This is a native tool for rocprofiler-compute to collect counters data for GPU +kernel dispatches using the rocprofiler-sdk public API. This C++ tool is +compiled into a shared object with hipcc/amdclang++ and dynamically links to the +rocprofiler-sdk library. The shared object is injected using the LD_PRELOAD +environment variable so that rocprofiler-sdk services can be configured before +the GPU workload starts executing. + +An experimental feature for attach/detach scenarios is also provided. + +Code Flow: + +1. Entry point - rocprofiler_configure(): + - Parses ROCPROF environment variables to configure profiling. + - Sets up tool metadata and logging. + - Returns pointers to tool_init() and tool_fini() functions. + +2. Tool Initialization - tool_init(): + - Creates a profiling context. + - Subscribes to dispatch tracing and counting services by providing function +callbacks. + - Starts the profiling context. + +3. Kernel registration callback - tool_tracing_callback(): + - Invoked when a kernel is registered. + - Stores the kernel name to kernel id mapping. + - Determines which kernel names/ids to target for profiling based on ROCPROF +environment variables. + +4. Kernel dispatch callback - dispatch_callback(): + - Invoked before a kernel dispatch is enqueued. + - Decides whether to profile this dispatch. + - If profiling is required, creates or fetches from cache a counter profile +for the agent and returns a pointer to it. + - The counter profile dictates which counters to collect for this dispatch. + +5. Kernel dispatch record callback - record_callback(): + - Invoked after a kernel dispatch is completed. + - Receives the collected counter records. + - Stores the counter records in tool data for later processing. + +6. Tool Finalization - tool_fini(): + - Called when the application is terminating. + - Stops the profiling context. + - Processes and writes the collected counter records to the output file. + - Cleans up resources. +*/ + +#include "helper.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if (CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " \ + << msg << " failed with error code " << CHECKSTATUS << ": " \ + << status_msg << std::endl; \ + std::stringstream errmsg{}; \ + errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " \ + << msg " failure (" << status_msg << ")"; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + +namespace { + +// Struct to store a single counter info record +struct counter_info_record_t { + uint64_t dispatch_id; + uint64_t kernel_id; + uint64_t counter_id; + std::string counter_name; + double counter_value; +}; + +// Tool data struct, now includes a vector of counter_info_record_t +struct tool_data_t { + std::mutex mut{}; + std::unique_ptr output_stream{nullptr}; + std::unordered_map counter_id_name_map{}; + std::string requested_counters{}; + std::string kernel_filter_include_regex{}; + std::vector> kernel_filter_ranges{}; + std::vector counter_records; + std::set target_kernel_ids{}; +}; + +using kernel_symbol_data_t = + rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; + +rocprofiler_context_id_t &get_client_ctx() { + static rocprofiler_context_id_t ctx{0}; + return ctx; +} + +void record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data, + rocprofiler_counter_record_t *record_data, + size_t record_count, + rocprofiler_user_data_t /* user_data */, + void *callback_data_args) { + auto *tool_data_ptr = + static_cast *>(callback_data_args); + tool_data_t *tool; + { + std::lock_guard lock(tool_data_ptr->get()->mut); + tool = tool_data_ptr->get(); + } + + // For each counter, write: dispatch_id, counter_id, counter_name, + // counter_value + for (size_t i = 0; i < record_count; ++i) { + rocprofiler_counter_id_t counter_id{}; + ROCPROFILER_CALL( + rocprofiler_query_record_counter_id(record_data[i].id, &counter_id), + "query record counter id"); + + // Store the counter info record in tool_data + counter_info_record_t record{dispatch_data.dispatch_info.dispatch_id, + dispatch_data.dispatch_info.kernel_id, + counter_id.handle, + tool->counter_id_name_map[counter_id.handle], + record_data[i].counter_value}; + { + std::lock_guard lock(tool->mut); + tool->counter_records.push_back(std::move(record)); + } + } +} + +/** + * Callback from rocprofiler when a code object is loaded. + * We use this to get record kernel names as they are registered. + */ +void tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t * /*user_data*/, + void *callback_data) { + if (record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD && + record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == + ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) { + auto *data = static_cast(record.payload); + int demangle_status = 0; + auto kernel_name = + helper_utils::cxa_demangle(data->kernel_name, &demangle_status); + kernel_name = helper_utils::truncate_name(kernel_name); + + // check if regex can be found in kernel name matches regex from tool data, + // if matches store kernel id + auto *tool_data_ptr = + static_cast *>(callback_data); + auto *tool = tool_data_ptr->get(); + // Lock before modifying target_kernel_ids + std::lock_guard lock(tool->mut); + if (!tool->kernel_filter_include_regex.empty()) { + try { + std::regex re(tool->kernel_filter_include_regex); + if (!kernel_name.empty() && std::regex_search(kernel_name, re)) { + tool->target_kernel_ids.insert(data->kernel_id); + } + } catch (const std::regex_error &e) { + std::cerr + << "[rocprofiler-compute] [" << __FUNCTION__ + << "] ERROR: Invalid regex in ROCPROF_KERNEL_FILTER_INCLUDE_REGEX: " + << tool->kernel_filter_include_regex << " : " << e.what() + << std::endl; + } + } + // If no regex specified, collect for all kernels + else { + tool->target_kernel_ids.insert(data->kernel_id); + } + } +} + +/** + * Checks if the given kernel dispatch should be targeted for profiling. + * Returns true if the kernel_id is in the set of target_kernel_ids (if + * non-empty), and if the kernel_iteration (1-based index) matches the + * kernel_filter_range (if specified). + * + * @param tool Pointer to the tool_data_t structure containing profiling + * configuration. + * @param kernel_id The kernel ID of the dispatch. + * @param kernel_iteration The 1-based index of this kernel_id's dispatch (first + * dispatch is 1). + * @return true if the dispatch should be profiled, false otherwise. + */ +bool is_targetted_dispatch(const tool_data_t *tool, uint64_t kernel_id, + uint64_t kernel_iteration) { + // If target_kernel_ids is non-empty, only allow those kernel_ids + if (!tool->target_kernel_ids.empty() && + !tool->target_kernel_ids.count(kernel_id)) + return false; + + // If kernel_filter_ranges is set, check if kernel_iteration is in any of the + // specified ranges + if (!tool->kernel_filter_ranges.empty()) + return std::any_of(tool->kernel_filter_ranges.begin(), + tool->kernel_filter_ranges.end(), + [kernel_iteration](const auto &range) { + return kernel_iteration >= range.first && + kernel_iteration <= range.second; + }); + + // If no filter ranges are specified, or all checks passed, profile this + // dispatch + return true; +} + +/** + * @brief Creates a counter collection profile for performance monitoring on a + * specific GPU agent. + * + * This function parses the requested counters from the tool configuration, + * validates them against the counters supported by the target GPU agent, and + * creates a rocprofiler counter configuration for collecting the available + * requested counters during dispatch profiling. + * + * @param tool Pointer to tool data containing the requested counters string and + * counter mappings + * @param dispatch_data Dispatch counting service data containing agent + * information for the target GPU + * + * @return rocprofiler_counter_config_id_t A valid counter configuration profile + * ID that can be used for counter collection, or an invalid profile (handle = + * 0) if creation fails + * + * @details + * The function performs the following operations: + * 1. Parses the requested counters from tool->requested_counters string + * (format: "prefix:counter1 counter2 ...") + * 2. Queries all counters supported by the specified GPU agent + * 3. Filters the supported counters to match only those requested + * 4. Logs warnings for any requested counters that are not supported by the + * agent + * 5. Creates and returns a rocprofiler counter configuration for the valid + * counters + * 6. Updates the tool's counter ID to name mapping for later reference + * + * @note If no counters are requested or none of the requested counters are + * supported, an empty profile may be created. Unsupported counters are logged + * as warnings but do not cause the function to fail. + */ +rocprofiler_counter_config_id_t create_counter_collection_profile( + tool_data_t *tool, + rocprofiler_dispatch_counting_service_data_t dispatch_data) { + // get counters to collect + std::set counters_to_collect; + const std::string &counters_str = tool->requested_counters; + if (!counters_str.empty()) { + auto pos = counters_str.find(':'); + if (pos != std::string::npos) { + std::istringstream ss(counters_str.substr(pos + 1)); + for (std::string token; ss >> token;) + counters_to_collect.insert(token); + } + } + + // Get available counters for this agent + std::vector gpu_counters; + ROCPROFILER_CALL( + rocprofiler_iterate_agent_supported_counters( + dispatch_data.dispatch_info.agent_id, + [](rocprofiler_agent_id_t, rocprofiler_counter_id_t *counters, + size_t num_counters, void *user_data) { + std::vector *vec = + static_cast *>(user_data); + for (size_t i = 0; i < num_counters; i++) { + vec->push_back(counters[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + static_cast(&gpu_counters)), + "fetch supported counters"); + + // Identify counters requested to collect which are available + std::vector collect_counters; + std::vector collect_counters_names; + for (auto &counter : gpu_counters) { + rocprofiler_counter_info_v0_t info; + ROCPROFILER_CALL(rocprofiler_query_counter_info( + counter, ROCPROFILER_COUNTER_INFO_VERSION_0, + static_cast(&info)), + "query counter info"); + if (counters_to_collect.count(std::string(info.name)) > 0) { + collect_counters.push_back(counter); + collect_counters_names.push_back(std::string(info.name)); + tool->counter_id_name_map[counter.handle] = std::string(info.name); + } + } + + // Log unsupported counters in a concise, comma-separated line + std::vector unsupported_counters; + for (const auto &requested : counters_to_collect) { + if (std::find(collect_counters_names.begin(), collect_counters_names.end(), + requested) == collect_counters_names.end()) { + unsupported_counters.push_back(requested); + } + } + if (!unsupported_counters.empty()) { + std::clog << "\033[33m[rocprofiler-compute] [" << __FUNCTION__ + << "] WARNING: Requested counters not available: "; + for (size_t i = 0; i < unsupported_counters.size(); ++i) { + std::clog << unsupported_counters[i]; + if (i + 1 < unsupported_counters.size()) + std::clog << ", "; + } + std::clog << "\033[0m" << std::endl; + } + + // Create and return collection profile for the counters + rocprofiler_counter_config_id_t profile = {.handle = 0}; + ROCPROFILER_CALL( + rocprofiler_create_counter_config(dispatch_data.dispatch_info.agent_id, + collect_counters.data(), + collect_counters.size(), &profile), + "construct profile cfg"); + return profile; +} + +/** + * Callback from rocprofiler when an kernel dispatch is enqueued into the HSA + * queue. rocprofiler_counter_config_id_t* is a return to specify what counters + * to collect for this dispatch (dispatch_packet). + * We store profile in a cache to prevent constructing many identical + * profiles. We first check the cache to see if we have already constructed a + * profile for the agent. If we have, return it. Otherwise, construct a new + * profile. + */ +void dispatch_callback( + rocprofiler_dispatch_counting_service_data_t dispatch_data, + rocprofiler_counter_config_id_t *config, + rocprofiler_user_data_t * /*user_data*/, void *callback_data_args) { + + auto kernel_id = dispatch_data.dispatch_info.kernel_id; + + // create static map of kernel_id to number of dispatches (zero indexed) and + // update it + static std::unordered_map kernel_id_iteration_map{}; + static std::shared_mutex kernel_id_iteration_mutex; + uint64_t kernel_iteration = 0; + { + // Acquire unique lock for update and ensure map is updated correctly + std::unique_lock lock(kernel_id_iteration_mutex); + auto &iter = kernel_id_iteration_map[kernel_id]; + iter += 1; + kernel_iteration = iter; + } + + // static cast tool + auto *tool_data_ptr = + static_cast *>(callback_data_args); + tool_data_t *tool; + { + std::lock_guard lock(tool_data_ptr->get()->mut); + tool = tool_data_ptr->get(); + } + + // kernel filtering + if (!is_targetted_dispatch(tool, kernel_id, kernel_iteration)) { + return; + } + + static std::shared_mutex m_mutex = {}; + static std::unordered_map + profile_cache = {}; + + // check cache for existing profile for this agent + auto search_cache = [&]() { + if (auto pos = + profile_cache.find(dispatch_data.dispatch_info.agent_id.handle); + pos != profile_cache.end()) { + *config = pos->second; + return true; + } + return false; + }; + { + auto rlock = std::shared_lock{m_mutex}; + if (search_cache()) + return; + } + + // get write lock to update cache + auto wlock = std::unique_lock{m_mutex}; + if (search_cache()) + return; + + // cache the profile for this agent + rocprofiler_counter_config_id_t profile = + create_counter_collection_profile(tool, dispatch_data); + profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile); + // Return the profile to collect those counters for this dispatch + *config = profile; +} + +int tool_init(rocprofiler_client_finalize_t, void *user_data) { + std::clog << "[rocprofiler-compute] In tool init\n"; + ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), + "context creation"); + + ROCPROFILER_CALL(rocprofiler_configure_callback_dispatch_counting_service( + get_client_ctx(), dispatch_callback, user_data, + record_callback, user_data), + "setup counting service"); + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + get_client_ctx(), + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, + tool_tracing_callback, user_data), + "setup code object tracing service"); + ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), + "start context"); + + return 0; +} + +void generate_output(tool_data_t *tool_data) { + // Dispatches before the kernel to be filtered was registered may have been + // profiled. Remove any records whose kernel id does not match the + // target_kernel_ids + if (!tool_data->target_kernel_ids.empty()) { + tool_data->counter_records.erase( + std::remove_if(tool_data->counter_records.begin(), + tool_data->counter_records.end(), + [tool_data](const counter_info_record_t &record) { + return tool_data->target_kernel_ids.find( + record.kernel_id) == + tool_data->target_kernel_ids.end(); + }), + tool_data->counter_records.end()); + } + + // Write collected counter records and clean up + if (auto &os = tool_data->output_stream) { + for (const auto &r : tool_data->counter_records) + *os << r.dispatch_id << ',' << r.counter_id << ',' << r.counter_name + << ',' << r.counter_value << '\n'; + os->flush(); + } +} + +void tool_fini(void *user_data) { + assert(user_data); + std::clog << "[rocprofiler-compute] In tool fini\n"; + rocprofiler_stop_context(get_client_ctx()); + + auto *tool_data_ptr = static_cast *>(user_data); + generate_output(tool_data_ptr->get()); + + delete tool_data_ptr; +} + +} // namespace + +std::unique_ptr create_tool_data(rocprofiler_client_id_t *id) { + auto tool_data = std::make_unique(); + + // Generate a unique output filename using a random hex string (no libuuid + // dependency) + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, 0xFFFFFFFF); + std::stringstream filename_ss; + filename_ss << std::hex << dis(gen); + std::string base_filename = + "counter_collection_" + filename_ss.str().substr(0, 8) + ".csv"; + + // Require ROCPROF_OUTPUT_PATH to be set, otherwise error out + std::string filename; + const char *output_path = getenv("ROCPROF_OUTPUT_PATH"); + if (!output_path || !*output_path) { + throw std::runtime_error( + "ROCPROF_OUTPUT_PATH environment variable must be set"); + } + filename = output_path; + if (filename.back() != '/') + filename += '/'; + // Use the generated base filename along with ROCPROF_OUTPUT_PATH + filename += base_filename; + + // Set output stream to file + // Set output stream to file + auto ofs = std::make_unique(filename); + if (!ofs->is_open()) { + throw std::runtime_error("Failed to open output file: " + filename); + } + tool_data->output_stream = std::move(ofs); + // Write header at the beginning of the file + *tool_data->output_stream + << "dispatch_id,counter_id,counter_name,counter_value\n"; + tool_data->output_stream->flush(); + + // Write to clog the path of the logging file + std::clog << id->name << " [" << __FUNCTION__ + << "] Logging counter collection to: " << filename << std::endl; + + // Store ROCPROF env. vars. in tool_data + + // ROCPROF_COUNTERS env. var. is a string like "pmc: counter1 counter2 ..." + if (const char *v = getenv("ROCPROF_COUNTERS")) + tool_data->requested_counters = v; + + // ROCPROF_KERNEL_FILTER_INCLUDE_REGEX env. var. is a regex string like + // kernel_name_1|kernel_name_2|... Used to collect counters only for kernels + // with names matching the regex + if (const char *v = getenv("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX")) + tool_data->kernel_filter_include_regex = v; + + // ROCPROF_KERNEL_FILTER_RANGE env. var. is a string like "[4,7-9,...]" + if (const char *v = getenv("ROCPROF_KERNEL_FILTER_RANGE")) { + // Remove square brackets at the ends if present + std::string v_str = v; + if (!v_str.empty() && v_str.front() == '[') + v_str.erase(0, 1); + if (!v_str.empty() && v_str.back() == ']') + v_str.pop_back(); + v = v_str.c_str(); + // Parse the range string into vector of pairs + std::istringstream ss(v); + for (std::string token; std::getline(ss, token, ',');) { + size_t dash_pos = token.find('-'); + try { + if (dash_pos == std::string::npos) { + // single number + uint64_t num = std::stoull(token); + tool_data->kernel_filter_ranges.emplace_back(num, num); + } else { + // range of numbers + uint64_t start = std::stoull(token.substr(0, dash_pos)); + uint64_t end = std::stoull(token.substr(dash_pos + 1)); + tool_data->kernel_filter_ranges.emplace_back(start, end); + } + } catch (const std::invalid_argument &) { + std::cerr << "[rocprofiler-compute] [" << __FUNCTION__ + << "] ERROR: Invalid entry in ROCPROF_KERNEL_FILTER_RANGE: " + << token << std::endl; + } + } + } + + return tool_data; +} + +rocprofiler_tool_configure_result_t * +rocprofiler_configure(uint32_t version, const char *runtime_version, + uint32_t priority, rocprofiler_client_id_t *id) { + // set the client name + id->name = "[rocprofiler-compute]"; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " [" << __FUNCTION__ << "] (priority=" << priority + << ") is using rocprofiler-sdk v" << major << "." << minor << "." + << patch << " (" << runtime_version << ")"; + + std::clog << info.str() << std::endl; + + // init tool data + auto tool_data = create_tool_data(id); + + // create configure data + static auto cfg = rocprofiler_tool_configure_result_t{ + sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, + static_cast( + new std::unique_ptr(std::move(tool_data)))}; + + // return pointer to configure data + return &cfg; +} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_base.py b/projects/rocprofiler-compute/src/rocprof_compute_base.py index 26da0157d0..1837ec5535 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_base.py @@ -151,6 +151,22 @@ class RocProfCompute: ) and block: console_error("Cannot use --list-available-metrics with --blocks") + # fallback to csv output format, if rocpd public api not available + if ( + self.__mode == "profile" + and self.__args.format_rocprof_output == "rocpd" + and not ( + Path(self.__args.rocprofiler_sdk_tool_path).parents[1] + / "librocprofiler-sdk-rocpd.so" + ).exists() + ): + console_warning( + "rocpd output format is not supported with the " + "current rocprofiler-sdk version. " + "Falling back to csv output format." + ) + self.__args.format_rocprof_output = "csv" + @demarcate def load_soc_specs(self, sysinfo: Optional[dict] = None) -> None: """Load OmniSoC instance for RocProfCompute run""" @@ -180,16 +196,6 @@ class RocProfCompute: ) self.__args = parser.parse_args() - if ( - hasattr(self.__args, "format_rocprof_output") - and self.__args.format_rocprof_output != "rocpd" - ): - console_warning( - f"The option --format-rocprof-output currently set to " - f"{self.__args.format_rocprof_output} will default to rocpd " - "in a future release." - ) - if self.__args.mode is None: if self.__args.specs: print(generate_machine_specs(self.__args)) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 1aeabcd653..079a9b3045 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -27,6 +27,8 @@ import argparse import csv import shlex import shutil +import sys +import tempfile import time from abc import abstractmethod from pathlib import Path @@ -67,9 +69,7 @@ class RocProfCompute_Base: def get_args(self) -> argparse.Namespace: return self.__args - def get_profiler_options( - self, fname: str, soc: OmniSoC_Base - ) -> Union[list[str], dict[str, Any]]: + def get_profiler_options(self) -> Union[list[str], dict[str, Any]]: """Fetch any version specific arguments required by profiler""" # assume no SoC specific options and return empty list by default return [] @@ -415,6 +415,58 @@ class RocProfCompute_Base: total_runs = len(input_files) total_profiling_time = 0.0 + native_tool_path = None + # Native counter collection tool is only compatible with + # rocprofiler-sdk public API for ROCm version >= 7.x.x + # Do not use native tool in attach + # mode until we figure out how multiple tools can attach + # TODO: Figure out how multiple tools can attach + if ( + self.__profiler == "rocprofiler-sdk" + and not args.no_native_tool + and int(self._soc._mspec.rocm_version.split(".")[0]) >= 7 + and not args.attach_pid + ): + # Use native counter collection tool + native_tool_path = str( + Path(sys.argv[0]).resolve().parents[2] + / "lib" + / "rocprofiler-compute" + / "librocprofiler-compute-tool.so" + ) + if not Path(native_tool_path).is_file(): + # Build native counter collection tool if not exists + native_tool_path = str( + Path( + tempfile.mkdtemp(prefix="rocprofiler-compute-tool-", dir="/tmp") + ) + / "librocprofiler-compute-tool.so" + ) + link_libraries = ("rocprofiler-sdk",) + build_command = ( + # Create shared object + "hipcc -shared -fPIC " + # Link with dependant libraries + + " ".join(f"-l{lib}" for lib in link_libraries) + + " " + # Compliler flags + "-std=c++17 -W -Wall -Wextra -Wshadow -O2 " + # rocprofiler sdk library path + f"-L {str(Path(args.rocprofiler_sdk_tool_path).parent.parent)} " + # native tool source files (tool.cpp and helper.cpp) + f"{str(Path(__file__).parent.parent)}/" + "lib/rocprofiler_compute_tool.cpp " + f"{str(Path(__file__).parent.parent)}/" + "lib/helper.cpp " + # temporary shared object for native tool + f"-o {native_tool_path}" + ) + console_debug(f"Building native tool using command: {build_command}") + success, output = capture_subprocess_output(shlex.split(build_command)) + console_debug(f"Build output: {output}") + if not success: + console_error("Failed to build native counter collection tool.") + for i, fname in enumerate(input_files): run_number = i + 1 @@ -465,7 +517,10 @@ class RocProfCompute_Base: console_debug(output) console_log("profiling", f"Current input file: {fname}") - options = self.get_profiler_options(str(fname), self._soc) + if self.__profiler == "rocprofiler-sdk": + options = self.get_profiler_options(native_tool_path=native_tool_path) + else: + options = self.get_profiler_options() start_time = time.time() if self.__profiler == "rocprofv3" or self.__profiler == "rocprofiler-sdk": # Only 1-run case is permitted for attach/detach @@ -502,6 +557,10 @@ class RocProfCompute_Base: else: console_error("Profiler not supported") + # Delete temporary native tool if created + if native_tool_path and native_tool_path.startswith("/tmp"): + shutil.rmtree(Path(native_tool_path).parent, ignore_errors=True) + # PC sampling data is only collected when block "21" is specified if not "21" in args.filter_blocks: console_warning( @@ -514,14 +573,13 @@ class RocProfCompute_Base: console_log(f"[Run {total_runs + 1}/{total_runs + 1}][PC sampling profile run]") start_time = time.time() + # No native tool for pc sampling + options = self.get_profiler_options() pc_sampling_prof( + profiler_options=options, method=args.pc_sampling_method, interval=args.pc_sampling_interval, workload_dir=args.path, - appcmd=shlex.split( - args.remaining - ), # FIXME: the right solution is applying it when argparsing once! - rocprofiler_sdk_library_path=args.rocprofiler_sdk_library_path, ) end_time = time.time() diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py index b796bee1f4..661732ebb1 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py @@ -46,7 +46,7 @@ class rocprof_v3_profiler(RocProfCompute_Base): or not self.get_args().roof_only ) - def get_profiler_options(self, fname: str, soc: OmniSoC_Base) -> list[str]: + def get_profiler_options(self) -> list[str]: args = self.get_args() app_cmd = shlex.split(args.remaining) @@ -90,12 +90,12 @@ class rocprof_v3_profiler(RocProfCompute_Base): if args.dispatch: for dispatch_id in args.dispatch: if ":" in dispatch_id: - # 4:7 -> 5-7 + # 4:7 -> 4-7 start, end = dispatch_id.split(":") - dispatch.append(f"{int(start) + 1}-{end}") + dispatch.append(f"{start}-{end}") else: - # 4 -> 5 - dispatch.append(f"{int(dispatch_id) + 1}") + # 4 -> 4 + dispatch.append(f"{dispatch_id}") if dispatch: profiling_options.extend([ "--kernel-iteration-range", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py index ab18e4be54..aa75baeda8 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprofiler_sdk.py @@ -26,7 +26,7 @@ import argparse import shlex from pathlib import Path -from typing import Union +from typing import Optional, Union from rocprof_compute_profile.profiler_base import RocProfCompute_Base from rocprof_compute_soc.soc_base import OmniSoC_Base @@ -48,35 +48,44 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base): ) def get_profiler_options( - self, fname: str, soc: OmniSoC_Base + self, native_tool_path: Optional[str] = None ) -> dict[str, Union[str, list[str]]]: args = self.get_args() app_cmd = shlex.split(args.remaining) - rocm_libdir = Path(args.rocprofiler_sdk_library_path).parent - rocprofiler_sdk_tool_path = str( - rocm_libdir / "rocprofiler-sdk" / "librocprofiler-sdk-tool.so" - ) - rocm_dir = Path(args.rocprofiler_sdk_library_path).parent.parent - rocprofiler_attach_tool_path = str( - rocm_dir / "lib" / "librocprofiler-sdk-rocattach.so" - ) - ld_preload = [ - rocprofiler_sdk_tool_path, - args.rocprofiler_sdk_library_path, - rocprofiler_attach_tool_path, - ] - options = { - "ROCPROFILER_LIBRARY_CTOR": "1", + ld_preload = [args.rocprofiler_sdk_tool_path] + if native_tool_path: + # Use native tool to collect counters + ld_preload.append(native_tool_path) + options = {"ROCPROF_COUNTER_COLLECTION": "0"} + console_log( + f"Using native counter collection tool: {str(native_tool_path)}" + ) + else: + options = {"ROCPROF_COUNTER_COLLECTION": "1"} + + options.update({ "LD_PRELOAD": ":".join(ld_preload), - "ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path, - "LD_LIBRARY_PATH": str(rocm_libdir), "ROCPROF_KERNEL_TRACE": "1", "ROCPROF_OUTPUT_FORMAT": args.format_rocprof_output, "ROCPROF_OUTPUT_PATH": f"{args.path}/out/pmc_1", - } + }) + + # Create folder pointed by ROCPROF_OUTPUT_PATH + Path(options["ROCPROF_OUTPUT_PATH"]).mkdir(parents=True, exist_ok=True) if args.attach_pid: + # In attach mode, tools are provided using ROCP_TOOL_LIBRARIES + # instead of LD_PRELOAD. + options.update({ + "ROCP_TOOL_LIBRARIES": ":".join(ld_preload), + }) + options.pop("LD_PRELOAD", None) + + rocprofiler_attach_tool_path = str( + Path(args.rocprofiler_sdk_tool_path).parent.parent + / "librocprofiler-sdk-rocattach.so" + ) options.update({ "ROCPROF_ATTACH_TOOL_LIBRARY": rocprofiler_attach_tool_path, "ROCPROF_ATTACH_PID": args.attach_pid, @@ -108,13 +117,12 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base): if args.dispatch: for dispatch_id in args.dispatch: if ":" in dispatch_id: - # 4:7 -> 5-7 + # 4:7 -> 4-7 start, end = dispatch_id.split(":") - dispatch.append(f"{int(start) + 1}-{end}") + dispatch.append(f"{start}-{end}") else: - # 4 -> 5 - dispatch.append(f"{int(dispatch_id) + 1}") - + # 4 -> 4 + dispatch.append(f"{dispatch_id}") if dispatch: options["ROCPROF_KERNEL_FILTER_RANGE"] = f"[{','.join(dispatch)}]" if not args.attach_pid: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index aaa27bb009..ad96b19462 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -51,9 +51,7 @@ from utils.utils import ( METRIC_ID_RE, add_counter_extra_config_input_yaml, convert_metric_id_to_panel_info, - detect_rocprof, get_panel_alias, - get_submodules, is_tcc_channel_counter, mibench, parse_sets_yaml, @@ -409,55 +407,35 @@ class OmniSoC_Base: def get_rocprof_supported_counters(self) -> set[str]: args = self.get_args() - rocprof_cmd = detect_rocprof(args) - - if rocprof_cmd != "rocprofiler-sdk": - console_warning( - "rocprofv3 interface is deprecated and will be removed " - "in a future release." - ) - rocprof_counters: set[str] = set() - if not ( - str(rocprof_cmd).endswith("rocprofv3") - or str(rocprof_cmd) == "rocprofiler-sdk" - ): - console_error( - f"Incompatible profiler: {rocprof_cmd}. " - "Supported profilers include: " - f"{get_submodules('rocprof_compute_profile')}" + # Point to counter definition + old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH") + os.environ["ROCPROFILER_METRICS_PATH"] = str( + config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs" + ) + sys.path.append( + str( + Path(args.rocprofiler_sdk_tool_path).parents[1] + / "python3/site-packages" ) + ) + from rocprofv3 import avail - # Point to counter definition - old_rocprofiler_metrics_path = os.environ.get("ROCPROFILER_METRICS_PATH") - os.environ["ROCPROFILER_METRICS_PATH"] = str( - config.rocprof_compute_home / "rocprof_compute_soc" / "profile_configs" - ) - sys.path.append( - str( - Path(self.get_args().rocprofiler_sdk_library_path).parent - / "python3/site-packages" - ) - ) - from rocprofv3 import avail - - avail.loadLibrary.libname = str( - Path(args.rocprofiler_sdk_library_path).parent - / "rocprofiler-sdk" - / "librocprofv3-list-avail.so" - ) - counters = avail.get_counters() - rocprof_counters = { - counter.name - for counter in counters[list(counters.keys())[0]] - if hasattr(counter, "block") or hasattr(counter, "expression") - } - # Reset env. var. - if old_rocprofiler_metrics_path is None: - del os.environ["ROCPROFILER_METRICS_PATH"] - else: - os.environ["ROCPROFILER_METRICS_PATH"] = old_rocprofiler_metrics_path + avail.loadLibrary.libname = str( + Path(args.rocprofiler_sdk_tool_path).parent / "librocprofv3-list-avail.so" + ) + counters = avail.get_counters() + rocprof_counters = { + counter.name + for counter in counters[list(counters.keys())[0]] + if hasattr(counter, "block") or hasattr(counter, "expression") + } + # Reset env. var. + if old_rocprofiler_metrics_path is None: + del os.environ["ROCPROFILER_METRICS_PATH"] + else: + os.environ["ROCPROFILER_METRICS_PATH"] = old_rocprofiler_metrics_path return rocprof_counters diff --git a/projects/rocprofiler-compute/src/utils/rocpd_data.py b/projects/rocprofiler-compute/src/utils/rocpd_data.py index 8eec4a06a3..564073786e 100644 --- a/projects/rocprofiler-compute/src/utils/rocpd_data.py +++ b/projects/rocprofiler-compute/src/utils/rocpd_data.py @@ -53,6 +53,12 @@ SELECT value as Counter_Value FROM counters_collection """ +ROCPD_PMC_EVENT_TABLE_NAME_PREFIX = "rocpd_pmc_event_" +TABLE_NAME_PREFIX_QUERY = ( + "SELECT name FROM sqlite_master WHERE type='table' " + "AND name LIKE '{table_name_prefix}%'" +) +INSERT_QUERY = "INSERT INTO {table_name} ({columns}) VALUES ({placeholders})" def convert_db_to_csv( @@ -120,3 +126,55 @@ def process_rocpd_csv(df: pd.DataFrame) -> pd.DataFrame: # Reset dispatch IDs df["Dispatch_ID"] = range(len(df)) return df + + +def update_rocpd_pmc_events(counter_info: pd.DataFrame, rocpd_db_path: str) -> None: + """Update pmc_event table in the given rocpd database path""" + try: + with closing(sqlite3.connect(rocpd_db_path)) as conn: + # Get pmc_event table name + with closing( + conn.execute( + TABLE_NAME_PREFIX_QUERY.format( + table_name_prefix=ROCPD_PMC_EVENT_TABLE_NAME_PREFIX + ) + ) + ) as cursor: + table_name = cursor.fetchone() + if table_name is None: + console_error("No pmc_event table found in the rocpd database") + table_name = table_name[0] + + # get pmc_event table data + guid = table_name[len(ROCPD_PMC_EVENT_TABLE_NAME_PREFIX) :].replace( + "_", "-" + ) + columns = ("guid", "event_id", "pmc_id", "value") + values = list( + zip( + # guid + [guid] * len(counter_info), + # event_id + counter_info["dispatch_id"], + # pmc_id + counter_info["counter_id"], + # value + counter_info["counter_value"], + ) + ) + + # insert into pmc_event table + with conn: + placeholders = ", ".join(["?"] * len(columns)) + conn.executemany( + INSERT_QUERY.format( + table_name=table_name, + columns=", ".join(columns), + placeholders=placeholders, + ), + values, + ) + except OSError as e: + console_error(f"Database error while updating pmc_event table: {e}") + except Exception as e: + console_error(f"Unexpected error updating pmc_event table: {e}") diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index dd819a4973..0950c47366 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -41,6 +41,7 @@ import sys import tempfile import threading import time +import traceback import uuid from collections.abc import Generator from contextlib import contextmanager @@ -210,14 +211,14 @@ def detect_rocprof(args: argparse.Namespace) -> str: # Default is rocprofiler-sdk if os.environ.get("ROCPROF", "rocprofiler-sdk") == "rocprofiler-sdk": - if not Path(args.rocprofiler_sdk_library_path).exists(): + if not Path(args.rocprofiler_sdk_tool_path).exists(): console_error( - "Could not find rocprofiler-sdk library at " - f"{args.rocprofiler_sdk_library_path}" + "Could not find rocprofiler-sdk tool at " + f"{args.rocprofiler_sdk_tool_path}" ) rocprof_cmd = "rocprofiler-sdk" console_debug(f"rocprof_cmd is {rocprof_cmd}") - console_debug(f"rocprofiler_sdk_path is {args.rocprofiler_sdk_library_path}") + console_debug(f"rocprofiler_sdk_tool_path is {args.rocprofiler_sdk_tool_path}") else: # If ROCPROF is not set to rocprofiler-sdk rocprof_cmd = os.environ["ROCPROF"] @@ -705,16 +706,12 @@ def run_prof( # standard rocprof options if rocprof_cmd == "rocprofiler-sdk": - options = cast(dict[str, Union[str, list[str]]], profiler_options) - options["ROCPROF_COUNTER_COLLECTION"] = "1" + options = cast(dict[str, Union[str, list[str]]], profiler_options).copy() options["ROCPROF_COUNTERS"] = f"pmc: {' '.join(parse_text(fname))}" + options["ROCPROF_AGENT_INDEX"] = "absolute" else: default_options = ["-i", fname] options = default_options + cast(list[str], profiler_options) - - if rocprof_cmd == "rocprofiler-sdk": - options["ROCPROF_AGENT_INDEX"] = "absolute" - else: options = ["-A", "absolute"] + options new_env = os.environ.copy() @@ -758,7 +755,6 @@ def run_prof( ): new_env["ROCPROFILER_INDIVIDUAL_XCC_MODE"] = "1" - is_timestamps = Path(fname).name == "timestamps.txt" time_1 = time.time() if rocprof_cmd == "rocprofiler-sdk": @@ -849,6 +845,16 @@ def run_prof( results_files: list[str] = [] if format_rocprof_output == "rocpd": + # If using native tool for counter collection + if ( + rocprof_cmd == "rocprofiler-sdk" + and options["ROCPROF_COUNTER_COLLECTION"] == "0" + ): + # Update rocpd database with counter csv created by native tool + rocpd_data.update_rocpd_pmc_events( + pd.read_csv(glob.glob(workload_dir + "/out/pmc_1/*.csv")[0]), + glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0], + ) # Write results_fbase.csv rocpd_data.convert_db_to_csv( glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0], @@ -865,88 +871,95 @@ def run_prof( # Remove temp directory shutil.rmtree(workload_dir + "/" + "out") return - - # rocprofv3 requires additional processing for each process - results_files = process_rocprofv3_output( - format_rocprof_output, workload_dir, is_timestamps - ) - - if rocprof_cmd == "rocprofiler-sdk": - # TODO: as rocprofv3 --kokkos-trace feature improves, - # rocprof-compute should make updates accordingly - if "ROCPROF_HIP_RUNTIME_API_TRACE" in options: - process_hip_trace_output(workload_dir, fbase) - else: - if "--kokkos-trace" in options: + elif format_rocprof_output == "csv": + if rocprof_cmd == "rocprofiler-sdk": + # rocprofv3 requires additional processing for each process + results_files = process_rocprofv3_output( + workload_dir, + # counter data collected using native tool + using_native_tool=options["ROCPROF_COUNTER_COLLECTION"] == "0", + ) # TODO: as rocprofv3 --kokkos-trace feature improves, # rocprof-compute should make updates accordingly - process_kokkos_trace_output(workload_dir, fbase) - elif "--hip-trace" in options: - process_hip_trace_output(workload_dir, fbase) + if "ROCPROF_HIP_RUNTIME_API_TRACE" in options: + process_hip_trace_output(workload_dir, fbase) + else: + # rocprofv3 requires additional processing for each process + # rocprofv3 cannot use native tool + results_files = process_rocprofv3_output( + workload_dir, using_native_tool=False + ) + if "--kokkos-trace" in options: + # TODO: as rocprofv3 --kokkos-trace feature improves, + # rocprof-compute should make updates accordingly + process_kokkos_trace_output(workload_dir, fbase) + elif "--hip-trace" in options: + process_hip_trace_output(workload_dir, fbase) - # Combine results into single CSV file - if results_files: - combined_results = pd.concat( - [pd.read_csv(f) for f in results_files], ignore_index=True + # Combine results into single CSV file + if results_files: + combined_results = pd.concat( + [pd.read_csv(f) for f in results_files], ignore_index=True + ) + else: + console_warning( + f"Cannot write results for {fbase}.csv due to no counter " + "csv files generated." + ) + return + + # Overwrite column to ensure unique IDs. + combined_results["Dispatch_ID"] = range(0, len(combined_results)) + + combined_results.to_csv( + workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False ) + + if Path(f"{workload_dir}/out").exists(): + # copy and remove out directory if needed + shutil.copyfile( + f"{workload_dir}/out/pmc_1/results_{fbase}.csv", + f"{workload_dir}/{fbase}.csv", + ) + # Remove temp directory + shutil.rmtree(f"{workload_dir}/out") + + # Standardize rocprof headers via overwrite + # {: } + output_headers = { + # ROCm-6.1.0 specific csv headers + "KernelName": "Kernel_Name", + "Index": "Dispatch_ID", + "grd": "Grid_Size", + "gpu-id": "GPU_ID", + "wgr": "Workgroup_Size", + "lds": "LDS_Per_Workgroup", + "scr": "Scratch_Per_Workitem", + "sgpr": "SGPR", + "arch_vgpr": "Arch_VGPR", + "accum_vgpr": "Accum_VGPR", + "BeginNs": "Start_Timestamp", + "EndNs": "End_Timestamp", + # ROCm-6.0.0 specific csv headers + "GRD": "Grid_Size", + "WGR": "Workgroup_Size", + "LDS": "LDS_Per_Workgroup", + "SCR": "Scratch_Per_Workitem", + "ACCUM_VGPR": "Accum_VGPR", + } + csv_path = Path(workload_dir) / f"{fbase}.csv" + df = pd.read_csv(csv_path) + df.rename(columns=output_headers, inplace=True) + df.to_csv(csv_path, index=False) else: - console_warning( - f"Cannot write results for {fbase}.csv due to no counter " - "csv files generated." - ) - return - - # Overwrite column to ensure unique IDs. - combined_results["Dispatch_ID"] = range(0, len(combined_results)) - - combined_results.to_csv( - workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False - ) - - if Path(f"{workload_dir}/out").exists(): - # copy and remove out directory if needed - shutil.copyfile( - f"{workload_dir}/out/pmc_1/results_{fbase}.csv", - f"{workload_dir}/{fbase}.csv", - ) - # Remove temp directory - shutil.rmtree(f"{workload_dir}/out") - - # Standardize rocprof headers via overwrite - # {: } - output_headers = { - # ROCm-6.1.0 specific csv headers - "KernelName": "Kernel_Name", - "Index": "Dispatch_ID", - "grd": "Grid_Size", - "gpu-id": "GPU_ID", - "wgr": "Workgroup_Size", - "lds": "LDS_Per_Workgroup", - "scr": "Scratch_Per_Workitem", - "sgpr": "SGPR", - "arch_vgpr": "Arch_VGPR", - "accum_vgpr": "Accum_VGPR", - "BeginNs": "Start_Timestamp", - "EndNs": "End_Timestamp", - # ROCm-6.0.0 specific csv headers - "GRD": "Grid_Size", - "WGR": "Workgroup_Size", - "LDS": "LDS_Per_Workgroup", - "SCR": "Scratch_Per_Workitem", - "ACCUM_VGPR": "Accum_VGPR", - } - csv_path = Path(workload_dir) / f"{fbase}.csv" - df = pd.read_csv(csv_path) - df.rename(columns=output_headers, inplace=True) - df.to_csv(csv_path, index=False) + console_error(f"Unknown format_rocprof_output: {format_rocprof_output}") def pc_sampling_prof( + profiler_options: Union[list[str], dict[str, Union[str, list[str]]]], method: str, interval: int, workload_dir: str, - appcmd: list[str], - rocprofiler_sdk_library_path: str, ) -> None: """ Run rocprof with pc sampling. Current support v3 only. @@ -957,19 +970,12 @@ def pc_sampling_prof( unit = "time" if method == "host_trap" else "cycles" if rocprof_cmd == "rocprofiler-sdk": - rocm_libdir = str(Path(rocprofiler_sdk_library_path).parent) - rocprofiler_sdk_tool_path = str( - Path(rocm_libdir) / "rocprofiler-sdk/librocprofiler-sdk-tool.so" - ) - ld_preload = [ - rocprofiler_sdk_tool_path, - rocprofiler_sdk_library_path, - ] - options = { - "ROCPROFILER_LIBRARY_CTOR": "1", - "LD_PRELOAD": ":".join(ld_preload), - "ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path, - "LD_LIBRARY_PATH": rocm_libdir, + options = cast(dict[str, Union[str, list[str]]], profiler_options).copy() + options.update({ + # no counter collection for pc sampling + "ROCPROF_COUNTER_COLLECTION": "0", + # no kernel tracing for pc sampling + "ROCPROF_KERNEL_TRACE": "0", "ROCPROF_OUTPUT_FORMAT": "csv,json", "ROCPROF_OUTPUT_PATH": workload_dir, "ROCPROF_OUTPUT_FILE_NAME": "ps_file", @@ -977,15 +983,15 @@ def pc_sampling_prof( "ROCPROF_PC_SAMPLING_UNIT": unit, "ROCPROF_PC_SAMPLING_INTERVAL": str(interval), "ROCPROF_PC_SAMPLING_METHOD": method, - "ROCPROF_KERNEL_TRACE": "1", - } + }) + app_cmd = options.pop("APP_CMD") if "APP_CMD" in options else None new_env = os.environ.copy() for key, value in options.items(): new_env[key] = value console_debug(f"pc sampling rocprof sdk env vars: {new_env}") - console_debug(f"pc sampling rocprof sdk user provided command: {appcmd}") + console_debug(f"pc sampling rocprof sdk user provided command: {app_cmd}") success, output = capture_subprocess_output( - appcmd, new_env=new_env, profileMode=True + app_cmd, new_env=new_env, profileMode=True ) else: options = [ @@ -1005,9 +1011,11 @@ def pc_sampling_prof( "-o", "ps_file", # TODO: sync up with the name from source in 2100_.yaml "--", + cast(str, profiler_options[-1]), # app command ] - options.extend(appcmd) + console_debug(f"rocprof command: {shlex.join([rocprof_cmd] + options)}") + # profile the app success, output = capture_subprocess_output( [rocprof_cmd] + options, new_env=os.environ.copy(), profileMode=True ) @@ -1016,72 +1024,140 @@ def pc_sampling_prof( console_error("PC sampling failed.") -def process_rocprofv3_output( - rocprof_output: str, workload_dir: str, is_timestamps: bool -) -> list[str]: +def convert_native_counter_collection_csv(workload_dir: str) -> None: """ - rocprofv3 specific output processing. - takes care of json or csv formats, for csv format, - additional processing is performed. + Use native counter collection csv and rocprofiler-sdk kernel + trace to write counter collection csv in rocprofiler-sdk format + for further processing to pmc_perf.csv file + """ + counter_data = pd.read_csv( + glob.glob(f"{workload_dir}/out/pmc_1/*.csv")[0], index_col=False + ) + # Group by on counter_data based on dispatch_id and + # counter_id and sum the counter_value + counter_data = counter_data.groupby( + ["dispatch_id", "counter_name"], as_index=False + ).agg({"counter_value": "sum"}) + kernel_data_filename = glob.glob(f"{workload_dir}/out/pmc_1/*/*_kernel_trace.csv")[ + 0 + ] + kernel_data = pd.read_csv(kernel_data_filename) + rocprofv3_counter_data = pd.DataFrame({ + "Correlation_Id": counter_data["dispatch_id"], + "Dispatch_Id": counter_data["dispatch_id"], + "Agent_Id": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Agent_Id" + ].values, + "Queue_Id": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Queue_Id" + ].values, + "Process_Id": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Thread_Id" + ].values, + "Thread_Id": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Thread_Id" + ].values, + "Grid_Size": ( + kernel_data.iloc[counter_data["dispatch_id"] - 1][ + ["Grid_Size_X", "Grid_Size_Y", "Grid_Size_Z"] + ] + .prod(axis=1) + .values + ), + "Kernel_Id": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Kernel_Id" + ].values, + "Kernel_Name": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Kernel_Name" + ].values, + "Workgroup_Size": ( + kernel_data.iloc[counter_data["dispatch_id"] - 1][ + ["Workgroup_Size_X", "Workgroup_Size_Y", "Workgroup_Size_Z"] + ] + .prod(axis=1) + .values + ), + "LDS_Block_Size": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "LDS_Block_Size" + ].values, + "Scratch_Size": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Scratch_Size" + ].values, + "VGPR_Count": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "VGPR_Count" + ].values, + "Accum_VGPR_Count": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Accum_VGPR_Count" + ].values, + "SGPR_Count": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "SGPR_Count" + ].values, + "Counter_Name": counter_data["counter_name"], + "Counter_Value": counter_data["counter_value"], + "Start_Timestamp": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "Start_Timestamp" + ].values, + "End_Timestamp": kernel_data.iloc[counter_data["dispatch_id"] - 1][ + "End_Timestamp" + ].values, + }) + rocprofv3_counter_data.to_csv( + kernel_data_filename.replace("kernel_trace", "counter_collection"), + index=False, + ) + + +def process_rocprofv3_output(workload_dir: str, using_native_tool: bool) -> list[str]: + """ + rocprofv3 specific output processing for csv format. """ results_files_csv: list[str] = [] - if rocprof_output == "json": - results_files_json = glob.glob(f"{workload_dir}/out/pmc_1/*/*.json") - - for json_file in results_files_json: - csv_file = str(Path(json_file).with_suffix(".csv")) - v3_json_to_csv(json_file, csv_file) - results_files_csv = glob.glob(f"{workload_dir}/out/pmc_1/*/*.csv") - - elif rocprof_output == "csv": - counter_info_csvs = glob.glob( - f"{workload_dir}/out/pmc_1/*/*_counter_collection.csv" - ) - existing_counter_files_csv = [f for f in counter_info_csvs if Path(f).is_file()] - - if existing_counter_files_csv: - for counter_file in existing_counter_files_csv: - counter_path = Path(counter_file) - current_dir = counter_path.parent - - agent_info_filepath = current_dir / counter_path.name.replace( - "_counter_collection", "_agent_info" - ) - - if not agent_info_filepath.is_file(): - raise ValueError( - f'{counter_file} has no corresponding "agent info" file' - ) - - converted_csv_file = current_dir / counter_path.name.replace( - "_counter_collection", "_converted" - ) - - try: - v3_counter_csv_to_v2_csv( - counter_file, str(agent_info_filepath), str(converted_csv_file) - ) - except Exception as e: - console_warning( - f"Error converting {counter_file} from v3 to v2 csv: {e}" - ) - return [] - - results_files_csv = glob.glob(f"{workload_dir}/out/pmc_1/*/*_converted.csv") - elif is_timestamps: - # when the input is timestamps, we know counter csv file - # is not generated and will instead parse kernel trace file - results_files_csv = glob.glob( - f"{workload_dir}/out/pmc_1/*/*_kernel_trace.csv" + if using_native_tool: + try: + convert_native_counter_collection_csv(workload_dir) + except Exception: + console_error( + "Error converting native counter collection csv.\n" + f"Stacktrace:\n{traceback.format_exc()}" ) - else: - # when the input is not for timestamps, and counter csv file - # is not generated, we assume failed rocprof run and will completely - # bypass the file generation and merging for current pmc - results_files_csv = [] + + counter_info_csvs = glob.glob( + f"{workload_dir}/out/pmc_1/*/*_counter_collection.csv" + ) + existing_counter_files_csv = [f for f in counter_info_csvs if Path(f).is_file()] + + if existing_counter_files_csv: + for counter_file in existing_counter_files_csv: + counter_path = Path(counter_file) + current_dir = counter_path.parent + + agent_info_filepath = current_dir / counter_path.name.replace( + "_counter_collection", "_agent_info" + ) + + if not agent_info_filepath.is_file(): + raise ValueError( + f'{counter_file} has no corresponding "agent info" file' + ) + + converted_csv_file = current_dir / counter_path.name.replace( + "_counter_collection", "_converted" + ) + + try: + v3_counter_csv_to_v2_csv( + counter_file, str(agent_info_filepath), str(converted_csv_file) + ) + except Exception as e: + console_warning( + f"Error converting {counter_file} from v3 to v2 csv: {e}" + ) + return [] + + results_files_csv = glob.glob(f"{workload_dir}/out/pmc_1/*/*_converted.csv") else: - console_error("The output file of rocprofv3 can only support json or csv!!!") + return [] return results_files_csv diff --git a/projects/rocprofiler-compute/tests/conftest.py b/projects/rocprofiler-compute/tests/conftest.py index e9f729c658..0c231ce221 100644 --- a/projects/rocprofiler-compute/tests/conftest.py +++ b/projects/rocprofiler-compute/tests/conftest.py @@ -51,12 +51,13 @@ def pytest_addoption(parser): ) parser.addoption( - "--rocprofiler-sdk-library-path", + "--rocprofiler-sdk-tool-path", type=str, default=str( - Path(os.getenv("ROCM_PATH", "/opt/rocm")) / "lib/librocprofiler-sdk.so" + Path(os.getenv("ROCM_PATH", "/opt/rocm")) + / "lib/rocprofiler-sdk/librocprofiler-sdk-tool.so" ), - help="Path to the rocprofiler-sdk library", + help="Path to the rocprofiler-sdk tool", ) @@ -71,11 +72,11 @@ def binary_handler_profile_rocprof_compute(request): app_name="app_1", attach_detach_para=None, ): - if request.config.getoption("--rocprofiler-sdk-library-path"): + if request.config.getoption("--rocprofiler-sdk-tool-path"): options.extend( [ - "--rocprofiler-sdk-library-path", - request.config.getoption("--rocprofiler-sdk-library-path"), + "--rocprofiler-sdk-tool-path", + request.config.getoption("--rocprofiler-sdk-tool-path"), ], ) if request.config.getoption("--call-binary"): @@ -114,7 +115,13 @@ def binary_handler_profile_rocprof_compute(request): assert process.returncode == 0 return process.returncode else: - baseline_opts = ["rocprof-compute", "profile", "-n", app_name, "-VVV"] + baseline_opts = [ + "install/bin/rocprof-compute", + "profile", + "-n", + app_name, + "-VVV", + ] if not roof: baseline_opts.append("--no-roof") diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 758b2e9e5e..70fb8ef904 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -74,99 +74,23 @@ num_devices = 1 attach_detach_interval_msec_no_delay = 10000 attach_detach_interval_msec_with_delay = 60000 - DEFAULT_ABS_DIFF = 15 DEFAULT_REL_DIFF = 50 MAX_REOCCURING_COUNT = 28 -ALL_CSVS_MI100 = sorted([ - "SQC_DCACHE_INFLIGHT_LEVEL.csv", - "SQC_ICACHE_INFLIGHT_LEVEL.csv", - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_LEVEL_WAVES.csv", +CSVS = sorted([ "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "sysinfo.csv", -]) - -ALL_CSVS_MI200 = sorted([ - "SQC_DCACHE_INFLIGHT_LEVEL.csv", - "SQC_ICACHE_INFLIGHT_LEVEL.csv", - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "sysinfo.csv", -]) -ALL_CSVS_MI300 = sorted([ - "SQC_DCACHE_INFLIGHT_LEVEL.csv", - "SQC_ICACHE_INFLIGHT_LEVEL.csv", - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "sysinfo.csv", -]) -ALL_CSVS_MI350 = sorted([ - "SQC_DCACHE_INFLIGHT_LEVEL.csv", - "SQC_ICACHE_INFLIGHT_LEVEL.csv", - "SQ_IFETCH_LEVEL.csv", - "SQ_INST_LEVEL_LDS.csv", - "SQ_INST_LEVEL_SMEM.csv", - "SQ_INST_LEVEL_VMEM.csv", - "SQ_LEVEL_WAVES.csv", - "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", - "pmc_perf_3.csv", - "pmc_perf_4.csv", - "pmc_perf_5.csv", - "pmc_perf_6.csv", - "pmc_perf_7.csv", - "pmc_perf_8.csv", - "pmc_perf_9.csv", - "pmc_perf_10.csv", - "pmc_perf_11.csv", - "pmc_perf_12.csv", "sysinfo.csv", ]) ROOF_ONLY_FILES = sorted([ "empirRoof_gpu-0_FP32.pdf", "pmc_perf.csv", - "pmc_perf_0.csv", - "pmc_perf_1.csv", - "pmc_perf_2.csv", "roofline.csv", "sysinfo.csv", ]) PC_SAMPLING_HOST_TRAP_FILES = sorted([ - "pmc_perf_0.csv", "pmc_perf.csv", "ps_file_agent_info.csv", "ps_file_kernel_trace.csv", @@ -176,7 +100,6 @@ PC_SAMPLING_HOST_TRAP_FILES = sorted([ ]) PC_SAMPLING_STOCHASTIC_FILES = sorted([ - "pmc_perf_0.csv", "pmc_perf.csv", "ps_file_agent_info.csv", "ps_file_kernel_trace.csv", @@ -550,13 +473,38 @@ def test_path(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS + else: + print(f"This test is not supported for {soc}") + assert 0 + + validate(inspect.stack()[0][3], workload_dir, file_dict) + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + +@pytest.mark.path +def test_path_no_native(binary_handler_profile_rocprof_compute): + workload_dir = test_utils.get_output_dir() + options = ["--no-native-tool"] + binary_handler_profile_rocprof_compute(config, workload_dir, options) + + file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) + + if soc == "MI100": + assert sorted(list(file_dict.keys())) == CSVS + elif soc == "MI200": + assert sorted(list(file_dict.keys())) == CSVS + elif "MI300" in soc: + assert sorted(list(file_dict.keys())) == CSVS + elif "MI350" in soc: + assert sorted(list(file_dict.keys())) == CSVS else: print(f"This test is not supported for {soc}") assert 0 @@ -586,6 +534,107 @@ def test_path_rocpd( test_utils.clean_output_dir(config["cleanup"], workload_dir) +@pytest.mark.path +def test_path_csv( + binary_handler_profile_rocprof_compute, binary_handler_analyze_rocprof_compute +): + workload_dir = test_utils.get_output_dir() + options = ["--format-rocprof-output", "csv"] + binary_handler_profile_rocprof_compute(config, workload_dir, options) + + file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) + all_csvs_mi100 = sorted([ + "SQC_DCACHE_INFLIGHT_LEVEL.csv", + "SQC_ICACHE_INFLIGHT_LEVEL.csv", + "SQ_IFETCH_LEVEL.csv", + "SQ_INST_LEVEL_LDS.csv", + "SQ_LEVEL_WAVES.csv", + "pmc_perf.csv", + "pmc_perf_0.csv", + "pmc_perf_1.csv", + "pmc_perf_2.csv", + "pmc_perf_3.csv", + "pmc_perf_4.csv", + "pmc_perf_5.csv", + "pmc_perf_6.csv", + "sysinfo.csv", + ]) + all_csvs_mi200 = sorted([ + "SQC_DCACHE_INFLIGHT_LEVEL.csv", + "SQC_ICACHE_INFLIGHT_LEVEL.csv", + "SQ_IFETCH_LEVEL.csv", + "SQ_INST_LEVEL_LDS.csv", + "SQ_INST_LEVEL_SMEM.csv", + "SQ_INST_LEVEL_VMEM.csv", + "SQ_LEVEL_WAVES.csv", + "pmc_perf.csv", + "pmc_perf_0.csv", + "pmc_perf_1.csv", + "pmc_perf_2.csv", + "pmc_perf_3.csv", + "pmc_perf_4.csv", + "pmc_perf_5.csv", + "sysinfo.csv", + ]) + all_csvs_mi300 = sorted([ + "SQC_DCACHE_INFLIGHT_LEVEL.csv", + "SQC_ICACHE_INFLIGHT_LEVEL.csv", + "SQ_IFETCH_LEVEL.csv", + "SQ_INST_LEVEL_LDS.csv", + "SQ_INST_LEVEL_SMEM.csv", + "SQ_INST_LEVEL_VMEM.csv", + "SQ_LEVEL_WAVES.csv", + "pmc_perf.csv", + "pmc_perf_0.csv", + "pmc_perf_1.csv", + "pmc_perf_2.csv", + "pmc_perf_3.csv", + "pmc_perf_4.csv", + "pmc_perf_5.csv", + "sysinfo.csv", + ]) + all_csvs_mi350 = sorted([ + "SQC_DCACHE_INFLIGHT_LEVEL.csv", + "SQC_ICACHE_INFLIGHT_LEVEL.csv", + "SQ_IFETCH_LEVEL.csv", + "SQ_INST_LEVEL_LDS.csv", + "SQ_INST_LEVEL_SMEM.csv", + "SQ_INST_LEVEL_VMEM.csv", + "SQ_LEVEL_WAVES.csv", + "pmc_perf.csv", + "pmc_perf_0.csv", + "pmc_perf_1.csv", + "pmc_perf_2.csv", + "pmc_perf_3.csv", + "pmc_perf_4.csv", + "pmc_perf_5.csv", + "pmc_perf_6.csv", + "pmc_perf_7.csv", + "pmc_perf_8.csv", + "pmc_perf_9.csv", + "pmc_perf_10.csv", + "pmc_perf_11.csv", + "pmc_perf_12.csv", + "sysinfo.csv", + ]) + + if soc == "MI100": + assert sorted(list(file_dict.keys())) == all_csvs_mi100 + elif soc == "MI200": + assert sorted(list(file_dict.keys())) == all_csvs_mi200 + elif "MI300" in soc: + assert sorted(list(file_dict.keys())) == all_csvs_mi300 + elif "MI350" in soc: + assert sorted(list(file_dict.keys())) == all_csvs_mi350 + else: + print(f"This test is not supported for {soc}") + assert 0 + + validate(inspect.stack()[0][3], workload_dir, file_dict) + + test_utils.clean_output_dir(config["cleanup"], workload_dir) + + @pytest.mark.roofline def test_roof_basic_validation(binary_handler_profile_rocprof_compute): """ @@ -1422,13 +1471,13 @@ def test_device_filter(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1452,13 +1501,13 @@ def test_kernel(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1474,19 +1523,19 @@ def test_kernel(binary_handler_profile_rocprof_compute): @pytest.mark.dispatch def test_dispatch_0(binary_handler_profile_rocprof_compute): - options = ["--dispatch", "0"] + options = ["--dispatch", "1"] workload_dir = test_utils.get_output_dir() binary_handler_profile_rocprof_compute(config, workload_dir, options) file_dict = test_utils.check_csv_files(workload_dir, num_devices, 1) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1497,7 +1546,7 @@ def test_dispatch_0(binary_handler_profile_rocprof_compute): file_dict, [ "--dispatch", - "0", + "1", ], ) @@ -1506,19 +1555,19 @@ def test_dispatch_0(binary_handler_profile_rocprof_compute): @pytest.mark.dispatch def test_dispatch_0_1(binary_handler_profile_rocprof_compute): - options = ["--dispatch", "0:2"] + options = ["--dispatch", "1:2"] workload_dir = test_utils.get_output_dir() binary_handler_profile_rocprof_compute(config, workload_dir, options) file_dict = test_utils.check_csv_files(workload_dir, num_devices, 2) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1527,7 +1576,7 @@ def test_dispatch_0_1(binary_handler_profile_rocprof_compute): inspect.stack()[0][3], workload_dir, file_dict, - ["--dispatch", "0", "1"], + ["--dispatch", "1", "2"], ) test_utils.clean_output_dir(config["cleanup"], workload_dir) @@ -1535,19 +1584,19 @@ def test_dispatch_0_1(binary_handler_profile_rocprof_compute): @pytest.mark.dispatch def test_dispatch_2(binary_handler_profile_rocprof_compute): - options = ["--dispatch", "0"] + options = ["--dispatch", "1"] workload_dir = test_utils.get_output_dir() binary_handler_profile_rocprof_compute(config, workload_dir, options) file_dict = test_utils.check_csv_files(workload_dir, num_devices, 1) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1558,7 +1607,7 @@ def test_dispatch_2(binary_handler_profile_rocprof_compute): file_dict, [ "--dispatch", - "0", + "1", ], ) @@ -1573,13 +1622,13 @@ def test_join_type_grid(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 @@ -1602,13 +1651,13 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute): file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels) if soc == "MI100": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + assert sorted(list(file_dict.keys())) == CSVS elif soc == "MI200": - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI200 + assert sorted(list(file_dict.keys())) == CSVS elif "MI300" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI300 + assert sorted(list(file_dict.keys())) == CSVS elif "MI350" in soc: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI350 + assert sorted(list(file_dict.keys())) == CSVS else: print(f"Testing isn't supported yet for {soc}") assert 0 diff --git a/projects/rocprofiler-compute/tests/test_utils.py b/projects/rocprofiler-compute/tests/test_utils.py index 48bd0e02ce..4e00b2c544 100644 --- a/projects/rocprofiler-compute/tests/test_utils.py +++ b/projects/rocprofiler-compute/tests/test_utils.py @@ -381,7 +381,7 @@ def test_detect_rocprof_env_rocprof_not_found(monkeypatch): """ class DummyArgs: - rocprofiler_sdk_library_path = "/fake/path" + rocprofiler_sdk_tool_path = "/fake/path" # Set ROCPROF to 'rocprof' monkeypatch.setenv("ROCPROF", "rocprofv3") @@ -416,7 +416,7 @@ def test_detect_rocprof_env_rocprof_found(monkeypatch): """ class DummyArgs: - rocprofiler_sdk_library_path = "/fake/path" + rocprofiler_sdk_tool_path = "/fake/path" monkeypatch.setenv("ROCPROF", "rocprof") # shutil.which returns a fake path for 'rocprof' @@ -448,7 +448,7 @@ def test_detect_rocprof_env_not_set(monkeypatch): """ class DummyArgs: - rocprofiler_sdk_library_path = "/fake/path" + rocprofiler_sdk_tool_path = "/fake/path" monkeypatch.delenv("ROCPROF", raising=False) monkeypatch.setattr("pathlib.Path.exists", lambda _: True) @@ -475,7 +475,7 @@ def test_detect_rocprof_sdk(monkeypatch): """ class DummyArgs: - rocprofiler_sdk_library_path = "/some/sdk/path" + rocprofiler_sdk_tool_path = "/some/sdk/path" monkeypatch.setenv("ROCPROF", "rocprofiler-sdk") monkeypatch.setattr("pathlib.Path.exists", lambda self: True) @@ -2500,6 +2500,7 @@ def test_run_prof_success_rocprofiler_sdk(tmp_path, monkeypatch): profiler_options = { "APP_CMD": ["./test_app"], "ROCPROF_OUTPUT_PATH": workload_dir, + "ROCPROF_COUNTER_COLLECTION": "1", "ROCP_TOOL_LIBRARIES": "/opt/rocm/lib/rocprofiler-sdk/" "librocprofiler-sdk-tool.so", } @@ -3061,13 +3062,14 @@ def test_run_prof_v3_sdk_and_cli_calls_trace_processing(tmp_path, monkeypatch): mspec = MockMSpec() loglevel = logging.INFO - format_rocprof_output = True + format_rocprof_output = "csv" monkeypatch.setattr("utils.utils.rocprof_cmd", "rocprofiler-sdk") profiler_options_sdk_hip = { "APP_CMD": "my_app", "ROCPROF_HIP_RUNTIME_API_TRACE": "1", + "ROCPROF_COUNTER_COLLECTION": "1", "ROCP_TOOL_LIBRARIES": "/opt/rocm/lib/rocprofiler-sdk/" "librocprofiler-sdk-tool.so", } @@ -3123,44 +3125,6 @@ def test_run_prof_v3_sdk_and_cli_calls_trace_processing(tmp_path, monkeypatch): # ============================================================================= -def test_process_rocprofv3_output_json_format(tmp_path, monkeypatch): - """ - Test process_rocprofv3_output with json format converts JSON files to CSV. - - Args: - tmp_path (Path): Temporary directory for test files. - monkeypatch (pytest.MonkeyPatch): Pytest fixture for patching. - - Returns: - None: Asserts CSV files are created from JSON files. - """ - workload_dir = str(tmp_path) - output_dir = tmp_path / "out" / "pmc_1" / "subdir" - output_dir.mkdir(parents=True) - - json_file1 = output_dir / "test1.json" - json_file2 = output_dir / "test2.json" - json_file1.write_text('{"test": "data1"}') - json_file2.write_text('{"test": "data2"}') - - monkeypatch.setattr("glob.glob", lambda pattern: [str(json_file1), str(json_file2)]) - - def mock_v3_json_to_csv(json_path, csv_path): - Path(csv_path).write_text("csv,data\ntest,value") - - monkeypatch.setattr("utils.utils.v3_json_to_csv", mock_v3_json_to_csv) - - import utils.utils as utils_mod - - result = utils_mod.process_rocprofv3_output("json", workload_dir, False) - - assert len(result) == 2 - csv_file1 = output_dir / "test1.csv" - csv_file2 = output_dir / "test2.csv" - assert csv_file1.exists() - assert csv_file2.exists() - - def test_process_rocprofv3_output_csv_format_with_counter_files(tmp_path, monkeypatch): """ Test process_rocprofv3_output with csv format processes counter collection files. @@ -3201,7 +3165,7 @@ def test_process_rocprofv3_output_csv_format_with_counter_files(tmp_path, monkey import utils.utils as utils_mod - result = utils_mod.process_rocprofv3_output("csv", workload_dir, False) + result = utils_mod.process_rocprofv3_output(workload_dir, False) assert len(result) == 1 assert str(converted_file) in result @@ -3247,7 +3211,7 @@ def test_process_rocprofv3_output_csv_format_conversion_error(tmp_path, monkeypa import utils.utils as utils_mod - result = utils_mod.process_rocprofv3_output("csv", workload_dir, False) + result = utils_mod.process_rocprofv3_output(workload_dir, False) assert result == [] assert len(warnings) == 1 @@ -3282,42 +3246,7 @@ def test_process_rocprofv3_output_csv_format_missing_agent_file(tmp_path, monkey import utils.utils as utils_mod with pytest.raises(ValueError, match='has no corresponding "agent info" file'): - utils_mod.process_rocprofv3_output("csv", workload_dir, False) - - -def test_process_rocprofv3_output_csv_format_timestamps_fallback(tmp_path, monkeypatch): - """ - Test process_rocprofv3_output falls back to kernel trace files for timestamps. - - Args: - tmp_path (Path): Temporary directory for test files. - monkeypatch (pytest.MonkeyPatch): Pytest fixture for patching. - - Returns: - None: Asserts kernel trace files are used when is_timestamps is True. - """ - workload_dir = str(tmp_path) - output_dir = tmp_path / "out" / "pmc_1" / "subdir" - output_dir.mkdir(parents=True) - - trace_file = output_dir / "test_kernel_trace.csv" - trace_file.write_text("kernel,trace\ntest,data") - - def mock_glob(pattern): - if "_counter_collection.csv" in pattern: - return [] - elif "_kernel_trace.csv" in pattern: - return [str(trace_file)] - return [] - - monkeypatch.setattr("glob.glob", mock_glob) - - import utils.utils as utils_mod - - result = utils_mod.process_rocprofv3_output("csv", workload_dir, True) - - assert len(result) == 1 - assert str(trace_file) in result + utils_mod.process_rocprofv3_output(workload_dir, False) def test_process_rocprofv3_output_csv_format_no_files_non_timestamps( @@ -3340,53 +3269,7 @@ def test_process_rocprofv3_output_csv_format_no_files_non_timestamps( import utils.utils as utils_mod - result = utils_mod.process_rocprofv3_output("csv", workload_dir, False) - - assert result == [] - - -def test_process_rocprofv3_output_invalid_format(monkeypatch): - """ - Test process_rocprofv3_output raises error for invalid output format. - - Args: - monkeypatch (pytest.MonkeyPatch): Pytest fixture for patching. - - Returns: - None: Asserts console_error is called for invalid format. - """ - - def mock_console_error(msg): - raise RuntimeError(f"console_error: {msg}") - - monkeypatch.setattr("utils.utils.console_error", mock_console_error) - - import utils.utils as utils_mod - - with pytest.raises( - RuntimeError, match="The output file of rocprofv3 can only support json or csv" - ): - utils_mod.process_rocprofv3_output("invalid", "/tmp", False) - - -def test_process_rocprofv3_output_json_format_no_files(tmp_path, monkeypatch): - """ - Test process_rocprofv3_output with json format when no JSON files exist. - - Args: - tmp_path (Path): Temporary directory for test files. - monkeypatch (pytest.MonkeyPatch): Pytest fixture for patching. - - Returns: - None: Asserts empty list returned when no JSON files found. - """ - workload_dir = str(tmp_path) - - monkeypatch.setattr("glob.glob", lambda pattern: []) - - import utils.utils as utils_mod - - result = utils_mod.process_rocprofv3_output("json", workload_dir, False) + result = utils_mod.process_rocprofv3_output(workload_dir, False) assert result == [] @@ -3439,7 +3322,7 @@ def test_process_rocprofv3_output_csv_format_multiple_counter_files( import utils.utils as utils_mod - result = utils_mod.process_rocprofv3_output("csv", workload_dir, False) + result = utils_mod.process_rocprofv3_output(workload_dir, False) assert len(result) == 2 assert str(converted_file1) in result @@ -8180,8 +8063,8 @@ def test_add_counter_overwrite_existing(): # additional test detect_rocprof console error # ============================================================================= class MockArgs: - def __init__(self, rocprofiler_sdk_library_path): - self.rocprofiler_sdk_library_path = rocprofiler_sdk_library_path + def __init__(self, rocprofiler_sdk_tool_path): + self.rocprofiler_sdk_tool_path = rocprofiler_sdk_tool_path @mock.patch.dict(os.environ, {"ROCPROF": "rocprofiler-sdk"}, clear=True) @@ -8192,7 +8075,7 @@ def test_detect_rocprof_calls_console_error_if_sdk_path_invalid( ): """ Tests that detect_rocprof calls console_error when ROCPROF is 'rocprofiler-sdk' - and the rocprofiler_sdk_library_path does not exist. + and the rocprofiler_sdk_tool_path does not exist. Focuses on the console_error call. """ mock_path_instance = mock.Mock() @@ -8200,13 +8083,13 @@ def test_detect_rocprof_calls_console_error_if_sdk_path_invalid( mock_path_constructor.return_value = mock_path_instance fake_library_path = "/some/invalid/path/to/librocprofiler_sdk.so" - args = MockArgs(rocprofiler_sdk_library_path=fake_library_path) + args = MockArgs(rocprofiler_sdk_tool_path=fake_library_path) with mock.patch("utils.utils.console_debug") as mock_console_debug: # noqa utils.detect_rocprof(args) expected_error_message = ( - "Could not find rocprofiler-sdk library at " + fake_library_path + "Could not find rocprofiler-sdk tool at " + fake_library_path ) mock_console_error_func.assert_called_once_with(expected_error_message) @@ -8442,7 +8325,7 @@ def test_pc_sampling_prof_sdk_path_nonexistent_librocprofiler_sdk_tool( mock_console_debug, mock_console_error, mock_capture_subprocess, tmp_path ): """ - Edge Case: rocprofiler_sdk_library_path is valid, but librocprofiler-sdk-tool.so + Edge Case: rocprofiler_sdk_tool_path is valid, but librocprofiler-sdk-tool.so is NOT found next to it (or in rocprofiler-sdk subdir). This test primarily checks if the paths are constructed. The actual check for file existence before `capture_subprocess_output` is not in the provided snippet, @@ -8452,31 +8335,29 @@ def test_pc_sampling_prof_sdk_path_nonexistent_librocprofiler_sdk_tool( method = "host_trap" interval = 1000 workload_dir = str(tmp_path) - appcmd = "my_app --arg" + options = {"APP_CMD": "my_app --arg"} sdk_lib_dir = tmp_path / "rocm_sdk" / "lib" sdk_lib_dir.mkdir(parents=True, exist_ok=True) - rocprofiler_sdk_library_path = str(sdk_lib_dir / "librocprofiler_sdk.so") - Path(rocprofiler_sdk_library_path).touch() + rocprofiler_sdk_tool_path = str(sdk_lib_dir / "librocprofiler_sdk.so") + Path(rocprofiler_sdk_tool_path).touch() expected_tool_path = str( sdk_lib_dir / "rocprofiler-sdk" / "librocprofiler-sdk-tool.so" ) + options["LD_PRELOAD"] = expected_tool_path + mock_capture_subprocess.return_value = (True, "Success output") - utils.pc_sampling_prof( - method, interval, workload_dir, appcmd, rocprofiler_sdk_library_path - ) + utils.pc_sampling_prof(options, method, interval, workload_dir) assert mock_capture_subprocess.called call_args = mock_capture_subprocess.call_args called_env = call_args.kwargs.get("new_env", {}) assert "LD_PRELOAD" in called_env - ld_preload_paths = called_env["LD_PRELOAD"].split(":") - assert expected_tool_path in ld_preload_paths - assert rocprofiler_sdk_library_path in ld_preload_paths + assert called_env["LD_PRELOAD"] == expected_tool_path mock_console_error.assert_not_called() @@ -8495,14 +8376,12 @@ def test_pc_sampling_prof_subprocess_fails( method = "stochastic" interval = 5000 workload_dir = str(tmp_path) - appcmd = "another_app" - rocprofiler_sdk_library_path = "/some/path/librocprofiler_sdk.so" + options = ["another_app"] + rocprofiler_sdk_tool_path = "/some/path/librocprofiler_sdk.so" # noqa: F841 mock_capture_subprocess.return_value = (False, "Error output from subprocess") - utils.pc_sampling_prof( - method, interval, workload_dir, appcmd, rocprofiler_sdk_library_path - ) + utils.pc_sampling_prof(options, method, interval, workload_dir) mock_capture_subprocess.assert_called_once() mock_console_error.assert_called_once_with("PC sampling failed.") @@ -8510,10 +8389,11 @@ def test_pc_sampling_prof_subprocess_fails( mock_capture_subprocess.reset_mock() mock_console_error.reset_mock() with mock.patch("utils.utils.rocprof_cmd", "rocprofiler-sdk"): + options = {"APP_CMD": "another_app"} sdk_lib_dir = tmp_path / "rocm_sdk_fail" / "lib" sdk_lib_dir.mkdir(parents=True, exist_ok=True) - rocprofiler_sdk_library_path_sdk = str(sdk_lib_dir / "librocprofiler_sdk.so") - Path(rocprofiler_sdk_library_path_sdk).touch() + rocprofiler_sdk_tool_path_sdk = str(sdk_lib_dir / "librocprofiler_sdk.so") + Path(rocprofiler_sdk_tool_path_sdk).touch() tool_dir = sdk_lib_dir / "rocprofiler-sdk" tool_dir.mkdir(parents=True, exist_ok=True) @@ -8524,9 +8404,7 @@ def test_pc_sampling_prof_subprocess_fails( "Error output from SDK subprocess", ) - utils.pc_sampling_prof( - method, interval, workload_dir, appcmd, rocprofiler_sdk_library_path_sdk - ) + utils.pc_sampling_prof(options, method, interval, workload_dir) mock_capture_subprocess.assert_called_once() mock_console_error.assert_called_once_with("PC sampling failed.") @@ -8547,14 +8425,12 @@ def test_pc_sampling_prof_empty_appcmd( method = "host_trap" interval = 100 workload_dir = str(tmp_path) - appcmd = "" - rocprofiler_sdk_library_path = "/some/path/librocprofiler_sdk.so" + options = ["--"] + rocprofiler_sdk_tool_path = "/some/path/librocprofiler_sdk.so" # noqa: F841 mock_capture_subprocess.return_value = (True, "Output with empty appcmd") - utils.pc_sampling_prof( - method, interval, workload_dir, appcmd, rocprofiler_sdk_library_path - ) + utils.pc_sampling_prof(options, method, interval, workload_dir) assert mock_capture_subprocess.called options_list = mock_capture_subprocess.call_args[0][0] @@ -8566,17 +8442,16 @@ def test_pc_sampling_prof_empty_appcmd( with mock.patch("utils.utils.rocprof_cmd", "rocprofiler-sdk"): sdk_lib_dir = tmp_path / "rocm_sdk_empty" / "lib" sdk_lib_dir.mkdir(parents=True, exist_ok=True) - rocprofiler_sdk_library_path_sdk = str(sdk_lib_dir / "librocprofiler_sdk.so") - Path(rocprofiler_sdk_library_path_sdk).touch() + rocprofiler_sdk_tool_path_sdk = str(sdk_lib_dir / "librocprofiler_sdk.so") + Path(rocprofiler_sdk_tool_path_sdk).touch() tool_dir = sdk_lib_dir / "rocprofiler-sdk" tool_dir.mkdir(parents=True, exist_ok=True) (tool_dir / "librocprofiler-sdk-tool.so").touch() mock_capture_subprocess.return_value = (True, "Output with empty appcmd SDK") + options = {"APP_CMD": ""} - utils.pc_sampling_prof( - method, interval, workload_dir, appcmd, rocprofiler_sdk_library_path_sdk - ) + utils.pc_sampling_prof(options, method, interval, workload_dir) assert mock_capture_subprocess.called assert mock_capture_subprocess.call_args[0][0] == ""