From 2c9d92be33175faa0916220ac44811355fa4513c Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 7 Feb 2025 23:27:58 -0500 Subject: [PATCH] Remove remaining roctracer references (#82) [ROCm/rocprofiler-systems commit: e437200e9e970c73fb3a09df9e0df73351d7d51a] --- .../cmake/Modules/Findrocprofiler.cmake | 109 ------ .../cmake/Modules/Findroctracer.cmake | 186 --------- .../rocprofiler-systems/cmake/Packages.cmake | 13 - .../docker/Dockerfile.opensuse | 2 +- .../docker/Dockerfile.rhel | 2 +- .../docker/Dockerfile.ubuntu | 2 +- .../how-to/configuring-runtime-options.rst | 1 - ...rumenting-rewriting-binary-application.rst | 7 +- .../how-to/performing-causal-profiling.rst | 2 +- .../docs/how-to/sampling-call-stack.rst | 4 +- .../understanding-rocprof-sys-output.rst | 353 ++++++++++-------- .../docs/how-to/using-rocprof-sys-api.rst | 85 ++--- .../docs/reference/development-guide.rst | 2 +- .../rocprof-sys-instrument/internal_libs.cpp | 4 +- .../rocprof-sys-instrument.cpp | 4 +- .../source/bin/rocprof-sys-sample/impl.cpp | 19 +- .../source/lib/core/argparse.cpp | 7 +- .../source/lib/core/components/fwd.hpp | 12 - .../source/lib/core/hip_runtime.hpp | 51 --- .../library/components/backtrace.cpp | 1 - .../components/pthread_create_gotcha.cpp | 2 +- .../source/lib/rocprof-sys/library/ptl.cpp | 51 --- .../source/lib/rocprof-sys/library/ptl.hpp | 11 - .../lib/rocprof-sys/library/thread_info.hpp | 2 +- .../tests/rocprof-sys-openmp-tests.cmake | 4 +- 25 files changed, 254 insertions(+), 682 deletions(-) delete mode 100644 projects/rocprofiler-systems/cmake/Modules/Findrocprofiler.cmake delete mode 100644 projects/rocprofiler-systems/cmake/Modules/Findroctracer.cmake delete mode 100644 projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp diff --git a/projects/rocprofiler-systems/cmake/Modules/Findrocprofiler.cmake b/projects/rocprofiler-systems/cmake/Modules/Findrocprofiler.cmake deleted file mode 100644 index 2c91fa6833..0000000000 --- a/projects/rocprofiler-systems/cmake/Modules/Findrocprofiler.cmake +++ /dev/null @@ -1,109 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file -# Copyright.txt or https://cmake.org/licensing for details. - -include(FindPackageHandleStandardArgs) - -# ----------------------------------------------------------------------------------------# - -if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "") - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler) - if(EXISTS ${_DIR}) - get_filename_component(_ABS_DIR "${_DIR}" REALPATH) - list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR}) - endif() -endforeach() - -# ----------------------------------------------------------------------------------------# - -find_path( - rocprofiler_ROOT_DIR - NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h - HINTS ${_ROCM_ROCPROFILER_PATHS} - PATHS ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES rocprofiler) - -mark_as_advanced(rocprofiler_ROOT_DIR) - -# ----------------------------------------------------------------------------------------# - -find_path( - rocprofiler_INCLUDE_DIR - NAMES rocprofiler.h - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES include include/rocprofiler rocprofiler/include) - -mark_as_advanced(rocprofiler_INCLUDE_DIR) - -find_path( - rocprofiler_hsa_INCLUDE_DIR - NAMES hsa.h - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES include include/hsa) - -mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR) - -# ----------------------------------------------------------------------------------------# - -find_library( - rocprofiler_LIBRARY - NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}rocprofiler64${CMAKE_SHARED_LIBRARY_SUFFIX}.1 - rocprofiler64 rocprofiler - HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR} - ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR} - ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES lib lib64 - NO_DEFAULT_PATH) - -find_library( - rocprofiler_hsa-runtime_LIBRARY - NAMES hsa-runtime64 hsa-runtime - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES lib lib64) - -if(rocprofiler_LIBRARY) - get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE) -endif() - -mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY) -unset(_ROCM_ROCPROFILER_PATHS) - -if(ROCmVersion_NUMERIC_VERSION EQUAL 50500) - find_library( - rocprofiler_pciaccess_LIBRARY - NAMES pciaccess - PATH_SUFFIXES lib lib64) - mark_as_advanced(rocprofiler_pciaccess_LIBRARY) -endif() - -# ----------------------------------------------------------------------------------------# - -find_package_handle_standard_args( - rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR - rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY) - -# ----------------------------------------------------------------------------------------# - -if(rocprofiler_FOUND) - add_library(rocprofiler::rocprofiler INTERFACE IMPORTED) - add_library(rocprofiler::roctx INTERFACE IMPORTED) - set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR} - ${rocprofiler_hsa_INCLUDE_DIR}) - set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR}) - set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY}) - if(rocprofiler_pciaccess_LIBRARY) - list(APPEND rocprofiler_LIBRARIES ${rocprofiler_pciaccess_LIBRARY}) - endif() - - target_include_directories( - rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR} - ${rocprofiler_hsa_INCLUDE_DIR}) - - target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES}) -endif() diff --git a/projects/rocprofiler-systems/cmake/Modules/Findroctracer.cmake b/projects/rocprofiler-systems/cmake/Modules/Findroctracer.cmake deleted file mode 100644 index 7b94f9645c..0000000000 --- a/projects/rocprofiler-systems/cmake/Modules/Findroctracer.cmake +++ /dev/null @@ -1,186 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file -# Copyright.txt or https://cmake.org/licensing for details. - -include(FindPackageHandleStandardArgs) - -# ----------------------------------------------------------------------------------------# - -if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "") - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/roctracer) - if(EXISTS ${_DIR}) - get_filename_component(_ABS_DIR "${_DIR}" REALPATH) - list(APPEND _ROCM_ROCTRACER_PATHS ${_ABS_DIR}) - endif() -endforeach() - -# ----------------------------------------------------------------------------------------# - -find_path( - roctracer_ROOT_DIR - NAMES include/roctracer/roctracer.h include/roctracer.h - HINTS ${_ROCM_ROCTRACER_PATHS} - PATHS ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES roctracer) - -mark_as_advanced(roctracer_ROOT_DIR) - -# ----------------------------------------------------------------------------------------# - -find_path( - roctracer_INCLUDE_DIR - NAMES roctracer.h - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES include include/roctracer roctracer/include) - -mark_as_advanced(roctracer_INCLUDE_DIR) - -find_path( - roctracer_hsa_INCLUDE_DIR - NAMES hsa.h - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES include include/hsa) - -mark_as_advanced(roctracer_hsa_INCLUDE_DIR) - -# ----------------------------------------------------------------------------------------# - -find_library( - roctracer_LIBRARY - NAMES roctracer64 roctracer - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES lib lib64) - -find_library( - roctracer_roctx_LIBRARY - NAMES roctx64 roctx - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES lib lib64) - -find_library( - roctracer_kfdwrapper_LIBRARY - NAMES kfdwrapper64 kfdwrapper - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES lib lib64) - -find_library( - roctracer_hsa-runtime_LIBRARY - NAMES hsa-runtime64 hsa-runtime - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES lib lib64) - -# try not to directly use the hsakmt::hsakmt target because it hardcodes the -# INTERFACE_LINK_LIBRARIES used when it was built -find_package(hsakmt HINTS ${_ROCM_ROCTRACER_PATHS} PATHS ${_ROCM_ROCTRACER_PATHS}) - -if(hsakmt_FOUND) - add_library(roctracer::hsakmt INTERFACE IMPORTED) - get_target_property(hsakmt_INCLUDE_DIR hsakmt::hsakmt INTERFACE_INCLUDE_DIRECTORIES) - target_include_directories(roctracer::hsakmt INTERFACE ${hsakmt_INCLUDE_DIR}) - set(hsakmt_FOUND_LIBS ON) - foreach(_LIB drm drm_amdgpu rt c numa udev) - set(_LIB_NAMES ${_LIB}) - foreach(_EXT 2 1) - list( - APPEND - _LIB_NAMES - ${CMAKE_SHARED_LIBRARY_PREFIX}${_LIB}${CMAKE_SHARED_LIBRARY_SUFFIX}.${_EXT} - ) - endforeach() - find_library( - hsakmt_${_LIB}_LIBRARY - NAMES ${_LIB_NAMES} - HINTS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu - PATHS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu - PATH_SUFFIXES ${CMAKE_INSTALL_LIBDIR} lib lib64) - if(NOT hsakmt_${_LIB}_LIBRARY) - set(hsakmt_FOUND_LIBS OFF) - else() - target_link_libraries(roctracer::hsakmt INTERFACE ${hsakmt_${_LIB}_LIBRARY}) - endif() - endforeach() - if(hsakmt_FOUND_LIBS) - find_package(Threads REQUIRED) - target_link_libraries(roctracer::hsakmt INTERFACE Threads::Threads) - set(roctracer_hsakmt_LIBRARY - roctracer::hsakmt - CACHE STRING "Generated hsakmt target for roctracer") - else() - set(roctracer_hsakmt_LIBRARY - hsakmt::hsakmt - CACHE STRING "Imported hsakmt target") - endif() -else() - find_library( - roctracer_hsakmt_LIBRARY - NAMES hsakmt - HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS} - PATH_SUFFIXES lib lib64) -endif() - -if(roctracer_LIBRARY) - get_filename_component(roctracer_LIBRARY_DIR "${roctracer_LIBRARY}" PATH CACHE) -endif() - -mark_as_advanced(roctracer_LIBRARY roctracer_roctx_LIBRARY roctracer_hsakmt_LIBRARY - roctracer_hsa-runtime_LIBRARY) - -# ----------------------------------------------------------------------------------------# - -find_package_handle_standard_args( - roctracer DEFAULT_MSG roctracer_ROOT_DIR roctracer_INCLUDE_DIR - roctracer_hsa_INCLUDE_DIR roctracer_LIBRARY roctracer_roctx_LIBRARY) - -# ------------------------------------------------------------------------------# - -if(roctracer_FOUND) - add_library(roctracer::roctracer INTERFACE IMPORTED) - add_library(roctracer::roctx INTERFACE IMPORTED) - set(roctracer_INCLUDE_DIRS ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR}) - set(roctracer_LIBRARIES ${roctracer_LIBRARY} ${roctracer_roctx_LIBRARY}) - set(roctracer_LIBRARY_DIRS ${roctracer_LIBRARY_DIR}) - - target_include_directories( - roctracer::roctracer INTERFACE ${roctracer_INCLUDE_DIR} - ${roctracer_hsa_INCLUDE_DIR}) - target_include_directories(roctracer::roctx INTERFACE ${roctracer_INCLUDE_DIR} - ${roctracer_hsa_INCLUDE_DIR}) - - target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_LIBRARY}) - target_link_libraries(roctracer::roctx INTERFACE ${roctracer_roctx_LIBRARY}) - - if(roctracer_kfdwrapper_LIBRARY) - list(APPEND roctracer_LIBRARIES ${roctracer_kfdwrapper_LIBRARY}) - target_link_libraries(roctracer::roctracer - INTERFACE ${roctracer_kfdwrapper_LIBRARY}) - target_link_libraries(roctracer::roctx INTERFACE ${roctracer_kfdwrapper_LIBRARY}) - endif() - - if(roctracer_hsakmt_LIBRARY) - list(APPEND roctracer_LIBRARIES ${roctracer_hsakmt_LIBRARY}) - target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_hsakmt_LIBRARY}) - target_link_libraries(roctracer::roctx INTERFACE ${roctracer_hsakmt_LIBRARY}) - endif() - - if(roctracer_hsa-runtime_LIBRARY) - list(APPEND roctracer_LIBRARIES ${roctracer_hsa-runtime_LIBRARY}) - target_link_libraries(roctracer::roctracer - INTERFACE ${roctracer_hsa-runtime_LIBRARY}) - endif() - -endif() - -# ------------------------------------------------------------------------------# - -unset(_ROCM_ROCTRACER_PATHS) - -# ------------------------------------------------------------------------------# diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index c508a459d2..8aa398c54f 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -17,10 +17,6 @@ rocprofiler_systems_add_interface_library( "Provides flags and libraries for Dyninst (dynamic instrumentation)") rocprofiler_systems_add_interface_library(rocprofiler-systems-rocm "Provides flags and libraries for ROCm") -rocprofiler_systems_add_interface_library(rocprofiler-systems-roctracer - "Provides flags and libraries for roctracer") -rocprofiler_systems_add_interface_library(rocprofiler-systems-rocprofiler - "Provides flags and libraries for rocprofiler") rocprofiler_systems_add_interface_library( rocprofiler-systems-rccl "Provides flags for ROCm Communication Collectives Library (RCCL)") @@ -161,15 +157,6 @@ if(ROCPROFSYS_USE_ROCM) set(ROCPROFSYS_ROCM_VERSION_PATCH ${ROCmVersion_PATCH_VERSION}) set(ROCPROFSYS_ROCM_VERSION ${ROCmVersion_TRIPLE_VERSION}) - if(ROCPROFSYS_ROCM_VERSION_MAJOR GREATER_EQUAL 4 AND ROCPROFSYS_ROCM_VERSION_MINOR - GREATER 3) - set(roctracer_kfdwrapper_LIBRARY) - endif() - - if(NOT roctracer_kfdwrapper_LIBRARY) - set(roctracer_kfdwrapper_LIBRARY) - endif() - rocprofiler_systems_add_feature(ROCPROFSYS_ROCM_VERSION "ROCm version used by rocprofiler-systems") else() diff --git a/projects/rocprofiler-systems/docker/Dockerfile.opensuse b/projects/rocprofiler-systems/docker/Dockerfile.opensuse index 31594cf2c1..a48a4147e4 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.opensuse +++ b/projects/rocprofiler-systems/docker/Dockerfile.opensuse @@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \ zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ zypper --non-interactive --gpg-auto-import-keys refresh && \ - zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \ + zypper --non-interactive install -y rocm-dev rccl-devel libpciaccess0 && \ zypper --non-interactive clean --all; \ fi diff --git a/projects/rocprofiler-systems/docker/Dockerfile.rhel b/projects/rocprofiler-systems/docker/Dockerfile.rhel index 97394b8b19..486952de3f 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.rhel +++ b/projects/rocprofiler-systems/docker/Dockerfile.rhel @@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \ dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \ yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ - yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \ + yum install -y rocm-dev && \ yum clean all; \ fi diff --git a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu index a98d692242..a97367e7b7 100644 --- a/projects/rocprofiler-systems/docker/Dockerfile.ubuntu +++ b/projects/rocprofiler-systems/docker/Dockerfile.ubuntu @@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \ apt-get install -y ./${AMDGPU_DEB} && \ apt-get update && \ - apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \ + apt-get install -y rocm-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \ apt-get autoclean; \ fi diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index a910e9df5d..9694582148 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -468,7 +468,6 @@ Viewing components | written_bytes | Number of bytes sent to the storage layer. | | written_char | Number of bytes which this task has cause... | | rocprof-sys | Invokes instrumentation functions rocprof... | - | roctracer | High-precision ROCm API and kernel tracing. | | sampling_wall_clock | Wall-clock timing. Derived from statistic... | | sampling_cpu_clock | CPU-clock timing. Derived from statistica... | | sampling_percent | Fraction of wall-clock time spent in func... | diff --git a/projects/rocprofiler-systems/docs/how-to/instrumenting-rewriting-binary-application.rst b/projects/rocprofiler-systems/docs/how-to/instrumenting-rewriting-binary-application.rst index 1c7329f602..d2709f65cb 100644 --- a/projects/rocprofiler-systems/docs/how-to/instrumenting-rewriting-binary-application.rst +++ b/projects/rocprofiler-systems/docs/how-to/instrumenting-rewriting-binary-application.rst @@ -247,10 +247,7 @@ view the help menu. libpthread.so.0 libresolv.so.2 librocm_smi64.so - librocmtools.so - librocprofiler64.so - libroctracer64.so - libroctx64.so + librocprofiler-sdk.so librt.so.1 libstdc++.so.6 libtbb.so @@ -854,7 +851,7 @@ By default, ``rocprof-sys-instrument`` uses ``--mode trace`` for instrumentation only instruments ``main`` in an executable. It activates both CPU call-stack sampling and background system-level thread sampling by default. Tracing capabilities which do not rely on instrumentation, such as the HIP API and kernel tracing -(which is collected by roctracer), are still available. +are still available. The ROCm Systems Profiler sampling capabilities are always available, even in trace mode, but are deactivated by default. To activate sampling in trace mode, set ``ROCPROFSYS_USE_SAMPLING=ON`` in the environment diff --git a/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst b/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst index c95a6d12a4..2520c01cb3 100644 --- a/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst +++ b/projects/rocprofiler-systems/docs/how-to/performing-causal-profiling.rst @@ -80,7 +80,7 @@ in between samples. Progress points must be triggered in a deterministic manner This can happen in three different ways: * `ROCm Systems Profiler `_ can leverage the callbacks from - Kokkos-Tools, OpenMP-Tools, roctracer, etc. and the wrappers around functions for + Kokkos-Tools, OpenMP-Tools, rocprofiler-sdk, etc. and the wrappers around functions for MPI, NUMA, RCCL, etc. to act as progress points * Users can leverage the :doc:`runtime instrumentation capabilities <./instrumenting-rewriting-binary-application>` to insert progress points diff --git a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst index 0f3408704c..d9e62e2672 100644 --- a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst +++ b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst @@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option [BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation - -I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] + -I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ] Include data from these backends (count: unlimited) - -E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] + -E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ] Exclude data from these backends (count: unlimited) [HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H diff --git a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst index de31e938d4..73e27fe354 100644 --- a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst +++ b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst @@ -77,164 +77,207 @@ Metadata JSON Sample .. code-block:: json { - "rocprof-sys": { + "rocprofiler-systems": { "metadata": { - "info": { - "HW_L1_CACHE_SIZE": 32768, - "HW_L2_CACHE_SIZE": 524288, - "HW_L3_CACHE_SIZE": 16777216, - "HW_PHYSICAL_CPU": 12, - "HW_CONCURRENCY": 24, - "LAUNCH_TIME": "02:04", - "LAUNCH_DATE": "05/08/22", - "TIMEMORY_GIT_REVISION": "52e7034fd419ff296506cdef43084f6071dbaba1", - "TIMEMORY_VERSION": "3.3.0rc4", - "TIMEMORY_API": "tim::project::timemory", - "TIMEMORY_GIT_DESCRIBE": "v3.2.0-263-g52e7034f", - "PWD": "/home/jrmadsen/devel/c++/AARInternal/hosttrace-dyninst/build-vscode", - "USER": "jrmadsen", - "HOME": "/home/jrmadsen", - "SHELL": "/bin/bash", - "CPU_MODEL": "AMD Ryzen Threadripper PRO 3945WX 12-Cores", - "CPU_FREQUENCY": 2400, - "CPU_VENDOR": "AuthenticAMD", - "CPU_FEATURES": [ - "fpu", - "msr", - "sse", - "sse2", - "constant_tsc", - "ssse3", - "fma", - "sse4_1", - "sse4_2", - "popcnt", - "avx2", - "... etc. ..." - ], - "memory_maps": [ - { - "end_address": "7f4013797000", - "start_address": "7f4012e58000", - "pathname": "/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000", - "offset": "34a000", - "device": "103:05", - "inode": 4331165, - "permissions": "rw-p" - }, - { - "end_address": "7f4013902000", - "start_address": "7f4013901000", - "pathname": "/usr/lib/x86_64-linux-gnu/libm-2.31.so", - "offset": "14d000", - "device": "103:05", - "inode": 42078854, - "permissions": "rwxp" - }, - { - "end_address": "7f4013919000", - "start_address": "7f4013908000", - "pathname": "/usr/lib/x86_64-linux-gnu/libpthread-2.31.so", - "offset": "6000", - "device": "103:05", - "inode": 42078874, - "permissions": "r-xp" - }, - { - "...": "etc." - }, - ], - "memory_maps_files": [ - "/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000", - "/opt/rocm-5.0.0/hsa-amd-aqlprofile/lib/libhsa-amd-aqlprofile64.so.1.0.50000", - "/opt/rocm-5.0.0/lib/libamd_comgr.so.2.4.50000", - "/opt/rocm-5.0.0/lib/libhsa-runtime64.so.1.5.50000", - "/opt/rocm-5.0.0/rocm_smi/lib/librocm_smi64.so.5.0.50000", - "/opt/rocm-5.0.0/roctracer/lib/libroctracer64.so.1.0.50000", - "/usr/lib/x86_64-linux-gnu/ld-2.31.so", - "/usr/lib/x86_64-linux-gnu/libc-2.31.so", - "/usr/lib/x86_64-linux-gnu/libdl-2.31.so", - "... etc. ..." - ], - }, - "output": { - "text": [ - { - "value": [ - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.txt" - ], - "key": "roctracer" - }, - { - "value": [ - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt" - ], - "key": "wall_clock" - } - ], - "json": [ - { - "value": [ - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.json", - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.tree.json" - ], - "key": "roctracer" - }, - { - "value": [ - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json", - "rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.tree.json" - ], - "key": "wall_clock" - } - ] - }, - "environment": [ - { - "value": "/home/jrmadsen", - "key": "HOME" - }, - { - "value": "/bin/bash", - "key": "SHELL" - }, - { - "value": "jrmadsen", - "key": "USER" - }, - { - "value": "true", - "key": "... etc. ..." - } + "info": { + "CPU_MODEL": "AMD Ryzen 5 3600 6-Core Processor", + "CPU_VENDOR": "AuthenticAMD", + "HOME": "/home/rocm-dev", + "LAUNCH_DATE": "01/15/25", + "LAUNCH_TIME": "16:49", + "PWD": "/home/rocm-dev/code/rocprofiler-systems", + "ROCPROFSYS_COMPILER_ID": "GNU", + "ROCPROFSYS_COMPILER_VERSION": "11.4.0", + "ROCPROFSYS_GIT_DESCRIBE": "", + "ROCPROFSYS_GIT_REVISION": "3213dc652728f7ed01b62bf55f6af76c43bfcbdb", + "ROCPROFSYS_LIBRARY_ARCH": "x86_64-linux-gnu", + "ROCPROFSYS_ROCM_VERSION": "6.3.1", + "ROCPROFSYS_SYSTEM_NAME": "Linux", + "ROCPROFSYS_SYSTEM_PROCESSOR": "x86_64", + "ROCPROFSYS_SYSTEM_VERSION": "6.8.0-51-generic", + "ROCPROFSYS_VERSION": "1.0.0", + "SHELL": "/usr/bin/zsh", + "TIMEMORY_API": "tim::project::timemory", + "TIMEMORY_GIT_DESCRIBE": "v3.2.0-703-gba3c6486", + "TIMEMORY_GIT_REVISION": "ba3c648677b3c6f217abe147ef3198f36239e234", + "TIMEMORY_VERSION": "4.0.0rc0", + "USER": "rocm-dev", + "CPU_FREQUENCY": 1972, + "CPU_FEATURES": [ + "fpu", + "vme", + "de", + "pse", + "tsc", + "msr", + "pae", + "... etc. ..." + ], + "HW_CONCURRENCY": 12, + "HW_PHYSICAL_CPU": 6, + "HW_L1_CACHE_SIZE": 32768, + "HW_L2_CACHE_SIZE": 524288, + "HW_L3_CACHE_SIZE": 16777216, + "ROCPROFSYS_VERSION_MAJOR": 1, + "ROCPROFSYS_VERSION_MINOR": 0, + "ROCPROFSYS_VERSION_PATCH": 0, + "ROCPROFSYS_ROCM_VERSION_MAJOR": 6, + "ROCPROFSYS_ROCM_VERSION_MINOR": 3, + "ROCPROFSYS_ROCM_VERSION_PATCH": 1, + "memory_maps_files": [ + "/opt/rocm-6.3.1/lib/libhsa-amd-aqlprofile64.so.1.0.60301", + "/opt/rocm-6.3.1/lib/libhsa-runtime64.so.1.14.60301", + "/opt/rocm-6.3.1/lib/librocm_smi64.so.7.4.60301", + "/opt/rocm-6.3.1/lib/librocprofiler-register.so.0.4.0", + "/opt/rocm-6.3.1/lib/librocprofiler-sdk.so.0.5.0", + "/opt/rocm/lib/libhsa-amd-aqlprofile64.so.1", + "/opt/rocm/lib/libhsa-runtime64.so.1", + "/opt/rocm/lib/librocm_smi64.so.7", + "/opt/rocm/lib/librocprofiler-register.so.0", + "/opt/rocm/lib/librocprofiler-sdk.so.0", + "... etc. ..." ], - "settings": { - "ROCPROFSYS_JSON_OUTPUT": { - "count": -1, - "environ_updated": false, - "name": "json_output", - "data_type": "bool", - "initial": true, - "enabled": true, - "value": true, - "max_count": 1, - "cmdline": [ - "--rocprof-sys-json-output" - ], - "environ": "ROCPROFSYS_JSON_OUTPUT", - "config_updated": false, - "categories": [ - "io", - "json", - "native" - ], - "description": "Write json output files" - }, - "... etc. ...": { - "etc.": true - } + "memory_maps": [ + { + "cereal_class_version": 0, + "load_address": "76005b800000", + "last_address": "76005b81b000", + "permissions": "r---", + "offset": "0", + "device": "", + "inode": 0, + "pathname": "/opt/rocm/lib/libhsa-runtime64.so.1" + }, + { + "load_address": "76005b81b000", + "last_address": "76005b93400d", + "permissions": "r-x-", + "offset": "1b000", + "device": "", + "inode": 0, + "pathname": "/opt/rocm/lib/libhsa-runtime64.so.1" + }, + { + "load_address": "76005b935000", + "last_address": "76005b9aeab8", + "permissions": "r---", + "offset": "135000", + "device": "", + "inode": 0, + "pathname": "/opt/rocm/lib/libhsa-runtime64.so.1" + }, + { + "load_address": "76005b9b0638", + "last_address": "76005bb2d598", + "permissions": "rw--", + "offset": "1af638", + "device": "", + "inode": 0, + "pathname": "/opt/rocm/lib/libhsa-runtime64.so.1" + }, + { + "load_address": "76005bc00000", + "last_address": "76005bc26140", + "permissions": "r---", + "offset": "0", + "device": "", + "inode": 0, + "pathname": "/opt/rocm/lib/librocprofiler-sdk.so.0" + }, + { + "... etc. ..." } - } - } + ], + "settings": { + "cereal_class_version": 2, + "ROCPROFSYS_OUTPUT_PREFIX": { + "name": "output_prefix", + "environ": "ROCPROFSYS_OUTPUT_PREFIX", + "description": "Explicitly specify a prefix for all output files", + "count": 1, + "max_count": -1, + "cmdline": [ + "--rocprofiler-systems-output-prefix" + ], + "categories": [ + "filename", + "io", + "librocprof-sys", + "native", + "rocprofsys" + ], + "data_type": "string", + "initial": "parallel-overhead-binary-rewrite/", + "value": "parallel-overhead-binary-rewrite/", + "updated": "config", + "enabled": true + }, + { + ... etc. ... + }, + "command_line": [ + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/parallel-overhead.inst", + "--", + "10", + "12", + "1000" + ], + "environment": [ + ... etc . ... + ] + }, + "environment": [ + { + "key": "GOTCHA_DEBUG", + "value": "0" + }, + { + "key": "HIP_VISIBLE_DEVICES", + "value": "" + }, + { + "key": "HOME", + "value": "/home/rocm-dev" + }, + { + "key": "LD_LIBRARY_PATH", + "value": "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib:/opt/rocm/lib" + }, + { + "key": "LIBRARY_PATH", + "value": "" + }, + { + etc ... + } + ] + "output": { + "json": [ + { + "key": "wall_clock", + "value": [ + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json" + ] + } + ], + "protobuf": [ + { + "key": "perfetto", + "value": [ + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/perfetto-trace.proto" + ] + } + ], + "text": [ + { + "key": "wall_clock", + "value": [ + "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt" + ] + } + ] + }, + }, } Configuring the ROCm Systems Profiler output diff --git a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst index 55b87297ee..94874acd08 100644 --- a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst +++ b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst @@ -192,75 +192,52 @@ First, instrument and run the program. ... $ rocprof-sys-run --profile --trace -- ./user-api.inst 10 12 1000 - ROCPROFSYS: HSA_TOOLS_LIB=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0 - ROCPROFSYS: HSA_TOOLS_REPORT_LOAD_FAILURE=1 - ROCPROFSYS: LD_PRELOAD=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0 - ROCPROFSYS: OMP_TOOL_LIBRARIES=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0 + ROCPROFSYS: LD_PRELOAD=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0 + ROCPROFSYS: OMP_TOOL_LIBRARIES=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0 ROCPROFSYS: ROCPROFSYS_PROFILE=true ROCPROFSYS: ROCPROFSYS_TRACE=true ROCPROFSYS: ROCPROFSYS_VERBOSE=0 - ROCPROFSYS: ROCP_HSA_INTERCEPT=1 - ROCPROFSYS: ROCP_TOOL_LIB=/opt/rocm-6.3.1/lib/librocprof-sys.so.0.1.0 - [rocprof-sys][dl][297646] rocprofsys_main - [rocprof-sys][297646][rocprofsys_init_tooling] Instrumentation mode: Trace + [rocprof-sys][dl][1827155] rocprofsys_main + [rocprof-sys][1827155][rocprofsys_init_tooling] Instrumentation mode: Trace - ____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____ + ____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____ | _ \ / _ \ / ___| \/ | / ___\ \ / / ___|_ _| ____| \/ / ___| | _ \| _ \ / _ \| ___|_ _| | | ____| _ \ | |_) | | | | | | |\/| | \___ \\ V /\___ \ | | | _| | |\/| \___ \ | |_) | |_) | | | | |_ | || | | _| | |_) | | _ <| |_| | |___| | | | ___) || | ___) || | | |___| | | |___) | | __/| _ <| |_| | _| | || |___| |___| _ < |_| \_\\___/ \____|_| |_| |____/ |_| |____/ |_| |_____|_| |_|____/ |_| |_| \_\\___/|_| |___|_____|_____|_| \_\ - rocprof-sys v0.1.0 (rev: b569c837e455f71dd76d06392d0b901ae927deca, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x) - [105.947] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: "" - Pushing custom region :: ./user-api.inst - Pushing custom region :: initialization + rocprof-sys v1.0.0 (rev: 3213dc652728f7ed01b62bf55f6af76c43bfcbdb, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x) + [790.763] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: "" [./user-api.inst] Threads: 12 [./user-api.inst] Iterations: 1000 [./user-api.inst] fibonacci(10)... - Pushing custom region :: thread_creation - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: run(10) x 1000 - Pushing custom region :: thread_wait - Pushing custom region :: run(10) x 1000 [./user-api.inst] fibonacci(10) x 12 = 715000 - [rocprof-sys][297646][0][rocprofsys_finalize] finalizing... - [rocprof-sys][297646][0][rocprofsys_finalize] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646 : 0.978014 sec wall_clock, 26.752 MB peak_rss, 27.394 MB page_rss, 1.520000 sec cpu_clock, 155.4 % cpu_util [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/0 : 0.976068 sec wall_clock, 0.789948 sec thread_cpu_clock, 80.9 % thread_cpu_util, 26.112 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/1 : 0.027517 sec wall_clock, 0.027510 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/2 : 0.027828 sec wall_clock, 0.027811 sec thread_cpu_clock, 99.9 % thread_cpu_util, 3.584 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/3 : 0.027585 sec wall_clock, 0.027585 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/4 : 0.033449 sec wall_clock, 0.033443 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/5 : 0.027727 sec wall_clock, 0.027726 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.328 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/6 : 0.032228 sec wall_clock, 0.032220 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.712 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/7 : 0.030201 sec wall_clock, 0.030202 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/8 : 0.027960 sec wall_clock, 0.027951 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/9 : 0.034698 sec wall_clock, 0.034699 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/10 : 0.033414 sec wall_clock, 0.033399 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.512 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/11 : 0.028161 sec wall_clock, 0.028149 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/12 : 0.027791 sec wall_clock, 0.027767 sec thread_cpu_clock, 99.9 % thread_cpu_util, 0.256 MB peak_rss [laps: 1] - [rocprof-sys][297646][0][rocprofsys_finalize] - [rocprof-sys][297646][0][rocprofsys_finalize] Finalizing perfetto... - [rocprofiler-systems][297646][perfetto]> Outputting '/home/gliff/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-02_19.29/perfetto-trace-297646.proto' (16728.58 KB / 16.73 MB / 0.02 GB)... Done - [rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.json' - [rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.txt' - [rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.json' - [rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.txt' - [rocprofiler-systems][297646][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/metadata-297646.json' and 'rocprofsys-user-api.inst-output/2025-01-02_19.29/functions-297646.json' - [rocprof-sys][297646][0][rocprofsys_finalize] Finalized: 0.314368 sec wall_clock, 19.040 MB peak_rss, 3.498 MB page_rss, 0.280000 sec cpu_clock, 89.1 % cpu_util - [107.243] perfetto.cc:49204 Tracing session 1 ended, total sessions:0 + [rocprof-sys][1827155][0][rocprofsys_finalize] finalizing... + [rocprof-sys][1827155][0][rocprofsys_finalize] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155 : 0.137404 sec wall_clock, 6.528 MB peak_rss, 6.685 MB page_rss, 0.540000 sec cpu_clock, 393.0 % cpu_util [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/0 : 0.135815 sec wall_clock, 0.035171 sec thread_cpu_clock, 25.9 % thread_cpu_util, 6.016 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/1 : 0.028336 sec wall_clock, 0.028336 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/2 : 0.030380 sec wall_clock, 0.030380 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/3 : 0.035233 sec wall_clock, 0.035227 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/4 : 0.035275 sec wall_clock, 0.035267 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/5 : 0.035452 sec wall_clock, 0.035452 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/6 : 0.036198 sec wall_clock, 0.036190 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/7 : 0.034709 sec wall_clock, 0.034702 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/8 : 0.036590 sec wall_clock, 0.033590 sec thread_cpu_clock, 91.8 % thread_cpu_util, 0.512 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/9 : 0.033108 sec wall_clock, 0.033098 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/10 : 0.032993 sec wall_clock, 0.032994 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.256 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/11 : 0.035687 sec wall_clock, 0.035368 sec thread_cpu_clock, 99.1 % thread_cpu_util, 0.128 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/12 : 0.035359 sec wall_clock, 0.035354 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.128 MB peak_rss [laps: 1] + [rocprof-sys][1827155][0][rocprofsys_finalize] + [rocprof-sys][1827155][0][rocprofsys_finalize] Finalizing perfetto... + [rocprofiler-systems][1827155][perfetto]> Outputting '/home/rocm-dev/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-15_17.57/perfetto-trace-1827155.proto' (17.20 KB / 0.02 MB / 0.00 GB)... Done + [rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.json' + [rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.txt' + [rocprofiler-systems][1827155][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/metadata-1827155.json' and 'rocprofsys-user-api.inst-output/2025-01-15_17.57/functions-1827155.json' + [rocprof-sys][1827155][0][rocprofsys_finalize] Finalized: 0.048039 sec wall_clock, 0.640 MB peak_rss, 0.655 MB page_rss, 0.020000 sec cpu_clock, 41.6 % cpu_util + [790.953] perfetto.cc:49204 Tracing session 1 ended, total sessions:0 Then review the output. diff --git a/projects/rocprofiler-systems/docs/reference/development-guide.rst b/projects/rocprofiler-systems/docs/reference/development-guide.rst index e71eee033b..6003021484 100644 --- a/projects/rocprofiler-systems/docs/reference/development-guide.rst +++ b/projects/rocprofiler-systems/docs/reference/development-guide.rst @@ -301,7 +301,7 @@ Collected data is generally handled in one of the three following ways: In general, only instrumentation for relatively simple data is directly passed to Perfetto and/or Timemory during runtime. For example, the callbacks from binary instrumentation, user API instrumentation, -and roctracer directly invoke +and rocprofiler-sdk directly invoke calls to Perfetto or Timemory's storage model. Otherwise, the data is stored by ROCm Systems Profiler in the thread-data model which is more persistent than simply using ``thread_local`` static data, which gets deleted diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp index 32fc522f5a..8f2b58b4d7 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/internal_libs.cpp @@ -320,13 +320,13 @@ get_internal_basic_libs_impl() "libunwind-setjmp.so", "libunwind.so", "libunwind-x86_64.so", + "libpapi.so", + "libpfm.so", "librocm_smi64.so", "libroctx64.so", "librocmtools.so", "libroctracer64.so", "librocprofiler64.so", - "libpapi.so", - "libpfm.so", "librocprofiler-register.so", "librocprofiler-sdk.so", "librocprofiler-sdk-roctx.so", diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp index 58c9965497..247b00e8b3 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp @@ -360,8 +360,8 @@ main(int argc, char** argv) itr, std::regex{ "lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|" "instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|" - "tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|" - "amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|" + "tbbmalloc_proxy|gotcha|libunwind|hsa-runtime|amdhip|" + "amd_comgr|rocm_smi64|rocprofiler-register|" "rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" })) { if(!find(filepath::dirname(itr), lib_search_paths)) diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp index 28b083b28c..9a6856bc5e 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp @@ -713,20 +713,11 @@ parse_args(int argc, char** argv, std::vector& _env) } }); - std::set _backend_choices = { "all", - "kokkosp", - "mpip", - "ompt", - "rcclp", - "rocm-smi", - "roctracer", - "rocprofiler", - "roctx", - "mutex-locks", - "spin-locks", - "rw-locks", - "rocprofiler-sdk", - "rocm" }; + std::set _backend_choices = { + "all", "kokkosp", "mpip", "ompt", + "rcclp", "rocm-smi", "mutex-locks", "spin-locks", + "rw-locks", "rocprofiler-sdk", "rocm" + }; #if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS) _backend_choices.erase("mpip"); diff --git a/projects/rocprofiler-systems/source/lib/core/argparse.cpp b/projects/rocprofiler-systems/source/lib/core/argparse.cpp index f2c274749b..2a87338dc5 100644 --- a/projects/rocprofiler-systems/source/lib/core/argparse.cpp +++ b/projects/rocprofiler-systems/source/lib/core/argparse.cpp @@ -543,9 +543,10 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _data.processed_environs.emplace("periods"); } - strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt", - "rcclp", "rocm-smi", "roctracer", "rocprofiler", - "roctx", "mutex-locks", "spin-locks", "rw-locks" }; + strset_t _backend_choices = { + "all", "kokkosp", "mpip", "ompt", "rcclp", + "rocm-smi", "rocprofiler-sdk", "mutex-locks", "spin-locks", "rw-locks" + }; #if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS) _backend_choices.erase("mpip"); diff --git a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp index 5d16fa5812..8259378ff6 100644 --- a/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/core/components/fwd.hpp @@ -39,8 +39,6 @@ #include -ROCPROFSYS_DECLARE_COMPONENT(roctracer) -ROCPROFSYS_DECLARE_COMPONENT(rocprofiler) ROCPROFSYS_DECLARE_COMPONENT(rcclp_handle) ROCPROFSYS_DECLARE_COMPONENT(comm_data) @@ -127,12 +125,6 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, f ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type) #endif -TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer, project::rocprofsys, - tpls::rocm, device::gpu, os::supports_linux, - category::external) -TIMEMORY_SET_COMPONENT_API(rocprofsys::component::rocprofiler, project::rocprofsys, - tpls::rocm, device::gpu, os::supports_linux, - category::external, category::hardware_counter) TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_wall_clock, project::rocprofsys, category::timing, os::supports_unix, category::sampling, category::interrupt_sampling) @@ -160,10 +152,6 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::roc tpls::rocm, device::gpu, os::supports_linux, category::sampling, category::process_sampling) -TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::roctracer, "roctracer", - "High-precision ROCm API and kernel tracing", "") -TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::rocprofiler, "rocprofiler", - "ROCm kernel hardware counters", "") TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_wall_clock, "sampling_wall_clock", "Wall-clock timing", "Derived from statistical sampling") diff --git a/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp b/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp deleted file mode 100644 index 2d3ba1efa7..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/hip_runtime.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/defines.hpp" - -#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 - -# if defined(HIP_INCLUDE_HIP_HIP_RUNTIME_H) || \ - defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H) -# error \ - "include core/hip_runtime.hpp before or " -# endif - -# define HIP_PROF_HIP_API_STRING 1 - -// following must be included before for ROCm 6.0+ -# if defined(USE_PROF_API) -# undef USE_PROF_API -# endif -# include -# include -// must be included after hip_runtime_api.h -# include -// must be included after hip_runtime_api.h -# include -// must be included after hip_runtime_api.h -# include - -# include -#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace.cpp index 2f4bb236f4..30718886ca 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace.cpp @@ -126,7 +126,6 @@ backtrace::filter_and_patch(const std::vector& _data) if(_lbl.find("DYNINST_") != _npos) return 0; if(_lbl.find("rocprofsys_") != _npos) return -1; if(_lbl.find("rocprofiler_") != _npos) return -1; - if(_lbl.find("roctracer_") != _npos) return -1; if(_lbl.find("perfetto::") != _npos) return -1; if(_lbl.find("protozero::") == 0) return -1; if(_lbl.find("gotcha_") != _npos) return -1; diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp index b4d43de2f8..1e7249d09a 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/pthread_create_gotcha.cpp @@ -121,7 +121,7 @@ stop_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args) { auto _wc = *_bundle.get(); _wc.stop(); - // update roctracer_data + // update data _bundle.store(std::plus{}, _wc.get() * _wc.unit()); // stop all _bundle.stop(); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.cpp index 86b5f127cc..1bacd5f202 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.cpp @@ -116,19 +116,6 @@ get_thread_pool_state() } // namespace } // namespace general -namespace roctracer -{ -namespace -{ -auto& -get_thread_pool_state() -{ - static auto _v = State::PreInit; - return _v; -} -} // namespace -} // namespace roctracer - void setup() { @@ -140,17 +127,6 @@ setup() void join() { - if(roctracer::get_thread_pool_state() == State::Active) - { - ROCPROFSYS_DEBUG_F("waiting for all roctracer tasks to complete...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - roctracer::get_task_group(i).join(); - } - else - { - ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { ROCPROFSYS_DEBUG_F("waiting for all general tasks to complete...\n"); @@ -162,22 +138,6 @@ join() void shutdown() { - if(roctracer::get_thread_pool_state() == State::Active) - { - ROCPROFSYS_DEBUG_F("Waiting on completion of roctracer tasks...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - roctracer::get_task_group(i).join(); - roctracer::get_task_group(i).clear(); - roctracer::get_task_group(i).set_pool(nullptr); - } - roctracer::get_thread_pool_state() = State::Finalized; - } - else - { - ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { ROCPROFSYS_DEBUG_F("Waiting on completion of general tasks...\n"); @@ -219,16 +179,5 @@ general::get_task_group(int64_t _tid) return *_v; } -PTL::TaskGroup& -roctracer::get_task_group(int64_t _tid) -{ - struct local - {}; - using thread_data_t = thread_data, local>; - static thread_local auto& _v = (roctracer::get_thread_pool_state() = State::Active, - thread_data_t::instance(construct_on_thread{ _tid }, - &tasking::get_thread_pool())); - return *_v; -} } // namespace tasking } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.hpp index 7414e59071..9d702293e0 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ptl.hpp @@ -56,16 +56,5 @@ PTL::TaskGroup& get_task_group(int64_t _tid = utility::get_thread_index()); } -//--------------------------------------------------------------------------------------// -// -// roctracer -// -//--------------------------------------------------------------------------------------// - -namespace roctracer -{ -PTL::TaskGroup& -get_task_group(int64_t _tid = utility::get_thread_index()); -} // namespace roctracer } // namespace tasking } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/thread_info.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/thread_info.hpp index 3bd912c452..b54b4051ed 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/thread_info.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/thread_info.hpp @@ -39,7 +39,7 @@ namespace rocprofsys // InternalTID: zero-based, process-local thread-ID from atomic increment // from user-created threads and rocprof-sys-created threads. // This value may vary based on threads created by different -// backends, e.g., roctracer will create threads +// backends. // // SystemTID: system thread-ID. Should be same value as what is seen // in debugger, etc. diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake index 91a5cb9dc6..18c4cd027f 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake @@ -44,9 +44,7 @@ rocprofiler_systems_add_test( TARGET openmp-target GPU ON LABELS "openmp;openmp-target" - ENVIRONMENT - "${_ompt_environment};ROCPROFSYS_ROCTRACER_HSA_ACTIVITY=OFF;ROCPROFSYS_ROCTRACER_HSA_API=OFF" - ) + ENVIRONMENT "${_ompt_environment}") set(_ompt_sampling_environ "${_ompt_environment}"