Remove remaining roctracer references (#82)

[ROCm/rocprofiler-systems commit: e437200e9e]
This commit is contained in:
David Galiffi
2025-02-07 23:27:58 -05:00
committed by GitHub
orang tua 9fcea73122
melakukan 2c9d92be33
25 mengubah file dengan 254 tambahan dan 682 penghapusan
@@ -1,109 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
# Copyright.txt or https://cmake.org/licensing for details.
include(FindPackageHandleStandardArgs)
# ----------------------------------------------------------------------------------------#
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(ROCM_PATH "$ENV{ROCM_PATH}")
endif()
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler)
if(EXISTS ${_DIR})
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR})
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
find_path(
rocprofiler_ROOT_DIR
NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h
HINTS ${_ROCM_ROCPROFILER_PATHS}
PATHS ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES rocprofiler)
mark_as_advanced(rocprofiler_ROOT_DIR)
# ----------------------------------------------------------------------------------------#
find_path(
rocprofiler_INCLUDE_DIR
NAMES rocprofiler.h
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES include include/rocprofiler rocprofiler/include)
mark_as_advanced(rocprofiler_INCLUDE_DIR)
find_path(
rocprofiler_hsa_INCLUDE_DIR
NAMES hsa.h
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES include include/hsa)
mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR)
# ----------------------------------------------------------------------------------------#
find_library(
rocprofiler_LIBRARY
NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}rocprofiler64${CMAKE_SHARED_LIBRARY_SUFFIX}.1
rocprofiler64 rocprofiler
HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES lib lib64
NO_DEFAULT_PATH)
find_library(
rocprofiler_hsa-runtime_LIBRARY
NAMES hsa-runtime64 hsa-runtime
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES lib lib64)
if(rocprofiler_LIBRARY)
get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE)
endif()
mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
unset(_ROCM_ROCPROFILER_PATHS)
if(ROCmVersion_NUMERIC_VERSION EQUAL 50500)
find_library(
rocprofiler_pciaccess_LIBRARY
NAMES pciaccess
PATH_SUFFIXES lib lib64)
mark_as_advanced(rocprofiler_pciaccess_LIBRARY)
endif()
# ----------------------------------------------------------------------------------------#
find_package_handle_standard_args(
rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR
rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
# ----------------------------------------------------------------------------------------#
if(rocprofiler_FOUND)
add_library(rocprofiler::rocprofiler INTERFACE IMPORTED)
add_library(rocprofiler::roctx INTERFACE IMPORTED)
set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR}
${rocprofiler_hsa_INCLUDE_DIR})
set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR})
set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY})
if(rocprofiler_pciaccess_LIBRARY)
list(APPEND rocprofiler_LIBRARIES ${rocprofiler_pciaccess_LIBRARY})
endif()
target_include_directories(
rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR}
${rocprofiler_hsa_INCLUDE_DIR})
target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES})
endif()
@@ -1,186 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
# Copyright.txt or https://cmake.org/licensing for details.
include(FindPackageHandleStandardArgs)
# ----------------------------------------------------------------------------------------#
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(ROCM_PATH "$ENV{ROCM_PATH}")
endif()
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/roctracer)
if(EXISTS ${_DIR})
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
list(APPEND _ROCM_ROCTRACER_PATHS ${_ABS_DIR})
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
find_path(
roctracer_ROOT_DIR
NAMES include/roctracer/roctracer.h include/roctracer.h
HINTS ${_ROCM_ROCTRACER_PATHS}
PATHS ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES roctracer)
mark_as_advanced(roctracer_ROOT_DIR)
# ----------------------------------------------------------------------------------------#
find_path(
roctracer_INCLUDE_DIR
NAMES roctracer.h
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES include include/roctracer roctracer/include)
mark_as_advanced(roctracer_INCLUDE_DIR)
find_path(
roctracer_hsa_INCLUDE_DIR
NAMES hsa.h
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES include include/hsa)
mark_as_advanced(roctracer_hsa_INCLUDE_DIR)
# ----------------------------------------------------------------------------------------#
find_library(
roctracer_LIBRARY
NAMES roctracer64 roctracer
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES lib lib64)
find_library(
roctracer_roctx_LIBRARY
NAMES roctx64 roctx
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES lib lib64)
find_library(
roctracer_kfdwrapper_LIBRARY
NAMES kfdwrapper64 kfdwrapper
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES lib lib64)
find_library(
roctracer_hsa-runtime_LIBRARY
NAMES hsa-runtime64 hsa-runtime
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES lib lib64)
# try not to directly use the hsakmt::hsakmt target because it hardcodes the
# INTERFACE_LINK_LIBRARIES used when it was built
find_package(hsakmt HINTS ${_ROCM_ROCTRACER_PATHS} PATHS ${_ROCM_ROCTRACER_PATHS})
if(hsakmt_FOUND)
add_library(roctracer::hsakmt INTERFACE IMPORTED)
get_target_property(hsakmt_INCLUDE_DIR hsakmt::hsakmt INTERFACE_INCLUDE_DIRECTORIES)
target_include_directories(roctracer::hsakmt INTERFACE ${hsakmt_INCLUDE_DIR})
set(hsakmt_FOUND_LIBS ON)
foreach(_LIB drm drm_amdgpu rt c numa udev)
set(_LIB_NAMES ${_LIB})
foreach(_EXT 2 1)
list(
APPEND
_LIB_NAMES
${CMAKE_SHARED_LIBRARY_PREFIX}${_LIB}${CMAKE_SHARED_LIBRARY_SUFFIX}.${_EXT}
)
endforeach()
find_library(
hsakmt_${_LIB}_LIBRARY
NAMES ${_LIB_NAMES}
HINTS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu
PATHS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu
PATH_SUFFIXES ${CMAKE_INSTALL_LIBDIR} lib lib64)
if(NOT hsakmt_${_LIB}_LIBRARY)
set(hsakmt_FOUND_LIBS OFF)
else()
target_link_libraries(roctracer::hsakmt INTERFACE ${hsakmt_${_LIB}_LIBRARY})
endif()
endforeach()
if(hsakmt_FOUND_LIBS)
find_package(Threads REQUIRED)
target_link_libraries(roctracer::hsakmt INTERFACE Threads::Threads)
set(roctracer_hsakmt_LIBRARY
roctracer::hsakmt
CACHE STRING "Generated hsakmt target for roctracer")
else()
set(roctracer_hsakmt_LIBRARY
hsakmt::hsakmt
CACHE STRING "Imported hsakmt target")
endif()
else()
find_library(
roctracer_hsakmt_LIBRARY
NAMES hsakmt
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
PATH_SUFFIXES lib lib64)
endif()
if(roctracer_LIBRARY)
get_filename_component(roctracer_LIBRARY_DIR "${roctracer_LIBRARY}" PATH CACHE)
endif()
mark_as_advanced(roctracer_LIBRARY roctracer_roctx_LIBRARY roctracer_hsakmt_LIBRARY
roctracer_hsa-runtime_LIBRARY)
# ----------------------------------------------------------------------------------------#
find_package_handle_standard_args(
roctracer DEFAULT_MSG roctracer_ROOT_DIR roctracer_INCLUDE_DIR
roctracer_hsa_INCLUDE_DIR roctracer_LIBRARY roctracer_roctx_LIBRARY)
# ------------------------------------------------------------------------------#
if(roctracer_FOUND)
add_library(roctracer::roctracer INTERFACE IMPORTED)
add_library(roctracer::roctx INTERFACE IMPORTED)
set(roctracer_INCLUDE_DIRS ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR})
set(roctracer_LIBRARIES ${roctracer_LIBRARY} ${roctracer_roctx_LIBRARY})
set(roctracer_LIBRARY_DIRS ${roctracer_LIBRARY_DIR})
target_include_directories(
roctracer::roctracer INTERFACE ${roctracer_INCLUDE_DIR}
${roctracer_hsa_INCLUDE_DIR})
target_include_directories(roctracer::roctx INTERFACE ${roctracer_INCLUDE_DIR}
${roctracer_hsa_INCLUDE_DIR})
target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_LIBRARY})
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_roctx_LIBRARY})
if(roctracer_kfdwrapper_LIBRARY)
list(APPEND roctracer_LIBRARIES ${roctracer_kfdwrapper_LIBRARY})
target_link_libraries(roctracer::roctracer
INTERFACE ${roctracer_kfdwrapper_LIBRARY})
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_kfdwrapper_LIBRARY})
endif()
if(roctracer_hsakmt_LIBRARY)
list(APPEND roctracer_LIBRARIES ${roctracer_hsakmt_LIBRARY})
target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_hsakmt_LIBRARY})
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_hsakmt_LIBRARY})
endif()
if(roctracer_hsa-runtime_LIBRARY)
list(APPEND roctracer_LIBRARIES ${roctracer_hsa-runtime_LIBRARY})
target_link_libraries(roctracer::roctracer
INTERFACE ${roctracer_hsa-runtime_LIBRARY})
endif()
endif()
# ------------------------------------------------------------------------------#
unset(_ROCM_ROCTRACER_PATHS)
# ------------------------------------------------------------------------------#
@@ -17,10 +17,6 @@ rocprofiler_systems_add_interface_library(
"Provides flags and libraries for Dyninst (dynamic instrumentation)")
rocprofiler_systems_add_interface_library(rocprofiler-systems-rocm
"Provides flags and libraries for ROCm")
rocprofiler_systems_add_interface_library(rocprofiler-systems-roctracer
"Provides flags and libraries for roctracer")
rocprofiler_systems_add_interface_library(rocprofiler-systems-rocprofiler
"Provides flags and libraries for rocprofiler")
rocprofiler_systems_add_interface_library(
rocprofiler-systems-rccl
"Provides flags for ROCm Communication Collectives Library (RCCL)")
@@ -161,15 +157,6 @@ if(ROCPROFSYS_USE_ROCM)
set(ROCPROFSYS_ROCM_VERSION_PATCH ${ROCmVersion_PATCH_VERSION})
set(ROCPROFSYS_ROCM_VERSION ${ROCmVersion_TRIPLE_VERSION})
if(ROCPROFSYS_ROCM_VERSION_MAJOR GREATER_EQUAL 4 AND ROCPROFSYS_ROCM_VERSION_MINOR
GREATER 3)
set(roctracer_kfdwrapper_LIBRARY)
endif()
if(NOT roctracer_kfdwrapper_LIBRARY)
set(roctracer_kfdwrapper_LIBRARY)
endif()
rocprofiler_systems_add_feature(ROCPROFSYS_ROCM_VERSION
"ROCm version used by rocprofiler-systems")
else()
@@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \
zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
zypper --non-interactive --gpg-auto-import-keys refresh && \
zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
zypper --non-interactive install -y rocm-dev rccl-devel libpciaccess0 && \
zypper --non-interactive clean --all; \
fi
@@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
yum install -y rocm-dev && \
yum clean all; \
fi
@@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \
apt-get install -y ./${AMDGPU_DEB} && \
apt-get update && \
apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
apt-get install -y rocm-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
apt-get autoclean; \
fi
@@ -468,7 +468,6 @@ Viewing components
| written_bytes | Number of bytes sent to the storage layer. |
| written_char | Number of bytes which this task has cause... |
| rocprof-sys | Invokes instrumentation functions rocprof... |
| roctracer | High-precision ROCm API and kernel tracing. |
| sampling_wall_clock | Wall-clock timing. Derived from statistic... |
| sampling_cpu_clock | CPU-clock timing. Derived from statistica... |
| sampling_percent | Fraction of wall-clock time spent in func... |
@@ -247,10 +247,7 @@ view the help menu.
libpthread.so.0
libresolv.so.2
librocm_smi64.so
librocmtools.so
librocprofiler64.so
libroctracer64.so
libroctx64.so
librocprofiler-sdk.so
librt.so.1
libstdc++.so.6
libtbb.so
@@ -854,7 +851,7 @@ By default, ``rocprof-sys-instrument`` uses ``--mode trace`` for instrumentation
only instruments ``main`` in an executable. It activates both CPU call-stack sampling and
background system-level thread sampling by default.
Tracing capabilities which do not rely on instrumentation, such as the HIP API and kernel tracing
(which is collected by roctracer), are still available.
are still available.
The ROCm Systems Profiler sampling capabilities are always available, even in trace mode, but are deactivated by default.
To activate sampling in trace mode, set ``ROCPROFSYS_USE_SAMPLING=ON`` in the environment
@@ -80,7 +80,7 @@ in between samples. Progress points must be triggered in a deterministic manner
This can happen in three different ways:
* `ROCm Systems Profiler <https://github.com/ROCm/rocprofiler-systems>`_ can leverage the callbacks from
Kokkos-Tools, OpenMP-Tools, roctracer, etc. and the wrappers around functions for
Kokkos-Tools, OpenMP-Tools, rocprofiler-sdk, etc. and the wrappers around functions for
MPI, NUMA, RCCL, etc. to act as progress points
* Users can leverage the :doc:`runtime instrumentation capabilities <./instrumenting-rewriting-binary-application>`
to insert progress points
@@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option
[BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ]
Include data from these backends (count: unlimited)
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ]
Exclude data from these backends (count: unlimited)
[HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H
@@ -77,164 +77,207 @@ Metadata JSON Sample
.. code-block:: json
{
"rocprof-sys": {
"rocprofiler-systems": {
"metadata": {
"info": {
"HW_L1_CACHE_SIZE": 32768,
"HW_L2_CACHE_SIZE": 524288,
"HW_L3_CACHE_SIZE": 16777216,
"HW_PHYSICAL_CPU": 12,
"HW_CONCURRENCY": 24,
"LAUNCH_TIME": "02:04",
"LAUNCH_DATE": "05/08/22",
"TIMEMORY_GIT_REVISION": "52e7034fd419ff296506cdef43084f6071dbaba1",
"TIMEMORY_VERSION": "3.3.0rc4",
"TIMEMORY_API": "tim::project::timemory",
"TIMEMORY_GIT_DESCRIBE": "v3.2.0-263-g52e7034f",
"PWD": "/home/jrmadsen/devel/c++/AARInternal/hosttrace-dyninst/build-vscode",
"USER": "jrmadsen",
"HOME": "/home/jrmadsen",
"SHELL": "/bin/bash",
"CPU_MODEL": "AMD Ryzen Threadripper PRO 3945WX 12-Cores",
"CPU_FREQUENCY": 2400,
"CPU_VENDOR": "AuthenticAMD",
"CPU_FEATURES": [
"fpu",
"msr",
"sse",
"sse2",
"constant_tsc",
"ssse3",
"fma",
"sse4_1",
"sse4_2",
"popcnt",
"avx2",
"... etc. ..."
],
"memory_maps": [
{
"end_address": "7f4013797000",
"start_address": "7f4012e58000",
"pathname": "/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000",
"offset": "34a000",
"device": "103:05",
"inode": 4331165,
"permissions": "rw-p"
},
{
"end_address": "7f4013902000",
"start_address": "7f4013901000",
"pathname": "/usr/lib/x86_64-linux-gnu/libm-2.31.so",
"offset": "14d000",
"device": "103:05",
"inode": 42078854,
"permissions": "rwxp"
},
{
"end_address": "7f4013919000",
"start_address": "7f4013908000",
"pathname": "/usr/lib/x86_64-linux-gnu/libpthread-2.31.so",
"offset": "6000",
"device": "103:05",
"inode": 42078874,
"permissions": "r-xp"
},
{
"...": "etc."
},
],
"memory_maps_files": [
"/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000",
"/opt/rocm-5.0.0/hsa-amd-aqlprofile/lib/libhsa-amd-aqlprofile64.so.1.0.50000",
"/opt/rocm-5.0.0/lib/libamd_comgr.so.2.4.50000",
"/opt/rocm-5.0.0/lib/libhsa-runtime64.so.1.5.50000",
"/opt/rocm-5.0.0/rocm_smi/lib/librocm_smi64.so.5.0.50000",
"/opt/rocm-5.0.0/roctracer/lib/libroctracer64.so.1.0.50000",
"/usr/lib/x86_64-linux-gnu/ld-2.31.so",
"/usr/lib/x86_64-linux-gnu/libc-2.31.so",
"/usr/lib/x86_64-linux-gnu/libdl-2.31.so",
"... etc. ..."
],
},
"output": {
"text": [
{
"value": [
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.txt"
],
"key": "roctracer"
},
{
"value": [
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt"
],
"key": "wall_clock"
}
],
"json": [
{
"value": [
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.json",
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.tree.json"
],
"key": "roctracer"
},
{
"value": [
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json",
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.tree.json"
],
"key": "wall_clock"
}
]
},
"environment": [
{
"value": "/home/jrmadsen",
"key": "HOME"
},
{
"value": "/bin/bash",
"key": "SHELL"
},
{
"value": "jrmadsen",
"key": "USER"
},
{
"value": "true",
"key": "... etc. ..."
}
"info": {
"CPU_MODEL": "AMD Ryzen 5 3600 6-Core Processor",
"CPU_VENDOR": "AuthenticAMD",
"HOME": "/home/rocm-dev",
"LAUNCH_DATE": "01/15/25",
"LAUNCH_TIME": "16:49",
"PWD": "/home/rocm-dev/code/rocprofiler-systems",
"ROCPROFSYS_COMPILER_ID": "GNU",
"ROCPROFSYS_COMPILER_VERSION": "11.4.0",
"ROCPROFSYS_GIT_DESCRIBE": "",
"ROCPROFSYS_GIT_REVISION": "3213dc652728f7ed01b62bf55f6af76c43bfcbdb",
"ROCPROFSYS_LIBRARY_ARCH": "x86_64-linux-gnu",
"ROCPROFSYS_ROCM_VERSION": "6.3.1",
"ROCPROFSYS_SYSTEM_NAME": "Linux",
"ROCPROFSYS_SYSTEM_PROCESSOR": "x86_64",
"ROCPROFSYS_SYSTEM_VERSION": "6.8.0-51-generic",
"ROCPROFSYS_VERSION": "1.0.0",
"SHELL": "/usr/bin/zsh",
"TIMEMORY_API": "tim::project::timemory",
"TIMEMORY_GIT_DESCRIBE": "v3.2.0-703-gba3c6486",
"TIMEMORY_GIT_REVISION": "ba3c648677b3c6f217abe147ef3198f36239e234",
"TIMEMORY_VERSION": "4.0.0rc0",
"USER": "rocm-dev",
"CPU_FREQUENCY": 1972,
"CPU_FEATURES": [
"fpu",
"vme",
"de",
"pse",
"tsc",
"msr",
"pae",
"... etc. ..."
],
"HW_CONCURRENCY": 12,
"HW_PHYSICAL_CPU": 6,
"HW_L1_CACHE_SIZE": 32768,
"HW_L2_CACHE_SIZE": 524288,
"HW_L3_CACHE_SIZE": 16777216,
"ROCPROFSYS_VERSION_MAJOR": 1,
"ROCPROFSYS_VERSION_MINOR": 0,
"ROCPROFSYS_VERSION_PATCH": 0,
"ROCPROFSYS_ROCM_VERSION_MAJOR": 6,
"ROCPROFSYS_ROCM_VERSION_MINOR": 3,
"ROCPROFSYS_ROCM_VERSION_PATCH": 1,
"memory_maps_files": [
"/opt/rocm-6.3.1/lib/libhsa-amd-aqlprofile64.so.1.0.60301",
"/opt/rocm-6.3.1/lib/libhsa-runtime64.so.1.14.60301",
"/opt/rocm-6.3.1/lib/librocm_smi64.so.7.4.60301",
"/opt/rocm-6.3.1/lib/librocprofiler-register.so.0.4.0",
"/opt/rocm-6.3.1/lib/librocprofiler-sdk.so.0.5.0",
"/opt/rocm/lib/libhsa-amd-aqlprofile64.so.1",
"/opt/rocm/lib/libhsa-runtime64.so.1",
"/opt/rocm/lib/librocm_smi64.so.7",
"/opt/rocm/lib/librocprofiler-register.so.0",
"/opt/rocm/lib/librocprofiler-sdk.so.0",
"... etc. ..."
],
"settings": {
"ROCPROFSYS_JSON_OUTPUT": {
"count": -1,
"environ_updated": false,
"name": "json_output",
"data_type": "bool",
"initial": true,
"enabled": true,
"value": true,
"max_count": 1,
"cmdline": [
"--rocprof-sys-json-output"
],
"environ": "ROCPROFSYS_JSON_OUTPUT",
"config_updated": false,
"categories": [
"io",
"json",
"native"
],
"description": "Write json output files"
},
"... etc. ...": {
"etc.": true
}
"memory_maps": [
{
"cereal_class_version": 0,
"load_address": "76005b800000",
"last_address": "76005b81b000",
"permissions": "r---",
"offset": "0",
"device": "",
"inode": 0,
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
},
{
"load_address": "76005b81b000",
"last_address": "76005b93400d",
"permissions": "r-x-",
"offset": "1b000",
"device": "",
"inode": 0,
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
},
{
"load_address": "76005b935000",
"last_address": "76005b9aeab8",
"permissions": "r---",
"offset": "135000",
"device": "",
"inode": 0,
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
},
{
"load_address": "76005b9b0638",
"last_address": "76005bb2d598",
"permissions": "rw--",
"offset": "1af638",
"device": "",
"inode": 0,
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
},
{
"load_address": "76005bc00000",
"last_address": "76005bc26140",
"permissions": "r---",
"offset": "0",
"device": "",
"inode": 0,
"pathname": "/opt/rocm/lib/librocprofiler-sdk.so.0"
},
{
"... etc. ..."
}
}
}
],
"settings": {
"cereal_class_version": 2,
"ROCPROFSYS_OUTPUT_PREFIX": {
"name": "output_prefix",
"environ": "ROCPROFSYS_OUTPUT_PREFIX",
"description": "Explicitly specify a prefix for all output files",
"count": 1,
"max_count": -1,
"cmdline": [
"--rocprofiler-systems-output-prefix"
],
"categories": [
"filename",
"io",
"librocprof-sys",
"native",
"rocprofsys"
],
"data_type": "string",
"initial": "parallel-overhead-binary-rewrite/",
"value": "parallel-overhead-binary-rewrite/",
"updated": "config",
"enabled": true
},
{
... etc. ...
},
"command_line": [
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/parallel-overhead.inst",
"--",
"10",
"12",
"1000"
],
"environment": [
... etc . ...
]
},
"environment": [
{
"key": "GOTCHA_DEBUG",
"value": "0"
},
{
"key": "HIP_VISIBLE_DEVICES",
"value": ""
},
{
"key": "HOME",
"value": "/home/rocm-dev"
},
{
"key": "LD_LIBRARY_PATH",
"value": "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib:/opt/rocm/lib"
},
{
"key": "LIBRARY_PATH",
"value": ""
},
{
etc ...
}
]
"output": {
"json": [
{
"key": "wall_clock",
"value": [
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json"
]
}
],
"protobuf": [
{
"key": "perfetto",
"value": [
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/perfetto-trace.proto"
]
}
],
"text": [
{
"key": "wall_clock",
"value": [
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt"
]
}
]
},
},
}
Configuring the ROCm Systems Profiler output
@@ -192,75 +192,52 @@ First, instrument and run the program.
...
$ rocprof-sys-run --profile --trace -- ./user-api.inst 10 12 1000
ROCPROFSYS: HSA_TOOLS_LIB=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
ROCPROFSYS: HSA_TOOLS_REPORT_LOAD_FAILURE=1
ROCPROFSYS: LD_PRELOAD=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
ROCPROFSYS: OMP_TOOL_LIBRARIES=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
ROCPROFSYS: LD_PRELOAD=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
ROCPROFSYS: OMP_TOOL_LIBRARIES=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
ROCPROFSYS: ROCPROFSYS_PROFILE=true
ROCPROFSYS: ROCPROFSYS_TRACE=true
ROCPROFSYS: ROCPROFSYS_VERBOSE=0
ROCPROFSYS: ROCP_HSA_INTERCEPT=1
ROCPROFSYS: ROCP_TOOL_LIB=/opt/rocm-6.3.1/lib/librocprof-sys.so.0.1.0
[rocprof-sys][dl][297646] rocprofsys_main
[rocprof-sys][297646][rocprofsys_init_tooling] Instrumentation mode: Trace
[rocprof-sys][dl][1827155] rocprofsys_main
[rocprof-sys][1827155][rocprofsys_init_tooling] Instrumentation mode: Trace
____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____
____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____
| _ \ / _ \ / ___| \/ | / ___\ \ / / ___|_ _| ____| \/ / ___| | _ \| _ \ / _ \| ___|_ _| | | ____| _ \
| |_) | | | | | | |\/| | \___ \\ V /\___ \ | | | _| | |\/| \___ \ | |_) | |_) | | | | |_ | || | | _| | |_) |
| _ <| |_| | |___| | | | ___) || | ___) || | | |___| | | |___) | | __/| _ <| |_| | _| | || |___| |___| _ <
|_| \_\\___/ \____|_| |_| |____/ |_| |____/ |_| |_____|_| |_|____/ |_| |_| \_\\___/|_| |___|_____|_____|_| \_\
rocprof-sys v0.1.0 (rev: b569c837e455f71dd76d06392d0b901ae927deca, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x)
[105.947] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: ""
Pushing custom region :: ./user-api.inst
Pushing custom region :: initialization
rocprof-sys v1.0.0 (rev: 3213dc652728f7ed01b62bf55f6af76c43bfcbdb, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x)
[790.763] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: ""
[./user-api.inst] Threads: 12
[./user-api.inst] Iterations: 1000
[./user-api.inst] fibonacci(10)...
Pushing custom region :: thread_creation
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: run(10) x 1000
Pushing custom region :: thread_wait
Pushing custom region :: run(10) x 1000
[./user-api.inst] fibonacci(10) x 12 = 715000
[rocprof-sys][297646][0][rocprofsys_finalize] finalizing...
[rocprof-sys][297646][0][rocprofsys_finalize]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646 : 0.978014 sec wall_clock, 26.752 MB peak_rss, 27.394 MB page_rss, 1.520000 sec cpu_clock, 155.4 % cpu_util [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/0 : 0.976068 sec wall_clock, 0.789948 sec thread_cpu_clock, 80.9 % thread_cpu_util, 26.112 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/1 : 0.027517 sec wall_clock, 0.027510 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/2 : 0.027828 sec wall_clock, 0.027811 sec thread_cpu_clock, 99.9 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/3 : 0.027585 sec wall_clock, 0.027585 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/4 : 0.033449 sec wall_clock, 0.033443 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/5 : 0.027727 sec wall_clock, 0.027726 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.328 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/6 : 0.032228 sec wall_clock, 0.032220 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.712 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/7 : 0.030201 sec wall_clock, 0.030202 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/8 : 0.027960 sec wall_clock, 0.027951 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/9 : 0.034698 sec wall_clock, 0.034699 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/10 : 0.033414 sec wall_clock, 0.033399 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.512 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/11 : 0.028161 sec wall_clock, 0.028149 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/12 : 0.027791 sec wall_clock, 0.027767 sec thread_cpu_clock, 99.9 % thread_cpu_util, 0.256 MB peak_rss [laps: 1]
[rocprof-sys][297646][0][rocprofsys_finalize]
[rocprof-sys][297646][0][rocprofsys_finalize] Finalizing perfetto...
[rocprofiler-systems][297646][perfetto]> Outputting '/home/gliff/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-02_19.29/perfetto-trace-297646.proto' (16728.58 KB / 16.73 MB / 0.02 GB)... Done
[rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.json'
[rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.txt'
[rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.json'
[rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.txt'
[rocprofiler-systems][297646][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/metadata-297646.json' and 'rocprofsys-user-api.inst-output/2025-01-02_19.29/functions-297646.json'
[rocprof-sys][297646][0][rocprofsys_finalize] Finalized: 0.314368 sec wall_clock, 19.040 MB peak_rss, 3.498 MB page_rss, 0.280000 sec cpu_clock, 89.1 % cpu_util
[107.243] perfetto.cc:49204 Tracing session 1 ended, total sessions:0
[rocprof-sys][1827155][0][rocprofsys_finalize] finalizing...
[rocprof-sys][1827155][0][rocprofsys_finalize]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155 : 0.137404 sec wall_clock, 6.528 MB peak_rss, 6.685 MB page_rss, 0.540000 sec cpu_clock, 393.0 % cpu_util [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/0 : 0.135815 sec wall_clock, 0.035171 sec thread_cpu_clock, 25.9 % thread_cpu_util, 6.016 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/1 : 0.028336 sec wall_clock, 0.028336 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/2 : 0.030380 sec wall_clock, 0.030380 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/3 : 0.035233 sec wall_clock, 0.035227 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/4 : 0.035275 sec wall_clock, 0.035267 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/5 : 0.035452 sec wall_clock, 0.035452 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/6 : 0.036198 sec wall_clock, 0.036190 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/7 : 0.034709 sec wall_clock, 0.034702 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/8 : 0.036590 sec wall_clock, 0.033590 sec thread_cpu_clock, 91.8 % thread_cpu_util, 0.512 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/9 : 0.033108 sec wall_clock, 0.033098 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/10 : 0.032993 sec wall_clock, 0.032994 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.256 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/11 : 0.035687 sec wall_clock, 0.035368 sec thread_cpu_clock, 99.1 % thread_cpu_util, 0.128 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/12 : 0.035359 sec wall_clock, 0.035354 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.128 MB peak_rss [laps: 1]
[rocprof-sys][1827155][0][rocprofsys_finalize]
[rocprof-sys][1827155][0][rocprofsys_finalize] Finalizing perfetto...
[rocprofiler-systems][1827155][perfetto]> Outputting '/home/rocm-dev/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-15_17.57/perfetto-trace-1827155.proto' (17.20 KB / 0.02 MB / 0.00 GB)... Done
[rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.json'
[rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.txt'
[rocprofiler-systems][1827155][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/metadata-1827155.json' and 'rocprofsys-user-api.inst-output/2025-01-15_17.57/functions-1827155.json'
[rocprof-sys][1827155][0][rocprofsys_finalize] Finalized: 0.048039 sec wall_clock, 0.640 MB peak_rss, 0.655 MB page_rss, 0.020000 sec cpu_clock, 41.6 % cpu_util
[790.953] perfetto.cc:49204 Tracing session 1 ended, total sessions:0
Then review the output.
@@ -301,7 +301,7 @@ Collected data is generally handled in one of the three following ways:
In general, only instrumentation for relatively simple data is directly passed to
Perfetto and/or Timemory during runtime.
For example, the callbacks from binary instrumentation, user API instrumentation,
and roctracer directly invoke
and rocprofiler-sdk directly invoke
calls to Perfetto or Timemory's storage model. Otherwise, the data is stored
by ROCm Systems Profiler in the thread-data model
which is more persistent than simply using ``thread_local`` static data, which gets deleted
@@ -320,13 +320,13 @@ get_internal_basic_libs_impl()
"libunwind-setjmp.so",
"libunwind.so",
"libunwind-x86_64.so",
"libpapi.so",
"libpfm.so",
"librocm_smi64.so",
"libroctx64.so",
"librocmtools.so",
"libroctracer64.so",
"librocprofiler64.so",
"libpapi.so",
"libpfm.so",
"librocprofiler-register.so",
"librocprofiler-sdk.so",
"librocprofiler-sdk-roctx.so",
@@ -360,8 +360,8 @@ main(int argc, char** argv)
itr, std::regex{
"lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|"
"instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|"
"tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|"
"amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|"
"tbbmalloc_proxy|gotcha|libunwind|hsa-runtime|amdhip|"
"amd_comgr|rocm_smi64|rocprofiler-register|"
"rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" }))
{
if(!find(filepath::dirname(itr), lib_search_paths))
@@ -713,20 +713,11 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
}
});
std::set<std::string> _backend_choices = { "all",
"kokkosp",
"mpip",
"ompt",
"rcclp",
"rocm-smi",
"roctracer",
"rocprofiler",
"roctx",
"mutex-locks",
"spin-locks",
"rw-locks",
"rocprofiler-sdk",
"rocm" };
std::set<std::string> _backend_choices = {
"all", "kokkosp", "mpip", "ompt",
"rcclp", "rocm-smi", "mutex-locks", "spin-locks",
"rw-locks", "rocprofiler-sdk", "rocm"
};
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
_backend_choices.erase("mpip");
@@ -543,9 +543,10 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
_data.processed_environs.emplace("periods");
}
strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt",
"rcclp", "rocm-smi", "roctracer", "rocprofiler",
"roctx", "mutex-locks", "spin-locks", "rw-locks" };
strset_t _backend_choices = {
"all", "kokkosp", "mpip", "ompt", "rcclp",
"rocm-smi", "rocprofiler-sdk", "mutex-locks", "spin-locks", "rw-locks"
};
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
_backend_choices.erase("mpip");
@@ -39,8 +39,6 @@
#include <type_traits>
ROCPROFSYS_DECLARE_COMPONENT(roctracer)
ROCPROFSYS_DECLARE_COMPONENT(rocprofiler)
ROCPROFSYS_DECLARE_COMPONENT(rcclp_handle)
ROCPROFSYS_DECLARE_COMPONENT(comm_data)
@@ -127,12 +125,6 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, f
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type)
#endif
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer, project::rocprofsys,
tpls::rocm, device::gpu, os::supports_linux,
category::external)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::rocprofiler, project::rocprofsys,
tpls::rocm, device::gpu, os::supports_linux,
category::external, category::hardware_counter)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_wall_clock,
project::rocprofsys, category::timing, os::supports_unix,
category::sampling, category::interrupt_sampling)
@@ -160,10 +152,6 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::roc
tpls::rocm, device::gpu, os::supports_linux,
category::sampling, category::process_sampling)
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::roctracer, "roctracer",
"High-precision ROCm API and kernel tracing", "")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::rocprofiler, "rocprofiler",
"ROCm kernel hardware counters", "")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_wall_clock,
"sampling_wall_clock", "Wall-clock timing",
"Derived from statistical sampling")
@@ -1,51 +0,0 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "core/defines.hpp"
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
# if defined(HIP_INCLUDE_HIP_HIP_RUNTIME_H) || \
defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H)
# error \
"include core/hip_runtime.hpp before <hip/hip_runtime.h> or <hip/hip_runtime_api.h>"
# endif
# define HIP_PROF_HIP_API_STRING 1
// following must be included before <roctracer_hip.h> for ROCm 6.0+
# if defined(USE_PROF_API)
# undef USE_PROF_API
# endif
# include <hip/hip_runtime.h>
# include <hip/hip_runtime_api.h>
// must be included after hip_runtime_api.h
# include <hip/hip_deprecated.h>
// must be included after hip_runtime_api.h
# include <roctracer/hip_ostream_ops.h>
// must be included after hip_runtime_api.h
# include <hip/amd_detail/hip_prof_str.h>
# include <hip/hip_version.h>
#endif
@@ -126,7 +126,6 @@ backtrace::filter_and_patch(const std::vector<entry_type>& _data)
if(_lbl.find("DYNINST_") != _npos) return 0;
if(_lbl.find("rocprofsys_") != _npos) return -1;
if(_lbl.find("rocprofiler_") != _npos) return -1;
if(_lbl.find("roctracer_") != _npos) return -1;
if(_lbl.find("perfetto::") != _npos) return -1;
if(_lbl.find("protozero::") == 0) return -1;
if(_lbl.find("gotcha_") != _npos) return -1;
@@ -121,7 +121,7 @@ stop_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args)
{
auto _wc = *_bundle.get<comp::wall_clock>();
_wc.stop();
// update roctracer_data
// update data
_bundle.store(std::plus<double>{}, _wc.get() * _wc.unit());
// stop all
_bundle.stop();
@@ -116,19 +116,6 @@ get_thread_pool_state()
} // namespace
} // namespace general
namespace roctracer
{
namespace
{
auto&
get_thread_pool_state()
{
static auto _v = State::PreInit;
return _v;
}
} // namespace
} // namespace roctracer
void
setup()
{
@@ -140,17 +127,6 @@ setup()
void
join()
{
if(roctracer::get_thread_pool_state() == State::Active)
{
ROCPROFSYS_DEBUG_F("waiting for all roctracer tasks to complete...\n");
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
roctracer::get_task_group(i).join();
}
else
{
ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n");
}
if(general::get_thread_pool_state() == State::Active)
{
ROCPROFSYS_DEBUG_F("waiting for all general tasks to complete...\n");
@@ -162,22 +138,6 @@ join()
void
shutdown()
{
if(roctracer::get_thread_pool_state() == State::Active)
{
ROCPROFSYS_DEBUG_F("Waiting on completion of roctracer tasks...\n");
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
{
roctracer::get_task_group(i).join();
roctracer::get_task_group(i).clear();
roctracer::get_task_group(i).set_pool(nullptr);
}
roctracer::get_thread_pool_state() = State::Finalized;
}
else
{
ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n");
}
if(general::get_thread_pool_state() == State::Active)
{
ROCPROFSYS_DEBUG_F("Waiting on completion of general tasks...\n");
@@ -219,16 +179,5 @@ general::get_task_group(int64_t _tid)
return *_v;
}
PTL::TaskGroup<void>&
roctracer::get_task_group(int64_t _tid)
{
struct local
{};
using thread_data_t = thread_data<PTL::TaskGroup<void>, local>;
static thread_local auto& _v = (roctracer::get_thread_pool_state() = State::Active,
thread_data_t::instance(construct_on_thread{ _tid },
&tasking::get_thread_pool()));
return *_v;
}
} // namespace tasking
} // namespace rocprofsys
@@ -56,16 +56,5 @@ PTL::TaskGroup<void>&
get_task_group(int64_t _tid = utility::get_thread_index());
}
//--------------------------------------------------------------------------------------//
//
// roctracer
//
//--------------------------------------------------------------------------------------//
namespace roctracer
{
PTL::TaskGroup<void>&
get_task_group(int64_t _tid = utility::get_thread_index());
} // namespace roctracer
} // namespace tasking
} // namespace rocprofsys
@@ -39,7 +39,7 @@ namespace rocprofsys
// InternalTID: zero-based, process-local thread-ID from atomic increment
// from user-created threads and rocprof-sys-created threads.
// This value may vary based on threads created by different
// backends, e.g., roctracer will create threads
// backends.
//
// SystemTID: system thread-ID. Should be same value as what is seen
// in debugger, etc.
@@ -44,9 +44,7 @@ rocprofiler_systems_add_test(
TARGET openmp-target
GPU ON
LABELS "openmp;openmp-target"
ENVIRONMENT
"${_ompt_environment};ROCPROFSYS_ROCTRACER_HSA_ACTIVITY=OFF;ROCPROFSYS_ROCTRACER_HSA_API=OFF"
)
ENVIRONMENT "${_ompt_environment}")
set(_ompt_sampling_environ
"${_ompt_environment}"