Remove remaining roctracer references (#82)
[ROCm/rocprofiler-systems commit: e437200e9e]
This commit is contained in:
committed by
GitHub
orang tua
9fcea73122
melakukan
2c9d92be33
@@ -1,109 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
|
||||
# Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
|
||||
set(ROCM_PATH "$ENV{ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler)
|
||||
if(EXISTS ${_DIR})
|
||||
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
|
||||
list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocprofiler_ROOT_DIR
|
||||
NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h
|
||||
HINTS ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES rocprofiler)
|
||||
|
||||
mark_as_advanced(rocprofiler_ROOT_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocprofiler_INCLUDE_DIR
|
||||
NAMES rocprofiler.h
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES include include/rocprofiler rocprofiler/include)
|
||||
|
||||
mark_as_advanced(rocprofiler_INCLUDE_DIR)
|
||||
|
||||
find_path(
|
||||
rocprofiler_hsa_INCLUDE_DIR
|
||||
NAMES hsa.h
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES include include/hsa)
|
||||
|
||||
mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
rocprofiler_LIBRARY
|
||||
NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}rocprofiler64${CMAKE_SHARED_LIBRARY_SUFFIX}.1
|
||||
rocprofiler64 rocprofiler
|
||||
HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
|
||||
${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
|
||||
${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES lib lib64
|
||||
NO_DEFAULT_PATH)
|
||||
|
||||
find_library(
|
||||
rocprofiler_hsa-runtime_LIBRARY
|
||||
NAMES hsa-runtime64 hsa-runtime
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
if(rocprofiler_LIBRARY)
|
||||
get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
|
||||
unset(_ROCM_ROCPROFILER_PATHS)
|
||||
|
||||
if(ROCmVersion_NUMERIC_VERSION EQUAL 50500)
|
||||
find_library(
|
||||
rocprofiler_pciaccess_LIBRARY
|
||||
NAMES pciaccess
|
||||
PATH_SUFFIXES lib lib64)
|
||||
mark_as_advanced(rocprofiler_pciaccess_LIBRARY)
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package_handle_standard_args(
|
||||
rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR
|
||||
rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(rocprofiler_FOUND)
|
||||
add_library(rocprofiler::rocprofiler INTERFACE IMPORTED)
|
||||
add_library(rocprofiler::roctx INTERFACE IMPORTED)
|
||||
set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR}
|
||||
${rocprofiler_hsa_INCLUDE_DIR})
|
||||
set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR})
|
||||
set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY})
|
||||
if(rocprofiler_pciaccess_LIBRARY)
|
||||
list(APPEND rocprofiler_LIBRARIES ${rocprofiler_pciaccess_LIBRARY})
|
||||
endif()
|
||||
|
||||
target_include_directories(
|
||||
rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR}
|
||||
${rocprofiler_hsa_INCLUDE_DIR})
|
||||
|
||||
target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES})
|
||||
endif()
|
||||
@@ -1,186 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
|
||||
# Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
|
||||
set(ROCM_PATH "$ENV{ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/roctracer)
|
||||
if(EXISTS ${_DIR})
|
||||
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
|
||||
list(APPEND _ROCM_ROCTRACER_PATHS ${_ABS_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
roctracer_ROOT_DIR
|
||||
NAMES include/roctracer/roctracer.h include/roctracer.h
|
||||
HINTS ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES roctracer)
|
||||
|
||||
mark_as_advanced(roctracer_ROOT_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
roctracer_INCLUDE_DIR
|
||||
NAMES roctracer.h
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES include include/roctracer roctracer/include)
|
||||
|
||||
mark_as_advanced(roctracer_INCLUDE_DIR)
|
||||
|
||||
find_path(
|
||||
roctracer_hsa_INCLUDE_DIR
|
||||
NAMES hsa.h
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES include include/hsa)
|
||||
|
||||
mark_as_advanced(roctracer_hsa_INCLUDE_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
roctracer_LIBRARY
|
||||
NAMES roctracer64 roctracer
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
find_library(
|
||||
roctracer_roctx_LIBRARY
|
||||
NAMES roctx64 roctx
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
find_library(
|
||||
roctracer_kfdwrapper_LIBRARY
|
||||
NAMES kfdwrapper64 kfdwrapper
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
find_library(
|
||||
roctracer_hsa-runtime_LIBRARY
|
||||
NAMES hsa-runtime64 hsa-runtime
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
# try not to directly use the hsakmt::hsakmt target because it hardcodes the
|
||||
# INTERFACE_LINK_LIBRARIES used when it was built
|
||||
find_package(hsakmt HINTS ${_ROCM_ROCTRACER_PATHS} PATHS ${_ROCM_ROCTRACER_PATHS})
|
||||
|
||||
if(hsakmt_FOUND)
|
||||
add_library(roctracer::hsakmt INTERFACE IMPORTED)
|
||||
get_target_property(hsakmt_INCLUDE_DIR hsakmt::hsakmt INTERFACE_INCLUDE_DIRECTORIES)
|
||||
target_include_directories(roctracer::hsakmt INTERFACE ${hsakmt_INCLUDE_DIR})
|
||||
set(hsakmt_FOUND_LIBS ON)
|
||||
foreach(_LIB drm drm_amdgpu rt c numa udev)
|
||||
set(_LIB_NAMES ${_LIB})
|
||||
foreach(_EXT 2 1)
|
||||
list(
|
||||
APPEND
|
||||
_LIB_NAMES
|
||||
${CMAKE_SHARED_LIBRARY_PREFIX}${_LIB}${CMAKE_SHARED_LIBRARY_SUFFIX}.${_EXT}
|
||||
)
|
||||
endforeach()
|
||||
find_library(
|
||||
hsakmt_${_LIB}_LIBRARY
|
||||
NAMES ${_LIB_NAMES}
|
||||
HINTS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu
|
||||
PATHS ${_ROCM_ROCTRACER_PATHS} /opt/amdgpu
|
||||
PATH_SUFFIXES ${CMAKE_INSTALL_LIBDIR} lib lib64)
|
||||
if(NOT hsakmt_${_LIB}_LIBRARY)
|
||||
set(hsakmt_FOUND_LIBS OFF)
|
||||
else()
|
||||
target_link_libraries(roctracer::hsakmt INTERFACE ${hsakmt_${_LIB}_LIBRARY})
|
||||
endif()
|
||||
endforeach()
|
||||
if(hsakmt_FOUND_LIBS)
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(roctracer::hsakmt INTERFACE Threads::Threads)
|
||||
set(roctracer_hsakmt_LIBRARY
|
||||
roctracer::hsakmt
|
||||
CACHE STRING "Generated hsakmt target for roctracer")
|
||||
else()
|
||||
set(roctracer_hsakmt_LIBRARY
|
||||
hsakmt::hsakmt
|
||||
CACHE STRING "Imported hsakmt target")
|
||||
endif()
|
||||
else()
|
||||
find_library(
|
||||
roctracer_hsakmt_LIBRARY
|
||||
NAMES hsakmt
|
||||
HINTS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATHS ${roctracer_ROOT_DIR} ${_ROCM_ROCTRACER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
endif()
|
||||
|
||||
if(roctracer_LIBRARY)
|
||||
get_filename_component(roctracer_LIBRARY_DIR "${roctracer_LIBRARY}" PATH CACHE)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(roctracer_LIBRARY roctracer_roctx_LIBRARY roctracer_hsakmt_LIBRARY
|
||||
roctracer_hsa-runtime_LIBRARY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package_handle_standard_args(
|
||||
roctracer DEFAULT_MSG roctracer_ROOT_DIR roctracer_INCLUDE_DIR
|
||||
roctracer_hsa_INCLUDE_DIR roctracer_LIBRARY roctracer_roctx_LIBRARY)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
if(roctracer_FOUND)
|
||||
add_library(roctracer::roctracer INTERFACE IMPORTED)
|
||||
add_library(roctracer::roctx INTERFACE IMPORTED)
|
||||
set(roctracer_INCLUDE_DIRS ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR})
|
||||
set(roctracer_LIBRARIES ${roctracer_LIBRARY} ${roctracer_roctx_LIBRARY})
|
||||
set(roctracer_LIBRARY_DIRS ${roctracer_LIBRARY_DIR})
|
||||
|
||||
target_include_directories(
|
||||
roctracer::roctracer INTERFACE ${roctracer_INCLUDE_DIR}
|
||||
${roctracer_hsa_INCLUDE_DIR})
|
||||
target_include_directories(roctracer::roctx INTERFACE ${roctracer_INCLUDE_DIR}
|
||||
${roctracer_hsa_INCLUDE_DIR})
|
||||
|
||||
target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_LIBRARY})
|
||||
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_roctx_LIBRARY})
|
||||
|
||||
if(roctracer_kfdwrapper_LIBRARY)
|
||||
list(APPEND roctracer_LIBRARIES ${roctracer_kfdwrapper_LIBRARY})
|
||||
target_link_libraries(roctracer::roctracer
|
||||
INTERFACE ${roctracer_kfdwrapper_LIBRARY})
|
||||
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_kfdwrapper_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(roctracer_hsakmt_LIBRARY)
|
||||
list(APPEND roctracer_LIBRARIES ${roctracer_hsakmt_LIBRARY})
|
||||
target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_hsakmt_LIBRARY})
|
||||
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_hsakmt_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(roctracer_hsa-runtime_LIBRARY)
|
||||
list(APPEND roctracer_LIBRARIES ${roctracer_hsa-runtime_LIBRARY})
|
||||
target_link_libraries(roctracer::roctracer
|
||||
INTERFACE ${roctracer_hsa-runtime_LIBRARY})
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
unset(_ROCM_ROCTRACER_PATHS)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
@@ -17,10 +17,6 @@ rocprofiler_systems_add_interface_library(
|
||||
"Provides flags and libraries for Dyninst (dynamic instrumentation)")
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-rocm
|
||||
"Provides flags and libraries for ROCm")
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-roctracer
|
||||
"Provides flags and libraries for roctracer")
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-rocprofiler
|
||||
"Provides flags and libraries for rocprofiler")
|
||||
rocprofiler_systems_add_interface_library(
|
||||
rocprofiler-systems-rccl
|
||||
"Provides flags for ROCm Communication Collectives Library (RCCL)")
|
||||
@@ -161,15 +157,6 @@ if(ROCPROFSYS_USE_ROCM)
|
||||
set(ROCPROFSYS_ROCM_VERSION_PATCH ${ROCmVersion_PATCH_VERSION})
|
||||
set(ROCPROFSYS_ROCM_VERSION ${ROCmVersion_TRIPLE_VERSION})
|
||||
|
||||
if(ROCPROFSYS_ROCM_VERSION_MAJOR GREATER_EQUAL 4 AND ROCPROFSYS_ROCM_VERSION_MINOR
|
||||
GREATER 3)
|
||||
set(roctracer_kfdwrapper_LIBRARY)
|
||||
endif()
|
||||
|
||||
if(NOT roctracer_kfdwrapper_LIBRARY)
|
||||
set(roctracer_kfdwrapper_LIBRARY)
|
||||
endif()
|
||||
|
||||
rocprofiler_systems_add_feature(ROCPROFSYS_ROCM_VERSION
|
||||
"ROCm version used by rocprofiler-systems")
|
||||
else()
|
||||
|
||||
@@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \
|
||||
zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
zypper --non-interactive --gpg-auto-import-keys refresh && \
|
||||
zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
|
||||
zypper --non-interactive install -y rocm-dev rccl-devel libpciaccess0 && \
|
||||
zypper --non-interactive clean --all; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
|
||||
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
|
||||
yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
|
||||
yum install -y rocm-dev && \
|
||||
yum clean all; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \
|
||||
apt-get install -y ./${AMDGPU_DEB} && \
|
||||
apt-get update && \
|
||||
apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
|
||||
apt-get install -y rocm-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
|
||||
apt-get autoclean; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -468,7 +468,6 @@ Viewing components
|
||||
| written_bytes | Number of bytes sent to the storage layer. |
|
||||
| written_char | Number of bytes which this task has cause... |
|
||||
| rocprof-sys | Invokes instrumentation functions rocprof... |
|
||||
| roctracer | High-precision ROCm API and kernel tracing. |
|
||||
| sampling_wall_clock | Wall-clock timing. Derived from statistic... |
|
||||
| sampling_cpu_clock | CPU-clock timing. Derived from statistica... |
|
||||
| sampling_percent | Fraction of wall-clock time spent in func... |
|
||||
|
||||
+2
-5
@@ -247,10 +247,7 @@ view the help menu.
|
||||
libpthread.so.0
|
||||
libresolv.so.2
|
||||
librocm_smi64.so
|
||||
librocmtools.so
|
||||
librocprofiler64.so
|
||||
libroctracer64.so
|
||||
libroctx64.so
|
||||
librocprofiler-sdk.so
|
||||
librt.so.1
|
||||
libstdc++.so.6
|
||||
libtbb.so
|
||||
@@ -854,7 +851,7 @@ By default, ``rocprof-sys-instrument`` uses ``--mode trace`` for instrumentation
|
||||
only instruments ``main`` in an executable. It activates both CPU call-stack sampling and
|
||||
background system-level thread sampling by default.
|
||||
Tracing capabilities which do not rely on instrumentation, such as the HIP API and kernel tracing
|
||||
(which is collected by roctracer), are still available.
|
||||
are still available.
|
||||
|
||||
The ROCm Systems Profiler sampling capabilities are always available, even in trace mode, but are deactivated by default.
|
||||
To activate sampling in trace mode, set ``ROCPROFSYS_USE_SAMPLING=ON`` in the environment
|
||||
|
||||
@@ -80,7 +80,7 @@ in between samples. Progress points must be triggered in a deterministic manner
|
||||
This can happen in three different ways:
|
||||
|
||||
* `ROCm Systems Profiler <https://github.com/ROCm/rocprofiler-systems>`_ can leverage the callbacks from
|
||||
Kokkos-Tools, OpenMP-Tools, roctracer, etc. and the wrappers around functions for
|
||||
Kokkos-Tools, OpenMP-Tools, rocprofiler-sdk, etc. and the wrappers around functions for
|
||||
MPI, NUMA, RCCL, etc. to act as progress points
|
||||
* Users can leverage the :doc:`runtime instrumentation capabilities <./instrumenting-rewriting-binary-application>`
|
||||
to insert progress points
|
||||
|
||||
@@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option
|
||||
|
||||
[BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation
|
||||
|
||||
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ]
|
||||
Include data from these backends (count: unlimited)
|
||||
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler-sdk | rw-locks | spin-locks ]
|
||||
Exclude data from these backends (count: unlimited)
|
||||
|
||||
[HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H
|
||||
|
||||
+198
-155
@@ -77,164 +77,207 @@ Metadata JSON Sample
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"rocprof-sys": {
|
||||
"rocprofiler-systems": {
|
||||
"metadata": {
|
||||
"info": {
|
||||
"HW_L1_CACHE_SIZE": 32768,
|
||||
"HW_L2_CACHE_SIZE": 524288,
|
||||
"HW_L3_CACHE_SIZE": 16777216,
|
||||
"HW_PHYSICAL_CPU": 12,
|
||||
"HW_CONCURRENCY": 24,
|
||||
"LAUNCH_TIME": "02:04",
|
||||
"LAUNCH_DATE": "05/08/22",
|
||||
"TIMEMORY_GIT_REVISION": "52e7034fd419ff296506cdef43084f6071dbaba1",
|
||||
"TIMEMORY_VERSION": "3.3.0rc4",
|
||||
"TIMEMORY_API": "tim::project::timemory",
|
||||
"TIMEMORY_GIT_DESCRIBE": "v3.2.0-263-g52e7034f",
|
||||
"PWD": "/home/jrmadsen/devel/c++/AARInternal/hosttrace-dyninst/build-vscode",
|
||||
"USER": "jrmadsen",
|
||||
"HOME": "/home/jrmadsen",
|
||||
"SHELL": "/bin/bash",
|
||||
"CPU_MODEL": "AMD Ryzen Threadripper PRO 3945WX 12-Cores",
|
||||
"CPU_FREQUENCY": 2400,
|
||||
"CPU_VENDOR": "AuthenticAMD",
|
||||
"CPU_FEATURES": [
|
||||
"fpu",
|
||||
"msr",
|
||||
"sse",
|
||||
"sse2",
|
||||
"constant_tsc",
|
||||
"ssse3",
|
||||
"fma",
|
||||
"sse4_1",
|
||||
"sse4_2",
|
||||
"popcnt",
|
||||
"avx2",
|
||||
"... etc. ..."
|
||||
],
|
||||
"memory_maps": [
|
||||
{
|
||||
"end_address": "7f4013797000",
|
||||
"start_address": "7f4012e58000",
|
||||
"pathname": "/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000",
|
||||
"offset": "34a000",
|
||||
"device": "103:05",
|
||||
"inode": 4331165,
|
||||
"permissions": "rw-p"
|
||||
},
|
||||
{
|
||||
"end_address": "7f4013902000",
|
||||
"start_address": "7f4013901000",
|
||||
"pathname": "/usr/lib/x86_64-linux-gnu/libm-2.31.so",
|
||||
"offset": "14d000",
|
||||
"device": "103:05",
|
||||
"inode": 42078854,
|
||||
"permissions": "rwxp"
|
||||
},
|
||||
{
|
||||
"end_address": "7f4013919000",
|
||||
"start_address": "7f4013908000",
|
||||
"pathname": "/usr/lib/x86_64-linux-gnu/libpthread-2.31.so",
|
||||
"offset": "6000",
|
||||
"device": "103:05",
|
||||
"inode": 42078874,
|
||||
"permissions": "r-xp"
|
||||
},
|
||||
{
|
||||
"...": "etc."
|
||||
},
|
||||
],
|
||||
"memory_maps_files": [
|
||||
"/opt/rocm-5.0.0/hip/lib/libamdhip64.so.5.0.50000",
|
||||
"/opt/rocm-5.0.0/hsa-amd-aqlprofile/lib/libhsa-amd-aqlprofile64.so.1.0.50000",
|
||||
"/opt/rocm-5.0.0/lib/libamd_comgr.so.2.4.50000",
|
||||
"/opt/rocm-5.0.0/lib/libhsa-runtime64.so.1.5.50000",
|
||||
"/opt/rocm-5.0.0/rocm_smi/lib/librocm_smi64.so.5.0.50000",
|
||||
"/opt/rocm-5.0.0/roctracer/lib/libroctracer64.so.1.0.50000",
|
||||
"/usr/lib/x86_64-linux-gnu/ld-2.31.so",
|
||||
"/usr/lib/x86_64-linux-gnu/libc-2.31.so",
|
||||
"/usr/lib/x86_64-linux-gnu/libdl-2.31.so",
|
||||
"... etc. ..."
|
||||
],
|
||||
},
|
||||
"output": {
|
||||
"text": [
|
||||
{
|
||||
"value": [
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.txt"
|
||||
],
|
||||
"key": "roctracer"
|
||||
},
|
||||
{
|
||||
"value": [
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt"
|
||||
],
|
||||
"key": "wall_clock"
|
||||
}
|
||||
],
|
||||
"json": [
|
||||
{
|
||||
"value": [
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.json",
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/roctracer.tree.json"
|
||||
],
|
||||
"key": "roctracer"
|
||||
},
|
||||
{
|
||||
"value": [
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json",
|
||||
"rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.tree.json"
|
||||
],
|
||||
"key": "wall_clock"
|
||||
}
|
||||
]
|
||||
},
|
||||
"environment": [
|
||||
{
|
||||
"value": "/home/jrmadsen",
|
||||
"key": "HOME"
|
||||
},
|
||||
{
|
||||
"value": "/bin/bash",
|
||||
"key": "SHELL"
|
||||
},
|
||||
{
|
||||
"value": "jrmadsen",
|
||||
"key": "USER"
|
||||
},
|
||||
{
|
||||
"value": "true",
|
||||
"key": "... etc. ..."
|
||||
}
|
||||
"info": {
|
||||
"CPU_MODEL": "AMD Ryzen 5 3600 6-Core Processor",
|
||||
"CPU_VENDOR": "AuthenticAMD",
|
||||
"HOME": "/home/rocm-dev",
|
||||
"LAUNCH_DATE": "01/15/25",
|
||||
"LAUNCH_TIME": "16:49",
|
||||
"PWD": "/home/rocm-dev/code/rocprofiler-systems",
|
||||
"ROCPROFSYS_COMPILER_ID": "GNU",
|
||||
"ROCPROFSYS_COMPILER_VERSION": "11.4.0",
|
||||
"ROCPROFSYS_GIT_DESCRIBE": "",
|
||||
"ROCPROFSYS_GIT_REVISION": "3213dc652728f7ed01b62bf55f6af76c43bfcbdb",
|
||||
"ROCPROFSYS_LIBRARY_ARCH": "x86_64-linux-gnu",
|
||||
"ROCPROFSYS_ROCM_VERSION": "6.3.1",
|
||||
"ROCPROFSYS_SYSTEM_NAME": "Linux",
|
||||
"ROCPROFSYS_SYSTEM_PROCESSOR": "x86_64",
|
||||
"ROCPROFSYS_SYSTEM_VERSION": "6.8.0-51-generic",
|
||||
"ROCPROFSYS_VERSION": "1.0.0",
|
||||
"SHELL": "/usr/bin/zsh",
|
||||
"TIMEMORY_API": "tim::project::timemory",
|
||||
"TIMEMORY_GIT_DESCRIBE": "v3.2.0-703-gba3c6486",
|
||||
"TIMEMORY_GIT_REVISION": "ba3c648677b3c6f217abe147ef3198f36239e234",
|
||||
"TIMEMORY_VERSION": "4.0.0rc0",
|
||||
"USER": "rocm-dev",
|
||||
"CPU_FREQUENCY": 1972,
|
||||
"CPU_FEATURES": [
|
||||
"fpu",
|
||||
"vme",
|
||||
"de",
|
||||
"pse",
|
||||
"tsc",
|
||||
"msr",
|
||||
"pae",
|
||||
"... etc. ..."
|
||||
],
|
||||
"HW_CONCURRENCY": 12,
|
||||
"HW_PHYSICAL_CPU": 6,
|
||||
"HW_L1_CACHE_SIZE": 32768,
|
||||
"HW_L2_CACHE_SIZE": 524288,
|
||||
"HW_L3_CACHE_SIZE": 16777216,
|
||||
"ROCPROFSYS_VERSION_MAJOR": 1,
|
||||
"ROCPROFSYS_VERSION_MINOR": 0,
|
||||
"ROCPROFSYS_VERSION_PATCH": 0,
|
||||
"ROCPROFSYS_ROCM_VERSION_MAJOR": 6,
|
||||
"ROCPROFSYS_ROCM_VERSION_MINOR": 3,
|
||||
"ROCPROFSYS_ROCM_VERSION_PATCH": 1,
|
||||
"memory_maps_files": [
|
||||
"/opt/rocm-6.3.1/lib/libhsa-amd-aqlprofile64.so.1.0.60301",
|
||||
"/opt/rocm-6.3.1/lib/libhsa-runtime64.so.1.14.60301",
|
||||
"/opt/rocm-6.3.1/lib/librocm_smi64.so.7.4.60301",
|
||||
"/opt/rocm-6.3.1/lib/librocprofiler-register.so.0.4.0",
|
||||
"/opt/rocm-6.3.1/lib/librocprofiler-sdk.so.0.5.0",
|
||||
"/opt/rocm/lib/libhsa-amd-aqlprofile64.so.1",
|
||||
"/opt/rocm/lib/libhsa-runtime64.so.1",
|
||||
"/opt/rocm/lib/librocm_smi64.so.7",
|
||||
"/opt/rocm/lib/librocprofiler-register.so.0",
|
||||
"/opt/rocm/lib/librocprofiler-sdk.so.0",
|
||||
"... etc. ..."
|
||||
],
|
||||
"settings": {
|
||||
"ROCPROFSYS_JSON_OUTPUT": {
|
||||
"count": -1,
|
||||
"environ_updated": false,
|
||||
"name": "json_output",
|
||||
"data_type": "bool",
|
||||
"initial": true,
|
||||
"enabled": true,
|
||||
"value": true,
|
||||
"max_count": 1,
|
||||
"cmdline": [
|
||||
"--rocprof-sys-json-output"
|
||||
],
|
||||
"environ": "ROCPROFSYS_JSON_OUTPUT",
|
||||
"config_updated": false,
|
||||
"categories": [
|
||||
"io",
|
||||
"json",
|
||||
"native"
|
||||
],
|
||||
"description": "Write json output files"
|
||||
},
|
||||
"... etc. ...": {
|
||||
"etc.": true
|
||||
}
|
||||
"memory_maps": [
|
||||
{
|
||||
"cereal_class_version": 0,
|
||||
"load_address": "76005b800000",
|
||||
"last_address": "76005b81b000",
|
||||
"permissions": "r---",
|
||||
"offset": "0",
|
||||
"device": "",
|
||||
"inode": 0,
|
||||
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
|
||||
},
|
||||
{
|
||||
"load_address": "76005b81b000",
|
||||
"last_address": "76005b93400d",
|
||||
"permissions": "r-x-",
|
||||
"offset": "1b000",
|
||||
"device": "",
|
||||
"inode": 0,
|
||||
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
|
||||
},
|
||||
{
|
||||
"load_address": "76005b935000",
|
||||
"last_address": "76005b9aeab8",
|
||||
"permissions": "r---",
|
||||
"offset": "135000",
|
||||
"device": "",
|
||||
"inode": 0,
|
||||
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
|
||||
},
|
||||
{
|
||||
"load_address": "76005b9b0638",
|
||||
"last_address": "76005bb2d598",
|
||||
"permissions": "rw--",
|
||||
"offset": "1af638",
|
||||
"device": "",
|
||||
"inode": 0,
|
||||
"pathname": "/opt/rocm/lib/libhsa-runtime64.so.1"
|
||||
},
|
||||
{
|
||||
"load_address": "76005bc00000",
|
||||
"last_address": "76005bc26140",
|
||||
"permissions": "r---",
|
||||
"offset": "0",
|
||||
"device": "",
|
||||
"inode": 0,
|
||||
"pathname": "/opt/rocm/lib/librocprofiler-sdk.so.0"
|
||||
},
|
||||
{
|
||||
"... etc. ..."
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"cereal_class_version": 2,
|
||||
"ROCPROFSYS_OUTPUT_PREFIX": {
|
||||
"name": "output_prefix",
|
||||
"environ": "ROCPROFSYS_OUTPUT_PREFIX",
|
||||
"description": "Explicitly specify a prefix for all output files",
|
||||
"count": 1,
|
||||
"max_count": -1,
|
||||
"cmdline": [
|
||||
"--rocprofiler-systems-output-prefix"
|
||||
],
|
||||
"categories": [
|
||||
"filename",
|
||||
"io",
|
||||
"librocprof-sys",
|
||||
"native",
|
||||
"rocprofsys"
|
||||
],
|
||||
"data_type": "string",
|
||||
"initial": "parallel-overhead-binary-rewrite/",
|
||||
"value": "parallel-overhead-binary-rewrite/",
|
||||
"updated": "config",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
... etc. ...
|
||||
},
|
||||
"command_line": [
|
||||
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/parallel-overhead.inst",
|
||||
"--",
|
||||
"10",
|
||||
"12",
|
||||
"1000"
|
||||
],
|
||||
"environment": [
|
||||
... etc . ...
|
||||
]
|
||||
},
|
||||
"environment": [
|
||||
{
|
||||
"key": "GOTCHA_DEBUG",
|
||||
"value": "0"
|
||||
},
|
||||
{
|
||||
"key": "HIP_VISIBLE_DEVICES",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"key": "HOME",
|
||||
"value": "/home/rocm-dev"
|
||||
},
|
||||
{
|
||||
"key": "LD_LIBRARY_PATH",
|
||||
"value": "/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib:/opt/rocm/lib"
|
||||
},
|
||||
{
|
||||
"key": "LIBRARY_PATH",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
etc ...
|
||||
}
|
||||
]
|
||||
"output": {
|
||||
"json": [
|
||||
{
|
||||
"key": "wall_clock",
|
||||
"value": [
|
||||
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.json"
|
||||
]
|
||||
}
|
||||
],
|
||||
"protobuf": [
|
||||
{
|
||||
"key": "perfetto",
|
||||
"value": [
|
||||
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/perfetto-trace.proto"
|
||||
]
|
||||
}
|
||||
],
|
||||
"text": [
|
||||
{
|
||||
"key": "wall_clock",
|
||||
"value": [
|
||||
"/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/rocprof-sys-tests-output/parallel-overhead-binary-rewrite/wall_clock.txt"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
Configuring the ROCm Systems Profiler output
|
||||
|
||||
@@ -192,75 +192,52 @@ First, instrument and run the program.
|
||||
...
|
||||
$ rocprof-sys-run --profile --trace -- ./user-api.inst 10 12 1000
|
||||
|
||||
ROCPROFSYS: HSA_TOOLS_LIB=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
|
||||
ROCPROFSYS: HSA_TOOLS_REPORT_LOAD_FAILURE=1
|
||||
ROCPROFSYS: LD_PRELOAD=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
|
||||
ROCPROFSYS: OMP_TOOL_LIBRARIES=/opt/rocm-6.3.1/lib/librocprof-sys-dl.so.0.1.0
|
||||
ROCPROFSYS: LD_PRELOAD=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
|
||||
ROCPROFSYS: OMP_TOOL_LIBRARIES=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
|
||||
ROCPROFSYS: ROCPROFSYS_PROFILE=true
|
||||
ROCPROFSYS: ROCPROFSYS_TRACE=true
|
||||
ROCPROFSYS: ROCPROFSYS_VERBOSE=0
|
||||
ROCPROFSYS: ROCP_HSA_INTERCEPT=1
|
||||
ROCPROFSYS: ROCP_TOOL_LIB=/opt/rocm-6.3.1/lib/librocprof-sys.so.0.1.0
|
||||
[rocprof-sys][dl][297646] rocprofsys_main
|
||||
[rocprof-sys][297646][rocprofsys_init_tooling] Instrumentation mode: Trace
|
||||
[rocprof-sys][dl][1827155] rocprofsys_main
|
||||
[rocprof-sys][1827155][rocprofsys_init_tooling] Instrumentation mode: Trace
|
||||
|
||||
|
||||
____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____
|
||||
____ ___ ____ __ __ ______ ______ _____ _____ __ __ ____ ____ ____ ___ _____ ___ _ _____ ____
|
||||
| _ \ / _ \ / ___| \/ | / ___\ \ / / ___|_ _| ____| \/ / ___| | _ \| _ \ / _ \| ___|_ _| | | ____| _ \
|
||||
| |_) | | | | | | |\/| | \___ \\ V /\___ \ | | | _| | |\/| \___ \ | |_) | |_) | | | | |_ | || | | _| | |_) |
|
||||
| _ <| |_| | |___| | | | ___) || | ___) || | | |___| | | |___) | | __/| _ <| |_| | _| | || |___| |___| _ <
|
||||
|_| \_\\___/ \____|_| |_| |____/ |_| |____/ |_| |_____|_| |_|____/ |_| |_| \_\\___/|_| |___|_____|_____|_| \_\
|
||||
|
||||
rocprof-sys v0.1.0 (rev: b569c837e455f71dd76d06392d0b901ae927deca, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x)
|
||||
[105.947] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: ""
|
||||
Pushing custom region :: ./user-api.inst
|
||||
Pushing custom region :: initialization
|
||||
rocprof-sys v1.0.0 (rev: 3213dc652728f7ed01b62bf55f6af76c43bfcbdb, x86_64-linux-gnu, compiler: GNU v11.4.0, rocm: v6.3.x)
|
||||
[790.763] perfetto.cc:47606 Configured tracing session 1, #sources:1, duration:0 ms, #buffers:1, total buffer size:1024000 KB, total sessions:1, uid:0 session name: ""
|
||||
[./user-api.inst] Threads: 12
|
||||
[./user-api.inst] Iterations: 1000
|
||||
[./user-api.inst] fibonacci(10)...
|
||||
Pushing custom region :: thread_creation
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: run(10) x 1000
|
||||
Pushing custom region :: thread_wait
|
||||
Pushing custom region :: run(10) x 1000
|
||||
[./user-api.inst] fibonacci(10) x 12 = 715000
|
||||
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] finalizing...
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646 : 0.978014 sec wall_clock, 26.752 MB peak_rss, 27.394 MB page_rss, 1.520000 sec cpu_clock, 155.4 % cpu_util [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/0 : 0.976068 sec wall_clock, 0.789948 sec thread_cpu_clock, 80.9 % thread_cpu_util, 26.112 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/1 : 0.027517 sec wall_clock, 0.027510 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/2 : 0.027828 sec wall_clock, 0.027811 sec thread_cpu_clock, 99.9 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/3 : 0.027585 sec wall_clock, 0.027585 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/4 : 0.033449 sec wall_clock, 0.033443 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.584 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/5 : 0.027727 sec wall_clock, 0.027726 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.328 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/6 : 0.032228 sec wall_clock, 0.032220 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.712 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/7 : 0.030201 sec wall_clock, 0.030202 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.768 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/8 : 0.027960 sec wall_clock, 0.027951 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/9 : 0.034698 sec wall_clock, 0.034699 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/10 : 0.033414 sec wall_clock, 0.033399 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.512 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/11 : 0.028161 sec wall_clock, 0.028149 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] rocprofsys/process/297646/thread/12 : 0.027791 sec wall_clock, 0.027767 sec thread_cpu_clock, 99.9 % thread_cpu_util, 0.256 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize]
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] Finalizing perfetto...
|
||||
[rocprofiler-systems][297646][perfetto]> Outputting '/home/gliff/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-02_19.29/perfetto-trace-297646.proto' (16728.58 KB / 16.73 MB / 0.02 GB)... Done
|
||||
[rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.json'
|
||||
[rocprofiler-systems][297646][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/wall_clock-297646.txt'
|
||||
[rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.json'
|
||||
[rocprofiler-systems][297646][roctracer]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/roctracer-297646.txt'
|
||||
[rocprofiler-systems][297646][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-02_19.29/metadata-297646.json' and 'rocprofsys-user-api.inst-output/2025-01-02_19.29/functions-297646.json'
|
||||
[rocprof-sys][297646][0][rocprofsys_finalize] Finalized: 0.314368 sec wall_clock, 19.040 MB peak_rss, 3.498 MB page_rss, 0.280000 sec cpu_clock, 89.1 % cpu_util
|
||||
[107.243] perfetto.cc:49204 Tracing session 1 ended, total sessions:0
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] finalizing...
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155 : 0.137404 sec wall_clock, 6.528 MB peak_rss, 6.685 MB page_rss, 0.540000 sec cpu_clock, 393.0 % cpu_util [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/0 : 0.135815 sec wall_clock, 0.035171 sec thread_cpu_clock, 25.9 % thread_cpu_util, 6.016 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/1 : 0.028336 sec wall_clock, 0.028336 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/2 : 0.030380 sec wall_clock, 0.030380 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/3 : 0.035233 sec wall_clock, 0.035227 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/4 : 0.035275 sec wall_clock, 0.035267 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/5 : 0.035452 sec wall_clock, 0.035452 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/6 : 0.036198 sec wall_clock, 0.036190 sec thread_cpu_clock, 100.0 % thread_cpu_util, 3.840 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/7 : 0.034709 sec wall_clock, 0.034702 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.640 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/8 : 0.036590 sec wall_clock, 0.033590 sec thread_cpu_clock, 91.8 % thread_cpu_util, 0.512 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/9 : 0.033108 sec wall_clock, 0.033098 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.384 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/10 : 0.032993 sec wall_clock, 0.032994 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.256 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/11 : 0.035687 sec wall_clock, 0.035368 sec thread_cpu_clock, 99.1 % thread_cpu_util, 0.128 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] rocprofsys/process/1827155/thread/12 : 0.035359 sec wall_clock, 0.035354 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.128 MB peak_rss [laps: 1]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize]
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] Finalizing perfetto...
|
||||
[rocprofiler-systems][1827155][perfetto]> Outputting '/home/rocm-dev/opt/user-api-test/rocprofsys-user-api.inst-output/2025-01-15_17.57/perfetto-trace-1827155.proto' (17.20 KB / 0.02 MB / 0.00 GB)... Done
|
||||
[rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.json'
|
||||
[rocprofiler-systems][1827155][wall_clock]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/wall_clock-1827155.txt'
|
||||
[rocprofiler-systems][1827155][metadata]> Outputting 'rocprofsys-user-api.inst-output/2025-01-15_17.57/metadata-1827155.json' and 'rocprofsys-user-api.inst-output/2025-01-15_17.57/functions-1827155.json'
|
||||
[rocprof-sys][1827155][0][rocprofsys_finalize] Finalized: 0.048039 sec wall_clock, 0.640 MB peak_rss, 0.655 MB page_rss, 0.020000 sec cpu_clock, 41.6 % cpu_util
|
||||
[790.953] perfetto.cc:49204 Tracing session 1 ended, total sessions:0
|
||||
|
||||
Then review the output.
|
||||
|
||||
|
||||
@@ -301,7 +301,7 @@ Collected data is generally handled in one of the three following ways:
|
||||
In general, only instrumentation for relatively simple data is directly passed to
|
||||
Perfetto and/or Timemory during runtime.
|
||||
For example, the callbacks from binary instrumentation, user API instrumentation,
|
||||
and roctracer directly invoke
|
||||
and rocprofiler-sdk directly invoke
|
||||
calls to Perfetto or Timemory's storage model. Otherwise, the data is stored
|
||||
by ROCm Systems Profiler in the thread-data model
|
||||
which is more persistent than simply using ``thread_local`` static data, which gets deleted
|
||||
|
||||
@@ -320,13 +320,13 @@ get_internal_basic_libs_impl()
|
||||
"libunwind-setjmp.so",
|
||||
"libunwind.so",
|
||||
"libunwind-x86_64.so",
|
||||
"libpapi.so",
|
||||
"libpfm.so",
|
||||
"librocm_smi64.so",
|
||||
"libroctx64.so",
|
||||
"librocmtools.so",
|
||||
"libroctracer64.so",
|
||||
"librocprofiler64.so",
|
||||
"libpapi.so",
|
||||
"libpfm.so",
|
||||
"librocprofiler-register.so",
|
||||
"librocprofiler-sdk.so",
|
||||
"librocprofiler-sdk-roctx.so",
|
||||
|
||||
+2
-2
@@ -360,8 +360,8 @@ main(int argc, char** argv)
|
||||
itr, std::regex{
|
||||
"lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|"
|
||||
"instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|"
|
||||
"tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|"
|
||||
"amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|"
|
||||
"tbbmalloc_proxy|gotcha|libunwind|hsa-runtime|amdhip|"
|
||||
"amd_comgr|rocm_smi64|rocprofiler-register|"
|
||||
"rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" }))
|
||||
{
|
||||
if(!find(filepath::dirname(itr), lib_search_paths))
|
||||
|
||||
@@ -713,20 +713,11 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
}
|
||||
});
|
||||
|
||||
std::set<std::string> _backend_choices = { "all",
|
||||
"kokkosp",
|
||||
"mpip",
|
||||
"ompt",
|
||||
"rcclp",
|
||||
"rocm-smi",
|
||||
"roctracer",
|
||||
"rocprofiler",
|
||||
"roctx",
|
||||
"mutex-locks",
|
||||
"spin-locks",
|
||||
"rw-locks",
|
||||
"rocprofiler-sdk",
|
||||
"rocm" };
|
||||
std::set<std::string> _backend_choices = {
|
||||
"all", "kokkosp", "mpip", "ompt",
|
||||
"rcclp", "rocm-smi", "mutex-locks", "spin-locks",
|
||||
"rw-locks", "rocprofiler-sdk", "rocm"
|
||||
};
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
|
||||
_backend_choices.erase("mpip");
|
||||
|
||||
@@ -543,9 +543,10 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
_data.processed_environs.emplace("periods");
|
||||
}
|
||||
|
||||
strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt",
|
||||
"rcclp", "rocm-smi", "roctracer", "rocprofiler",
|
||||
"roctx", "mutex-locks", "spin-locks", "rw-locks" };
|
||||
strset_t _backend_choices = {
|
||||
"all", "kokkosp", "mpip", "ompt", "rcclp",
|
||||
"rocm-smi", "rocprofiler-sdk", "mutex-locks", "spin-locks", "rw-locks"
|
||||
};
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
|
||||
_backend_choices.erase("mpip");
|
||||
|
||||
@@ -39,8 +39,6 @@
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
ROCPROFSYS_DECLARE_COMPONENT(roctracer)
|
||||
ROCPROFSYS_DECLARE_COMPONENT(rocprofiler)
|
||||
ROCPROFSYS_DECLARE_COMPONENT(rcclp_handle)
|
||||
ROCPROFSYS_DECLARE_COMPONENT(comm_data)
|
||||
|
||||
@@ -127,12 +125,6 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, f
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type)
|
||||
#endif
|
||||
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer, project::rocprofsys,
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::external)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::rocprofiler, project::rocprofsys,
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::external, category::hardware_counter)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_wall_clock,
|
||||
project::rocprofsys, category::timing, os::supports_unix,
|
||||
category::sampling, category::interrupt_sampling)
|
||||
@@ -160,10 +152,6 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::roc
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::sampling, category::process_sampling)
|
||||
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::roctracer, "roctracer",
|
||||
"High-precision ROCm API and kernel tracing", "")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::rocprofiler, "rocprofiler",
|
||||
"ROCm kernel hardware counters", "")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_wall_clock,
|
||||
"sampling_wall_clock", "Wall-clock timing",
|
||||
"Derived from statistical sampling")
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/defines.hpp"
|
||||
|
||||
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
|
||||
|
||||
# if defined(HIP_INCLUDE_HIP_HIP_RUNTIME_H) || \
|
||||
defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H)
|
||||
# error \
|
||||
"include core/hip_runtime.hpp before <hip/hip_runtime.h> or <hip/hip_runtime_api.h>"
|
||||
# endif
|
||||
|
||||
# define HIP_PROF_HIP_API_STRING 1
|
||||
|
||||
// following must be included before <roctracer_hip.h> for ROCm 6.0+
|
||||
# if defined(USE_PROF_API)
|
||||
# undef USE_PROF_API
|
||||
# endif
|
||||
# include <hip/hip_runtime.h>
|
||||
# include <hip/hip_runtime_api.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <hip/hip_deprecated.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <roctracer/hip_ostream_ops.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
# include <hip/hip_version.h>
|
||||
#endif
|
||||
-1
@@ -126,7 +126,6 @@ backtrace::filter_and_patch(const std::vector<entry_type>& _data)
|
||||
if(_lbl.find("DYNINST_") != _npos) return 0;
|
||||
if(_lbl.find("rocprofsys_") != _npos) return -1;
|
||||
if(_lbl.find("rocprofiler_") != _npos) return -1;
|
||||
if(_lbl.find("roctracer_") != _npos) return -1;
|
||||
if(_lbl.find("perfetto::") != _npos) return -1;
|
||||
if(_lbl.find("protozero::") == 0) return -1;
|
||||
if(_lbl.find("gotcha_") != _npos) return -1;
|
||||
|
||||
+1
-1
@@ -121,7 +121,7 @@ stop_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args)
|
||||
{
|
||||
auto _wc = *_bundle.get<comp::wall_clock>();
|
||||
_wc.stop();
|
||||
// update roctracer_data
|
||||
// update data
|
||||
_bundle.store(std::plus<double>{}, _wc.get() * _wc.unit());
|
||||
// stop all
|
||||
_bundle.stop();
|
||||
|
||||
@@ -116,19 +116,6 @@ get_thread_pool_state()
|
||||
} // namespace
|
||||
} // namespace general
|
||||
|
||||
namespace roctracer
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_thread_pool_state()
|
||||
{
|
||||
static auto _v = State::PreInit;
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace roctracer
|
||||
|
||||
void
|
||||
setup()
|
||||
{
|
||||
@@ -140,17 +127,6 @@ setup()
|
||||
void
|
||||
join()
|
||||
{
|
||||
if(roctracer::get_thread_pool_state() == State::Active)
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("waiting for all roctracer tasks to complete...\n");
|
||||
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
|
||||
roctracer::get_task_group(i).join();
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n");
|
||||
}
|
||||
|
||||
if(general::get_thread_pool_state() == State::Active)
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("waiting for all general tasks to complete...\n");
|
||||
@@ -162,22 +138,6 @@ join()
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
if(roctracer::get_thread_pool_state() == State::Active)
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("Waiting on completion of roctracer tasks...\n");
|
||||
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
|
||||
{
|
||||
roctracer::get_task_group(i).join();
|
||||
roctracer::get_task_group(i).clear();
|
||||
roctracer::get_task_group(i).set_pool(nullptr);
|
||||
}
|
||||
roctracer::get_thread_pool_state() = State::Finalized;
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("roctracer thread-pool is not active...\n");
|
||||
}
|
||||
|
||||
if(general::get_thread_pool_state() == State::Active)
|
||||
{
|
||||
ROCPROFSYS_DEBUG_F("Waiting on completion of general tasks...\n");
|
||||
@@ -219,16 +179,5 @@ general::get_task_group(int64_t _tid)
|
||||
return *_v;
|
||||
}
|
||||
|
||||
PTL::TaskGroup<void>&
|
||||
roctracer::get_task_group(int64_t _tid)
|
||||
{
|
||||
struct local
|
||||
{};
|
||||
using thread_data_t = thread_data<PTL::TaskGroup<void>, local>;
|
||||
static thread_local auto& _v = (roctracer::get_thread_pool_state() = State::Active,
|
||||
thread_data_t::instance(construct_on_thread{ _tid },
|
||||
&tasking::get_thread_pool()));
|
||||
return *_v;
|
||||
}
|
||||
} // namespace tasking
|
||||
} // namespace rocprofsys
|
||||
|
||||
@@ -56,16 +56,5 @@ PTL::TaskGroup<void>&
|
||||
get_task_group(int64_t _tid = utility::get_thread_index());
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
//
|
||||
// roctracer
|
||||
//
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
namespace roctracer
|
||||
{
|
||||
PTL::TaskGroup<void>&
|
||||
get_task_group(int64_t _tid = utility::get_thread_index());
|
||||
} // namespace roctracer
|
||||
} // namespace tasking
|
||||
} // namespace rocprofsys
|
||||
|
||||
@@ -39,7 +39,7 @@ namespace rocprofsys
|
||||
// InternalTID: zero-based, process-local thread-ID from atomic increment
|
||||
// from user-created threads and rocprof-sys-created threads.
|
||||
// This value may vary based on threads created by different
|
||||
// backends, e.g., roctracer will create threads
|
||||
// backends.
|
||||
//
|
||||
// SystemTID: system thread-ID. Should be same value as what is seen
|
||||
// in debugger, etc.
|
||||
|
||||
@@ -44,9 +44,7 @@ rocprofiler_systems_add_test(
|
||||
TARGET openmp-target
|
||||
GPU ON
|
||||
LABELS "openmp;openmp-target"
|
||||
ENVIRONMENT
|
||||
"${_ompt_environment};ROCPROFSYS_ROCTRACER_HSA_ACTIVITY=OFF;ROCPROFSYS_ROCTRACER_HSA_API=OFF"
|
||||
)
|
||||
ENVIRONMENT "${_ompt_environment}")
|
||||
|
||||
set(_ompt_sampling_environ
|
||||
"${_ompt_environment}"
|
||||
|
||||
Reference in New Issue
Block a user