Upgrade ROCm-SMI to AMD SMI (#86)

* Integrating amd-smi into rocprofiler-systems due to rocm-smi deprecation.
* No functionality changes to users other than naming conventions.
* New tracks available in perfetto- gpu busy percentage metrics now splits gfx busy into separate gfx, umc, and mm engine measurements.

---------

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
Co-authored-by: David Galiffi <David.Galiffi@amd.com>

[ROCm/rocprofiler-systems commit: 0c32dfd6bc]
Cette révision appartient à :
cfallows-amd
2025-01-30 21:32:07 -05:00
révisé par GitHub
Parent ba360d552c
révision 8c5db3f1d8
32 fichiers modifiés avec 533 ajouts et 363 suppressions
+1 -1
Voir le fichier
@@ -59,7 +59,7 @@ The documentation source files reside in the [`/docs`](/docs) folder of this rep
- HIP kernel tracing
- HSA API tracing
- HSA operation tracing
- System-level sampling (via rocm-smi)
- System-level sampling (via amd-smi)
- Memory usage
- Power usage
- Temperature
+2 -2
Voir le fichier
@@ -157,11 +157,11 @@ if(NOT ROCPROFSYS_BUILD_DYNINST)
endif()
endif()
if(ROCmVersion_FOUND)
set(_ROCM_SMI_SUFFIX
set(_AMD_SMI_SUFFIX
" (>= ${ROCmVersion_MAJOR_VERSION}.0.0.${ROCmVersion_NUMERIC_VERSION})")
endif()
if(ROCPROFSYS_USE_ROCM)
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocm-smi-lib${_ROCM_SMI_SUFFIX}")
list(APPEND _DEBIAN_PACKAGE_DEPENDS "amd-smi-lib${_AMD_SMI_SUFFIX}")
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocprofiler-sdk (>= ${rocprofiler-sdk_VERSION})")
endif()
if(ROCPROFSYS_USE_MPI)
+79
Voir le fichier
@@ -0,0 +1,79 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
# Copyright.txt or https://cmake.org/licensing for details.
include(FindPackageHandleStandardArgs)
# ----------------------------------------------------------------------------------------#
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(ROCM_PATH "$ENV{ROCM_PATH}")
endif()
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/amd_smi)
if(EXISTS ${_DIR})
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
list(APPEND _AMD_SMI_PATHS ${_ABS_DIR})
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
find_path(
amd-smi_ROOT_DIR
NAMES include/amd_smi/amdsmi.h
HINTS ${_AMD_SMI_PATHS}
PATHS ${_AMD_SMI_PATHS}
PATH_SUFFIXES amd_smi)
mark_as_advanced(amd-smi_ROOT_DIR)
# ----------------------------------------------------------------------------------------#
find_path(
amd-smi_INCLUDE_DIR
NAMES amd_smi/amdsmi.h
HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
PATH_SUFFIXES include amd_smi/include)
mark_as_advanced(amd-smi_INCLUDE_DIR)
# ----------------------------------------------------------------------------------------#
find_library(
amd-smi_LIBRARY
NAMES amd_smi
HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
PATH_SUFFIXES amd-smi/lib lib)
if(amd-smi_LIBRARY)
get_filename_component(amd-smi_LIBRARY_DIR "${amd-smi_LIBRARY}" PATH CACHE)
endif()
mark_as_advanced(amd-smi_LIBRARY)
# ----------------------------------------------------------------------------------------#
find_package_handle_standard_args(amd-smi DEFAULT_MSG amd-smi_ROOT_DIR
amd-smi_INCLUDE_DIR amd-smi_LIBRARY)
# ------------------------------------------------------------------------------#
if(amd-smi_FOUND)
add_library(amd-smi::amd-smi INTERFACE IMPORTED)
add_library(amd-smi::roctx INTERFACE IMPORTED)
set(amd-smi_INCLUDE_DIRS ${amd-smi_INCLUDE_DIR})
set(amd-smi_LIBRARIES ${amd-smi_LIBRARY})
set(amd-smi_LIBRARY_DIRS ${amd-smi_LIBRARY_DIR})
target_include_directories(amd-smi::amd-smi INTERFACE ${amd-smi_INCLUDE_DIR})
target_link_libraries(amd-smi::amd-smi INTERFACE ${amd-smi_LIBRARY})
endif()
# ------------------------------------------------------------------------------#
unset(_AMD_SMI_PATHS)
# ------------------------------------------------------------------------------#
-79
Voir le fichier
@@ -1,79 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
# Copyright.txt or https://cmake.org/licensing for details.
include(FindPackageHandleStandardArgs)
# ----------------------------------------------------------------------------------------#
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(ROCM_PATH "$ENV{ROCM_PATH}")
endif()
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocm_smi)
if(EXISTS ${_DIR})
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
list(APPEND _ROCM_SMI_PATHS ${_ABS_DIR})
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
find_path(
rocm-smi_ROOT_DIR
NAMES include/rocm_smi/rocm_smi.h
HINTS ${_ROCM_SMI_PATHS}
PATHS ${_ROCM_SMI_PATHS}
PATH_SUFFIXES rocm_smi)
mark_as_advanced(rocm-smi_ROOT_DIR)
# ----------------------------------------------------------------------------------------#
find_path(
rocm-smi_INCLUDE_DIR
NAMES rocm_smi/rocm_smi.h
HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
PATH_SUFFIXES include rocm_smi/include)
mark_as_advanced(rocm-smi_INCLUDE_DIR)
# ----------------------------------------------------------------------------------------#
find_library(
rocm-smi_LIBRARY
NAMES rocm_smi64 rocm_smi
HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
PATH_SUFFIXES rocm_smi/lib rocm_smi/lib64 lib lib64)
if(rocm-smi_LIBRARY)
get_filename_component(rocm-smi_LIBRARY_DIR "${rocm-smi_LIBRARY}" PATH CACHE)
endif()
mark_as_advanced(rocm-smi_LIBRARY)
# ----------------------------------------------------------------------------------------#
find_package_handle_standard_args(rocm-smi DEFAULT_MSG rocm-smi_ROOT_DIR
rocm-smi_INCLUDE_DIR rocm-smi_LIBRARY)
# ------------------------------------------------------------------------------#
if(rocm-smi_FOUND)
add_library(rocm-smi::rocm-smi INTERFACE IMPORTED)
add_library(rocm-smi::roctx INTERFACE IMPORTED)
set(rocm-smi_INCLUDE_DIRS ${rocm-smi_INCLUDE_DIR})
set(rocm-smi_LIBRARIES ${rocm-smi_LIBRARY})
set(rocm-smi_LIBRARY_DIRS ${rocm-smi_LIBRARY_DIR})
target_include_directories(rocm-smi::rocm-smi INTERFACE ${rocm-smi_INCLUDE_DIR})
target_link_libraries(rocm-smi::rocm-smi INTERFACE ${rocm-smi_LIBRARY})
endif()
# ------------------------------------------------------------------------------#
unset(_ROCM_SMI_PATHS)
# ------------------------------------------------------------------------------#
+2 -2
Voir le fichier
@@ -192,8 +192,8 @@ if(ROCPROFSYS_USE_ROCM)
target_link_libraries(rocprofiler-systems-rocm
INTERFACE rocprofiler-sdk::rocprofiler-sdk)
find_package(rocm-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
target_link_libraries(rocprofiler-systems-rocm INTERFACE rocm-smi::rocm-smi)
find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi)
# find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
# target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi)
+1 -1
Voir le fichier
@@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \
zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
zypper --non-interactive --gpg-auto-import-keys refresh && \
zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
zypper --non-interactive install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
zypper --non-interactive clean --all; \
fi
+1 -1
Voir le fichier
@@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
yum install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
yum clean all; \
fi
+1 -1
Voir le fichier
@@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \
apt-get install -y ./${AMDGPU_DEB} && \
apt-get update && \
apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
apt-get install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
apt-get autoclean; \
fi
+1 -1
Voir le fichier
@@ -52,7 +52,7 @@ GPU metrics
* HIP kernel tracing
* HSA API tracing
* HSA operation tracing
* System-level sampling (via rocm-smi)
* System-level sampling (via amd-smi)
* Memory usage
* Power usage
Fichier binaire non affiché.

Avant

Largeur:  |  Hauteur:  |  Taille: 313 KiB

Après

Largeur:  |  Hauteur:  |  Taille: 433 KiB

+7 -7
Voir le fichier
@@ -229,7 +229,7 @@ Generating a default configuration file
ROCPROFSYS_USE_SAMPLING = false
ROCPROFSYS_USE_PROCESS_SAMPLING = true
ROCPROFSYS_USE_ROCM = true
ROCPROFSYS_USE_ROCM_SMI = true
ROCPROFSYS_USE_AMD_SMI = true
ROCPROFSYS_USE_KOKKOSP = false
ROCPROFSYS_USE_CODE_COVERAGE = false
ROCPROFSYS_USE_PID = true
@@ -384,7 +384,7 @@ Viewing the setting descriptions
| ROCPROFSYS_USE_OMPT | Enable support for OpenMP-Tools |
| ROCPROFSYS_TRACE | Enable perfetto backend |
| ROCPROFSYS_USE_PID | Enable tagging filenames with proces... |
| ROCPROFSYS_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... |
| ROCPROFSYS_USE_AMD_SMI | Enable sampling GPU power, temp, uti... |
| ROCPROFSYS_USE_ROCM | Enable ROCM tracing |
| ROCPROFSYS_USE_SAMPLING | Enable statistical sampling of call-... |
| ROCPROFSYS_USE_PROCESS_SAMPLING | Enable a background thread which sam... |
@@ -461,11 +461,11 @@ Viewing components
| sampling_wall_clock | Wall-clock timing. Derived from statistic... |
| sampling_cpu_clock | CPU-clock timing. Derived from statistica... |
| sampling_percent | Fraction of wall-clock time spent in func... |
| sampling_gpu_power | GPU Power Usage via ROCm-SMI. Derived fro... |
| sampling_gpu_temp | GPU Temperature via ROCm-SMI. Derived fro... |
| sampling_gpu_busy | GPU Utilization (% busy) via ROCm-SMI. De... |
| sampling_vcn_busy | GPU VCN Utilization (% activity) via ROCm... |
| sampling_gpu_memory_usage | GPU Memory Usage via ROCm-SMI. Derived fr... |
| sampling_gpu_power | GPU Power Usage via AMD-SMI. Derived from... |
| sampling_gpu_temp | GPU Temperature via AMD-SMI. Derived from... |
| sampling_gpu_busy | GPU Utilization (% busy) via AMD-SMI. Der... |
| sampling_vcn_busy | GPU VCN Utilization (% activity) via AMD-... |
| sampling_gpu_memory_usage | GPU Memory Usage via AMD-SMI. Derived fro... |
|-----------------------------------|----------------------------------------------|
Viewing hardware counters
@@ -246,7 +246,7 @@ view the help menu.
libprofiler.so
libpthread.so.0
libresolv.so.2
librocm_smi64.so
libamd_smi.so
librocmtools.so
librocprofiler64.so
libroctracer64.so
+5 -5
Voir le fichier
@@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option
[BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
Include data from these backends (count: unlimited)
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
Exclude data from these backends (count: unlimited)
[HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H
@@ -293,7 +293,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_RCCLP=true
ROCPROFSYS_USE_ROCM_SMI=true
ROCPROFSYS_USE_AMD_SMI=true
ROCPROFSYS_USE_ROCM=true
ROCPROFSYS_USE_SAMPLING=true
ROCPROFSYS_PROFILE=true
@@ -323,7 +323,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_RCCLP=false
ROCPROFSYS_USE_ROCM_SMI=false
ROCPROFSYS_USE_AMD_SMI=false
ROCPROFSYS_USE_ROCM=false
ROCPROFSYS_USE_SAMPLING=true
ROCPROFSYS_PROFILE=true
@@ -354,7 +354,7 @@ Here is the full output from the previous
ROCPROFSYS_USE_OMPT=false
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_RCCLP=false
ROCPROFSYS_USE_ROCM_SMI=false
ROCPROFSYS_USE_AMD_SMI=false
ROCPROFSYS_USE_ROCM=false
ROCPROFSYS_USE_SAMPLING=true
[rocprof-sys][dl][1785877] rocprofsys_main
+1 -1
Voir le fichier
@@ -340,7 +340,7 @@ generate_config(std::string _config_file, const std::set<std::string>& _config_f
{ "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE",
"ROCPROFSYS_PROFILE", "ROCPROFSYS_USE_SAMPLING",
"ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCM",
"ROCPROFSYS_USE_ROCM_SMI", "ROCPROFSYS_USE_KOKKOSP",
"ROCPROFSYS_USE_AMD_SMI", "ROCPROFSYS_USE_KOKKOSP",
"ROCPROFSYS_USE_OMPT", "ROCPROFSYS_USE", "ROCPROFSYS_OUTPUT" })
{
if(_lhs->get_env_name().find(itr) == 0 &&
@@ -361,7 +361,7 @@ main(int argc, char** argv)
"lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|"
"instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|"
"tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|"
"amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|"
"amd_comgr|amd_smi|rocprofiler64|rocprofiler-register|"
"rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" }))
{
if(!find(filepath::dirname(itr), lib_search_paths))
+5 -5
Voir le fichier
@@ -441,7 +441,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
auto _h = p.get<bool>("host");
auto _d = p.get<bool>("device");
update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_env, "ROCPROFSYS_USE_ROCM_SMI", _d);
update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d);
});
parser
.add_argument({ "-w", "--wait" },
@@ -718,7 +718,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
"mpip",
"ompt",
"rcclp",
"rocm-smi",
"amd-smi",
"roctracer",
"rocprofiler",
"roctx",
@@ -742,7 +742,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
#if !defined(ROCPROFSYS_USE_ROCM)
_backend_choices.erase("rocm");
_backend_choices.erase("rocm-smi");
_backend_choices.erase("amd-smi");
_backend_choices.erase("rocprofiler-sdk");
#endif
@@ -761,7 +761,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
@@ -785,7 +785,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
+6 -8
Voir le fichier
@@ -475,11 +475,11 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
auto _h = p.get<bool>("host");
auto _d = p.get<bool>("device");
update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", _d);
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d);
});
_data.processed_environs.emplace("device");
_data.processed_environs.emplace("rocm_smi");
_data.processed_environs.emplace("amd_smi");
}
if(_data.environ_filter("wait", _data))
@@ -544,7 +544,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
}
strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt",
"rcclp", "rocm-smi", "roctracer", "rocprofiler",
"rcclp", "amd-smi", "roctracer", "rocprofiler",
"roctx", "mutex-locks", "spin-locks", "rw-locks" };
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
@@ -561,7 +561,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
#if !defined(ROCPROFSYS_USE_ROCM)
_backend_choices.erase("amd-smi");
_backend_choices.erase("rocm-smi");
_backend_choices.erase("rocprofiler-sdk");
_backend_choices.erase("rocm");
#endif
@@ -571,7 +570,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
// remove GPU-specific backends
_backend_choices.erase("rcclp");
_backend_choices.erase("amd-smi");
_backend_choices.erase("rocm-smi");
_backend_choices.erase("rocprofiler-sdk");
_backend_choices.erase("rocm");
@@ -580,7 +578,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
#endif
#if defined(ROCPROFSYS_USE_ROCM)
update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", false);
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", false);
update_env(_data, "ROCPROFSYS_USE_ROCM", false);
#endif
}
@@ -606,7 +604,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
@@ -640,7 +638,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
+16 -12
Voir le fichier
@@ -100,12 +100,14 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_RO
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_counter_collection, ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, "rocm_counter_collection", "ROCm device counter collection")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_MARKER_API, "rocm_marker_api", "ROCTx labels")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi, ROCPROFSYS_CATEGORY_ROCM_SMI, "rocm_smi", "rocm-smi data")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, "device_busy", "Busy percentage of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi, ROCPROFSYS_CATEGORY_AMD_SMI, "amd_smi", "amd-smi data")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_gfxbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, "device_busy_gfx", "Busy percentage of GFX engine on a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_umcbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, "device_busy_umc", "Busy percentage of UMC on a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_mmbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM, "device_busy_mm", "Busy percentage of a MM engine on a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_temp, ROCPROFSYS_CATEGORY_AMD_SMI_TEMP, "device_temp", "Temperature of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_power, ROCPROFSYS_CATEGORY_AMD_SMI_POWER, "device_power", "Power consumption of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_memory_usage, ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_vcn_activity, ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions")
ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions")
ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions")
@@ -163,12 +165,14 @@ using name = perfetto_category<Tp...>;
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_counter_collection), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_marker_api), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_busy), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_gfxbusy), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_umcbusy), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_mmbusy), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_temp), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_power), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_memory_usage), \
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_vcn_activity), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \
ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \
ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \
+59 -23
Voir le fichier
@@ -74,7 +74,11 @@ struct backtrace_cpu_clock
{};
struct backtrace_fraction
{};
struct backtrace_gpu_busy
struct backtrace_gpu_busy_gfx
{};
struct backtrace_gpu_busy_umc
{};
struct backtrace_gpu_busy_mm
{};
struct backtrace_gpu_temp
{};
@@ -84,14 +88,16 @@ struct backtrace_gpu_memory
{};
struct backtrace_gpu_vcn
{};
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
using sampling_percent = data_tracker<double, backtrace_fraction>;
using sampling_gpu_busy = data_tracker<double, backtrace_gpu_busy>;
using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
using sampling_percent = data_tracker<double, backtrace_fraction>;
using sampling_gpu_busy_gfx = data_tracker<double, backtrace_gpu_busy_gfx>;
using sampling_gpu_busy_umc = data_tracker<double, backtrace_gpu_busy_umc>;
using sampling_gpu_busy_mm = data_tracker<double, backtrace_gpu_busy_mm>;
using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;
template <typename ApiT, typename StartFuncT = default_functor_t,
typename StopFuncT = default_functor_t>
@@ -120,7 +126,12 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_percent, fals
#endif
#if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_gfx,
false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_umc,
false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_mm,
false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type)
@@ -142,9 +153,18 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_cpu_clock, project::r
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_percent, project::rocprofsys,
category::timing, os::supports_unix, category::sampling,
category::interrupt_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy, project::rocprofsys,
tpls::rocm, device::gpu, os::supports_linux,
category::sampling, category::process_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_gfx,
project::rocprofsys, tpls::rocm, device::gpu,
os::supports_linux, category::sampling,
category::process_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_umc,
project::rocprofsys, tpls::rocm, device::gpu,
os::supports_linux, category::sampling,
category::process_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_mm,
project::rocprofsys, tpls::rocm, device::gpu,
os::supports_linux, category::sampling,
category::process_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_memory,
project::rocprofsys, tpls::rocm, device::gpu,
os::supports_linux, category::memory, category::sampling,
@@ -174,28 +194,38 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_percent,
"sampling_percent",
"Fraction of wall-clock time spent in functions",
"Derived from statistical sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy,
"sampling_gpu_busy",
"GPU Utilization (% busy) via ROCm-SMI",
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_gfx,
"sampling_gpu_busy_gfx",
"GFX engine GPU Utilization (% busy) via AMD SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_umc,
"sampling_gpu_busy_umc",
"Memory controller GPU Utilization (% busy) via AMD SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_mm,
"sampling_gpu_busy_mm",
"Multimedia engine GPU Utilization (% busy) via AMD SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_memory,
"sampling_gpu_memory_usage",
"GPU Memory Usage via ROCm-SMI", "Derived from sampling")
"GPU Memory Usage via AMD SMI", "Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_power,
"sampling_gpu_power", "GPU Power Usage via ROCm-SMI",
"sampling_gpu_power", "GPU Power Usage via AMD SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_temp,
"sampling_gpu_temp", "GPU Temperature via ROCm-SMI",
"sampling_gpu_temp", "GPU Temperature via AMD SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn,
"sampling_gpu_vcn",
"GPU VCN Utilization (% activity) via ROCm-SMI",
"GPU VCN Utilization (% activity) via AMD SMI",
"Derived from sampling")
// statistics type
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_cpu_clock, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_gfx, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_umc, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_mm, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double)
@@ -215,7 +245,11 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_timing_units, component::sampling_cpu_cloc
true_type)
// enable percent units
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy,
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_gfx,
true_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_umc,
true_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_mm,
true_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_percent,
true_type)
@@ -227,7 +261,9 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_memory_units, component::sampling_gpu_memo
true_type)
// reporting categories (sum)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_gfx, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_umc, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_mm, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type)
+17 -22
Voir le fichier
@@ -316,9 +316,9 @@ configure_settings(bool _init)
"rocm");
ROCPROFSYS_CONFIG_SETTING(
bool, "ROCPROFSYS_USE_ROCM_SMI",
bool, "ROCPROFSYS_USE_AMD_SMI",
"Enable sampling GPU power, temp, utilization, vcn_activity and memory usage",
true, "backend", "rocm_smi", "rocm", "process_sampling");
true, "backend", "amd_smi", "rocm", "process_sampling");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING",
"Enable statistical sampling of call-stack", false,
@@ -478,17 +478,12 @@ configure_settings(bool _init)
"'none' suppresses all CPU frequency sampling",
std::string{}, "process_sampling");
ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_DEVICES",
"[DEPRECATED] Renamed to ROCPROFSYS_SAMPLING_GPUS",
std::string{ "all" }, "rocm_smi", "rocm",
"process_sampling", "deprecated", "advanced");
ROCPROFSYS_CONFIG_SETTING(
std::string, "ROCPROFSYS_SAMPLING_GPUS",
"Devices to query when ROCPROFSYS_USE_ROCM_SMI=ON. Values should be separated by "
"Devices to query when ROCPROFSYS_USE_AMD_SMI=ON. Values should be separated by "
"commas and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies "
"'all' and 'none' suppresses all GPU sampling",
std::string{ "all" }, "rocm_smi", "rocm", "process_sampling");
std::string{ "all" }, "amd_smi", "rocm", "process_sampling");
ROCPROFSYS_CONFIG_SETTING(
std::string, "ROCPROFSYS_SAMPLING_TIDS",
@@ -627,9 +622,9 @@ configure_settings(bool _init)
rocprofiler_sdk::config_settings(_config);
ROCPROFSYS_CONFIG_SETTING(
std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
"rocm-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage",
"busy,temp,power,vcn_activity,mem_usage", "backend", "rocm_smi", "rocm",
std::string, "ROCPROFSYS_AMD_SMI_METRICS",
"amd-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage",
"busy,temp,power,vcn_activity,mem_usage", "backend", "amd_smi", "rocm",
"process_sampling", "advanced");
ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB",
@@ -1030,7 +1025,7 @@ configure_settings(bool _init)
_combine_perfetto_traces->second->set(_config->get<bool>("collapse_processes"));
}
handle_deprecated_setting("ROCPROFSYS_ROCM_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS");
handle_deprecated_setting("ROCPROFSYS_AMD_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS");
handle_deprecated_setting("ROCPROFSYS_USE_THREAD_SAMPLING",
"ROCPROFSYS_USE_PROCESS_SAMPLING");
handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE");
@@ -1104,7 +1099,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
_set("ROCPROFSYS_TRACE", false);
_set("ROCPROFSYS_PROFILE", false);
_set("ROCPROFSYS_USE_CAUSAL", false);
_set("ROCPROFSYS_USE_ROCM_SMI", false);
_set("ROCPROFSYS_USE_AMD_SMI", false);
_set("ROCPROFSYS_USE_KOKKOSP", false);
_set("ROCPROFSYS_USE_RCCLP", false);
_set("ROCPROFSYS_USE_OMPT", false);
@@ -1129,10 +1124,10 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
{
#if ROCPROFSYS_ROCM_VERSION > 0
ROCPROFSYS_BASIC_VERBOSE(
1, "No ROCm devices were found: disabling rocm and rocm_smi...\n");
1, "No ROCm devices were found: disabling rocm and amd_smi...\n");
#endif
_set("ROCPROFSYS_USE_ROCM", false);
_set("ROCPROFSYS_USE_ROCM_SMI", false);
_set("ROCPROFSYS_USE_AMD_SMI", false);
}
if(_config->get<bool>("ROCPROFSYS_USE_KOKKOSP"))
@@ -1165,7 +1160,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
_set("ROCPROFSYS_PROFILE", false);
_set("ROCPROFSYS_USE_CAUSAL", false);
_set("ROCPROFSYS_USE_ROCM", false);
_set("ROCPROFSYS_USE_ROCM_SMI", false);
_set("ROCPROFSYS_USE_AMD_SMI", false);
_set("ROCPROFSYS_USE_KOKKOSP", false);
_set("ROCPROFSYS_USE_RCCLP", false);
_set("ROCPROFSYS_USE_OMPT", false);
@@ -1349,12 +1344,12 @@ configure_disabled_settings(const std::shared_ptr<settings>& _config)
_handle_use_option("ROCPROFSYS_PROFILE", "timemory");
_handle_use_option("ROCPROFSYS_USE_OMPT", "ompt");
_handle_use_option("ROCPROFSYS_USE_RCCLP", "rcclp");
_handle_use_option("ROCPROFSYS_USE_ROCM_SMI", "rocm_smi");
_handle_use_option("ROCPROFSYS_USE_AMD_SMI", "amd_smi");
_handle_use_option("ROCPROFSYS_USE_ROCM", "rocm");
#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0
_config->find("ROCPROFSYS_USE_ROCM_SMI")->second->set_hidden(true);
for(const auto& itr : _config->disable_category("rocm_smi"))
_config->find("ROCPROFSYS_USE_AMD_SMI")->second->set_hidden(true);
for(const auto& itr : _config->disable_category("amd_smi"))
_config->find(itr)->second->set_hidden(true);
#endif
@@ -1813,10 +1808,10 @@ get_use_causal()
}
bool
get_use_rocm_smi()
get_use_amd_smi()
{
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
static auto _v = get_config()->find("ROCPROFSYS_USE_ROCM_SMI");
static auto _v = get_config()->find("ROCPROFSYS_USE_AMD_SMI");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
#else
return false;
+1 -1
Voir le fichier
@@ -205,7 +205,7 @@ bool&
get_use_causal() ROCPROFSYS_HOT;
bool
get_use_rocm_smi() ROCPROFSYS_HOT;
get_use_amd_smi() ROCPROFSYS_HOT;
bool&
get_use_sampling() ROCPROFSYS_HOT;
+91 -45
Voir le fichier
@@ -42,7 +42,7 @@
#include <timemory/manager.hpp>
#if ROCPROFSYS_USE_ROCM > 0
# include <rocm_smi/rocm_smi.h>
# include <amd_smi/amdsmi.h>
# include <rocprofiler-sdk/agent.h>
# include <rocprofiler-sdk/cxx/serialization.hpp>
# include <rocprofiler-sdk/fwd.h>
@@ -55,40 +55,43 @@ namespace gpu
namespace
{
#if ROCPROFSYS_USE_ROCM > 0
# define ROCPROFSYS_ROCM_SMI_CALL(ERROR_CODE) \
::rocprofsys::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__)
# define ROCPROFSYS_AMD_SMI_CALL(ERROR_CODE) \
::rocprofsys::gpu::check_amdsmi_error(ERROR_CODE, __FILE__, __LINE__)
void
check_rsmi_error(rsmi_status_t _code, const char* _file, int _line)
check_amdsmi_error(amdsmi_status_t _code, const char* _file, int _line)
{
if(_code == RSMI_STATUS_SUCCESS) return;
if(_code == AMDSMI_STATUS_SUCCESS) return;
const char* _msg = nullptr;
auto _err = rsmi_status_string(_code, &_msg);
if(_err != RSMI_STATUS_SUCCESS)
ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
auto _err = amdsmi_status_code_to_string(_code, &_msg);
if(_err != AMDSMI_STATUS_SUCCESS)
ROCPROFSYS_THROW(
"amdsmi_status_code_to_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
_msg);
}
bool
rsmi_init()
amdsmi_init()
{
auto _rsmi_init = []() {
auto _amdsmi_init = []() {
try
{
ROCPROFSYS_ROCM_SMI_CALL(::rsmi_init(0));
// Currently, only AMDSMI_INIT_AMD_GPUS is supported
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
get_processor_handles();
} catch(std::exception& _e)
{
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n",
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n",
_e.what());
return false;
}
return true;
}();
return _rsmi_init;
return _amdsmi_init;
}
#endif // ROCPROFSYS_USE_ROCM > 0
@@ -126,7 +129,7 @@ query_rocm_gpu_agents()
} // namespace
int
rocm_device_count()
device_count()
{
#if ROCPROFSYS_USE_ROCM > 0
static int _num_devices = query_rocm_gpu_agents();
@@ -136,38 +139,13 @@ rocm_device_count()
#endif
}
int
rsmi_device_count()
bool
initialize_amdsmi()
{
#if ROCPROFSYS_USE_ROCM > 0
if(!rsmi_init()) return 0;
static auto _num_devices = []() {
uint32_t _v = 0;
try
{
ROCPROFSYS_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v));
} catch(std::exception& _e)
{
ROCPROFSYS_BASIC_VERBOSE(
1, "Exception thrown getting the rocm-smi devices: %s\n", _e.what());
}
return _v;
}();
return _num_devices;
return (amdsmi_init()) ? true : false;
#else
return 0;
#endif
}
int
device_count()
{
#if ROCPROFSYS_USE_ROCM > 0
return rocm_device_count();
#else
return 0;
return false;
#endif
}
@@ -217,5 +195,73 @@ add_device_metadata()
}
});
}
#if ROCPROFSYS_USE_ROCM > 0
/*
* Required amdsmi methods to get processors and handles
*/
uint32_t processors::total_processor_count = 0;
std::vector<amdsmi_processor_handle> processors::processors_list = {};
void
get_processor_handles()
{
uint32_t socket_count;
uint32_t processor_count;
// Passing nullptr will return us the number of sockets available for read in this
// system
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
if(ret != AMDSMI_STATUS_SUCCESS)
{
return;
}
std::vector<amdsmi_socket_handle> sockets(socket_count);
ret = amdsmi_get_socket_handles(&socket_count, sockets.data());
for(auto& socket : sockets)
{
// Passing nullptr will return us the number of processors available for read for
// this socket
ret = amdsmi_get_processor_handles(socket, &processor_count, nullptr);
if(ret != AMDSMI_STATUS_SUCCESS)
{
return;
}
std::vector<amdsmi_processor_handle> all_processors(processor_count);
ret =
amdsmi_get_processor_handles(socket, &processor_count, all_processors.data());
if(ret != AMDSMI_STATUS_SUCCESS)
{
return;
}
for(auto& processor : all_processors)
{
processor_type_t processor_type = {};
ret = amdsmi_get_processor_type(processor, &processor_type);
if(processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU)
{
ROCPROFSYS_THROW("Not AMD_GPU device type!");
return;
}
processors::processors_list.push_back(processor);
}
}
processors::total_processor_count = processors::processors_list.size();
}
uint32_t
get_processor_count()
{
return processors::total_processor_count;
}
amdsmi_processor_handle
get_handle_from_id(uint32_t dev_id)
{
return processors::processors_list[dev_id];
}
#endif
} // namespace gpu
} // namespace rocprofsys
+28 -5
Voir le fichier
@@ -22,18 +22,41 @@
#pragma once
#if ROCPROFSYS_USE_ROCM > 0
# include <amd_smi/amdsmi.h>
#endif
namespace rocprofsys
{
namespace gpu
{
#if ROCPROFSYS_USE_ROCM > 0
void
get_processor_handles();
uint32_t
get_processor_count();
amdsmi_processor_handle
get_handle_from_id(uint32_t dev_id);
struct processors
{
static uint32_t total_processor_count;
static std::vector<amdsmi_processor_handle> processors_list;
private:
friend void rocprofsys::gpu::get_processor_handles();
friend uint32_t rocprofsys::gpu::get_processor_count();
friend amdsmi_processor_handle rocprofsys::gpu::get_handle_from_id(uint32_t dev_id);
};
#endif
int
device_count();
int
rocm_device_count();
int
rsmi_device_count();
bool
initialize_amdsmi();
void
add_device_metadata();
@@ -52,12 +52,14 @@ extern "C"
ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION,
ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION,
ROCPROFSYS_CATEGORY_ROCM_MARKER_API,
ROCPROFSYS_CATEGORY_ROCM_SMI,
ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY,
ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP,
ROCPROFSYS_CATEGORY_ROCM_SMI_POWER,
ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE,
ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY,
ROCPROFSYS_CATEGORY_AMD_SMI,
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX,
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC,
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM,
ROCPROFSYS_CATEGORY_AMD_SMI_TEMP,
ROCPROFSYS_CATEGORY_AMD_SMI_POWER,
ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE,
ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY,
ROCPROFSYS_CATEGORY_ROCM_RCCL,
ROCPROFSYS_CATEGORY_SAMPLING,
ROCPROFSYS_CATEGORY_PTHREAD,
+3 -3
Voir le fichier
@@ -22,7 +22,7 @@ set(library_headers
${CMAKE_CURRENT_LIST_DIR}/ptl.hpp
${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp
${CMAKE_CURRENT_LIST_DIR}/rocm.hpp
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.hpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp
${CMAKE_CURRENT_LIST_DIR}/runtime.hpp
${CMAKE_CURRENT_LIST_DIR}/sampling.hpp
@@ -44,7 +44,7 @@ if(ROCPROFSYS_USE_ROCM)
rocprofiler-systems-object-library
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm.cpp
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp)
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp)
add_subdirectory(rocprofiler-sdk)
endif()
@@ -58,7 +58,7 @@ set(ndebug_sources
${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp
${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp
${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
${CMAKE_CURRENT_LIST_DIR}/ompt.cpp)
set_source_files_properties(
@@ -30,7 +30,7 @@
# undef NDEBUG
#endif
#include "library/rocm_smi.hpp"
#include "library/amd_smi.hpp"
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
@@ -48,8 +48,6 @@
#include <timemory/utility/delimit.hpp>
#include <timemory/utility/locking.hpp>
#include <rocm_smi/rocm_smi.h>
#include <cassert>
#include <chrono>
#include <ios>
@@ -59,22 +57,22 @@
#include <sys/resource.h>
#include <thread>
#define ROCPROFSYS_ROCM_SMI_CALL(...) \
::rocprofsys::rocm_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
#define ROCPROFSYS_AMD_SMI_CALL(...) \
::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
namespace rocprofsys
{
namespace rocm_smi
namespace amd_smi
{
using bundle_t = std::deque<data>;
using sampler_instances = thread_data<bundle_t, category::rocm_smi>;
using sampler_instances = thread_data<bundle_t, category::amd_smi>;
namespace
{
auto&
get_settings(uint32_t _dev_id)
{
static auto _v = std::unordered_map<uint32_t, rocm_smi::settings>{};
static auto _v = std::unordered_map<uint32_t, amd_smi::settings>{};
return _v[_dev_id];
}
@@ -86,22 +84,23 @@ is_initialized()
}
void
check_error(const char* _file, int _line, rsmi_status_t _code, bool* _option = nullptr)
check_error(const char* _file, int _line, amdsmi_status_t _code, bool* _option = nullptr)
{
if(_code == RSMI_STATUS_SUCCESS)
if(_code == AMDSMI_STATUS_SUCCESS)
return;
else if(_code == RSMI_STATUS_NOT_SUPPORTED && _option)
else if(_code == AMDSMI_STATUS_NOT_SUPPORTED && _option)
{
*_option = false;
return;
}
const char* _msg = nullptr;
auto _err = rsmi_status_string(_code, &_msg);
if(_err != RSMI_STATUS_SUCCESS)
ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
auto _err = amdsmi_status_code_to_string(_code, &_msg);
if(_err != AMDSMI_STATUS_SUCCESS)
ROCPROFSYS_THROW(
"amdsmi_status_code_to_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
_msg);
}
@@ -127,7 +126,7 @@ data::sample(uint32_t _dev_id)
{
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
assert(_ts < std::numeric_limits<int64_t>::max());
rsmi_gpu_metrics_t _gpu_metrics;
amdsmi_gpu_metrics_t _gpu_metrics;
auto _state = get_state().load();
@@ -136,47 +135,55 @@ data::sample(uint32_t _dev_id)
m_dev_id = _dev_id;
m_ts = _ts;
#define ROCPROFSYS_RSMI_GET(OPTION, FUNCTION, ...) \
#define ROCPROFSYS_AMDSMI_GET(OPTION, FUNCTION, ...) \
if(OPTION) \
{ \
try \
{ \
ROCPROFSYS_ROCM_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \
ROCPROFSYS_AMD_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \
} catch(std::runtime_error & _e) \
{ \
ROCPROFSYS_VERBOSE_F( \
0, "[%s] Exception: %s. Disabling future samples from rocm-smi...\n", \
0, "[%s] Exception: %s. Disabling future samples from amd-smi...\n", \
#FUNCTION, _e.what()); \
get_state().store(State::Disabled); \
} \
}
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).busy, rsmi_dev_busy_percent_get, _dev_id,
&m_busy_perc);
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).temp, rsmi_dev_temp_metric_get, _dev_id,
RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &m_temp);
RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER;
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).power, rsmi_dev_power_get, _dev_id,
&m_power, &power_type)
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get,
_dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage);
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity,
rsmi_dev_gpu_metrics_info_get, _dev_id, &_gpu_metrics);
amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_dev_id);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity,
sample_handle, &m_busy_perc);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric,
sample_handle, AMDSMI_TEMPERATURE_TYPE_JUNCTION,
AMDSMI_TEMP_CURRENT, &m_temp);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).power, amdsmi_get_power_info,
sample_handle, &m_power)
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage,
sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).vcn_activity,
amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics);
for(const auto& activity : _gpu_metrics.vcn_activity)
{
if(activity != UINT16_MAX) m_vcn_metrics.push_back(activity);
}
#undef ROCPROFSYS_RSMI_GET
#undef ROCPROFSYS_AMDSMI_GET
}
void
data::print(std::ostream& _os) const
{
std::stringstream _ss{};
_ss << "device: " << m_dev_id << ", busy = " << m_busy_perc << "%, temp = " << m_temp
<< ", power = " << m_power << ", memory usage = " << m_mem_usage;
#if ROCPROFSYS_USE_ROCM > 0
_ss << "device: " << m_dev_id << ", gpu busy: = " << m_busy_perc.gfx_activity
<< "%, mm busy: = " << m_busy_perc.mm_activity
<< "%, umc busy: = " << m_busy_perc.umc_activity << "%, temp = " << m_temp
<< ", current power = " << m_power.current_socket_power
<< ", memory usage = " << m_mem_usage;
#endif
_os << _ss.str();
}
@@ -209,8 +216,8 @@ sample()
{
for(auto itr : data::device_list)
{
if(rocm_smi::get_state() != State::Active) continue;
ROCPROFSYS_DEBUG_F("Polling rocm-smi for device %u...\n", itr);
if(amd_smi::get_state() != State::Active) continue;
ROCPROFSYS_DEBUG_F("Polling amd-smi for device %u...\n", itr);
auto& _data = *_bundle_data.at(itr);
if(!_data) continue;
_data->emplace_back(data{ itr });
@@ -221,7 +228,7 @@ sample()
void
set_state(State _v)
{
rocm_smi::get_state().store(_v);
amd_smi::get_state().store(_v);
}
std::vector<data>&
@@ -235,15 +242,15 @@ bool
data::setup()
{
perfetto_counter_track<data>::init();
rocm_smi::set_state(State::PreInit);
amd_smi::set_state(State::PreInit);
return true;
}
bool
data::shutdown()
{
ROCPROFSYS_DEBUG("Shutting down rocm-smi...\n");
rocm_smi::set_state(State::Finalized);
ROCPROFSYS_DEBUG("Shutting down amd-smi...\n");
amd_smi::set_state(State::Finalized);
return true;
}
@@ -261,7 +268,9 @@ data::shutdown()
void
data::post_process(uint32_t _dev_id)
{
using component::sampling_gpu_busy;
using component::sampling_gpu_busy_gfx;
using component::sampling_gpu_busy_mm;
using component::sampling_gpu_busy_umc;
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
using component::sampling_gpu_temp;
@@ -269,12 +278,12 @@ data::post_process(uint32_t _dev_id)
if(device_count < _dev_id) return;
auto& _rocm_smi_v = sampler_instances::get()->at(_dev_id);
auto _rocm_smi = (_rocm_smi_v) ? *_rocm_smi_v : std::deque<rocm_smi::data>{};
auto& _amd_smi_v = sampler_instances::get()->at(_dev_id);
auto _amd_smi = (_amd_smi_v) ? *_amd_smi_v : std::deque<amd_smi::data>{};
const auto& _thread_info = thread_info::get(0, InternalTID);
ROCPROFSYS_VERBOSE(1, "Post-processing %zu rocm-smi samples from device %u\n",
_rocm_smi.size(), _dev_id);
ROCPROFSYS_VERBOSE(1, "Post-processing %zu amd-smi samples from device %u\n",
_amd_smi.size(), _dev_id);
ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0");
if(!_thread_info) return;
@@ -282,18 +291,23 @@ data::post_process(uint32_t _dev_id)
auto _settings = get_settings(_dev_id);
auto _process_perfetto = [&]() {
auto _idx = std::array<uint64_t, 5>{};
auto _idx = std::array<uint64_t, 7>{};
{
_idx.fill(_idx.size());
uint64_t nidx = 0;
if(_settings.busy) _idx.at(0) = nidx++;
if(_settings.temp) _idx.at(1) = nidx++;
if(_settings.power) _idx.at(2) = nidx++;
if(_settings.mem_usage) _idx.at(3) = nidx++;
if(_settings.vcn_activity) _idx.at(4) = nidx++;
if(_settings.busy)
{
_idx.at(0) = nidx++;
_idx.at(1) = nidx++;
_idx.at(2) = nidx++;
}
if(_settings.temp) _idx.at(3) = nidx++;
if(_settings.power) _idx.at(4) = nidx++;
if(_settings.mem_usage) _idx.at(5) = nidx++;
if(_settings.vcn_activity) _idx.at(6) = nidx++;
}
for(auto& itr : _rocm_smi)
for(auto& itr : _amd_smi)
{
using counter_track = perfetto_counter_track<data>;
if(itr.m_dev_id != _dev_id) continue;
@@ -303,11 +317,16 @@ data::post_process(uint32_t _dev_id)
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
};
if(_settings.busy) counter_track::emplace(_dev_id, addendum("Busy"), "%");
if(_settings.busy)
{
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
}
if(_settings.temp)
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
if(_settings.power)
counter_track::emplace(_dev_id, addendum("Power"), "watts");
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
if(_settings.mem_usage)
counter_track::emplace(_dev_id, addendum("Memory Usage"),
"megabytes");
@@ -323,26 +342,34 @@ data::post_process(uint32_t _dev_id)
uint64_t _ts = itr.m_ts;
if(!_thread_info->is_valid_time(_ts)) continue;
double _busy = itr.m_busy_perc;
double _temp = itr.m_temp / 1.0e3;
double _power = itr.m_power / 1.0e6;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
double _gfxbusy = itr.m_busy_perc.gfx_activity;
double _umcbusy = itr.m_busy_perc.umc_activity;
double _mmbusy = itr.m_busy_perc.mm_activity;
double _temp = itr.m_temp;
double _power = itr.m_power.current_socket_power;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
if(_settings.busy)
TRACE_COUNTER("device_busy", counter_track::at(_dev_id, _idx.at(0)), _ts,
_busy);
{
TRACE_COUNTER("device_busy_gfx", counter_track::at(_dev_id, _idx.at(0)),
_ts, _gfxbusy);
TRACE_COUNTER("device_busy_umc", counter_track::at(_dev_id, _idx.at(1)),
_ts, _umcbusy);
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, _idx.at(2)),
_ts, _mmbusy);
}
if(_settings.temp)
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(1)), _ts,
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(3)), _ts,
_temp);
if(_settings.power)
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(2)), _ts,
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(4)), _ts,
_power);
if(_settings.mem_usage)
TRACE_COUNTER("device_memory_usage",
counter_track::at(_dev_id, _idx.at(3)), _ts, _usage);
counter_track::at(_dev_id, _idx.at(5)), _ts, _usage);
if(_settings.vcn_activity)
{
uint64_t idx = _idx.at(4);
uint64_t idx = _idx.at(6);
for(const auto& temp : itr.m_vcn_metrics)
{
TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx),
@@ -361,14 +388,14 @@ data::post_process(uint32_t _dev_id)
void
setup()
{
auto_lock_t _lk{ type_mutex<category::rocm_smi>() };
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
if(is_initialized() || !get_use_rocm_smi()) return;
if(is_initialized() || !get_use_amd_smi()) return;
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
// assign the data value to determined by rocm-smi
data::device_count = device_count();
if(!gpu::initialize_amdsmi()) return;
data::device_count = gpu::get_processor_count();
auto _devices_v = get_sampling_gpus();
for(auto& itr : _devices_v)
@@ -421,14 +448,15 @@ setup()
data::device_list = _devices;
auto _metrics = get_setting_value<std::string>("ROCPROFSYS_ROCM_SMI_METRICS");
auto _metrics = get_setting_value<std::string>("ROCPROFSYS_AMD_SMI_METRICS");
try
{
for(auto itr : _devices)
{
uint16_t dev_id = 0;
ROCPROFSYS_ROCM_SMI_CALL(rsmi_dev_id_get(itr, &dev_id));
ROCPROFSYS_AMD_SMI_CALL(
amdsmi_get_gpu_id(gpu::get_handle_from_id(itr), &dev_id));
// dev_id holds the device ID of device i, upon a successful call
if(_metrics && !_metrics->empty())
@@ -447,10 +475,10 @@ setup()
{
auto iitr = supported.find(metric);
if(iitr == supported.end())
ROCPROFSYS_FAIL_F("unsupported rocm-smi metric: %s\n",
ROCPROFSYS_FAIL_F("unsupported amd-smi metric: %s\n",
metric.c_str());
ROCPROFSYS_VERBOSE_F(1, "Enabling rocm-smi metric '%s'\n",
ROCPROFSYS_VERBOSE_F(1, "Enabling amd-smi metric '%s'\n",
metric.c_str());
iitr->second = true;
}
@@ -462,7 +490,7 @@ setup()
data::setup();
} catch(std::runtime_error& _e)
{
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing rocm-smi: %s\n",
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n",
_e.what());
data::device_list = {};
}
@@ -471,7 +499,7 @@ setup()
void
shutdown()
{
auto_lock_t _lk{ type_mutex<category::rocm_smi>() };
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
if(!is_initialized()) return;
@@ -479,11 +507,11 @@ shutdown()
{
if(data::shutdown())
{
ROCPROFSYS_ROCM_SMI_CALL(rsmi_shut_down());
ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down());
}
} catch(std::runtime_error& _e)
{
ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down rocm-smi: %s\n",
ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down amd-smi: %s\n",
_e.what());
}
@@ -500,14 +528,22 @@ post_process()
uint32_t
device_count()
{
return gpu::rsmi_device_count();
return gpu::device_count();
}
} // namespace rocm_smi
} // namespace amd_smi
} // namespace rocprofsys
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy>), true,
double)
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_gfx>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_umc>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_mm>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_temp>), true,
@@ -34,6 +34,10 @@
#include "core/state.hpp"
#include "library/thread_data.hpp"
#if ROCPROFSYS_USE_ROCM > 0
# include <amd_smi/amdsmi.h>
#endif
#include <chrono>
#include <cstdint>
#include <deque>
@@ -47,7 +51,7 @@
namespace rocprofsys
{
namespace rocm_smi
namespace amd_smi
{
void
setup();
@@ -66,9 +70,6 @@ post_process();
void set_state(State);
uint32_t
device_count();
struct settings
{
bool busy = true;
@@ -86,7 +87,7 @@ struct data
using promise_t = std::promise<void>;
using timestamp_t = int64_t;
using power_t = uint64_t;
using power_t = uint32_t;
using busy_perc_t = uint32_t;
using mem_usage_t = uint64_t;
using temp_t = int64_t;
@@ -102,11 +103,16 @@ struct data
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
timestamp_t m_ts = 0;
busy_perc_t m_busy_perc = 0;
temp_t m_temp = 0;
power_t m_power = 0;
mem_usage_t m_mem_usage = 0;
std::vector<uint16_t> m_vcn_metrics = {};
#if ROCPROFSYS_USE_ROCM > 0
amdsmi_engine_usage_t m_busy_perc = {};
amdsmi_power_info_t m_power = {};
#else
std::vector<busy_perc_t> m_busy_perc = {};
std::vector<power_t> m_power = {};
#endif
friend std::ostream& operator<<(std::ostream& _os, const data& _v)
{
@@ -115,11 +121,11 @@ struct data
}
private:
friend void rocprofsys::rocm_smi::setup();
friend void rocprofsys::rocm_smi::config();
friend void rocprofsys::rocm_smi::sample();
friend void rocprofsys::rocm_smi::shutdown();
friend void rocprofsys::rocm_smi::post_process();
friend void rocprofsys::amd_smi::setup();
friend void rocprofsys::amd_smi::config();
friend void rocprofsys::amd_smi::sample();
friend void rocprofsys::amd_smi::shutdown();
friend void rocprofsys::amd_smi::post_process();
static size_t device_count;
static std::set<uint32_t> device_list;
@@ -154,7 +160,7 @@ post_process()
inline void set_state(State) {}
#endif
} // namespace rocm_smi
} // namespace amd_smi
} // namespace rocprofsys
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
@@ -166,8 +172,16 @@ inline void set_state(State) {}
# include <timemory/operations.hpp>
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy>), true,
double)
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_gfx>),
true, double)
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_umc>),
true, double)
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_mm>),
true, double)
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_temp>), true,
+8 -8
Voir le fichier
@@ -23,8 +23,8 @@
#include "library/process_sampler.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "library/amd_smi.hpp"
#include "library/cpu_freq.hpp"
#include "library/rocm_smi.hpp"
#include "library/runtime.hpp"
#include <memory>
@@ -140,14 +140,14 @@ sampler::setup()
// shutdown if already running
shutdown();
if(get_use_rocm_smi())
if(get_use_amd_smi())
{
auto& _rocm_smi = instances.emplace_back(std::make_unique<instance>());
_rocm_smi->setup = []() { rocm_smi::setup(); };
_rocm_smi->shutdown = []() { rocm_smi::shutdown(); };
_rocm_smi->post_process = []() { rocm_smi::post_process(); };
_rocm_smi->config = []() { rocm_smi::config(); };
_rocm_smi->sample = []() { rocm_smi::sample(); };
auto& _amd_smi = instances.emplace_back(std::make_unique<instance>());
_amd_smi->setup = []() { amd_smi::setup(); };
_amd_smi->shutdown = []() { amd_smi::shutdown(); };
_amd_smi->post_process = []() { amd_smi::post_process(); };
_amd_smi->config = []() { amd_smi::config(); };
_amd_smi->sample = []() { amd_smi::sample(); };
}
auto& _cpu_freq = instances.emplace_back(std::make_unique<instance>());
+1 -1
Voir le fichier
@@ -25,7 +25,7 @@
#include "core/debug.hpp"
#include "core/dynamic_library.hpp"
#include "core/gpu.hpp"
#include "library/rocm_smi.hpp"
#include "library/amd_smi.hpp"
#include "library/rocprofiler-sdk.hpp"
#include "library/runtime.hpp"
#include "library/thread_data.hpp"
+6 -6
Voir le fichier
@@ -30,8 +30,8 @@
#include "core/perfetto.hpp"
#include "core/rocprofiler-sdk.hpp"
#include "core/state.hpp"
#include "library/amd_smi.hpp"
#include "library/components/category_region.hpp"
#include "library/rocm_smi.hpp"
#include "library/rocprofiler-sdk/counters.hpp"
#include "library/rocprofiler-sdk/fwd.hpp"
#include "library/thread_info.hpp"
@@ -1116,10 +1116,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
gpu::add_device_metadata();
if(config::get_use_process_sampling() && config::get_use_rocm_smi())
if(config::get_use_process_sampling() && config::get_use_amd_smi())
{
ROCPROFSYS_VERBOSE_F(1, "Setting rocm_smi state to active...\n");
rocm_smi::set_state(State::Active);
ROCPROFSYS_VERBOSE_F(1, "Setting amd_smi state to active...\n");
amd_smi::set_state(State::Active);
}
start();
@@ -1137,8 +1137,8 @@ tool_fini(void* callback_data)
flush();
stop();
if(config::get_use_process_sampling() && config::get_use_rocm_smi())
rocm_smi::shutdown();
if(config::get_use_process_sampling() && config::get_use_amd_smi())
amd_smi::shutdown();
if(get_counter_storage())
{
+22 -6
Voir le fichier
@@ -125,7 +125,9 @@ using component::backtrace_timestamp;
using component::backtrace_wall_clock; // NOLINT
using component::callchain;
using component::sampling_cpu_clock;
using component::sampling_gpu_busy;
using component::sampling_gpu_busy_gfx;
using component::sampling_gpu_busy_mm;
using component::sampling_gpu_busy_umc;
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
using component::sampling_gpu_temp;
@@ -1551,11 +1553,25 @@ struct sampling_initialization
sampling_percent::description() = "Percentage of samples";
sampling_percent::set_precision(3);
sampling_gpu_busy::label() = "sampling_gpu_busy_percent";
sampling_gpu_busy::description() = "Utilization of GPU(s)";
sampling_gpu_busy::set_precision(0);
sampling_gpu_busy::set_format_flags(sampling_gpu_busy::get_format_flags() &
std::ios_base::showpoint);
sampling_gpu_busy_gfx::label() = "sampling_gpu_busy_gfx_percent";
sampling_gpu_busy_gfx::description() = "Utilization of GFX engines on GPU(s)";
sampling_gpu_busy_gfx::set_precision(0);
sampling_gpu_busy_gfx::set_format_flags(
sampling_gpu_busy_gfx::get_format_flags() & std::ios_base::showpoint);
sampling_gpu_busy_umc::label() = "sampling_gpu_busy_umc_percent";
sampling_gpu_busy_umc::description() =
"Utilization of memory controller on GPU(s)";
sampling_gpu_busy_umc::set_precision(0);
sampling_gpu_busy_umc::set_format_flags(
sampling_gpu_busy_umc::get_format_flags() & std::ios_base::showpoint);
sampling_gpu_busy_mm::label() = "sampling_gpu_busy_mm_percent";
sampling_gpu_busy_mm::description() =
"Utilization of multimedia engines on GPU(s)";
sampling_gpu_busy_mm::set_precision(0);
sampling_gpu_busy_mm::set_format_flags(sampling_gpu_busy_mm::get_format_flags() &
std::ios_base::showpoint);
sampling_gpu_memory::label() = "sampling_gpu_memory_usage";
sampling_gpu_memory::description() = "Memory usage of GPU(s)";
+14 -14
Voir le fichier
@@ -229,19 +229,19 @@ set(_VALID_GPU OFF)
if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU))
set(_VALID_GPU ON)
find_program(
ROCPROFSYS_ROCM_SMI_EXE
NAMES rocm-smi
ROCPROFSYS_AMD_SMI_EXE
NAMES amd-smi
HINTS ${ROCmVersion_DIR}
PATHS ${ROCmVersion_DIR}
PATH_SUFFIXES bin)
if(ROCPROFSYS_ROCM_SMI_EXE)
if(ROCPROFSYS_AMD_SMI_EXE)
execute_process(
COMMAND ${ROCPROFSYS_ROCM_SMI_EXE}
OUTPUT_VARIABLE _RSMI_OUT
ERROR_VARIABLE _RSMI_ERR
RESULT_VARIABLE _RSMI_RET)
if(_RSMI_RET EQUAL 0)
if("${_RSMI_OUTPUT}" MATCHES "ERROR" OR "${_RSMI_ERR}" MATCHES "ERROR")
COMMAND ${ROCPROFSYS_AMD_SMI_EXE}
OUTPUT_VARIABLE _AMDSMI_OUT
ERROR_VARIABLE _AMDSMI_ERR
RESULT_VARIABLE _AMDSMI_RET)
if(_AMDSMI_RET EQUAL 0)
if("${_AMDSMI_OUTPUT}" MATCHES "ERROR" OR "${_AMDSMI_ERR}" MATCHES "ERROR")
set(_VALID_GPU OFF)
endif()
else()
@@ -250,7 +250,7 @@ if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU))
endif()
if(NOT _VALID_GPU)
rocprofiler_systems_message(
AUTHOR_WARNING "rocm-smi did not successfully run. Disabling GPU tests...")
AUTHOR_WARNING "amd-smi did not successfully run. Disabling GPU tests...")
endif()
endif()
@@ -433,7 +433,7 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST)
endif()
if(NOT "ROCPROFSYS_USE_ROCM=OFF" IN_LIST TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "rocm-smi")
list(APPEND TEST_LABELS "amd-smi")
endif()
endif()
@@ -442,9 +442,9 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST)
list(APPEND TEST_LABELS "rocm")
endif()
if("ROCPROFSYS_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST
TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "rocm-smi")
if("ROCPROFSYS_USE_AMD_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "amd-smi" IN_LIST
TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "amd-smi")
endif()
if(TARGET ${TEST_TARGET})