Upgrade ROCm-SMI to AMD SMI (#86)
* Integrating amd-smi into rocprofiler-systems due to rocm-smi deprecation. * No functionality changes to users other than naming conventions. * New tracks available in perfetto- gpu busy percentage metrics now splits gfx busy into separate gfx, umc, and mm engine measurements. --------- Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com> Co-authored-by: David Galiffi <David.Galiffi@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
af3a8f695f
Коммит
0c32dfd6bc
+1
-1
@@ -59,7 +59,7 @@ The documentation source files reside in the [`/docs`](/docs) folder of this rep
|
||||
- HIP kernel tracing
|
||||
- HSA API tracing
|
||||
- HSA operation tracing
|
||||
- System-level sampling (via rocm-smi)
|
||||
- System-level sampling (via amd-smi)
|
||||
- Memory usage
|
||||
- Power usage
|
||||
- Temperature
|
||||
|
||||
@@ -157,11 +157,11 @@ if(NOT ROCPROFSYS_BUILD_DYNINST)
|
||||
endif()
|
||||
endif()
|
||||
if(ROCmVersion_FOUND)
|
||||
set(_ROCM_SMI_SUFFIX
|
||||
set(_AMD_SMI_SUFFIX
|
||||
" (>= ${ROCmVersion_MAJOR_VERSION}.0.0.${ROCmVersion_NUMERIC_VERSION})")
|
||||
endif()
|
||||
if(ROCPROFSYS_USE_ROCM)
|
||||
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocm-smi-lib${_ROCM_SMI_SUFFIX}")
|
||||
list(APPEND _DEBIAN_PACKAGE_DEPENDS "amd-smi-lib${_AMD_SMI_SUFFIX}")
|
||||
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocprofiler-sdk (>= ${rocprofiler-sdk_VERSION})")
|
||||
endif()
|
||||
if(ROCPROFSYS_USE_MPI)
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
|
||||
# Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
|
||||
set(ROCM_PATH "$ENV{ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/amd_smi)
|
||||
if(EXISTS ${_DIR})
|
||||
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
|
||||
list(APPEND _AMD_SMI_PATHS ${_ABS_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
amd-smi_ROOT_DIR
|
||||
NAMES include/amd_smi/amdsmi.h
|
||||
HINTS ${_AMD_SMI_PATHS}
|
||||
PATHS ${_AMD_SMI_PATHS}
|
||||
PATH_SUFFIXES amd_smi)
|
||||
|
||||
mark_as_advanced(amd-smi_ROOT_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
amd-smi_INCLUDE_DIR
|
||||
NAMES amd_smi/amdsmi.h
|
||||
HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
|
||||
PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
|
||||
PATH_SUFFIXES include amd_smi/include)
|
||||
|
||||
mark_as_advanced(amd-smi_INCLUDE_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
amd-smi_LIBRARY
|
||||
NAMES amd_smi
|
||||
HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
|
||||
PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS}
|
||||
PATH_SUFFIXES amd-smi/lib lib)
|
||||
|
||||
if(amd-smi_LIBRARY)
|
||||
get_filename_component(amd-smi_LIBRARY_DIR "${amd-smi_LIBRARY}" PATH CACHE)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(amd-smi_LIBRARY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package_handle_standard_args(amd-smi DEFAULT_MSG amd-smi_ROOT_DIR
|
||||
amd-smi_INCLUDE_DIR amd-smi_LIBRARY)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
if(amd-smi_FOUND)
|
||||
add_library(amd-smi::amd-smi INTERFACE IMPORTED)
|
||||
add_library(amd-smi::roctx INTERFACE IMPORTED)
|
||||
set(amd-smi_INCLUDE_DIRS ${amd-smi_INCLUDE_DIR})
|
||||
set(amd-smi_LIBRARIES ${amd-smi_LIBRARY})
|
||||
set(amd-smi_LIBRARY_DIRS ${amd-smi_LIBRARY_DIR})
|
||||
|
||||
target_include_directories(amd-smi::amd-smi INTERFACE ${amd-smi_INCLUDE_DIR})
|
||||
target_link_libraries(amd-smi::amd-smi INTERFACE ${amd-smi_LIBRARY})
|
||||
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
unset(_AMD_SMI_PATHS)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
@@ -1,79 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
|
||||
# Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
|
||||
set(ROCM_PATH "$ENV{ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocm_smi)
|
||||
if(EXISTS ${_DIR})
|
||||
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
|
||||
list(APPEND _ROCM_SMI_PATHS ${_ABS_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocm-smi_ROOT_DIR
|
||||
NAMES include/rocm_smi/rocm_smi.h
|
||||
HINTS ${_ROCM_SMI_PATHS}
|
||||
PATHS ${_ROCM_SMI_PATHS}
|
||||
PATH_SUFFIXES rocm_smi)
|
||||
|
||||
mark_as_advanced(rocm-smi_ROOT_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocm-smi_INCLUDE_DIR
|
||||
NAMES rocm_smi/rocm_smi.h
|
||||
HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
|
||||
PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
|
||||
PATH_SUFFIXES include rocm_smi/include)
|
||||
|
||||
mark_as_advanced(rocm-smi_INCLUDE_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
rocm-smi_LIBRARY
|
||||
NAMES rocm_smi64 rocm_smi
|
||||
HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
|
||||
PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS}
|
||||
PATH_SUFFIXES rocm_smi/lib rocm_smi/lib64 lib lib64)
|
||||
|
||||
if(rocm-smi_LIBRARY)
|
||||
get_filename_component(rocm-smi_LIBRARY_DIR "${rocm-smi_LIBRARY}" PATH CACHE)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(rocm-smi_LIBRARY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package_handle_standard_args(rocm-smi DEFAULT_MSG rocm-smi_ROOT_DIR
|
||||
rocm-smi_INCLUDE_DIR rocm-smi_LIBRARY)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
if(rocm-smi_FOUND)
|
||||
add_library(rocm-smi::rocm-smi INTERFACE IMPORTED)
|
||||
add_library(rocm-smi::roctx INTERFACE IMPORTED)
|
||||
set(rocm-smi_INCLUDE_DIRS ${rocm-smi_INCLUDE_DIR})
|
||||
set(rocm-smi_LIBRARIES ${rocm-smi_LIBRARY})
|
||||
set(rocm-smi_LIBRARY_DIRS ${rocm-smi_LIBRARY_DIR})
|
||||
|
||||
target_include_directories(rocm-smi::rocm-smi INTERFACE ${rocm-smi_INCLUDE_DIR})
|
||||
target_link_libraries(rocm-smi::rocm-smi INTERFACE ${rocm-smi_LIBRARY})
|
||||
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
unset(_ROCM_SMI_PATHS)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
@@ -192,8 +192,8 @@ if(ROCPROFSYS_USE_ROCM)
|
||||
target_link_libraries(rocprofiler-systems-rocm
|
||||
INTERFACE rocprofiler-sdk::rocprofiler-sdk)
|
||||
|
||||
find_package(rocm-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
|
||||
target_link_libraries(rocprofiler-systems-rocm INTERFACE rocm-smi::rocm-smi)
|
||||
find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
|
||||
target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi)
|
||||
|
||||
# find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED)
|
||||
# target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi)
|
||||
|
||||
@@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \
|
||||
zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
zypper --non-interactive --gpg-auto-import-keys refresh && \
|
||||
zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
|
||||
zypper --non-interactive install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
|
||||
zypper --non-interactive clean --all; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
|
||||
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
|
||||
yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
|
||||
yum install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
|
||||
yum clean all; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \
|
||||
apt-get install -y ./${AMDGPU_DEB} && \
|
||||
apt-get update && \
|
||||
apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
|
||||
apt-get install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \
|
||||
apt-get autoclean; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ GPU metrics
|
||||
* HIP kernel tracing
|
||||
* HSA API tracing
|
||||
* HSA operation tracing
|
||||
* System-level sampling (via rocm-smi)
|
||||
* System-level sampling (via amd-smi)
|
||||
|
||||
* Memory usage
|
||||
* Power usage
|
||||
|
||||
Двоичные данные
Двоичный файл не отображается.
|
До Ширина: | Высота: | Размер: 313 KiB После Ширина: | Высота: | Размер: 433 KiB |
@@ -229,7 +229,7 @@ Generating a default configuration file
|
||||
ROCPROFSYS_USE_SAMPLING = false
|
||||
ROCPROFSYS_USE_PROCESS_SAMPLING = true
|
||||
ROCPROFSYS_USE_ROCM = true
|
||||
ROCPROFSYS_USE_ROCM_SMI = true
|
||||
ROCPROFSYS_USE_AMD_SMI = true
|
||||
ROCPROFSYS_USE_KOKKOSP = false
|
||||
ROCPROFSYS_USE_CODE_COVERAGE = false
|
||||
ROCPROFSYS_USE_PID = true
|
||||
@@ -384,7 +384,7 @@ Viewing the setting descriptions
|
||||
| ROCPROFSYS_USE_OMPT | Enable support for OpenMP-Tools |
|
||||
| ROCPROFSYS_TRACE | Enable perfetto backend |
|
||||
| ROCPROFSYS_USE_PID | Enable tagging filenames with proces... |
|
||||
| ROCPROFSYS_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... |
|
||||
| ROCPROFSYS_USE_AMD_SMI | Enable sampling GPU power, temp, uti... |
|
||||
| ROCPROFSYS_USE_ROCM | Enable ROCM tracing |
|
||||
| ROCPROFSYS_USE_SAMPLING | Enable statistical sampling of call-... |
|
||||
| ROCPROFSYS_USE_PROCESS_SAMPLING | Enable a background thread which sam... |
|
||||
@@ -461,11 +461,11 @@ Viewing components
|
||||
| sampling_wall_clock | Wall-clock timing. Derived from statistic... |
|
||||
| sampling_cpu_clock | CPU-clock timing. Derived from statistica... |
|
||||
| sampling_percent | Fraction of wall-clock time spent in func... |
|
||||
| sampling_gpu_power | GPU Power Usage via ROCm-SMI. Derived fro... |
|
||||
| sampling_gpu_temp | GPU Temperature via ROCm-SMI. Derived fro... |
|
||||
| sampling_gpu_busy | GPU Utilization (% busy) via ROCm-SMI. De... |
|
||||
| sampling_vcn_busy | GPU VCN Utilization (% activity) via ROCm... |
|
||||
| sampling_gpu_memory_usage | GPU Memory Usage via ROCm-SMI. Derived fr... |
|
||||
| sampling_gpu_power | GPU Power Usage via AMD-SMI. Derived from... |
|
||||
| sampling_gpu_temp | GPU Temperature via AMD-SMI. Derived from... |
|
||||
| sampling_gpu_busy | GPU Utilization (% busy) via AMD-SMI. Der... |
|
||||
| sampling_vcn_busy | GPU VCN Utilization (% activity) via AMD-... |
|
||||
| sampling_gpu_memory_usage | GPU Memory Usage via AMD-SMI. Derived fro... |
|
||||
|-----------------------------------|----------------------------------------------|
|
||||
|
||||
Viewing hardware counters
|
||||
|
||||
@@ -246,7 +246,7 @@ view the help menu.
|
||||
libprofiler.so
|
||||
libpthread.so.0
|
||||
libresolv.so.2
|
||||
librocm_smi64.so
|
||||
libamd_smi.so
|
||||
librocmtools.so
|
||||
librocprofiler64.so
|
||||
libroctracer64.so
|
||||
|
||||
@@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option
|
||||
|
||||
[BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation
|
||||
|
||||
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
-I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
Include data from these backends (count: unlimited)
|
||||
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
-E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ]
|
||||
Exclude data from these backends (count: unlimited)
|
||||
|
||||
[HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H
|
||||
@@ -293,7 +293,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
|
||||
ROCPROFSYS_TRACE=true
|
||||
ROCPROFSYS_USE_PROCESS_SAMPLING=true
|
||||
ROCPROFSYS_USE_RCCLP=true
|
||||
ROCPROFSYS_USE_ROCM_SMI=true
|
||||
ROCPROFSYS_USE_AMD_SMI=true
|
||||
ROCPROFSYS_USE_ROCM=true
|
||||
ROCPROFSYS_USE_SAMPLING=true
|
||||
ROCPROFSYS_PROFILE=true
|
||||
@@ -323,7 +323,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
|
||||
ROCPROFSYS_TRACE=true
|
||||
ROCPROFSYS_USE_PROCESS_SAMPLING=true
|
||||
ROCPROFSYS_USE_RCCLP=false
|
||||
ROCPROFSYS_USE_ROCM_SMI=false
|
||||
ROCPROFSYS_USE_AMD_SMI=false
|
||||
ROCPROFSYS_USE_ROCM=false
|
||||
ROCPROFSYS_USE_SAMPLING=true
|
||||
ROCPROFSYS_PROFILE=true
|
||||
@@ -354,7 +354,7 @@ Here is the full output from the previous
|
||||
ROCPROFSYS_USE_OMPT=false
|
||||
ROCPROFSYS_USE_PROCESS_SAMPLING=true
|
||||
ROCPROFSYS_USE_RCCLP=false
|
||||
ROCPROFSYS_USE_ROCM_SMI=false
|
||||
ROCPROFSYS_USE_AMD_SMI=false
|
||||
ROCPROFSYS_USE_ROCM=false
|
||||
ROCPROFSYS_USE_SAMPLING=true
|
||||
[rocprof-sys][dl][1785877] rocprofsys_main
|
||||
|
||||
@@ -340,7 +340,7 @@ generate_config(std::string _config_file, const std::set<std::string>& _config_f
|
||||
{ "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE",
|
||||
"ROCPROFSYS_PROFILE", "ROCPROFSYS_USE_SAMPLING",
|
||||
"ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCM",
|
||||
"ROCPROFSYS_USE_ROCM_SMI", "ROCPROFSYS_USE_KOKKOSP",
|
||||
"ROCPROFSYS_USE_AMD_SMI", "ROCPROFSYS_USE_KOKKOSP",
|
||||
"ROCPROFSYS_USE_OMPT", "ROCPROFSYS_USE", "ROCPROFSYS_OUTPUT" })
|
||||
{
|
||||
if(_lhs->get_env_name().find(itr) == 0 &&
|
||||
|
||||
@@ -361,7 +361,7 @@ main(int argc, char** argv)
|
||||
"lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|"
|
||||
"instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|"
|
||||
"tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|"
|
||||
"amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|"
|
||||
"amd_comgr|amd_smi|rocprofiler64|rocprofiler-register|"
|
||||
"rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" }))
|
||||
{
|
||||
if(!find(filepath::dirname(itr), lib_search_paths))
|
||||
|
||||
@@ -441,7 +441,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
auto _h = p.get<bool>("host");
|
||||
auto _d = p.get<bool>("device");
|
||||
update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
|
||||
update_env(_env, "ROCPROFSYS_USE_ROCM_SMI", _d);
|
||||
update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d);
|
||||
});
|
||||
parser
|
||||
.add_argument({ "-w", "--wait" },
|
||||
@@ -718,7 +718,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
"mpip",
|
||||
"ompt",
|
||||
"rcclp",
|
||||
"rocm-smi",
|
||||
"amd-smi",
|
||||
"roctracer",
|
||||
"rocprofiler",
|
||||
"roctx",
|
||||
@@ -742,7 +742,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_ROCM)
|
||||
_backend_choices.erase("rocm");
|
||||
_backend_choices.erase("rocm-smi");
|
||||
_backend_choices.erase("amd-smi");
|
||||
_backend_choices.erase("rocprofiler-sdk");
|
||||
#endif
|
||||
|
||||
@@ -761,7 +761,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
|
||||
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
|
||||
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
@@ -785,7 +785,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
|
||||
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
|
||||
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
@@ -475,11 +475,11 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
auto _h = p.get<bool>("host");
|
||||
auto _d = p.get<bool>("device");
|
||||
update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
|
||||
update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", _d);
|
||||
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d);
|
||||
});
|
||||
|
||||
_data.processed_environs.emplace("device");
|
||||
_data.processed_environs.emplace("rocm_smi");
|
||||
_data.processed_environs.emplace("amd_smi");
|
||||
}
|
||||
|
||||
if(_data.environ_filter("wait", _data))
|
||||
@@ -544,7 +544,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
}
|
||||
|
||||
strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt",
|
||||
"rcclp", "rocm-smi", "roctracer", "rocprofiler",
|
||||
"rcclp", "amd-smi", "roctracer", "rocprofiler",
|
||||
"roctx", "mutex-locks", "spin-locks", "rw-locks" };
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
|
||||
@@ -561,7 +561,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_ROCM)
|
||||
_backend_choices.erase("amd-smi");
|
||||
_backend_choices.erase("rocm-smi");
|
||||
_backend_choices.erase("rocprofiler-sdk");
|
||||
_backend_choices.erase("rocm");
|
||||
#endif
|
||||
@@ -571,7 +570,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
// remove GPU-specific backends
|
||||
_backend_choices.erase("rcclp");
|
||||
_backend_choices.erase("amd-smi");
|
||||
_backend_choices.erase("rocm-smi");
|
||||
_backend_choices.erase("rocprofiler-sdk");
|
||||
_backend_choices.erase("rocm");
|
||||
|
||||
@@ -580,7 +578,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
#endif
|
||||
|
||||
#if defined(ROCPROFSYS_USE_ROCM)
|
||||
update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", false);
|
||||
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", false);
|
||||
update_env(_data, "ROCPROFSYS_USE_ROCM", false);
|
||||
#endif
|
||||
}
|
||||
@@ -606,7 +604,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
|
||||
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
|
||||
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
@@ -640,7 +638,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
_update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0);
|
||||
_update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0);
|
||||
_update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0);
|
||||
_update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
@@ -100,12 +100,14 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_RO
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_counter_collection, ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, "rocm_counter_collection", "ROCm device counter collection")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_MARKER_API, "rocm_marker_api", "ROCTx labels")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi, ROCPROFSYS_CATEGORY_ROCM_SMI, "rocm_smi", "rocm-smi data")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, "device_busy", "Busy percentage of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi, ROCPROFSYS_CATEGORY_AMD_SMI, "amd_smi", "amd-smi data")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_gfxbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, "device_busy_gfx", "Busy percentage of GFX engine on a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_umcbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, "device_busy_umc", "Busy percentage of UMC on a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_mmbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM, "device_busy_mm", "Busy percentage of a MM engine on a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_temp, ROCPROFSYS_CATEGORY_AMD_SMI_TEMP, "device_temp", "Temperature of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_power, ROCPROFSYS_CATEGORY_AMD_SMI_POWER, "device_power", "Power consumption of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_memory_usage, ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_vcn_activity, ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions")
|
||||
@@ -163,12 +165,14 @@ using name = perfetto_category<Tp...>;
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_counter_collection), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_marker_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_busy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_gfxbusy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_umcbusy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_mmbusy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_temp), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_power), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_memory_usage), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_vcn_activity), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \
|
||||
|
||||
@@ -74,7 +74,11 @@ struct backtrace_cpu_clock
|
||||
{};
|
||||
struct backtrace_fraction
|
||||
{};
|
||||
struct backtrace_gpu_busy
|
||||
struct backtrace_gpu_busy_gfx
|
||||
{};
|
||||
struct backtrace_gpu_busy_umc
|
||||
{};
|
||||
struct backtrace_gpu_busy_mm
|
||||
{};
|
||||
struct backtrace_gpu_temp
|
||||
{};
|
||||
@@ -84,14 +88,16 @@ struct backtrace_gpu_memory
|
||||
{};
|
||||
struct backtrace_gpu_vcn
|
||||
{};
|
||||
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
|
||||
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
|
||||
using sampling_percent = data_tracker<double, backtrace_fraction>;
|
||||
using sampling_gpu_busy = data_tracker<double, backtrace_gpu_busy>;
|
||||
using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
|
||||
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
|
||||
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
|
||||
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;
|
||||
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
|
||||
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
|
||||
using sampling_percent = data_tracker<double, backtrace_fraction>;
|
||||
using sampling_gpu_busy_gfx = data_tracker<double, backtrace_gpu_busy_gfx>;
|
||||
using sampling_gpu_busy_umc = data_tracker<double, backtrace_gpu_busy_umc>;
|
||||
using sampling_gpu_busy_mm = data_tracker<double, backtrace_gpu_busy_mm>;
|
||||
using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
|
||||
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
|
||||
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
|
||||
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;
|
||||
|
||||
template <typename ApiT, typename StartFuncT = default_functor_t,
|
||||
typename StopFuncT = default_functor_t>
|
||||
@@ -120,7 +126,12 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_percent, fals
|
||||
#endif
|
||||
|
||||
#if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_gfx,
|
||||
false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_umc,
|
||||
false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_mm,
|
||||
false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type)
|
||||
@@ -142,9 +153,18 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_cpu_clock, project::r
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_percent, project::rocprofsys,
|
||||
category::timing, os::supports_unix, category::sampling,
|
||||
category::interrupt_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy, project::rocprofsys,
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::sampling, category::process_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_gfx,
|
||||
project::rocprofsys, tpls::rocm, device::gpu,
|
||||
os::supports_linux, category::sampling,
|
||||
category::process_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_umc,
|
||||
project::rocprofsys, tpls::rocm, device::gpu,
|
||||
os::supports_linux, category::sampling,
|
||||
category::process_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_mm,
|
||||
project::rocprofsys, tpls::rocm, device::gpu,
|
||||
os::supports_linux, category::sampling,
|
||||
category::process_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_memory,
|
||||
project::rocprofsys, tpls::rocm, device::gpu,
|
||||
os::supports_linux, category::memory, category::sampling,
|
||||
@@ -174,28 +194,38 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_percent,
|
||||
"sampling_percent",
|
||||
"Fraction of wall-clock time spent in functions",
|
||||
"Derived from statistical sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy,
|
||||
"sampling_gpu_busy",
|
||||
"GPU Utilization (% busy) via ROCm-SMI",
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_gfx,
|
||||
"sampling_gpu_busy_gfx",
|
||||
"GFX engine GPU Utilization (% busy) via AMD SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_umc,
|
||||
"sampling_gpu_busy_umc",
|
||||
"Memory controller GPU Utilization (% busy) via AMD SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_mm,
|
||||
"sampling_gpu_busy_mm",
|
||||
"Multimedia engine GPU Utilization (% busy) via AMD SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_memory,
|
||||
"sampling_gpu_memory_usage",
|
||||
"GPU Memory Usage via ROCm-SMI", "Derived from sampling")
|
||||
"GPU Memory Usage via AMD SMI", "Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_power,
|
||||
"sampling_gpu_power", "GPU Power Usage via ROCm-SMI",
|
||||
"sampling_gpu_power", "GPU Power Usage via AMD SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_temp,
|
||||
"sampling_gpu_temp", "GPU Temperature via ROCm-SMI",
|
||||
"sampling_gpu_temp", "GPU Temperature via AMD SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn,
|
||||
"sampling_gpu_vcn",
|
||||
"GPU VCN Utilization (% activity) via ROCm-SMI",
|
||||
"GPU VCN Utilization (% activity) via AMD SMI",
|
||||
"Derived from sampling")
|
||||
|
||||
// statistics type
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_cpu_clock, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_gfx, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_umc, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_mm, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double)
|
||||
@@ -215,7 +245,11 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_timing_units, component::sampling_cpu_cloc
|
||||
true_type)
|
||||
|
||||
// enable percent units
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy,
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_gfx,
|
||||
true_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_umc,
|
||||
true_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_mm,
|
||||
true_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_percent,
|
||||
true_type)
|
||||
@@ -227,7 +261,9 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_memory_units, component::sampling_gpu_memo
|
||||
true_type)
|
||||
|
||||
// reporting categories (sum)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_gfx, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_umc, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_mm, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type)
|
||||
|
||||
@@ -316,9 +316,9 @@ configure_settings(bool _init)
|
||||
"rocm");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
bool, "ROCPROFSYS_USE_ROCM_SMI",
|
||||
bool, "ROCPROFSYS_USE_AMD_SMI",
|
||||
"Enable sampling GPU power, temp, utilization, vcn_activity and memory usage",
|
||||
true, "backend", "rocm_smi", "rocm", "process_sampling");
|
||||
true, "backend", "amd_smi", "rocm", "process_sampling");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING",
|
||||
"Enable statistical sampling of call-stack", false,
|
||||
@@ -478,17 +478,12 @@ configure_settings(bool _init)
|
||||
"'none' suppresses all CPU frequency sampling",
|
||||
std::string{}, "process_sampling");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_DEVICES",
|
||||
"[DEPRECATED] Renamed to ROCPROFSYS_SAMPLING_GPUS",
|
||||
std::string{ "all" }, "rocm_smi", "rocm",
|
||||
"process_sampling", "deprecated", "advanced");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
std::string, "ROCPROFSYS_SAMPLING_GPUS",
|
||||
"Devices to query when ROCPROFSYS_USE_ROCM_SMI=ON. Values should be separated by "
|
||||
"Devices to query when ROCPROFSYS_USE_AMD_SMI=ON. Values should be separated by "
|
||||
"commas and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies "
|
||||
"'all' and 'none' suppresses all GPU sampling",
|
||||
std::string{ "all" }, "rocm_smi", "rocm", "process_sampling");
|
||||
std::string{ "all" }, "amd_smi", "rocm", "process_sampling");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
std::string, "ROCPROFSYS_SAMPLING_TIDS",
|
||||
@@ -627,9 +622,9 @@ configure_settings(bool _init)
|
||||
rocprofiler_sdk::config_settings(_config);
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
|
||||
"rocm-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage",
|
||||
"busy,temp,power,vcn_activity,mem_usage", "backend", "rocm_smi", "rocm",
|
||||
std::string, "ROCPROFSYS_AMD_SMI_METRICS",
|
||||
"amd-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage",
|
||||
"busy,temp,power,vcn_activity,mem_usage", "backend", "amd_smi", "rocm",
|
||||
"process_sampling", "advanced");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB",
|
||||
@@ -1030,7 +1025,7 @@ configure_settings(bool _init)
|
||||
_combine_perfetto_traces->second->set(_config->get<bool>("collapse_processes"));
|
||||
}
|
||||
|
||||
handle_deprecated_setting("ROCPROFSYS_ROCM_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS");
|
||||
handle_deprecated_setting("ROCPROFSYS_AMD_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS");
|
||||
handle_deprecated_setting("ROCPROFSYS_USE_THREAD_SAMPLING",
|
||||
"ROCPROFSYS_USE_PROCESS_SAMPLING");
|
||||
handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE");
|
||||
@@ -1104,7 +1099,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
|
||||
_set("ROCPROFSYS_TRACE", false);
|
||||
_set("ROCPROFSYS_PROFILE", false);
|
||||
_set("ROCPROFSYS_USE_CAUSAL", false);
|
||||
_set("ROCPROFSYS_USE_ROCM_SMI", false);
|
||||
_set("ROCPROFSYS_USE_AMD_SMI", false);
|
||||
_set("ROCPROFSYS_USE_KOKKOSP", false);
|
||||
_set("ROCPROFSYS_USE_RCCLP", false);
|
||||
_set("ROCPROFSYS_USE_OMPT", false);
|
||||
@@ -1129,10 +1124,10 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
|
||||
{
|
||||
#if ROCPROFSYS_ROCM_VERSION > 0
|
||||
ROCPROFSYS_BASIC_VERBOSE(
|
||||
1, "No ROCm devices were found: disabling rocm and rocm_smi...\n");
|
||||
1, "No ROCm devices were found: disabling rocm and amd_smi...\n");
|
||||
#endif
|
||||
_set("ROCPROFSYS_USE_ROCM", false);
|
||||
_set("ROCPROFSYS_USE_ROCM_SMI", false);
|
||||
_set("ROCPROFSYS_USE_AMD_SMI", false);
|
||||
}
|
||||
|
||||
if(_config->get<bool>("ROCPROFSYS_USE_KOKKOSP"))
|
||||
@@ -1165,7 +1160,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
|
||||
_set("ROCPROFSYS_PROFILE", false);
|
||||
_set("ROCPROFSYS_USE_CAUSAL", false);
|
||||
_set("ROCPROFSYS_USE_ROCM", false);
|
||||
_set("ROCPROFSYS_USE_ROCM_SMI", false);
|
||||
_set("ROCPROFSYS_USE_AMD_SMI", false);
|
||||
_set("ROCPROFSYS_USE_KOKKOSP", false);
|
||||
_set("ROCPROFSYS_USE_RCCLP", false);
|
||||
_set("ROCPROFSYS_USE_OMPT", false);
|
||||
@@ -1349,12 +1344,12 @@ configure_disabled_settings(const std::shared_ptr<settings>& _config)
|
||||
_handle_use_option("ROCPROFSYS_PROFILE", "timemory");
|
||||
_handle_use_option("ROCPROFSYS_USE_OMPT", "ompt");
|
||||
_handle_use_option("ROCPROFSYS_USE_RCCLP", "rcclp");
|
||||
_handle_use_option("ROCPROFSYS_USE_ROCM_SMI", "rocm_smi");
|
||||
_handle_use_option("ROCPROFSYS_USE_AMD_SMI", "amd_smi");
|
||||
_handle_use_option("ROCPROFSYS_USE_ROCM", "rocm");
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0
|
||||
_config->find("ROCPROFSYS_USE_ROCM_SMI")->second->set_hidden(true);
|
||||
for(const auto& itr : _config->disable_category("rocm_smi"))
|
||||
_config->find("ROCPROFSYS_USE_AMD_SMI")->second->set_hidden(true);
|
||||
for(const auto& itr : _config->disable_category("amd_smi"))
|
||||
_config->find(itr)->second->set_hidden(true);
|
||||
#endif
|
||||
|
||||
@@ -1813,10 +1808,10 @@ get_use_causal()
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_rocm_smi()
|
||||
get_use_amd_smi()
|
||||
{
|
||||
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
|
||||
static auto _v = get_config()->find("ROCPROFSYS_USE_ROCM_SMI");
|
||||
static auto _v = get_config()->find("ROCPROFSYS_USE_AMD_SMI");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
#else
|
||||
return false;
|
||||
|
||||
@@ -205,7 +205,7 @@ bool&
|
||||
get_use_causal() ROCPROFSYS_HOT;
|
||||
|
||||
bool
|
||||
get_use_rocm_smi() ROCPROFSYS_HOT;
|
||||
get_use_amd_smi() ROCPROFSYS_HOT;
|
||||
|
||||
bool&
|
||||
get_use_sampling() ROCPROFSYS_HOT;
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
#include <timemory/manager.hpp>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocm_smi/rocm_smi.h>
|
||||
# include <amd_smi/amdsmi.h>
|
||||
# include <rocprofiler-sdk/agent.h>
|
||||
# include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
# include <rocprofiler-sdk/fwd.h>
|
||||
@@ -55,40 +55,43 @@ namespace gpu
|
||||
namespace
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# define ROCPROFSYS_ROCM_SMI_CALL(ERROR_CODE) \
|
||||
::rocprofsys::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__)
|
||||
# define ROCPROFSYS_AMD_SMI_CALL(ERROR_CODE) \
|
||||
::rocprofsys::gpu::check_amdsmi_error(ERROR_CODE, __FILE__, __LINE__)
|
||||
|
||||
void
|
||||
check_rsmi_error(rsmi_status_t _code, const char* _file, int _line)
|
||||
check_amdsmi_error(amdsmi_status_t _code, const char* _file, int _line)
|
||||
{
|
||||
if(_code == RSMI_STATUS_SUCCESS) return;
|
||||
if(_code == AMDSMI_STATUS_SUCCESS) return;
|
||||
const char* _msg = nullptr;
|
||||
auto _err = rsmi_status_string(_code, &_msg);
|
||||
if(_err != RSMI_STATUS_SUCCESS)
|
||||
ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. "
|
||||
"Error code %i originated at %s:%i\n",
|
||||
static_cast<int>(_code), _file, _line);
|
||||
auto _err = amdsmi_status_code_to_string(_code, &_msg);
|
||||
if(_err != AMDSMI_STATUS_SUCCESS)
|
||||
ROCPROFSYS_THROW(
|
||||
"amdsmi_status_code_to_string failed. No error message available. "
|
||||
"Error code %i originated at %s:%i\n",
|
||||
static_cast<int>(_code), _file, _line);
|
||||
ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
|
||||
_msg);
|
||||
}
|
||||
|
||||
bool
|
||||
rsmi_init()
|
||||
amdsmi_init()
|
||||
{
|
||||
auto _rsmi_init = []() {
|
||||
auto _amdsmi_init = []() {
|
||||
try
|
||||
{
|
||||
ROCPROFSYS_ROCM_SMI_CALL(::rsmi_init(0));
|
||||
// Currently, only AMDSMI_INIT_AMD_GPUS is supported
|
||||
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
|
||||
get_processor_handles();
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n",
|
||||
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n",
|
||||
_e.what());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}();
|
||||
|
||||
return _rsmi_init;
|
||||
return _amdsmi_init;
|
||||
}
|
||||
#endif // ROCPROFSYS_USE_ROCM > 0
|
||||
|
||||
@@ -126,7 +129,7 @@ query_rocm_gpu_agents()
|
||||
} // namespace
|
||||
|
||||
int
|
||||
rocm_device_count()
|
||||
device_count()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
static int _num_devices = query_rocm_gpu_agents();
|
||||
@@ -136,38 +139,13 @@ rocm_device_count()
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
rsmi_device_count()
|
||||
bool
|
||||
initialize_amdsmi()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!rsmi_init()) return 0;
|
||||
|
||||
static auto _num_devices = []() {
|
||||
uint32_t _v = 0;
|
||||
try
|
||||
{
|
||||
ROCPROFSYS_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v));
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(
|
||||
1, "Exception thrown getting the rocm-smi devices: %s\n", _e.what());
|
||||
}
|
||||
return _v;
|
||||
}();
|
||||
|
||||
return _num_devices;
|
||||
return (amdsmi_init()) ? true : false;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
device_count()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
return rocm_device_count();
|
||||
#else
|
||||
return 0;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -217,5 +195,73 @@ add_device_metadata()
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
/*
|
||||
* Required amdsmi methods to get processors and handles
|
||||
*/
|
||||
|
||||
uint32_t processors::total_processor_count = 0;
|
||||
std::vector<amdsmi_processor_handle> processors::processors_list = {};
|
||||
|
||||
void
|
||||
get_processor_handles()
|
||||
{
|
||||
uint32_t socket_count;
|
||||
uint32_t processor_count;
|
||||
|
||||
// Passing nullptr will return us the number of sockets available for read in this
|
||||
// system
|
||||
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
{
|
||||
return;
|
||||
}
|
||||
std::vector<amdsmi_socket_handle> sockets(socket_count);
|
||||
ret = amdsmi_get_socket_handles(&socket_count, sockets.data());
|
||||
for(auto& socket : sockets)
|
||||
{
|
||||
// Passing nullptr will return us the number of processors available for read for
|
||||
// this socket
|
||||
ret = amdsmi_get_processor_handles(socket, &processor_count, nullptr);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
{
|
||||
return;
|
||||
}
|
||||
std::vector<amdsmi_processor_handle> all_processors(processor_count);
|
||||
ret =
|
||||
amdsmi_get_processor_handles(socket, &processor_count, all_processors.data());
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for(auto& processor : all_processors)
|
||||
{
|
||||
processor_type_t processor_type = {};
|
||||
ret = amdsmi_get_processor_type(processor, &processor_type);
|
||||
if(processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU)
|
||||
{
|
||||
ROCPROFSYS_THROW("Not AMD_GPU device type!");
|
||||
return;
|
||||
}
|
||||
processors::processors_list.push_back(processor);
|
||||
}
|
||||
}
|
||||
processors::total_processor_count = processors::processors_list.size();
|
||||
}
|
||||
uint32_t
|
||||
get_processor_count()
|
||||
{
|
||||
return processors::total_processor_count;
|
||||
}
|
||||
|
||||
amdsmi_processor_handle
|
||||
get_handle_from_id(uint32_t dev_id)
|
||||
{
|
||||
return processors::processors_list[dev_id];
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace rocprofsys
|
||||
|
||||
@@ -22,18 +22,41 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <amd_smi/amdsmi.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
void
|
||||
get_processor_handles();
|
||||
|
||||
uint32_t
|
||||
get_processor_count();
|
||||
|
||||
amdsmi_processor_handle
|
||||
get_handle_from_id(uint32_t dev_id);
|
||||
|
||||
struct processors
|
||||
{
|
||||
static uint32_t total_processor_count;
|
||||
static std::vector<amdsmi_processor_handle> processors_list;
|
||||
|
||||
private:
|
||||
friend void rocprofsys::gpu::get_processor_handles();
|
||||
friend uint32_t rocprofsys::gpu::get_processor_count();
|
||||
friend amdsmi_processor_handle rocprofsys::gpu::get_handle_from_id(uint32_t dev_id);
|
||||
};
|
||||
#endif
|
||||
|
||||
int
|
||||
device_count();
|
||||
|
||||
int
|
||||
rocm_device_count();
|
||||
|
||||
int
|
||||
rsmi_device_count();
|
||||
bool
|
||||
initialize_amdsmi();
|
||||
|
||||
void
|
||||
add_device_metadata();
|
||||
|
||||
@@ -52,12 +52,14 @@ extern "C"
|
||||
ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION,
|
||||
ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION,
|
||||
ROCPROFSYS_CATEGORY_ROCM_MARKER_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_POWER,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_TEMP,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_POWER,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_RCCL,
|
||||
ROCPROFSYS_CATEGORY_SAMPLING,
|
||||
ROCPROFSYS_CATEGORY_PTHREAD,
|
||||
|
||||
@@ -22,7 +22,7 @@ set(library_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/ptl.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocm.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/runtime.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/sampling.hpp
|
||||
@@ -44,7 +44,7 @@ if(ROCPROFSYS_USE_ROCM)
|
||||
rocprofiler-systems-object-library
|
||||
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp)
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp)
|
||||
add_subdirectory(rocprofiler-sdk)
|
||||
endif()
|
||||
|
||||
@@ -58,7 +58,7 @@ set(ndebug_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ompt.cpp)
|
||||
|
||||
set_source_files_properties(
|
||||
|
||||
+116
-80
@@ -30,7 +30,7 @@
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "library/rocm_smi.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
@@ -48,8 +48,6 @@
|
||||
#include <timemory/utility/delimit.hpp>
|
||||
#include <timemory/utility/locking.hpp>
|
||||
|
||||
#include <rocm_smi/rocm_smi.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <ios>
|
||||
@@ -59,22 +57,22 @@
|
||||
#include <sys/resource.h>
|
||||
#include <thread>
|
||||
|
||||
#define ROCPROFSYS_ROCM_SMI_CALL(...) \
|
||||
::rocprofsys::rocm_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
|
||||
#define ROCPROFSYS_AMD_SMI_CALL(...) \
|
||||
::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocm_smi
|
||||
namespace amd_smi
|
||||
{
|
||||
using bundle_t = std::deque<data>;
|
||||
using sampler_instances = thread_data<bundle_t, category::rocm_smi>;
|
||||
using sampler_instances = thread_data<bundle_t, category::amd_smi>;
|
||||
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_settings(uint32_t _dev_id)
|
||||
{
|
||||
static auto _v = std::unordered_map<uint32_t, rocm_smi::settings>{};
|
||||
static auto _v = std::unordered_map<uint32_t, amd_smi::settings>{};
|
||||
return _v[_dev_id];
|
||||
}
|
||||
|
||||
@@ -86,22 +84,23 @@ is_initialized()
|
||||
}
|
||||
|
||||
void
|
||||
check_error(const char* _file, int _line, rsmi_status_t _code, bool* _option = nullptr)
|
||||
check_error(const char* _file, int _line, amdsmi_status_t _code, bool* _option = nullptr)
|
||||
{
|
||||
if(_code == RSMI_STATUS_SUCCESS)
|
||||
if(_code == AMDSMI_STATUS_SUCCESS)
|
||||
return;
|
||||
else if(_code == RSMI_STATUS_NOT_SUPPORTED && _option)
|
||||
else if(_code == AMDSMI_STATUS_NOT_SUPPORTED && _option)
|
||||
{
|
||||
*_option = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const char* _msg = nullptr;
|
||||
auto _err = rsmi_status_string(_code, &_msg);
|
||||
if(_err != RSMI_STATUS_SUCCESS)
|
||||
ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. "
|
||||
"Error code %i originated at %s:%i\n",
|
||||
static_cast<int>(_code), _file, _line);
|
||||
auto _err = amdsmi_status_code_to_string(_code, &_msg);
|
||||
if(_err != AMDSMI_STATUS_SUCCESS)
|
||||
ROCPROFSYS_THROW(
|
||||
"amdsmi_status_code_to_string failed. No error message available. "
|
||||
"Error code %i originated at %s:%i\n",
|
||||
static_cast<int>(_code), _file, _line);
|
||||
ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
|
||||
_msg);
|
||||
}
|
||||
@@ -127,7 +126,7 @@ data::sample(uint32_t _dev_id)
|
||||
{
|
||||
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
|
||||
assert(_ts < std::numeric_limits<int64_t>::max());
|
||||
rsmi_gpu_metrics_t _gpu_metrics;
|
||||
amdsmi_gpu_metrics_t _gpu_metrics;
|
||||
|
||||
auto _state = get_state().load();
|
||||
|
||||
@@ -136,47 +135,55 @@ data::sample(uint32_t _dev_id)
|
||||
m_dev_id = _dev_id;
|
||||
m_ts = _ts;
|
||||
|
||||
#define ROCPROFSYS_RSMI_GET(OPTION, FUNCTION, ...) \
|
||||
#define ROCPROFSYS_AMDSMI_GET(OPTION, FUNCTION, ...) \
|
||||
if(OPTION) \
|
||||
{ \
|
||||
try \
|
||||
{ \
|
||||
ROCPROFSYS_ROCM_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \
|
||||
ROCPROFSYS_AMD_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \
|
||||
} catch(std::runtime_error & _e) \
|
||||
{ \
|
||||
ROCPROFSYS_VERBOSE_F( \
|
||||
0, "[%s] Exception: %s. Disabling future samples from rocm-smi...\n", \
|
||||
0, "[%s] Exception: %s. Disabling future samples from amd-smi...\n", \
|
||||
#FUNCTION, _e.what()); \
|
||||
get_state().store(State::Disabled); \
|
||||
} \
|
||||
}
|
||||
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).busy, rsmi_dev_busy_percent_get, _dev_id,
|
||||
&m_busy_perc);
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).temp, rsmi_dev_temp_metric_get, _dev_id,
|
||||
RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &m_temp);
|
||||
RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER;
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).power, rsmi_dev_power_get, _dev_id,
|
||||
&m_power, &power_type)
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get,
|
||||
_dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage);
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity,
|
||||
rsmi_dev_gpu_metrics_info_get, _dev_id, &_gpu_metrics);
|
||||
amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_dev_id);
|
||||
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity,
|
||||
sample_handle, &m_busy_perc);
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric,
|
||||
sample_handle, AMDSMI_TEMPERATURE_TYPE_JUNCTION,
|
||||
AMDSMI_TEMP_CURRENT, &m_temp);
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).power, amdsmi_get_power_info,
|
||||
sample_handle, &m_power)
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage,
|
||||
sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage);
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).vcn_activity,
|
||||
amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics);
|
||||
|
||||
for(const auto& activity : _gpu_metrics.vcn_activity)
|
||||
{
|
||||
if(activity != UINT16_MAX) m_vcn_metrics.push_back(activity);
|
||||
}
|
||||
|
||||
#undef ROCPROFSYS_RSMI_GET
|
||||
#undef ROCPROFSYS_AMDSMI_GET
|
||||
}
|
||||
|
||||
void
|
||||
data::print(std::ostream& _os) const
|
||||
{
|
||||
std::stringstream _ss{};
|
||||
_ss << "device: " << m_dev_id << ", busy = " << m_busy_perc << "%, temp = " << m_temp
|
||||
<< ", power = " << m_power << ", memory usage = " << m_mem_usage;
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
_ss << "device: " << m_dev_id << ", gpu busy: = " << m_busy_perc.gfx_activity
|
||||
<< "%, mm busy: = " << m_busy_perc.mm_activity
|
||||
<< "%, umc busy: = " << m_busy_perc.umc_activity << "%, temp = " << m_temp
|
||||
<< ", current power = " << m_power.current_socket_power
|
||||
<< ", memory usage = " << m_mem_usage;
|
||||
#endif
|
||||
_os << _ss.str();
|
||||
}
|
||||
|
||||
@@ -209,8 +216,8 @@ sample()
|
||||
{
|
||||
for(auto itr : data::device_list)
|
||||
{
|
||||
if(rocm_smi::get_state() != State::Active) continue;
|
||||
ROCPROFSYS_DEBUG_F("Polling rocm-smi for device %u...\n", itr);
|
||||
if(amd_smi::get_state() != State::Active) continue;
|
||||
ROCPROFSYS_DEBUG_F("Polling amd-smi for device %u...\n", itr);
|
||||
auto& _data = *_bundle_data.at(itr);
|
||||
if(!_data) continue;
|
||||
_data->emplace_back(data{ itr });
|
||||
@@ -221,7 +228,7 @@ sample()
|
||||
void
|
||||
set_state(State _v)
|
||||
{
|
||||
rocm_smi::get_state().store(_v);
|
||||
amd_smi::get_state().store(_v);
|
||||
}
|
||||
|
||||
std::vector<data>&
|
||||
@@ -235,15 +242,15 @@ bool
|
||||
data::setup()
|
||||
{
|
||||
perfetto_counter_track<data>::init();
|
||||
rocm_smi::set_state(State::PreInit);
|
||||
amd_smi::set_state(State::PreInit);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
data::shutdown()
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Shutting down rocm-smi...\n");
|
||||
rocm_smi::set_state(State::Finalized);
|
||||
ROCPROFSYS_DEBUG("Shutting down amd-smi...\n");
|
||||
amd_smi::set_state(State::Finalized);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -261,7 +268,9 @@ data::shutdown()
|
||||
void
|
||||
data::post_process(uint32_t _dev_id)
|
||||
{
|
||||
using component::sampling_gpu_busy;
|
||||
using component::sampling_gpu_busy_gfx;
|
||||
using component::sampling_gpu_busy_mm;
|
||||
using component::sampling_gpu_busy_umc;
|
||||
using component::sampling_gpu_memory;
|
||||
using component::sampling_gpu_power;
|
||||
using component::sampling_gpu_temp;
|
||||
@@ -269,12 +278,12 @@ data::post_process(uint32_t _dev_id)
|
||||
|
||||
if(device_count < _dev_id) return;
|
||||
|
||||
auto& _rocm_smi_v = sampler_instances::get()->at(_dev_id);
|
||||
auto _rocm_smi = (_rocm_smi_v) ? *_rocm_smi_v : std::deque<rocm_smi::data>{};
|
||||
auto& _amd_smi_v = sampler_instances::get()->at(_dev_id);
|
||||
auto _amd_smi = (_amd_smi_v) ? *_amd_smi_v : std::deque<amd_smi::data>{};
|
||||
const auto& _thread_info = thread_info::get(0, InternalTID);
|
||||
|
||||
ROCPROFSYS_VERBOSE(1, "Post-processing %zu rocm-smi samples from device %u\n",
|
||||
_rocm_smi.size(), _dev_id);
|
||||
ROCPROFSYS_VERBOSE(1, "Post-processing %zu amd-smi samples from device %u\n",
|
||||
_amd_smi.size(), _dev_id);
|
||||
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0");
|
||||
if(!_thread_info) return;
|
||||
@@ -282,18 +291,23 @@ data::post_process(uint32_t _dev_id)
|
||||
auto _settings = get_settings(_dev_id);
|
||||
|
||||
auto _process_perfetto = [&]() {
|
||||
auto _idx = std::array<uint64_t, 5>{};
|
||||
auto _idx = std::array<uint64_t, 7>{};
|
||||
{
|
||||
_idx.fill(_idx.size());
|
||||
uint64_t nidx = 0;
|
||||
if(_settings.busy) _idx.at(0) = nidx++;
|
||||
if(_settings.temp) _idx.at(1) = nidx++;
|
||||
if(_settings.power) _idx.at(2) = nidx++;
|
||||
if(_settings.mem_usage) _idx.at(3) = nidx++;
|
||||
if(_settings.vcn_activity) _idx.at(4) = nidx++;
|
||||
if(_settings.busy)
|
||||
{
|
||||
_idx.at(0) = nidx++;
|
||||
_idx.at(1) = nidx++;
|
||||
_idx.at(2) = nidx++;
|
||||
}
|
||||
if(_settings.temp) _idx.at(3) = nidx++;
|
||||
if(_settings.power) _idx.at(4) = nidx++;
|
||||
if(_settings.mem_usage) _idx.at(5) = nidx++;
|
||||
if(_settings.vcn_activity) _idx.at(6) = nidx++;
|
||||
}
|
||||
|
||||
for(auto& itr : _rocm_smi)
|
||||
for(auto& itr : _amd_smi)
|
||||
{
|
||||
using counter_track = perfetto_counter_track<data>;
|
||||
if(itr.m_dev_id != _dev_id) continue;
|
||||
@@ -303,11 +317,16 @@ data::post_process(uint32_t _dev_id)
|
||||
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
|
||||
};
|
||||
|
||||
if(_settings.busy) counter_track::emplace(_dev_id, addendum("Busy"), "%");
|
||||
if(_settings.busy)
|
||||
{
|
||||
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
|
||||
}
|
||||
if(_settings.temp)
|
||||
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
|
||||
if(_settings.power)
|
||||
counter_track::emplace(_dev_id, addendum("Power"), "watts");
|
||||
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
|
||||
if(_settings.mem_usage)
|
||||
counter_track::emplace(_dev_id, addendum("Memory Usage"),
|
||||
"megabytes");
|
||||
@@ -323,26 +342,34 @@ data::post_process(uint32_t _dev_id)
|
||||
uint64_t _ts = itr.m_ts;
|
||||
if(!_thread_info->is_valid_time(_ts)) continue;
|
||||
|
||||
double _busy = itr.m_busy_perc;
|
||||
double _temp = itr.m_temp / 1.0e3;
|
||||
double _power = itr.m_power / 1.0e6;
|
||||
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
|
||||
double _gfxbusy = itr.m_busy_perc.gfx_activity;
|
||||
double _umcbusy = itr.m_busy_perc.umc_activity;
|
||||
double _mmbusy = itr.m_busy_perc.mm_activity;
|
||||
double _temp = itr.m_temp;
|
||||
double _power = itr.m_power.current_socket_power;
|
||||
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
|
||||
|
||||
if(_settings.busy)
|
||||
TRACE_COUNTER("device_busy", counter_track::at(_dev_id, _idx.at(0)), _ts,
|
||||
_busy);
|
||||
{
|
||||
TRACE_COUNTER("device_busy_gfx", counter_track::at(_dev_id, _idx.at(0)),
|
||||
_ts, _gfxbusy);
|
||||
TRACE_COUNTER("device_busy_umc", counter_track::at(_dev_id, _idx.at(1)),
|
||||
_ts, _umcbusy);
|
||||
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, _idx.at(2)),
|
||||
_ts, _mmbusy);
|
||||
}
|
||||
if(_settings.temp)
|
||||
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(1)), _ts,
|
||||
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(3)), _ts,
|
||||
_temp);
|
||||
if(_settings.power)
|
||||
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(2)), _ts,
|
||||
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(4)), _ts,
|
||||
_power);
|
||||
if(_settings.mem_usage)
|
||||
TRACE_COUNTER("device_memory_usage",
|
||||
counter_track::at(_dev_id, _idx.at(3)), _ts, _usage);
|
||||
counter_track::at(_dev_id, _idx.at(5)), _ts, _usage);
|
||||
if(_settings.vcn_activity)
|
||||
{
|
||||
uint64_t idx = _idx.at(4);
|
||||
uint64_t idx = _idx.at(6);
|
||||
for(const auto& temp : itr.m_vcn_metrics)
|
||||
{
|
||||
TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx),
|
||||
@@ -361,14 +388,14 @@ data::post_process(uint32_t _dev_id)
|
||||
void
|
||||
setup()
|
||||
{
|
||||
auto_lock_t _lk{ type_mutex<category::rocm_smi>() };
|
||||
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
|
||||
|
||||
if(is_initialized() || !get_use_rocm_smi()) return;
|
||||
if(is_initialized() || !get_use_amd_smi()) return;
|
||||
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
// assign the data value to determined by rocm-smi
|
||||
data::device_count = device_count();
|
||||
if(!gpu::initialize_amdsmi()) return;
|
||||
data::device_count = gpu::get_processor_count();
|
||||
|
||||
auto _devices_v = get_sampling_gpus();
|
||||
for(auto& itr : _devices_v)
|
||||
@@ -421,14 +448,15 @@ setup()
|
||||
|
||||
data::device_list = _devices;
|
||||
|
||||
auto _metrics = get_setting_value<std::string>("ROCPROFSYS_ROCM_SMI_METRICS");
|
||||
auto _metrics = get_setting_value<std::string>("ROCPROFSYS_AMD_SMI_METRICS");
|
||||
|
||||
try
|
||||
{
|
||||
for(auto itr : _devices)
|
||||
{
|
||||
uint16_t dev_id = 0;
|
||||
ROCPROFSYS_ROCM_SMI_CALL(rsmi_dev_id_get(itr, &dev_id));
|
||||
ROCPROFSYS_AMD_SMI_CALL(
|
||||
amdsmi_get_gpu_id(gpu::get_handle_from_id(itr), &dev_id));
|
||||
// dev_id holds the device ID of device i, upon a successful call
|
||||
|
||||
if(_metrics && !_metrics->empty())
|
||||
@@ -447,10 +475,10 @@ setup()
|
||||
{
|
||||
auto iitr = supported.find(metric);
|
||||
if(iitr == supported.end())
|
||||
ROCPROFSYS_FAIL_F("unsupported rocm-smi metric: %s\n",
|
||||
ROCPROFSYS_FAIL_F("unsupported amd-smi metric: %s\n",
|
||||
metric.c_str());
|
||||
|
||||
ROCPROFSYS_VERBOSE_F(1, "Enabling rocm-smi metric '%s'\n",
|
||||
ROCPROFSYS_VERBOSE_F(1, "Enabling amd-smi metric '%s'\n",
|
||||
metric.c_str());
|
||||
iitr->second = true;
|
||||
}
|
||||
@@ -462,7 +490,7 @@ setup()
|
||||
data::setup();
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing rocm-smi: %s\n",
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n",
|
||||
_e.what());
|
||||
data::device_list = {};
|
||||
}
|
||||
@@ -471,7 +499,7 @@ setup()
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
auto_lock_t _lk{ type_mutex<category::rocm_smi>() };
|
||||
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
|
||||
|
||||
if(!is_initialized()) return;
|
||||
|
||||
@@ -479,11 +507,11 @@ shutdown()
|
||||
{
|
||||
if(data::shutdown())
|
||||
{
|
||||
ROCPROFSYS_ROCM_SMI_CALL(rsmi_shut_down());
|
||||
ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down());
|
||||
}
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down rocm-smi: %s\n",
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down amd-smi: %s\n",
|
||||
_e.what());
|
||||
}
|
||||
|
||||
@@ -500,14 +528,22 @@ post_process()
|
||||
uint32_t
|
||||
device_count()
|
||||
{
|
||||
return gpu::rsmi_device_count();
|
||||
return gpu::device_count();
|
||||
}
|
||||
} // namespace rocm_smi
|
||||
} // namespace amd_smi
|
||||
} // namespace rocprofsys
|
||||
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy>), true,
|
||||
double)
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_gfx>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_umc>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_mm>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_temp>), true,
|
||||
+29
-15
@@ -34,6 +34,10 @@
|
||||
#include "core/state.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <amd_smi/amdsmi.h>
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
@@ -47,7 +51,7 @@
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocm_smi
|
||||
namespace amd_smi
|
||||
{
|
||||
void
|
||||
setup();
|
||||
@@ -66,9 +70,6 @@ post_process();
|
||||
|
||||
void set_state(State);
|
||||
|
||||
uint32_t
|
||||
device_count();
|
||||
|
||||
struct settings
|
||||
{
|
||||
bool busy = true;
|
||||
@@ -86,7 +87,7 @@ struct data
|
||||
using promise_t = std::promise<void>;
|
||||
|
||||
using timestamp_t = int64_t;
|
||||
using power_t = uint64_t;
|
||||
using power_t = uint32_t;
|
||||
using busy_perc_t = uint32_t;
|
||||
using mem_usage_t = uint64_t;
|
||||
using temp_t = int64_t;
|
||||
@@ -102,11 +103,16 @@ struct data
|
||||
|
||||
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
|
||||
timestamp_t m_ts = 0;
|
||||
busy_perc_t m_busy_perc = 0;
|
||||
temp_t m_temp = 0;
|
||||
power_t m_power = 0;
|
||||
mem_usage_t m_mem_usage = 0;
|
||||
std::vector<uint16_t> m_vcn_metrics = {};
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
amdsmi_engine_usage_t m_busy_perc = {};
|
||||
amdsmi_power_info_t m_power = {};
|
||||
#else
|
||||
std::vector<busy_perc_t> m_busy_perc = {};
|
||||
std::vector<power_t> m_power = {};
|
||||
#endif
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& _os, const data& _v)
|
||||
{
|
||||
@@ -115,11 +121,11 @@ struct data
|
||||
}
|
||||
|
||||
private:
|
||||
friend void rocprofsys::rocm_smi::setup();
|
||||
friend void rocprofsys::rocm_smi::config();
|
||||
friend void rocprofsys::rocm_smi::sample();
|
||||
friend void rocprofsys::rocm_smi::shutdown();
|
||||
friend void rocprofsys::rocm_smi::post_process();
|
||||
friend void rocprofsys::amd_smi::setup();
|
||||
friend void rocprofsys::amd_smi::config();
|
||||
friend void rocprofsys::amd_smi::sample();
|
||||
friend void rocprofsys::amd_smi::shutdown();
|
||||
friend void rocprofsys::amd_smi::post_process();
|
||||
|
||||
static size_t device_count;
|
||||
static std::set<uint32_t> device_list;
|
||||
@@ -154,7 +160,7 @@ post_process()
|
||||
|
||||
inline void set_state(State) {}
|
||||
#endif
|
||||
} // namespace rocm_smi
|
||||
} // namespace amd_smi
|
||||
} // namespace rocprofsys
|
||||
|
||||
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
|
||||
@@ -166,8 +172,16 @@ inline void set_state(State) {}
|
||||
# include <timemory/operations.hpp>
|
||||
|
||||
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy>), true,
|
||||
double)
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_gfx>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_umc>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_mm>),
|
||||
true, double)
|
||||
|
||||
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_temp>), true,
|
||||
@@ -23,8 +23,8 @@
|
||||
#include "library/process_sampler.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/cpu_freq.hpp"
|
||||
#include "library/rocm_smi.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
|
||||
#include <memory>
|
||||
@@ -140,14 +140,14 @@ sampler::setup()
|
||||
// shutdown if already running
|
||||
shutdown();
|
||||
|
||||
if(get_use_rocm_smi())
|
||||
if(get_use_amd_smi())
|
||||
{
|
||||
auto& _rocm_smi = instances.emplace_back(std::make_unique<instance>());
|
||||
_rocm_smi->setup = []() { rocm_smi::setup(); };
|
||||
_rocm_smi->shutdown = []() { rocm_smi::shutdown(); };
|
||||
_rocm_smi->post_process = []() { rocm_smi::post_process(); };
|
||||
_rocm_smi->config = []() { rocm_smi::config(); };
|
||||
_rocm_smi->sample = []() { rocm_smi::sample(); };
|
||||
auto& _amd_smi = instances.emplace_back(std::make_unique<instance>());
|
||||
_amd_smi->setup = []() { amd_smi::setup(); };
|
||||
_amd_smi->shutdown = []() { amd_smi::shutdown(); };
|
||||
_amd_smi->post_process = []() { amd_smi::post_process(); };
|
||||
_amd_smi->config = []() { amd_smi::config(); };
|
||||
_amd_smi->sample = []() { amd_smi::sample(); };
|
||||
}
|
||||
|
||||
auto& _cpu_freq = instances.emplace_back(std::make_unique<instance>());
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include "core/debug.hpp"
|
||||
#include "core/dynamic_library.hpp"
|
||||
#include "core/gpu.hpp"
|
||||
#include "library/rocm_smi.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/rocprofiler-sdk.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
@@ -30,8 +30,8 @@
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocprofiler-sdk.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/rocm_smi.hpp"
|
||||
#include "library/rocprofiler-sdk/counters.hpp"
|
||||
#include "library/rocprofiler-sdk/fwd.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
@@ -1116,10 +1116,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
|
||||
gpu::add_device_metadata();
|
||||
|
||||
if(config::get_use_process_sampling() && config::get_use_rocm_smi())
|
||||
if(config::get_use_process_sampling() && config::get_use_amd_smi())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE_F(1, "Setting rocm_smi state to active...\n");
|
||||
rocm_smi::set_state(State::Active);
|
||||
ROCPROFSYS_VERBOSE_F(1, "Setting amd_smi state to active...\n");
|
||||
amd_smi::set_state(State::Active);
|
||||
}
|
||||
|
||||
start();
|
||||
@@ -1137,8 +1137,8 @@ tool_fini(void* callback_data)
|
||||
flush();
|
||||
stop();
|
||||
|
||||
if(config::get_use_process_sampling() && config::get_use_rocm_smi())
|
||||
rocm_smi::shutdown();
|
||||
if(config::get_use_process_sampling() && config::get_use_amd_smi())
|
||||
amd_smi::shutdown();
|
||||
|
||||
if(get_counter_storage())
|
||||
{
|
||||
|
||||
@@ -125,7 +125,9 @@ using component::backtrace_timestamp;
|
||||
using component::backtrace_wall_clock; // NOLINT
|
||||
using component::callchain;
|
||||
using component::sampling_cpu_clock;
|
||||
using component::sampling_gpu_busy;
|
||||
using component::sampling_gpu_busy_gfx;
|
||||
using component::sampling_gpu_busy_mm;
|
||||
using component::sampling_gpu_busy_umc;
|
||||
using component::sampling_gpu_memory;
|
||||
using component::sampling_gpu_power;
|
||||
using component::sampling_gpu_temp;
|
||||
@@ -1551,11 +1553,25 @@ struct sampling_initialization
|
||||
sampling_percent::description() = "Percentage of samples";
|
||||
sampling_percent::set_precision(3);
|
||||
|
||||
sampling_gpu_busy::label() = "sampling_gpu_busy_percent";
|
||||
sampling_gpu_busy::description() = "Utilization of GPU(s)";
|
||||
sampling_gpu_busy::set_precision(0);
|
||||
sampling_gpu_busy::set_format_flags(sampling_gpu_busy::get_format_flags() &
|
||||
std::ios_base::showpoint);
|
||||
sampling_gpu_busy_gfx::label() = "sampling_gpu_busy_gfx_percent";
|
||||
sampling_gpu_busy_gfx::description() = "Utilization of GFX engines on GPU(s)";
|
||||
sampling_gpu_busy_gfx::set_precision(0);
|
||||
sampling_gpu_busy_gfx::set_format_flags(
|
||||
sampling_gpu_busy_gfx::get_format_flags() & std::ios_base::showpoint);
|
||||
|
||||
sampling_gpu_busy_umc::label() = "sampling_gpu_busy_umc_percent";
|
||||
sampling_gpu_busy_umc::description() =
|
||||
"Utilization of memory controller on GPU(s)";
|
||||
sampling_gpu_busy_umc::set_precision(0);
|
||||
sampling_gpu_busy_umc::set_format_flags(
|
||||
sampling_gpu_busy_umc::get_format_flags() & std::ios_base::showpoint);
|
||||
|
||||
sampling_gpu_busy_mm::label() = "sampling_gpu_busy_mm_percent";
|
||||
sampling_gpu_busy_mm::description() =
|
||||
"Utilization of multimedia engines on GPU(s)";
|
||||
sampling_gpu_busy_mm::set_precision(0);
|
||||
sampling_gpu_busy_mm::set_format_flags(sampling_gpu_busy_mm::get_format_flags() &
|
||||
std::ios_base::showpoint);
|
||||
|
||||
sampling_gpu_memory::label() = "sampling_gpu_memory_usage";
|
||||
sampling_gpu_memory::description() = "Memory usage of GPU(s)";
|
||||
|
||||
@@ -229,19 +229,19 @@ set(_VALID_GPU OFF)
|
||||
if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU))
|
||||
set(_VALID_GPU ON)
|
||||
find_program(
|
||||
ROCPROFSYS_ROCM_SMI_EXE
|
||||
NAMES rocm-smi
|
||||
ROCPROFSYS_AMD_SMI_EXE
|
||||
NAMES amd-smi
|
||||
HINTS ${ROCmVersion_DIR}
|
||||
PATHS ${ROCmVersion_DIR}
|
||||
PATH_SUFFIXES bin)
|
||||
if(ROCPROFSYS_ROCM_SMI_EXE)
|
||||
if(ROCPROFSYS_AMD_SMI_EXE)
|
||||
execute_process(
|
||||
COMMAND ${ROCPROFSYS_ROCM_SMI_EXE}
|
||||
OUTPUT_VARIABLE _RSMI_OUT
|
||||
ERROR_VARIABLE _RSMI_ERR
|
||||
RESULT_VARIABLE _RSMI_RET)
|
||||
if(_RSMI_RET EQUAL 0)
|
||||
if("${_RSMI_OUTPUT}" MATCHES "ERROR" OR "${_RSMI_ERR}" MATCHES "ERROR")
|
||||
COMMAND ${ROCPROFSYS_AMD_SMI_EXE}
|
||||
OUTPUT_VARIABLE _AMDSMI_OUT
|
||||
ERROR_VARIABLE _AMDSMI_ERR
|
||||
RESULT_VARIABLE _AMDSMI_RET)
|
||||
if(_AMDSMI_RET EQUAL 0)
|
||||
if("${_AMDSMI_OUTPUT}" MATCHES "ERROR" OR "${_AMDSMI_ERR}" MATCHES "ERROR")
|
||||
set(_VALID_GPU OFF)
|
||||
endif()
|
||||
else()
|
||||
@@ -250,7 +250,7 @@ if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU))
|
||||
endif()
|
||||
if(NOT _VALID_GPU)
|
||||
rocprofiler_systems_message(
|
||||
AUTHOR_WARNING "rocm-smi did not successfully run. Disabling GPU tests...")
|
||||
AUTHOR_WARNING "amd-smi did not successfully run. Disabling GPU tests...")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -433,7 +433,7 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST)
|
||||
endif()
|
||||
|
||||
if(NOT "ROCPROFSYS_USE_ROCM=OFF" IN_LIST TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "rocm-smi")
|
||||
list(APPEND TEST_LABELS "amd-smi")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -442,9 +442,9 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST)
|
||||
list(APPEND TEST_LABELS "rocm")
|
||||
endif()
|
||||
|
||||
if("ROCPROFSYS_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST
|
||||
TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "rocm-smi")
|
||||
if("ROCPROFSYS_USE_AMD_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "amd-smi" IN_LIST
|
||||
TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "amd-smi")
|
||||
endif()
|
||||
|
||||
if(TARGET ${TEST_TARGET})
|
||||
|
||||
Ссылка в новой задаче
Block a user