diff --git a/README.md b/README.md index c48530143f..f0e55ffff4 100755 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ The documentation source files reside in the [`/docs`](/docs) folder of this rep - HIP kernel tracing - HSA API tracing - HSA operation tracing -- System-level sampling (via rocm-smi) +- System-level sampling (via amd-smi) - Memory usage - Power usage - Temperature diff --git a/cmake/ConfigCPack.cmake b/cmake/ConfigCPack.cmake index 27211c5cec..886ecda6b0 100644 --- a/cmake/ConfigCPack.cmake +++ b/cmake/ConfigCPack.cmake @@ -157,11 +157,11 @@ if(NOT ROCPROFSYS_BUILD_DYNINST) endif() endif() if(ROCmVersion_FOUND) - set(_ROCM_SMI_SUFFIX + set(_AMD_SMI_SUFFIX " (>= ${ROCmVersion_MAJOR_VERSION}.0.0.${ROCmVersion_NUMERIC_VERSION})") endif() if(ROCPROFSYS_USE_ROCM) - list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocm-smi-lib${_ROCM_SMI_SUFFIX}") + list(APPEND _DEBIAN_PACKAGE_DEPENDS "amd-smi-lib${_AMD_SMI_SUFFIX}") list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocprofiler-sdk (>= ${rocprofiler-sdk_VERSION})") endif() if(ROCPROFSYS_USE_MPI) diff --git a/cmake/Modules/Findamd-smi.cmake b/cmake/Modules/Findamd-smi.cmake new file mode 100644 index 0000000000..bbf4cea613 --- /dev/null +++ b/cmake/Modules/Findamd-smi.cmake @@ -0,0 +1,79 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file +# Copyright.txt or https://cmake.org/licensing for details. + +include(FindPackageHandleStandardArgs) + +# ----------------------------------------------------------------------------------------# + +if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "") + set(ROCM_PATH "$ENV{ROCM_PATH}") +endif() + +foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/amd_smi) + if(EXISTS ${_DIR}) + get_filename_component(_ABS_DIR "${_DIR}" REALPATH) + list(APPEND _AMD_SMI_PATHS ${_ABS_DIR}) + endif() +endforeach() + +# ----------------------------------------------------------------------------------------# + +find_path( + amd-smi_ROOT_DIR + NAMES include/amd_smi/amdsmi.h + HINTS ${_AMD_SMI_PATHS} + PATHS ${_AMD_SMI_PATHS} + PATH_SUFFIXES amd_smi) + +mark_as_advanced(amd-smi_ROOT_DIR) + +# ----------------------------------------------------------------------------------------# + +find_path( + amd-smi_INCLUDE_DIR + NAMES amd_smi/amdsmi.h + HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS} + PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS} + PATH_SUFFIXES include amd_smi/include) + +mark_as_advanced(amd-smi_INCLUDE_DIR) + +# ----------------------------------------------------------------------------------------# + +find_library( + amd-smi_LIBRARY + NAMES amd_smi + HINTS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS} + PATHS ${amd-smi_ROOT_DIR} ${_AMD_SMI_PATHS} + PATH_SUFFIXES amd-smi/lib lib) + +if(amd-smi_LIBRARY) + get_filename_component(amd-smi_LIBRARY_DIR "${amd-smi_LIBRARY}" PATH CACHE) +endif() + +mark_as_advanced(amd-smi_LIBRARY) + +# ----------------------------------------------------------------------------------------# + +find_package_handle_standard_args(amd-smi DEFAULT_MSG amd-smi_ROOT_DIR + amd-smi_INCLUDE_DIR amd-smi_LIBRARY) + +# ------------------------------------------------------------------------------# + +if(amd-smi_FOUND) + add_library(amd-smi::amd-smi INTERFACE IMPORTED) + add_library(amd-smi::roctx INTERFACE IMPORTED) + set(amd-smi_INCLUDE_DIRS ${amd-smi_INCLUDE_DIR}) + set(amd-smi_LIBRARIES ${amd-smi_LIBRARY}) + set(amd-smi_LIBRARY_DIRS ${amd-smi_LIBRARY_DIR}) + + target_include_directories(amd-smi::amd-smi INTERFACE ${amd-smi_INCLUDE_DIR}) + target_link_libraries(amd-smi::amd-smi INTERFACE ${amd-smi_LIBRARY}) + +endif() + +# ------------------------------------------------------------------------------# + +unset(_AMD_SMI_PATHS) + +# ------------------------------------------------------------------------------# diff --git a/cmake/Modules/Findrocm-smi.cmake b/cmake/Modules/Findrocm-smi.cmake deleted file mode 100644 index 65076f5cb7..0000000000 --- a/cmake/Modules/Findrocm-smi.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file -# Copyright.txt or https://cmake.org/licensing for details. - -include(FindPackageHandleStandardArgs) - -# ----------------------------------------------------------------------------------------# - -if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "") - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -foreach(_DIR ${ROCmVersion_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocm_smi) - if(EXISTS ${_DIR}) - get_filename_component(_ABS_DIR "${_DIR}" REALPATH) - list(APPEND _ROCM_SMI_PATHS ${_ABS_DIR}) - endif() -endforeach() - -# ----------------------------------------------------------------------------------------# - -find_path( - rocm-smi_ROOT_DIR - NAMES include/rocm_smi/rocm_smi.h - HINTS ${_ROCM_SMI_PATHS} - PATHS ${_ROCM_SMI_PATHS} - PATH_SUFFIXES rocm_smi) - -mark_as_advanced(rocm-smi_ROOT_DIR) - -# ----------------------------------------------------------------------------------------# - -find_path( - rocm-smi_INCLUDE_DIR - NAMES rocm_smi/rocm_smi.h - HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS} - PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS} - PATH_SUFFIXES include rocm_smi/include) - -mark_as_advanced(rocm-smi_INCLUDE_DIR) - -# ----------------------------------------------------------------------------------------# - -find_library( - rocm-smi_LIBRARY - NAMES rocm_smi64 rocm_smi - HINTS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS} - PATHS ${rocm-smi_ROOT_DIR} ${_ROCM_SMI_PATHS} - PATH_SUFFIXES rocm_smi/lib rocm_smi/lib64 lib lib64) - -if(rocm-smi_LIBRARY) - get_filename_component(rocm-smi_LIBRARY_DIR "${rocm-smi_LIBRARY}" PATH CACHE) -endif() - -mark_as_advanced(rocm-smi_LIBRARY) - -# ----------------------------------------------------------------------------------------# - -find_package_handle_standard_args(rocm-smi DEFAULT_MSG rocm-smi_ROOT_DIR - rocm-smi_INCLUDE_DIR rocm-smi_LIBRARY) - -# ------------------------------------------------------------------------------# - -if(rocm-smi_FOUND) - add_library(rocm-smi::rocm-smi INTERFACE IMPORTED) - add_library(rocm-smi::roctx INTERFACE IMPORTED) - set(rocm-smi_INCLUDE_DIRS ${rocm-smi_INCLUDE_DIR}) - set(rocm-smi_LIBRARIES ${rocm-smi_LIBRARY}) - set(rocm-smi_LIBRARY_DIRS ${rocm-smi_LIBRARY_DIR}) - - target_include_directories(rocm-smi::rocm-smi INTERFACE ${rocm-smi_INCLUDE_DIR}) - target_link_libraries(rocm-smi::rocm-smi INTERFACE ${rocm-smi_LIBRARY}) - -endif() - -# ------------------------------------------------------------------------------# - -unset(_ROCM_SMI_PATHS) - -# ------------------------------------------------------------------------------# diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index c508a459d2..7c142c4251 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -192,8 +192,8 @@ if(ROCPROFSYS_USE_ROCM) target_link_libraries(rocprofiler-systems-rocm INTERFACE rocprofiler-sdk::rocprofiler-sdk) - find_package(rocm-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) - target_link_libraries(rocprofiler-systems-rocm INTERFACE rocm-smi::rocm-smi) + find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) + target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi) # find_package(amd-smi ${rocprofiler_systems_FIND_QUIETLY} REQUIRED) # target_link_libraries(rocprofiler-systems-rocm INTERFACE amd-smi::amd-smi) diff --git a/docker/Dockerfile.opensuse b/docker/Dockerfile.opensuse index 31594cf2c1..e278c4911a 100644 --- a/docker/Dockerfile.opensuse +++ b/docker/Dockerfile.opensuse @@ -35,7 +35,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \ zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ zypper --non-interactive --gpg-auto-import-keys refresh && \ - zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \ + zypper --non-interactive install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \ zypper --non-interactive clean --all; \ fi diff --git a/docker/Dockerfile.rhel b/docker/Dockerfile.rhel index 97394b8b19..06ac745c4f 100644 --- a/docker/Dockerfile.rhel +++ b/docker/Dockerfile.rhel @@ -29,7 +29,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \ dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \ yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ - yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \ + yum install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \ yum clean all; \ fi diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index a98d692242..b944dae7d2 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -39,7 +39,7 @@ RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \ apt-get install -y ./${AMDGPU_DEB} && \ apt-get update && \ - apt-get install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \ + apt-get install -y rocm-dev amd-smi-lib roctracer-dev rocprofiler-dev rccl-dev libpciaccess0 ${EXTRA_PACKAGES} && \ apt-get autoclean; \ fi diff --git a/docs/conceptual/rocprof-sys-feature-set.rst b/docs/conceptual/rocprof-sys-feature-set.rst index fed24b55b3..adc82f32d2 100644 --- a/docs/conceptual/rocprof-sys-feature-set.rst +++ b/docs/conceptual/rocprof-sys-feature-set.rst @@ -52,7 +52,7 @@ GPU metrics * HIP kernel tracing * HSA API tracing * HSA operation tracing -* System-level sampling (via rocm-smi) +* System-level sampling (via amd-smi) * Memory usage * Power usage diff --git a/docs/data/rocprof-sys-perfetto.png b/docs/data/rocprof-sys-perfetto.png index 5bd8da7279..21f1325cac 100644 Binary files a/docs/data/rocprof-sys-perfetto.png and b/docs/data/rocprof-sys-perfetto.png differ diff --git a/docs/how-to/configuring-runtime-options.rst b/docs/how-to/configuring-runtime-options.rst index 58f8a2122c..1470e8b10b 100644 --- a/docs/how-to/configuring-runtime-options.rst +++ b/docs/how-to/configuring-runtime-options.rst @@ -229,7 +229,7 @@ Generating a default configuration file ROCPROFSYS_USE_SAMPLING = false ROCPROFSYS_USE_PROCESS_SAMPLING = true ROCPROFSYS_USE_ROCM = true - ROCPROFSYS_USE_ROCM_SMI = true + ROCPROFSYS_USE_AMD_SMI = true ROCPROFSYS_USE_KOKKOSP = false ROCPROFSYS_USE_CODE_COVERAGE = false ROCPROFSYS_USE_PID = true @@ -384,7 +384,7 @@ Viewing the setting descriptions | ROCPROFSYS_USE_OMPT | Enable support for OpenMP-Tools | | ROCPROFSYS_TRACE | Enable perfetto backend | | ROCPROFSYS_USE_PID | Enable tagging filenames with proces... | - | ROCPROFSYS_USE_ROCM_SMI | Enable sampling GPU power, temp, uti... | + | ROCPROFSYS_USE_AMD_SMI | Enable sampling GPU power, temp, uti... | | ROCPROFSYS_USE_ROCM | Enable ROCM tracing | | ROCPROFSYS_USE_SAMPLING | Enable statistical sampling of call-... | | ROCPROFSYS_USE_PROCESS_SAMPLING | Enable a background thread which sam... | @@ -461,11 +461,11 @@ Viewing components | sampling_wall_clock | Wall-clock timing. Derived from statistic... | | sampling_cpu_clock | CPU-clock timing. Derived from statistica... | | sampling_percent | Fraction of wall-clock time spent in func... | - | sampling_gpu_power | GPU Power Usage via ROCm-SMI. Derived fro... | - | sampling_gpu_temp | GPU Temperature via ROCm-SMI. Derived fro... | - | sampling_gpu_busy | GPU Utilization (% busy) via ROCm-SMI. De... | - | sampling_vcn_busy | GPU VCN Utilization (% activity) via ROCm... | - | sampling_gpu_memory_usage | GPU Memory Usage via ROCm-SMI. Derived fr... | + | sampling_gpu_power | GPU Power Usage via AMD-SMI. Derived from... | + | sampling_gpu_temp | GPU Temperature via AMD-SMI. Derived from... | + | sampling_gpu_busy | GPU Utilization (% busy) via AMD-SMI. Der... | + | sampling_vcn_busy | GPU VCN Utilization (% activity) via AMD-... | + | sampling_gpu_memory_usage | GPU Memory Usage via AMD-SMI. Derived fro... | |-----------------------------------|----------------------------------------------| Viewing hardware counters diff --git a/docs/how-to/instrumenting-rewriting-binary-application.rst b/docs/how-to/instrumenting-rewriting-binary-application.rst index 1c7329f602..7545d491f6 100644 --- a/docs/how-to/instrumenting-rewriting-binary-application.rst +++ b/docs/how-to/instrumenting-rewriting-binary-application.rst @@ -246,7 +246,7 @@ view the help menu. libprofiler.so libpthread.so.0 libresolv.so.2 - librocm_smi64.so + libamd_smi.so librocmtools.so librocprofiler64.so libroctracer64.so diff --git a/docs/how-to/sampling-call-stack.rst b/docs/how-to/sampling-call-stack.rst index 0f3408704c..2018684659 100644 --- a/docs/how-to/sampling-call-stack.rst +++ b/docs/how-to/sampling-call-stack.rst @@ -213,9 +213,9 @@ View the help menu of ``rocprof-sys-sample`` with the ``-h`` / ``--help`` option [BACKEND OPTIONS] These options control region information captured w/o sampling or instrumentation - -I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] + -I, --include [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] Include data from these backends (count: unlimited) - -E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | rocm-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] + -E, --exclude [ all | kokkosp | mpip | mutex-locks | ompt | rcclp | amd-smi | rocprofiler | roctracer | roctx | rw-locks | spin-locks ] Exclude data from these backends (count: unlimited) [HARDWARE COUNTER OPTIONS] See also: rocprof-sys-avail -H @@ -293,7 +293,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_TRACE=true ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=true - ROCPROFSYS_USE_ROCM_SMI=true + ROCPROFSYS_USE_AMD_SMI=true ROCPROFSYS_USE_ROCM=true ROCPROFSYS_USE_SAMPLING=true ROCPROFSYS_PROFILE=true @@ -323,7 +323,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_TRACE=true ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=false - ROCPROFSYS_USE_ROCM_SMI=false + ROCPROFSYS_USE_AMD_SMI=false ROCPROFSYS_USE_ROCM=false ROCPROFSYS_USE_SAMPLING=true ROCPROFSYS_PROFILE=true @@ -354,7 +354,7 @@ Here is the full output from the previous ROCPROFSYS_USE_OMPT=false ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=false - ROCPROFSYS_USE_ROCM_SMI=false + ROCPROFSYS_USE_AMD_SMI=false ROCPROFSYS_USE_ROCM=false ROCPROFSYS_USE_SAMPLING=true [rocprof-sys][dl][1785877] rocprofsys_main diff --git a/source/bin/rocprof-sys-avail/generate_config.cpp b/source/bin/rocprof-sys-avail/generate_config.cpp index 5bc6deebcc..38c127fc8d 100644 --- a/source/bin/rocprof-sys-avail/generate_config.cpp +++ b/source/bin/rocprof-sys-avail/generate_config.cpp @@ -340,7 +340,7 @@ generate_config(std::string _config_file, const std::set& _config_f { "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE", "ROCPROFSYS_PROFILE", "ROCPROFSYS_USE_SAMPLING", "ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCM", - "ROCPROFSYS_USE_ROCM_SMI", "ROCPROFSYS_USE_KOKKOSP", + "ROCPROFSYS_USE_AMD_SMI", "ROCPROFSYS_USE_KOKKOSP", "ROCPROFSYS_USE_OMPT", "ROCPROFSYS_USE", "ROCPROFSYS_OUTPUT" }) { if(_lhs->get_env_name().find(itr) == 0 && diff --git a/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp b/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp index 58c9965497..7dbf2a7475 100644 --- a/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp +++ b/source/bin/rocprof-sys-instrument/rocprof-sys-instrument.cpp @@ -361,7 +361,7 @@ main(int argc, char** argv) "lib(dyninstAPI|stackwalk|pcontrol|patchAPI|parseAPI|" "instructionAPI|symtabAPI|dynDwarf|common|dynElf|tbb|tbbmalloc|" "tbbmalloc_proxy|gotcha|libunwind|roctracer64|hsa-runtime|amdhip|" - "amd_comgr|rocm_smi64|rocprofiler64|rocprofiler-register|" + "amd_comgr|amd_smi|rocprofiler64|rocprofiler-register|" "rocprofiler-sdk|rocprofiler-sdk-roctx|amd_smi)\\.(so|a)" })) { if(!find(filepath::dirname(itr), lib_search_paths)) diff --git a/source/bin/rocprof-sys-sample/impl.cpp b/source/bin/rocprof-sys-sample/impl.cpp index 28b083b28c..eb5815d215 100644 --- a/source/bin/rocprof-sys-sample/impl.cpp +++ b/source/bin/rocprof-sys-sample/impl.cpp @@ -441,7 +441,7 @@ parse_args(int argc, char** argv, std::vector& _env) auto _h = p.get("host"); auto _d = p.get("device"); update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); - update_env(_env, "ROCPROFSYS_USE_ROCM_SMI", _d); + update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d); }); parser .add_argument({ "-w", "--wait" }, @@ -718,7 +718,7 @@ parse_args(int argc, char** argv, std::vector& _env) "mpip", "ompt", "rcclp", - "rocm-smi", + "amd-smi", "roctracer", "rocprofiler", "roctx", @@ -742,7 +742,7 @@ parse_args(int argc, char** argv, std::vector& _env) #if !defined(ROCPROFSYS_USE_ROCM) _backend_choices.erase("rocm"); - _backend_choices.erase("rocm-smi"); + _backend_choices.erase("amd-smi"); _backend_choices.erase("rocprofiler-sdk"); #endif @@ -761,7 +761,7 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); @@ -785,7 +785,7 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); diff --git a/source/lib/core/argparse.cpp b/source/lib/core/argparse.cpp index f2c274749b..9eaae6eeec 100644 --- a/source/lib/core/argparse.cpp +++ b/source/lib/core/argparse.cpp @@ -475,11 +475,11 @@ add_core_arguments(parser_t& _parser, parser_data& _data) auto _h = p.get("host"); auto _d = p.get("device"); update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); - update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", _d); + update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d); }); _data.processed_environs.emplace("device"); - _data.processed_environs.emplace("rocm_smi"); + _data.processed_environs.emplace("amd_smi"); } if(_data.environ_filter("wait", _data)) @@ -544,7 +544,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) } strset_t _backend_choices = { "all", "kokkosp", "mpip", "ompt", - "rcclp", "rocm-smi", "roctracer", "rocprofiler", + "rcclp", "amd-smi", "roctracer", "rocprofiler", "roctx", "mutex-locks", "spin-locks", "rw-locks" }; #if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS) @@ -561,7 +561,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) #if !defined(ROCPROFSYS_USE_ROCM) _backend_choices.erase("amd-smi"); - _backend_choices.erase("rocm-smi"); _backend_choices.erase("rocprofiler-sdk"); _backend_choices.erase("rocm"); #endif @@ -571,7 +570,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) // remove GPU-specific backends _backend_choices.erase("rcclp"); _backend_choices.erase("amd-smi"); - _backend_choices.erase("rocm-smi"); _backend_choices.erase("rocprofiler-sdk"); _backend_choices.erase("rocm"); @@ -580,7 +578,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) #endif #if defined(ROCPROFSYS_USE_ROCM) - update_env(_data, "ROCPROFSYS_USE_ROCM_SMI", false); + update_env(_data, "ROCPROFSYS_USE_AMD_SMI", false); update_env(_data, "ROCPROFSYS_USE_ROCM", false); #endif } @@ -606,7 +604,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); @@ -640,7 +638,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_USE_OMPT", _v.count("ompt") > 0); _update("ROCPROFSYS_USE_ROCM", _v.count("rocm") > 0); _update("ROCPROFSYS_USE_RCCLP", _v.count("rcclp") > 0); - _update("ROCPROFSYS_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("ROCPROFSYS_USE_AMD_SMI", _v.count("amd-smi") > 0); _update("ROCPROFSYS_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); diff --git a/source/lib/core/categories.hpp b/source/lib/core/categories.hpp index aa6026fa27..05da163870 100644 --- a/source/lib/core/categories.hpp +++ b/source/lib/core/categories.hpp @@ -100,12 +100,14 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_RO ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_counter_collection, ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, "rocm_counter_collection", "ROCm device counter collection") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_MARKER_API, "rocm_marker_api", "ROCTx labels") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi, ROCPROFSYS_CATEGORY_ROCM_SMI, "rocm_smi", "rocm-smi data") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, "device_busy", "Busy percentage of a GPU device") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device") -ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi, ROCPROFSYS_CATEGORY_AMD_SMI, "amd_smi", "amd-smi data") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_gfxbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, "device_busy_gfx", "Busy percentage of GFX engine on a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_umcbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, "device_busy_umc", "Busy percentage of UMC on a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_mmbusy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM, "device_busy_mm", "Busy percentage of a MM engine on a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_temp, ROCPROFSYS_CATEGORY_AMD_SMI_TEMP, "device_temp", "Temperature of a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_power, ROCPROFSYS_CATEGORY_AMD_SMI_POWER, "device_power", "Power consumption of a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_memory_usage, ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device") +ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_vcn_activity, ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions") ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions") ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions") @@ -163,12 +165,14 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_counter_collection), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_marker_api), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_busy), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \ - ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_gfxbusy), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_umcbusy), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_mmbusy), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_temp), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_power), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_memory_usage), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_vcn_activity), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \ ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \ ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \ diff --git a/source/lib/core/components/fwd.hpp b/source/lib/core/components/fwd.hpp index 5d16fa5812..2552aaa6a1 100644 --- a/source/lib/core/components/fwd.hpp +++ b/source/lib/core/components/fwd.hpp @@ -74,7 +74,11 @@ struct backtrace_cpu_clock {}; struct backtrace_fraction {}; -struct backtrace_gpu_busy +struct backtrace_gpu_busy_gfx +{}; +struct backtrace_gpu_busy_umc +{}; +struct backtrace_gpu_busy_mm {}; struct backtrace_gpu_temp {}; @@ -84,14 +88,16 @@ struct backtrace_gpu_memory {}; struct backtrace_gpu_vcn {}; -using sampling_wall_clock = data_tracker; -using sampling_cpu_clock = data_tracker; -using sampling_percent = data_tracker; -using sampling_gpu_busy = data_tracker; -using sampling_gpu_temp = data_tracker; -using sampling_gpu_power = data_tracker; -using sampling_gpu_memory = data_tracker; -using sampling_gpu_vcn = data_tracker; +using sampling_wall_clock = data_tracker; +using sampling_cpu_clock = data_tracker; +using sampling_percent = data_tracker; +using sampling_gpu_busy_gfx = data_tracker; +using sampling_gpu_busy_umc = data_tracker; +using sampling_gpu_busy_mm = data_tracker; +using sampling_gpu_temp = data_tracker; +using sampling_gpu_power = data_tracker; +using sampling_gpu_memory = data_tracker; +using sampling_gpu_vcn = data_tracker; template @@ -120,7 +126,12 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_percent, fals #endif #if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_gfx, + false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_umc, + false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_mm, + false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type) @@ -142,9 +153,18 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_cpu_clock, project::r TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_percent, project::rocprofsys, category::timing, os::supports_unix, category::sampling, category::interrupt_sampling) -TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy, project::rocprofsys, - tpls::rocm, device::gpu, os::supports_linux, - category::sampling, category::process_sampling) +TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_gfx, + project::rocprofsys, tpls::rocm, device::gpu, + os::supports_linux, category::sampling, + category::process_sampling) +TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_umc, + project::rocprofsys, tpls::rocm, device::gpu, + os::supports_linux, category::sampling, + category::process_sampling) +TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_busy_mm, + project::rocprofsys, tpls::rocm, device::gpu, + os::supports_linux, category::sampling, + category::process_sampling) TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_memory, project::rocprofsys, tpls::rocm, device::gpu, os::supports_linux, category::memory, category::sampling, @@ -174,28 +194,38 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_percent, "sampling_percent", "Fraction of wall-clock time spent in functions", "Derived from statistical sampling") -TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy, - "sampling_gpu_busy", - "GPU Utilization (% busy) via ROCm-SMI", +TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_gfx, + "sampling_gpu_busy_gfx", + "GFX engine GPU Utilization (% busy) via AMD SMI", + "Derived from sampling") +TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_umc, + "sampling_gpu_busy_umc", + "Memory controller GPU Utilization (% busy) via AMD SMI", + "Derived from sampling") +TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_busy_mm, + "sampling_gpu_busy_mm", + "Multimedia engine GPU Utilization (% busy) via AMD SMI", "Derived from sampling") TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_memory, "sampling_gpu_memory_usage", - "GPU Memory Usage via ROCm-SMI", "Derived from sampling") + "GPU Memory Usage via AMD SMI", "Derived from sampling") TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_power, - "sampling_gpu_power", "GPU Power Usage via ROCm-SMI", + "sampling_gpu_power", "GPU Power Usage via AMD SMI", "Derived from sampling") TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_temp, - "sampling_gpu_temp", "GPU Temperature via ROCm-SMI", + "sampling_gpu_temp", "GPU Temperature via AMD SMI", "Derived from sampling") TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn, "sampling_gpu_vcn", - "GPU VCN Utilization (% activity) via ROCm-SMI", + "GPU VCN Utilization (% activity) via AMD SMI", "Derived from sampling") // statistics type TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_cpu_clock, double) -TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy, double) +TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_gfx, double) +TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_umc, double) +TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy_mm, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double) TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double) @@ -215,7 +245,11 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_timing_units, component::sampling_cpu_cloc true_type) // enable percent units -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy, +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_gfx, + true_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_umc, + true_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_gpu_busy_mm, true_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_percent_units, component::sampling_percent, true_type) @@ -227,7 +261,9 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(uses_memory_units, component::sampling_gpu_memo true_type) // reporting categories (sum) -ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_gfx, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_umc, false_type) +ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy_mm, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type) ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type) diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index 1967b9e1e0..acbd01de54 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -316,9 +316,9 @@ configure_settings(bool _init) "rocm"); ROCPROFSYS_CONFIG_SETTING( - bool, "ROCPROFSYS_USE_ROCM_SMI", + bool, "ROCPROFSYS_USE_AMD_SMI", "Enable sampling GPU power, temp, utilization, vcn_activity and memory usage", - true, "backend", "rocm_smi", "rocm", "process_sampling"); + true, "backend", "amd_smi", "rocm", "process_sampling"); ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING", "Enable statistical sampling of call-stack", false, @@ -478,17 +478,12 @@ configure_settings(bool _init) "'none' suppresses all CPU frequency sampling", std::string{}, "process_sampling"); - ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_DEVICES", - "[DEPRECATED] Renamed to ROCPROFSYS_SAMPLING_GPUS", - std::string{ "all" }, "rocm_smi", "rocm", - "process_sampling", "deprecated", "advanced"); - ROCPROFSYS_CONFIG_SETTING( std::string, "ROCPROFSYS_SAMPLING_GPUS", - "Devices to query when ROCPROFSYS_USE_ROCM_SMI=ON. Values should be separated by " + "Devices to query when ROCPROFSYS_USE_AMD_SMI=ON. Values should be separated by " "commas and can be explicit or ranges, e.g. 0,1,5-8. An empty value implies " "'all' and 'none' suppresses all GPU sampling", - std::string{ "all" }, "rocm_smi", "rocm", "process_sampling"); + std::string{ "all" }, "amd_smi", "rocm", "process_sampling"); ROCPROFSYS_CONFIG_SETTING( std::string, "ROCPROFSYS_SAMPLING_TIDS", @@ -627,9 +622,9 @@ configure_settings(bool _init) rocprofiler_sdk::config_settings(_config); ROCPROFSYS_CONFIG_SETTING( - std::string, "ROCPROFSYS_ROCM_SMI_METRICS", - "rocm-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage", - "busy,temp,power,vcn_activity,mem_usage", "backend", "rocm_smi", "rocm", + std::string, "ROCPROFSYS_AMD_SMI_METRICS", + "amd-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage", + "busy,temp,power,vcn_activity,mem_usage", "backend", "amd_smi", "rocm", "process_sampling", "advanced"); ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB", @@ -1030,7 +1025,7 @@ configure_settings(bool _init) _combine_perfetto_traces->second->set(_config->get("collapse_processes")); } - handle_deprecated_setting("ROCPROFSYS_ROCM_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS"); + handle_deprecated_setting("ROCPROFSYS_AMD_SMI_DEVICES", "ROCPROFSYS_SAMPLING_GPUS"); handle_deprecated_setting("ROCPROFSYS_USE_THREAD_SAMPLING", "ROCPROFSYS_USE_PROCESS_SAMPLING"); handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE"); @@ -1104,7 +1099,7 @@ configure_mode_settings(const std::shared_ptr& _config) _set("ROCPROFSYS_TRACE", false); _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_CAUSAL", false); - _set("ROCPROFSYS_USE_ROCM_SMI", false); + _set("ROCPROFSYS_USE_AMD_SMI", false); _set("ROCPROFSYS_USE_KOKKOSP", false); _set("ROCPROFSYS_USE_RCCLP", false); _set("ROCPROFSYS_USE_OMPT", false); @@ -1129,10 +1124,10 @@ configure_mode_settings(const std::shared_ptr& _config) { #if ROCPROFSYS_ROCM_VERSION > 0 ROCPROFSYS_BASIC_VERBOSE( - 1, "No ROCm devices were found: disabling rocm and rocm_smi...\n"); + 1, "No ROCm devices were found: disabling rocm and amd_smi...\n"); #endif _set("ROCPROFSYS_USE_ROCM", false); - _set("ROCPROFSYS_USE_ROCM_SMI", false); + _set("ROCPROFSYS_USE_AMD_SMI", false); } if(_config->get("ROCPROFSYS_USE_KOKKOSP")) @@ -1165,7 +1160,7 @@ configure_mode_settings(const std::shared_ptr& _config) _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_CAUSAL", false); _set("ROCPROFSYS_USE_ROCM", false); - _set("ROCPROFSYS_USE_ROCM_SMI", false); + _set("ROCPROFSYS_USE_AMD_SMI", false); _set("ROCPROFSYS_USE_KOKKOSP", false); _set("ROCPROFSYS_USE_RCCLP", false); _set("ROCPROFSYS_USE_OMPT", false); @@ -1349,12 +1344,12 @@ configure_disabled_settings(const std::shared_ptr& _config) _handle_use_option("ROCPROFSYS_PROFILE", "timemory"); _handle_use_option("ROCPROFSYS_USE_OMPT", "ompt"); _handle_use_option("ROCPROFSYS_USE_RCCLP", "rcclp"); - _handle_use_option("ROCPROFSYS_USE_ROCM_SMI", "rocm_smi"); + _handle_use_option("ROCPROFSYS_USE_AMD_SMI", "amd_smi"); _handle_use_option("ROCPROFSYS_USE_ROCM", "rocm"); #if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0 - _config->find("ROCPROFSYS_USE_ROCM_SMI")->second->set_hidden(true); - for(const auto& itr : _config->disable_category("rocm_smi")) + _config->find("ROCPROFSYS_USE_AMD_SMI")->second->set_hidden(true); + for(const auto& itr : _config->disable_category("amd_smi")) _config->find(itr)->second->set_hidden(true); #endif @@ -1813,10 +1808,10 @@ get_use_causal() } bool -get_use_rocm_smi() +get_use_amd_smi() { #if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 - static auto _v = get_config()->find("ROCPROFSYS_USE_ROCM_SMI"); + static auto _v = get_config()->find("ROCPROFSYS_USE_AMD_SMI"); return static_cast&>(*_v->second).get(); #else return false; diff --git a/source/lib/core/config.hpp b/source/lib/core/config.hpp index 1037488ec6..03fb8d2849 100644 --- a/source/lib/core/config.hpp +++ b/source/lib/core/config.hpp @@ -205,7 +205,7 @@ bool& get_use_causal() ROCPROFSYS_HOT; bool -get_use_rocm_smi() ROCPROFSYS_HOT; +get_use_amd_smi() ROCPROFSYS_HOT; bool& get_use_sampling() ROCPROFSYS_HOT; diff --git a/source/lib/core/gpu.cpp b/source/lib/core/gpu.cpp index e4b49e4ea1..91b3aad636 100644 --- a/source/lib/core/gpu.cpp +++ b/source/lib/core/gpu.cpp @@ -42,7 +42,7 @@ #include #if ROCPROFSYS_USE_ROCM > 0 -# include +# include # include # include # include @@ -55,40 +55,43 @@ namespace gpu namespace { #if ROCPROFSYS_USE_ROCM > 0 -# define ROCPROFSYS_ROCM_SMI_CALL(ERROR_CODE) \ - ::rocprofsys::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__) +# define ROCPROFSYS_AMD_SMI_CALL(ERROR_CODE) \ + ::rocprofsys::gpu::check_amdsmi_error(ERROR_CODE, __FILE__, __LINE__) void -check_rsmi_error(rsmi_status_t _code, const char* _file, int _line) +check_amdsmi_error(amdsmi_status_t _code, const char* _file, int _line) { - if(_code == RSMI_STATUS_SUCCESS) return; + if(_code == AMDSMI_STATUS_SUCCESS) return; const char* _msg = nullptr; - auto _err = rsmi_status_string(_code, &_msg); - if(_err != RSMI_STATUS_SUCCESS) - ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. " - "Error code %i originated at %s:%i\n", - static_cast(_code), _file, _line); + auto _err = amdsmi_status_code_to_string(_code, &_msg); + if(_err != AMDSMI_STATUS_SUCCESS) + ROCPROFSYS_THROW( + "amdsmi_status_code_to_string failed. No error message available. " + "Error code %i originated at %s:%i\n", + static_cast(_code), _file, _line); ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast(_code), _msg); } bool -rsmi_init() +amdsmi_init() { - auto _rsmi_init = []() { + auto _amdsmi_init = []() { try { - ROCPROFSYS_ROCM_SMI_CALL(::rsmi_init(0)); + // Currently, only AMDSMI_INIT_AMD_GPUS is supported + ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS)); + get_processor_handles(); } catch(std::exception& _e) { - ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n", + ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n", _e.what()); return false; } return true; }(); - return _rsmi_init; + return _amdsmi_init; } #endif // ROCPROFSYS_USE_ROCM > 0 @@ -126,7 +129,7 @@ query_rocm_gpu_agents() } // namespace int -rocm_device_count() +device_count() { #if ROCPROFSYS_USE_ROCM > 0 static int _num_devices = query_rocm_gpu_agents(); @@ -136,38 +139,13 @@ rocm_device_count() #endif } -int -rsmi_device_count() +bool +initialize_amdsmi() { #if ROCPROFSYS_USE_ROCM > 0 - if(!rsmi_init()) return 0; - - static auto _num_devices = []() { - uint32_t _v = 0; - try - { - ROCPROFSYS_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v)); - } catch(std::exception& _e) - { - ROCPROFSYS_BASIC_VERBOSE( - 1, "Exception thrown getting the rocm-smi devices: %s\n", _e.what()); - } - return _v; - }(); - - return _num_devices; + return (amdsmi_init()) ? true : false; #else - return 0; -#endif -} - -int -device_count() -{ -#if ROCPROFSYS_USE_ROCM > 0 - return rocm_device_count(); -#else - return 0; + return false; #endif } @@ -217,5 +195,73 @@ add_device_metadata() } }); } + +#if ROCPROFSYS_USE_ROCM > 0 +/* + * Required amdsmi methods to get processors and handles + */ + +uint32_t processors::total_processor_count = 0; +std::vector processors::processors_list = {}; + +void +get_processor_handles() +{ + uint32_t socket_count; + uint32_t processor_count; + + // Passing nullptr will return us the number of sockets available for read in this + // system + auto ret = amdsmi_get_socket_handles(&socket_count, nullptr); + if(ret != AMDSMI_STATUS_SUCCESS) + { + return; + } + std::vector sockets(socket_count); + ret = amdsmi_get_socket_handles(&socket_count, sockets.data()); + for(auto& socket : sockets) + { + // Passing nullptr will return us the number of processors available for read for + // this socket + ret = amdsmi_get_processor_handles(socket, &processor_count, nullptr); + if(ret != AMDSMI_STATUS_SUCCESS) + { + return; + } + std::vector all_processors(processor_count); + ret = + amdsmi_get_processor_handles(socket, &processor_count, all_processors.data()); + if(ret != AMDSMI_STATUS_SUCCESS) + { + return; + } + + for(auto& processor : all_processors) + { + processor_type_t processor_type = {}; + ret = amdsmi_get_processor_type(processor, &processor_type); + if(processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) + { + ROCPROFSYS_THROW("Not AMD_GPU device type!"); + return; + } + processors::processors_list.push_back(processor); + } + } + processors::total_processor_count = processors::processors_list.size(); +} +uint32_t +get_processor_count() +{ + return processors::total_processor_count; +} + +amdsmi_processor_handle +get_handle_from_id(uint32_t dev_id) +{ + return processors::processors_list[dev_id]; +} +#endif + } // namespace gpu } // namespace rocprofsys diff --git a/source/lib/core/gpu.hpp b/source/lib/core/gpu.hpp index 9085221dd9..42b693304c 100644 --- a/source/lib/core/gpu.hpp +++ b/source/lib/core/gpu.hpp @@ -22,18 +22,41 @@ #pragma once +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + namespace rocprofsys { namespace gpu { +#if ROCPROFSYS_USE_ROCM > 0 +void +get_processor_handles(); + +uint32_t +get_processor_count(); + +amdsmi_processor_handle +get_handle_from_id(uint32_t dev_id); + +struct processors +{ + static uint32_t total_processor_count; + static std::vector processors_list; + +private: + friend void rocprofsys::gpu::get_processor_handles(); + friend uint32_t rocprofsys::gpu::get_processor_count(); + friend amdsmi_processor_handle rocprofsys::gpu::get_handle_from_id(uint32_t dev_id); +}; +#endif + int device_count(); -int -rocm_device_count(); - -int -rsmi_device_count(); +bool +initialize_amdsmi(); void add_device_metadata(); diff --git a/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index cc76d0d5b9..5d5230a22e 100644 --- a/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -52,12 +52,14 @@ extern "C" ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, ROCPROFSYS_CATEGORY_ROCM_COUNTER_COLLECTION, ROCPROFSYS_CATEGORY_ROCM_MARKER_API, - ROCPROFSYS_CATEGORY_ROCM_SMI, - ROCPROFSYS_CATEGORY_ROCM_SMI_BUSY, - ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, - ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, - ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, - ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, + ROCPROFSYS_CATEGORY_AMD_SMI, + ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, + ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, + ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_MM, + ROCPROFSYS_CATEGORY_AMD_SMI_TEMP, + ROCPROFSYS_CATEGORY_AMD_SMI_POWER, + ROCPROFSYS_CATEGORY_AMD_SMI_MEMORY_USAGE, + ROCPROFSYS_CATEGORY_AMD_SMI_VCN_ACTIVITY, ROCPROFSYS_CATEGORY_ROCM_RCCL, ROCPROFSYS_CATEGORY_SAMPLING, ROCPROFSYS_CATEGORY_PTHREAD, diff --git a/source/lib/rocprof-sys/library/CMakeLists.txt b/source/lib/rocprof-sys/library/CMakeLists.txt index 5084c43958..104ee1fa66 100644 --- a/source/lib/rocprof-sys/library/CMakeLists.txt +++ b/source/lib/rocprof-sys/library/CMakeLists.txt @@ -22,7 +22,7 @@ set(library_headers ${CMAKE_CURRENT_LIST_DIR}/ptl.hpp ${CMAKE_CURRENT_LIST_DIR}/rcclp.hpp ${CMAKE_CURRENT_LIST_DIR}/rocm.hpp - ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.hpp + ${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp ${CMAKE_CURRENT_LIST_DIR}/runtime.hpp ${CMAKE_CURRENT_LIST_DIR}/sampling.hpp @@ -44,7 +44,7 @@ if(ROCPROFSYS_USE_ROCM) rocprofiler-systems-object-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rocm.cpp ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp - ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp) + ${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp) add_subdirectory(rocprofiler-sdk) endif() @@ -58,7 +58,7 @@ set(ndebug_sources ${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp ${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp - ${CMAKE_CURRENT_LIST_DIR}/rocm_smi.cpp + ${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp) set_source_files_properties( diff --git a/source/lib/rocprof-sys/library/rocm_smi.cpp b/source/lib/rocprof-sys/library/amd_smi.cpp similarity index 70% rename from source/lib/rocprof-sys/library/rocm_smi.cpp rename to source/lib/rocprof-sys/library/amd_smi.cpp index bdf5fc46c6..160104b058 100644 --- a/source/lib/rocprof-sys/library/rocm_smi.cpp +++ b/source/lib/rocprof-sys/library/amd_smi.cpp @@ -30,7 +30,7 @@ # undef NDEBUG #endif -#include "library/rocm_smi.hpp" +#include "library/amd_smi.hpp" #include "core/common.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" @@ -48,8 +48,6 @@ #include #include -#include - #include #include #include @@ -59,22 +57,22 @@ #include #include -#define ROCPROFSYS_ROCM_SMI_CALL(...) \ - ::rocprofsys::rocm_smi::check_error(__FILE__, __LINE__, __VA_ARGS__) +#define ROCPROFSYS_AMD_SMI_CALL(...) \ + ::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__) namespace rocprofsys { -namespace rocm_smi +namespace amd_smi { using bundle_t = std::deque; -using sampler_instances = thread_data; +using sampler_instances = thread_data; namespace { auto& get_settings(uint32_t _dev_id) { - static auto _v = std::unordered_map{}; + static auto _v = std::unordered_map{}; return _v[_dev_id]; } @@ -86,22 +84,23 @@ is_initialized() } void -check_error(const char* _file, int _line, rsmi_status_t _code, bool* _option = nullptr) +check_error(const char* _file, int _line, amdsmi_status_t _code, bool* _option = nullptr) { - if(_code == RSMI_STATUS_SUCCESS) + if(_code == AMDSMI_STATUS_SUCCESS) return; - else if(_code == RSMI_STATUS_NOT_SUPPORTED && _option) + else if(_code == AMDSMI_STATUS_NOT_SUPPORTED && _option) { *_option = false; return; } const char* _msg = nullptr; - auto _err = rsmi_status_string(_code, &_msg); - if(_err != RSMI_STATUS_SUCCESS) - ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. " - "Error code %i originated at %s:%i\n", - static_cast(_code), _file, _line); + auto _err = amdsmi_status_code_to_string(_code, &_msg); + if(_err != AMDSMI_STATUS_SUCCESS) + ROCPROFSYS_THROW( + "amdsmi_status_code_to_string failed. No error message available. " + "Error code %i originated at %s:%i\n", + static_cast(_code), _file, _line); ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast(_code), _msg); } @@ -127,7 +126,7 @@ data::sample(uint32_t _dev_id) { auto _ts = tim::get_clock_real_now(); assert(_ts < std::numeric_limits::max()); - rsmi_gpu_metrics_t _gpu_metrics; + amdsmi_gpu_metrics_t _gpu_metrics; auto _state = get_state().load(); @@ -136,47 +135,55 @@ data::sample(uint32_t _dev_id) m_dev_id = _dev_id; m_ts = _ts; -#define ROCPROFSYS_RSMI_GET(OPTION, FUNCTION, ...) \ +#define ROCPROFSYS_AMDSMI_GET(OPTION, FUNCTION, ...) \ if(OPTION) \ { \ try \ { \ - ROCPROFSYS_ROCM_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \ + ROCPROFSYS_AMD_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \ } catch(std::runtime_error & _e) \ { \ ROCPROFSYS_VERBOSE_F( \ - 0, "[%s] Exception: %s. Disabling future samples from rocm-smi...\n", \ + 0, "[%s] Exception: %s. Disabling future samples from amd-smi...\n", \ #FUNCTION, _e.what()); \ get_state().store(State::Disabled); \ } \ } - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).busy, rsmi_dev_busy_percent_get, _dev_id, - &m_busy_perc); - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).temp, rsmi_dev_temp_metric_get, _dev_id, - RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &m_temp); - RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER; - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).power, rsmi_dev_power_get, _dev_id, - &m_power, &power_type) - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get, - _dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage); - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity, - rsmi_dev_gpu_metrics_info_get, _dev_id, &_gpu_metrics); + amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_dev_id); + + ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity, + sample_handle, &m_busy_perc); + ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric, + sample_handle, AMDSMI_TEMPERATURE_TYPE_JUNCTION, + AMDSMI_TEMP_CURRENT, &m_temp); + ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).power, amdsmi_get_power_info, + sample_handle, &m_power) + ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage, + sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage); + ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).vcn_activity, + amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics); for(const auto& activity : _gpu_metrics.vcn_activity) { if(activity != UINT16_MAX) m_vcn_metrics.push_back(activity); } -#undef ROCPROFSYS_RSMI_GET +#undef ROCPROFSYS_AMDSMI_GET } void data::print(std::ostream& _os) const { std::stringstream _ss{}; - _ss << "device: " << m_dev_id << ", busy = " << m_busy_perc << "%, temp = " << m_temp - << ", power = " << m_power << ", memory usage = " << m_mem_usage; + +#if ROCPROFSYS_USE_ROCM > 0 + _ss << "device: " << m_dev_id << ", gpu busy: = " << m_busy_perc.gfx_activity + << "%, mm busy: = " << m_busy_perc.mm_activity + << "%, umc busy: = " << m_busy_perc.umc_activity << "%, temp = " << m_temp + << ", current power = " << m_power.current_socket_power + << ", memory usage = " << m_mem_usage; +#endif _os << _ss.str(); } @@ -209,8 +216,8 @@ sample() { for(auto itr : data::device_list) { - if(rocm_smi::get_state() != State::Active) continue; - ROCPROFSYS_DEBUG_F("Polling rocm-smi for device %u...\n", itr); + if(amd_smi::get_state() != State::Active) continue; + ROCPROFSYS_DEBUG_F("Polling amd-smi for device %u...\n", itr); auto& _data = *_bundle_data.at(itr); if(!_data) continue; _data->emplace_back(data{ itr }); @@ -221,7 +228,7 @@ sample() void set_state(State _v) { - rocm_smi::get_state().store(_v); + amd_smi::get_state().store(_v); } std::vector& @@ -235,15 +242,15 @@ bool data::setup() { perfetto_counter_track::init(); - rocm_smi::set_state(State::PreInit); + amd_smi::set_state(State::PreInit); return true; } bool data::shutdown() { - ROCPROFSYS_DEBUG("Shutting down rocm-smi...\n"); - rocm_smi::set_state(State::Finalized); + ROCPROFSYS_DEBUG("Shutting down amd-smi...\n"); + amd_smi::set_state(State::Finalized); return true; } @@ -261,7 +268,9 @@ data::shutdown() void data::post_process(uint32_t _dev_id) { - using component::sampling_gpu_busy; + using component::sampling_gpu_busy_gfx; + using component::sampling_gpu_busy_mm; + using component::sampling_gpu_busy_umc; using component::sampling_gpu_memory; using component::sampling_gpu_power; using component::sampling_gpu_temp; @@ -269,12 +278,12 @@ data::post_process(uint32_t _dev_id) if(device_count < _dev_id) return; - auto& _rocm_smi_v = sampler_instances::get()->at(_dev_id); - auto _rocm_smi = (_rocm_smi_v) ? *_rocm_smi_v : std::deque{}; + auto& _amd_smi_v = sampler_instances::get()->at(_dev_id); + auto _amd_smi = (_amd_smi_v) ? *_amd_smi_v : std::deque{}; const auto& _thread_info = thread_info::get(0, InternalTID); - ROCPROFSYS_VERBOSE(1, "Post-processing %zu rocm-smi samples from device %u\n", - _rocm_smi.size(), _dev_id); + ROCPROFSYS_VERBOSE(1, "Post-processing %zu amd-smi samples from device %u\n", + _amd_smi.size(), _dev_id); ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; @@ -282,18 +291,23 @@ data::post_process(uint32_t _dev_id) auto _settings = get_settings(_dev_id); auto _process_perfetto = [&]() { - auto _idx = std::array{}; + auto _idx = std::array{}; { _idx.fill(_idx.size()); uint64_t nidx = 0; - if(_settings.busy) _idx.at(0) = nidx++; - if(_settings.temp) _idx.at(1) = nidx++; - if(_settings.power) _idx.at(2) = nidx++; - if(_settings.mem_usage) _idx.at(3) = nidx++; - if(_settings.vcn_activity) _idx.at(4) = nidx++; + if(_settings.busy) + { + _idx.at(0) = nidx++; + _idx.at(1) = nidx++; + _idx.at(2) = nidx++; + } + if(_settings.temp) _idx.at(3) = nidx++; + if(_settings.power) _idx.at(4) = nidx++; + if(_settings.mem_usage) _idx.at(5) = nidx++; + if(_settings.vcn_activity) _idx.at(6) = nidx++; } - for(auto& itr : _rocm_smi) + for(auto& itr : _amd_smi) { using counter_track = perfetto_counter_track; if(itr.m_dev_id != _dev_id) continue; @@ -303,11 +317,16 @@ data::post_process(uint32_t _dev_id) return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)"); }; - if(_settings.busy) counter_track::emplace(_dev_id, addendum("Busy"), "%"); + if(_settings.busy) + { + counter_track::emplace(_dev_id, addendum("GFX Busy"), "%"); + counter_track::emplace(_dev_id, addendum("UMC Busy"), "%"); + counter_track::emplace(_dev_id, addendum("MM Busy"), "%"); + } if(_settings.temp) counter_track::emplace(_dev_id, addendum("Temperature"), "deg C"); if(_settings.power) - counter_track::emplace(_dev_id, addendum("Power"), "watts"); + counter_track::emplace(_dev_id, addendum("Current Power"), "watts"); if(_settings.mem_usage) counter_track::emplace(_dev_id, addendum("Memory Usage"), "megabytes"); @@ -323,26 +342,34 @@ data::post_process(uint32_t _dev_id) uint64_t _ts = itr.m_ts; if(!_thread_info->is_valid_time(_ts)) continue; - double _busy = itr.m_busy_perc; - double _temp = itr.m_temp / 1.0e3; - double _power = itr.m_power / 1.0e6; - double _usage = itr.m_mem_usage / static_cast(units::megabyte); + double _gfxbusy = itr.m_busy_perc.gfx_activity; + double _umcbusy = itr.m_busy_perc.umc_activity; + double _mmbusy = itr.m_busy_perc.mm_activity; + double _temp = itr.m_temp; + double _power = itr.m_power.current_socket_power; + double _usage = itr.m_mem_usage / static_cast(units::megabyte); if(_settings.busy) - TRACE_COUNTER("device_busy", counter_track::at(_dev_id, _idx.at(0)), _ts, - _busy); + { + TRACE_COUNTER("device_busy_gfx", counter_track::at(_dev_id, _idx.at(0)), + _ts, _gfxbusy); + TRACE_COUNTER("device_busy_umc", counter_track::at(_dev_id, _idx.at(1)), + _ts, _umcbusy); + TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, _idx.at(2)), + _ts, _mmbusy); + } if(_settings.temp) - TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(1)), _ts, + TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(3)), _ts, _temp); if(_settings.power) - TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(2)), _ts, + TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(4)), _ts, _power); if(_settings.mem_usage) TRACE_COUNTER("device_memory_usage", - counter_track::at(_dev_id, _idx.at(3)), _ts, _usage); + counter_track::at(_dev_id, _idx.at(5)), _ts, _usage); if(_settings.vcn_activity) { - uint64_t idx = _idx.at(4); + uint64_t idx = _idx.at(6); for(const auto& temp : itr.m_vcn_metrics) { TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx), @@ -361,14 +388,14 @@ data::post_process(uint32_t _dev_id) void setup() { - auto_lock_t _lk{ type_mutex() }; + auto_lock_t _lk{ type_mutex() }; - if(is_initialized() || !get_use_rocm_smi()) return; + if(is_initialized() || !get_use_amd_smi()) return; ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - // assign the data value to determined by rocm-smi - data::device_count = device_count(); + if(!gpu::initialize_amdsmi()) return; + data::device_count = gpu::get_processor_count(); auto _devices_v = get_sampling_gpus(); for(auto& itr : _devices_v) @@ -421,14 +448,15 @@ setup() data::device_list = _devices; - auto _metrics = get_setting_value("ROCPROFSYS_ROCM_SMI_METRICS"); + auto _metrics = get_setting_value("ROCPROFSYS_AMD_SMI_METRICS"); try { for(auto itr : _devices) { uint16_t dev_id = 0; - ROCPROFSYS_ROCM_SMI_CALL(rsmi_dev_id_get(itr, &dev_id)); + ROCPROFSYS_AMD_SMI_CALL( + amdsmi_get_gpu_id(gpu::get_handle_from_id(itr), &dev_id)); // dev_id holds the device ID of device i, upon a successful call if(_metrics && !_metrics->empty()) @@ -447,10 +475,10 @@ setup() { auto iitr = supported.find(metric); if(iitr == supported.end()) - ROCPROFSYS_FAIL_F("unsupported rocm-smi metric: %s\n", + ROCPROFSYS_FAIL_F("unsupported amd-smi metric: %s\n", metric.c_str()); - ROCPROFSYS_VERBOSE_F(1, "Enabling rocm-smi metric '%s'\n", + ROCPROFSYS_VERBOSE_F(1, "Enabling amd-smi metric '%s'\n", metric.c_str()); iitr->second = true; } @@ -462,7 +490,7 @@ setup() data::setup(); } catch(std::runtime_error& _e) { - ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing rocm-smi: %s\n", + ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n", _e.what()); data::device_list = {}; } @@ -471,7 +499,7 @@ setup() void shutdown() { - auto_lock_t _lk{ type_mutex() }; + auto_lock_t _lk{ type_mutex() }; if(!is_initialized()) return; @@ -479,11 +507,11 @@ shutdown() { if(data::shutdown()) { - ROCPROFSYS_ROCM_SMI_CALL(rsmi_shut_down()); + ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down()); } } catch(std::runtime_error& _e) { - ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down rocm-smi: %s\n", + ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down amd-smi: %s\n", _e.what()); } @@ -500,14 +528,22 @@ post_process() uint32_t device_count() { - return gpu::rsmi_device_count(); + return gpu::device_count(); } -} // namespace rocm_smi +} // namespace amd_smi } // namespace rocprofsys ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( - TIMEMORY_ESC(data_tracker), true, - double) + TIMEMORY_ESC(data_tracker), + true, double) + +ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), + true, double) + +ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), + true, double) ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, diff --git a/source/lib/rocprof-sys/library/rocm_smi.hpp b/source/lib/rocprof-sys/library/amd_smi.hpp similarity index 84% rename from source/lib/rocprof-sys/library/rocm_smi.hpp rename to source/lib/rocprof-sys/library/amd_smi.hpp index 133e60fe51..c020e8b506 100644 --- a/source/lib/rocprof-sys/library/rocm_smi.hpp +++ b/source/lib/rocprof-sys/library/amd_smi.hpp @@ -34,6 +34,10 @@ #include "core/state.hpp" #include "library/thread_data.hpp" +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + #include #include #include @@ -47,7 +51,7 @@ namespace rocprofsys { -namespace rocm_smi +namespace amd_smi { void setup(); @@ -66,9 +70,6 @@ post_process(); void set_state(State); -uint32_t -device_count(); - struct settings { bool busy = true; @@ -86,7 +87,7 @@ struct data using promise_t = std::promise; using timestamp_t = int64_t; - using power_t = uint64_t; + using power_t = uint32_t; using busy_perc_t = uint32_t; using mem_usage_t = uint64_t; using temp_t = int64_t; @@ -102,11 +103,16 @@ struct data uint32_t m_dev_id = std::numeric_limits::max(); timestamp_t m_ts = 0; - busy_perc_t m_busy_perc = 0; temp_t m_temp = 0; - power_t m_power = 0; mem_usage_t m_mem_usage = 0; std::vector m_vcn_metrics = {}; +#if ROCPROFSYS_USE_ROCM > 0 + amdsmi_engine_usage_t m_busy_perc = {}; + amdsmi_power_info_t m_power = {}; +#else + std::vector m_busy_perc = {}; + std::vector m_power = {}; +#endif friend std::ostream& operator<<(std::ostream& _os, const data& _v) { @@ -115,11 +121,11 @@ struct data } private: - friend void rocprofsys::rocm_smi::setup(); - friend void rocprofsys::rocm_smi::config(); - friend void rocprofsys::rocm_smi::sample(); - friend void rocprofsys::rocm_smi::shutdown(); - friend void rocprofsys::rocm_smi::post_process(); + friend void rocprofsys::amd_smi::setup(); + friend void rocprofsys::amd_smi::config(); + friend void rocprofsys::amd_smi::sample(); + friend void rocprofsys::amd_smi::shutdown(); + friend void rocprofsys::amd_smi::post_process(); static size_t device_count; static std::set device_list; @@ -154,7 +160,7 @@ post_process() inline void set_state(State) {} #endif -} // namespace rocm_smi +} // namespace amd_smi } // namespace rocprofsys #if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 @@ -166,8 +172,16 @@ inline void set_state(State) {} # include ROCPROFSYS_DECLARE_EXTERN_COMPONENT( - TIMEMORY_ESC(data_tracker), true, - double) + TIMEMORY_ESC(data_tracker), + true, double) + +ROCPROFSYS_DECLARE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), + true, double) + +ROCPROFSYS_DECLARE_EXTERN_COMPONENT( + TIMEMORY_ESC(data_tracker), + true, double) ROCPROFSYS_DECLARE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, diff --git a/source/lib/rocprof-sys/library/process_sampler.cpp b/source/lib/rocprof-sys/library/process_sampler.cpp index c14846af83..66c7d0495c 100644 --- a/source/lib/rocprof-sys/library/process_sampler.cpp +++ b/source/lib/rocprof-sys/library/process_sampler.cpp @@ -23,8 +23,8 @@ #include "library/process_sampler.hpp" #include "core/config.hpp" #include "core/debug.hpp" +#include "library/amd_smi.hpp" #include "library/cpu_freq.hpp" -#include "library/rocm_smi.hpp" #include "library/runtime.hpp" #include @@ -140,14 +140,14 @@ sampler::setup() // shutdown if already running shutdown(); - if(get_use_rocm_smi()) + if(get_use_amd_smi()) { - auto& _rocm_smi = instances.emplace_back(std::make_unique()); - _rocm_smi->setup = []() { rocm_smi::setup(); }; - _rocm_smi->shutdown = []() { rocm_smi::shutdown(); }; - _rocm_smi->post_process = []() { rocm_smi::post_process(); }; - _rocm_smi->config = []() { rocm_smi::config(); }; - _rocm_smi->sample = []() { rocm_smi::sample(); }; + auto& _amd_smi = instances.emplace_back(std::make_unique()); + _amd_smi->setup = []() { amd_smi::setup(); }; + _amd_smi->shutdown = []() { amd_smi::shutdown(); }; + _amd_smi->post_process = []() { amd_smi::post_process(); }; + _amd_smi->config = []() { amd_smi::config(); }; + _amd_smi->sample = []() { amd_smi::sample(); }; } auto& _cpu_freq = instances.emplace_back(std::make_unique()); diff --git a/source/lib/rocprof-sys/library/rocm.cpp b/source/lib/rocprof-sys/library/rocm.cpp index ca390a173c..f610f9bf71 100644 --- a/source/lib/rocprof-sys/library/rocm.cpp +++ b/source/lib/rocprof-sys/library/rocm.cpp @@ -25,7 +25,7 @@ #include "core/debug.hpp" #include "core/dynamic_library.hpp" #include "core/gpu.hpp" -#include "library/rocm_smi.hpp" +#include "library/amd_smi.hpp" #include "library/rocprofiler-sdk.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" diff --git a/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index cb1d5627a4..281c22d22c 100644 --- a/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -30,8 +30,8 @@ #include "core/perfetto.hpp" #include "core/rocprofiler-sdk.hpp" #include "core/state.hpp" +#include "library/amd_smi.hpp" #include "library/components/category_region.hpp" -#include "library/rocm_smi.hpp" #include "library/rocprofiler-sdk/counters.hpp" #include "library/rocprofiler-sdk/fwd.hpp" #include "library/thread_info.hpp" @@ -1116,10 +1116,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) gpu::add_device_metadata(); - if(config::get_use_process_sampling() && config::get_use_rocm_smi()) + if(config::get_use_process_sampling() && config::get_use_amd_smi()) { - ROCPROFSYS_VERBOSE_F(1, "Setting rocm_smi state to active...\n"); - rocm_smi::set_state(State::Active); + ROCPROFSYS_VERBOSE_F(1, "Setting amd_smi state to active...\n"); + amd_smi::set_state(State::Active); } start(); @@ -1137,8 +1137,8 @@ tool_fini(void* callback_data) flush(); stop(); - if(config::get_use_process_sampling() && config::get_use_rocm_smi()) - rocm_smi::shutdown(); + if(config::get_use_process_sampling() && config::get_use_amd_smi()) + amd_smi::shutdown(); if(get_counter_storage()) { diff --git a/source/lib/rocprof-sys/library/sampling.cpp b/source/lib/rocprof-sys/library/sampling.cpp index 0701e5fe42..67d20af238 100644 --- a/source/lib/rocprof-sys/library/sampling.cpp +++ b/source/lib/rocprof-sys/library/sampling.cpp @@ -125,7 +125,9 @@ using component::backtrace_timestamp; using component::backtrace_wall_clock; // NOLINT using component::callchain; using component::sampling_cpu_clock; -using component::sampling_gpu_busy; +using component::sampling_gpu_busy_gfx; +using component::sampling_gpu_busy_mm; +using component::sampling_gpu_busy_umc; using component::sampling_gpu_memory; using component::sampling_gpu_power; using component::sampling_gpu_temp; @@ -1551,11 +1553,25 @@ struct sampling_initialization sampling_percent::description() = "Percentage of samples"; sampling_percent::set_precision(3); - sampling_gpu_busy::label() = "sampling_gpu_busy_percent"; - sampling_gpu_busy::description() = "Utilization of GPU(s)"; - sampling_gpu_busy::set_precision(0); - sampling_gpu_busy::set_format_flags(sampling_gpu_busy::get_format_flags() & - std::ios_base::showpoint); + sampling_gpu_busy_gfx::label() = "sampling_gpu_busy_gfx_percent"; + sampling_gpu_busy_gfx::description() = "Utilization of GFX engines on GPU(s)"; + sampling_gpu_busy_gfx::set_precision(0); + sampling_gpu_busy_gfx::set_format_flags( + sampling_gpu_busy_gfx::get_format_flags() & std::ios_base::showpoint); + + sampling_gpu_busy_umc::label() = "sampling_gpu_busy_umc_percent"; + sampling_gpu_busy_umc::description() = + "Utilization of memory controller on GPU(s)"; + sampling_gpu_busy_umc::set_precision(0); + sampling_gpu_busy_umc::set_format_flags( + sampling_gpu_busy_umc::get_format_flags() & std::ios_base::showpoint); + + sampling_gpu_busy_mm::label() = "sampling_gpu_busy_mm_percent"; + sampling_gpu_busy_mm::description() = + "Utilization of multimedia engines on GPU(s)"; + sampling_gpu_busy_mm::set_precision(0); + sampling_gpu_busy_mm::set_format_flags(sampling_gpu_busy_mm::get_format_flags() & + std::ios_base::showpoint); sampling_gpu_memory::label() = "sampling_gpu_memory_usage"; sampling_gpu_memory::description() = "Memory usage of GPU(s)"; diff --git a/tests/rocprof-sys-testing.cmake b/tests/rocprof-sys-testing.cmake index fafafec9dc..b5c25a7931 100644 --- a/tests/rocprof-sys-testing.cmake +++ b/tests/rocprof-sys-testing.cmake @@ -229,19 +229,19 @@ set(_VALID_GPU OFF) if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU)) set(_VALID_GPU ON) find_program( - ROCPROFSYS_ROCM_SMI_EXE - NAMES rocm-smi + ROCPROFSYS_AMD_SMI_EXE + NAMES amd-smi HINTS ${ROCmVersion_DIR} PATHS ${ROCmVersion_DIR} PATH_SUFFIXES bin) - if(ROCPROFSYS_ROCM_SMI_EXE) + if(ROCPROFSYS_AMD_SMI_EXE) execute_process( - COMMAND ${ROCPROFSYS_ROCM_SMI_EXE} - OUTPUT_VARIABLE _RSMI_OUT - ERROR_VARIABLE _RSMI_ERR - RESULT_VARIABLE _RSMI_RET) - if(_RSMI_RET EQUAL 0) - if("${_RSMI_OUTPUT}" MATCHES "ERROR" OR "${_RSMI_ERR}" MATCHES "ERROR") + COMMAND ${ROCPROFSYS_AMD_SMI_EXE} + OUTPUT_VARIABLE _AMDSMI_OUT + ERROR_VARIABLE _AMDSMI_ERR + RESULT_VARIABLE _AMDSMI_RET) + if(_AMDSMI_RET EQUAL 0) + if("${_AMDSMI_OUTPUT}" MATCHES "ERROR" OR "${_AMDSMI_ERR}" MATCHES "ERROR") set(_VALID_GPU OFF) endif() else() @@ -250,7 +250,7 @@ if(ROCPROFSYS_USE_ROCM AND (NOT DEFINED ROCPROFSYS_CI_GPU OR ROCPROFSYS_CI_GPU)) endif() if(NOT _VALID_GPU) rocprofiler_systems_message( - AUTHOR_WARNING "rocm-smi did not successfully run. Disabling GPU tests...") + AUTHOR_WARNING "amd-smi did not successfully run. Disabling GPU tests...") endif() endif() @@ -433,7 +433,7 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST) endif() if(NOT "ROCPROFSYS_USE_ROCM=OFF" IN_LIST TEST_ENVIRONMENT) - list(APPEND TEST_LABELS "rocm-smi") + list(APPEND TEST_LABELS "amd-smi") endif() endif() @@ -442,9 +442,9 @@ function(ROCPROFILER_SYSTEMS_ADD_TEST) list(APPEND TEST_LABELS "rocm") endif() - if("ROCPROFSYS_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST - TEST_ENVIRONMENT) - list(APPEND TEST_LABELS "rocm-smi") + if("ROCPROFSYS_USE_AMD_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "amd-smi" IN_LIST + TEST_ENVIRONMENT) + list(APPEND TEST_LABELS "amd-smi") endif() if(TARGET ${TEST_TARGET})