diff --git a/source/lib/core/CMakeLists.txt b/source/lib/core/CMakeLists.txt index a0363a16f6..5c0cbad990 100644 --- a/source/lib/core/CMakeLists.txt +++ b/source/lib/core/CMakeLists.txt @@ -15,6 +15,7 @@ set(core_sources ${CMAKE_CURRENT_LIST_DIR}/perf.cpp ${CMAKE_CURRENT_LIST_DIR}/perfetto.cpp ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp + ${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp ${CMAKE_CURRENT_LIST_DIR}/state.cpp ${CMAKE_CURRENT_LIST_DIR}/timemory.cpp ${CMAKE_CURRENT_LIST_DIR}/utility.cpp) @@ -37,6 +38,7 @@ set(core_headers ${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp ${CMAKE_CURRENT_LIST_DIR}/redirect.hpp ${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp + ${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp ${CMAKE_CURRENT_LIST_DIR}/state.hpp ${CMAKE_CURRENT_LIST_DIR}/timemory.hpp ${CMAKE_CURRENT_LIST_DIR}/utility.hpp) diff --git a/source/lib/core/amd_smi.cpp b/source/lib/core/amd_smi.cpp new file mode 100644 index 0000000000..842c7f53e8 --- /dev/null +++ b/source/lib/core/amd_smi.cpp @@ -0,0 +1,112 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "core/amd_smi.hpp" +#include "core/common.hpp" +#include "core/config.hpp" +#include "core/debug.hpp" +#include "core/gpu.hpp" +#include "timemory.hpp" + +#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 +namespace rocprofsys +{ +namespace amd_smi +{ +namespace +{ +std::string +get_setting_name(std::string _v) +{ + constexpr auto _prefix = tim::string_view_t{ "rocprofsys_" }; + for(auto& itr : _v) + itr = tolower(itr); + auto _pos = _v.find(_prefix); + if(_pos == 0) return _v.substr(_prefix.length()); + return _v; +} + +# define ROCPROFSYS_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE, ...) \ + [&]() { \ + auto _ret = _config->insert( \ + ENV_NAME, get_setting_name(ENV_NAME), DESCRIPTION, \ + TYPE{ INITIAL_VALUE }, \ + std::set{ "custom", "rocprofsys", "librocprof-sys", \ + __VA_ARGS__ }); \ + if(!_ret.second) \ + { \ + ROCPROFSYS_PRINT("Warning! Duplicate setting: %s / %s\n", \ + get_setting_name(ENV_NAME).c_str(), ENV_NAME); \ + } \ + return _config->find(ENV_NAME)->second; \ + }() +} // namespace + +void +config_settings(const std::shared_ptr& _config) +{ + if(!get_use_amd_smi() || !gpu::initialize_amdsmi()) return; + + std::string default_metrics = "busy, temp, power, mem_usage"; + // No distinction between busy and activity shown in description + std::string jpeg_activity_support = ""; + std::string vcn_activity_support = ""; + + size_t device_count = gpu::get_processor_count(); + for(size_t i = 0; i < device_count; i++) + { + if(gpu::is_vcn_activity_supported(i) || gpu::is_vcn_busy_supported(i)) + { + vcn_activity_support += ", vcn_activity"; + break; + } + } + for(size_t i = 0; i < device_count; i++) + { + if(gpu::is_jpeg_activity_supported(i) || gpu::is_jpeg_busy_supported(i)) + { + jpeg_activity_support += ", jpeg_activity"; + break; + } + } + + ROCPROFSYS_CONFIG_SETTING( + std::string, "ROCPROFSYS_AMD_SMI_METRICS", + "amd-smi metrics to collect: " + default_metrics + jpeg_activity_support + + vcn_activity_support + ". " + + "An empty value implies 'all' and 'none' suppresses all.", + "busy, temp, power, mem_usage", "backend", "amd_smi", "rocm", "process_sampling"); +} +} // namespace amd_smi +} // namespace rocprofsys + +#else +namespace rocprofsys +{ +namespace amd_smi +{ +void +config_settings(const std::shared_ptr&) +{} +} // namespace amd_smi +} // namespace rocprofsys +#endif diff --git a/source/lib/core/amd_smi.hpp b/source/lib/core/amd_smi.hpp new file mode 100644 index 0000000000..0f95e82a56 --- /dev/null +++ b/source/lib/core/amd_smi.hpp @@ -0,0 +1,36 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "core/timemory.hpp" + +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + +namespace rocprofsys +{ +namespace amd_smi +{ +void +config_settings(const std::shared_ptr&); +} // namespace amd_smi +} // namespace rocprofsys diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index 2bc3a77a3b..4eabcd0157 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "config.hpp" +#include "amd_smi.hpp" #include "common/defines.h" #include "common/static_object.hpp" #include "constraint.hpp" @@ -626,13 +627,7 @@ configure_settings(bool _init) ->set_choices(perf::get_config_choices()); rocprofiler_sdk::config_settings(_config); - - ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_AMD_SMI_METRICS", - "amd-smi metrics to collect: busy, temp, power, " - "vcn_activity, jpeg_activity, mem_usage. " - "An empty value implies 'all' and 'none' suppresses all.", - "busy, temp, power, mem_usage", "backend", "amd_smi", - "rocm", "process_sampling"); + amd_smi::config_settings(_config); ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB", "Hint for shared-memory buffer size in perfetto (in KB)", diff --git a/source/lib/core/gpu.cpp b/source/lib/core/gpu.cpp index 91b3aad636..c46a9db40c 100644 --- a/source/lib/core/gpu.cpp +++ b/source/lib/core/gpu.cpp @@ -201,8 +201,12 @@ add_device_metadata() * Required amdsmi methods to get processors and handles */ -uint32_t processors::total_processor_count = 0; -std::vector processors::processors_list = {}; +uint32_t processors::total_processor_count = 0; +std::vector processors::processors_list = {}; +std::vector processors::vcn_activity_supported = {}; +std::vector processors::jpeg_activity_supported = {}; +std::vector processors::vcn_busy_supported = {}; +std::vector processors::jpeg_busy_supported = {}; void get_processor_handles() @@ -246,10 +250,87 @@ get_processor_handles() return; } processors::processors_list.push_back(processor); + + amdsmi_gpu_metrics_t gpu_metrics; + bool vcn_supported = false; + bool jpeg_supported = false; + bool v_busy_supported = false; + bool j_busy_supported = false; + ret = amdsmi_get_gpu_metrics_info(processor, &gpu_metrics); + if(ret == AMDSMI_STATUS_SUCCESS) + { + for(const auto& vcn_activity : gpu_metrics.vcn_activity) + { + if(vcn_activity != UINT16_MAX) + { + vcn_supported = true; + break; + } + } + for(const auto& jpeg_activity : gpu_metrics.jpeg_activity) + { + if(jpeg_activity != UINT16_MAX) + { + jpeg_supported = true; + break; + } + } + for(const auto& xcp : gpu_metrics.xcp_stats) + { + if(!v_busy_supported) + { + v_busy_supported = + std::any_of(std::begin(xcp.vcn_busy), std::end(xcp.vcn_busy), + [](uint16_t val) { return val != UINT16_MAX; }); + } + + if(!j_busy_supported) + { + j_busy_supported = std::any_of( + std::begin(xcp.jpeg_busy), std::end(xcp.jpeg_busy), + [](uint16_t val) { return val != UINT16_MAX; }); + } + + if(v_busy_supported && j_busy_supported) break; + } + } + processors::vcn_activity_supported.push_back(vcn_supported); + processors::jpeg_activity_supported.push_back(jpeg_supported); + processors::vcn_busy_supported.push_back(v_busy_supported); + processors::jpeg_busy_supported.push_back(j_busy_supported); } } processors::total_processor_count = processors::processors_list.size(); } + +bool +is_vcn_activity_supported(uint32_t dev_id) +{ + if(dev_id >= processors::vcn_activity_supported.size()) return false; + return processors::vcn_activity_supported[dev_id]; +} + +bool +is_jpeg_activity_supported(uint32_t dev_id) +{ + if(dev_id >= processors::jpeg_activity_supported.size()) return false; + return processors::jpeg_activity_supported[dev_id]; +} + +bool +is_vcn_busy_supported(uint32_t dev_id) +{ + if(dev_id >= processors::vcn_busy_supported.size()) return false; + return processors::vcn_busy_supported[dev_id]; +} + +bool +is_jpeg_busy_supported(uint32_t dev_id) +{ + if(dev_id >= processors::jpeg_busy_supported.size()) return false; + return processors::jpeg_busy_supported[dev_id]; +} + uint32_t get_processor_count() { diff --git a/source/lib/core/gpu.hpp b/source/lib/core/gpu.hpp index 42b693304c..f883630c2b 100644 --- a/source/lib/core/gpu.hpp +++ b/source/lib/core/gpu.hpp @@ -40,15 +40,35 @@ get_processor_count(); amdsmi_processor_handle get_handle_from_id(uint32_t dev_id); +bool +is_vcn_activity_supported(uint32_t dev_id); + +bool +is_jpeg_activity_supported(uint32_t dev_id); + +bool +is_vcn_busy_supported(uint32_t dev_id); + +bool +is_jpeg_busy_supported(uint32_t dev_id); + struct processors { static uint32_t total_processor_count; static std::vector processors_list; + static std::vector vcn_activity_supported; + static std::vector jpeg_activity_supported; + static std::vector vcn_busy_supported; + static std::vector jpeg_busy_supported; private: friend void rocprofsys::gpu::get_processor_handles(); friend uint32_t rocprofsys::gpu::get_processor_count(); friend amdsmi_processor_handle rocprofsys::gpu::get_handle_from_id(uint32_t dev_id); + friend bool rocprofsys::gpu::is_vcn_activity_supported(uint32_t dev_id); + friend bool rocprofsys::gpu::is_jpeg_activity_supported(uint32_t dev_id); + friend bool rocprofsys::gpu::is_vcn_busy_supported(uint32_t dev_id); + friend bool rocprofsys::gpu::is_jpeg_busy_supported(uint32_t dev_id); }; #endif