Files
marantic-amd 7af2dba741 [rocprof-sys] Align rocpd symbols of the same counter types (#2675)
## Motivation

In order for Optiq to be able to detect that counter tracks are of the same type, we aligned `info_pmc` symbol naming across the tracks of the same type. Being able to know this will be useful for grouping and categorizing similar types of counter tracks and for setting up a consistent y-axis scale when plotting the values on charts.

## Technical Details

Replace unique and/or ordered symbol names with counter-common symbol name which will be the same for the counters of the same type, with counter track name remaining the unique identifier for that counter track. For example, the "symbol" field was "JpegAct_0" but is now "JpegAct".
2026-01-23 00:36:08 -05:00

1351 строка
51 KiB
C++

// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "core/agent.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/cacheable.hpp"
#include "core/trace_cache/sample_type.hpp"
#include <amd_smi/amdsmi.h>
#include <cstdint>
#if defined(NDEBUG)
# undef NDEBUG
#endif
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/gpu.hpp"
#include "core/gpu_metrics.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/state.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "library/amd_smi.hpp"
#include "library/runtime.hpp"
#include "library/thread_info.hpp"
#include <timemory/backends/threading.hpp>
#include <timemory/components/timing/backends.hpp>
#include <timemory/mpl/type_traits.hpp>
#include <timemory/units.hpp>
#include <timemory/utility/delimit.hpp>
#include <timemory/utility/locking.hpp>
#include "logger/debug.hpp"
#include <cassert>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <string>
#include <sys/resource.h>
#define ROCPROFSYS_AMD_SMI_CALL(...) \
::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
namespace rocprofsys
{
namespace amd_smi
{
using bundle_t = std::deque<data>;
using sampler_instances = thread_data<bundle_t, category::amd_smi>;
namespace
{
void
metadata_initialize_category()
{
trace_cache::get_metadata_registry().add_string(
trait::name<category::amd_smi>::value);
}
void
metadata_initialize_smi_tracks(size_t gpu_id)
{
const auto thread_id = std::nullopt;
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_gfx_busy>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_umc_busy>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_mm_busy>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_power>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_temp>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_memory_usage>(
gpu_id),
thread_id, "{}" });
auto add_vcn_track = [&](std::optional<int> xcp_idx) {
for(auto clk = 0; clk < AMDSMI_MAX_NUM_VCN; ++clk)
{
auto name = trace_cache::info::annotate_with_device_id<
category::amd_smi_vcn_activity>(gpu_id, xcp_idx, clk);
trace_cache::get_metadata_registry().add_track(
{ name.c_str(), thread_id, "{}" });
}
};
auto add_jpeg_track = [&](std::optional<int> xcp_idx) {
for(auto clk = 0; clk < AMDSMI_MAX_NUM_JPEG; ++clk)
{
auto name = trace_cache::info::annotate_with_device_id<
category::amd_smi_jpeg_activity>(gpu_id, xcp_idx, clk);
trace_cache::get_metadata_registry().add_track(
{ name.c_str(), thread_id, "{}" });
}
};
if(gpu::vcn_is_device_level_only(gpu_id))
{
add_vcn_track(std::nullopt);
}
else
{
for(int xcp = 0; xcp < AMDSMI_MAX_NUM_XCP; ++xcp)
{
add_vcn_track(xcp);
}
}
if(gpu::jpeg_is_device_level_only(gpu_id))
{
add_jpeg_track(std::nullopt);
}
else
{
for(auto xcp = 0; xcp < AMDSMI_MAX_NUM_XCP; ++xcp)
{
add_jpeg_track(xcp);
}
}
// Add XGMI tracks using specific categories for each metric type
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_xgmi_link_width>(
gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_xgmi_link_speed>(
gpu_id),
thread_id, "{}" });
for(size_t i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i)
{
auto read_name =
trace_cache::info::annotate_with_device_id<category::amd_smi_xgmi_read_data>(
gpu_id, std::nullopt, i);
trace_cache::get_metadata_registry().add_track(
{ read_name.c_str(), thread_id, "{}" });
auto write_name =
trace_cache::info::annotate_with_device_id<category::amd_smi_xgmi_write_data>(
gpu_id, std::nullopt, i);
trace_cache::get_metadata_registry().add_track(
{ write_name.c_str(), thread_id, "{}" });
}
// Add PCIe tracks using specific categories for each metric
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_pcie_link_width>(
gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<category::amd_smi_pcie_link_speed>(
gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<
category::amd_smi_pcie_bandwidth_acc>(gpu_id),
thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trace_cache::info::annotate_with_device_id<
category::amd_smi_pcie_bandwidth_inst>(gpu_id),
thread_id, "{}" });
}
void
metadata_initialize_smi_pmc(size_t gpu_id)
{
// TODO: Find the proper values for a following definitions
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
const char* CELSIUS_DEGREES = "\u00B0C";
auto ni = node_info::get_instance();
const char* TARGET_ARCH = "GPU";
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_gfx_busy>::value, "GFX Busy",
trait::name<category::amd_smi_gfx_busy>::description, LONG_DESCRIPTION,
COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0, "{}" });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_umc_busy>::value, "UMC Busy",
trait::name<category::amd_smi_umc_busy>::description, LONG_DESCRIPTION,
COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0, "{}" });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_mm_busy>::value, "MM Busy",
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION,
COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0, "{}" });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_temp>::value, "Temp",
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
CELSIUS_DEGREES, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_power>::value, "Pow",
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
"W", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
COMPONENT, tim::units::mem_repr(tim::units::megabyte),
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
auto add_vcn_pmc = [&](std::optional<int> xcp_idx) {
for(int clk = 0; clk < AMDSMI_MAX_NUM_VCN; ++clk)
{
std::stringstream name_ss;
name_ss << trait::name<category::amd_smi_vcn_activity>::value;
if(xcp_idx) name_ss << "_" << *xcp_idx;
name_ss << "_" << clk;
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
name_ss.str(), "VcnAct",
trait::name<category::amd_smi_vcn_activity>::description,
LONG_DESCRIPTION, COMPONENT, trace_cache::PERCENTAGE,
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
}
};
auto add_jpeg_pmc = [&](std::optional<int> xcp_idx) {
for(auto clk = 0; clk < AMDSMI_MAX_NUM_JPEG; ++clk)
{
std::stringstream name_ss;
name_ss << trait::name<category::amd_smi_jpeg_activity>::value;
if(xcp_idx) name_ss << "_" << *xcp_idx;
name_ss << "_" << std::to_string(clk);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
name_ss.str(), "JpegAct",
trait::name<category::amd_smi_jpeg_activity>::description,
LONG_DESCRIPTION, COMPONENT, trace_cache::PERCENTAGE,
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
}
};
if(gpu::vcn_is_device_level_only(gpu_id))
{
add_vcn_pmc(std::nullopt);
}
else
{
for(int xcp = 0; xcp < AMDSMI_MAX_NUM_XCP; ++xcp)
{
add_vcn_pmc(xcp);
}
}
if(gpu::jpeg_is_device_level_only(gpu_id))
{
add_jpeg_pmc(std::nullopt);
}
else
{
for(auto xcp = 0; xcp < AMDSMI_MAX_NUM_XCP; ++xcp)
{
add_jpeg_pmc(xcp);
}
}
// Add XGMI PMC info using specific categories for each metric type
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_xgmi_link_width>::value, "XgmiLinkWidth",
trait::name<category::amd_smi_xgmi_link_width>::description, LONG_DESCRIPTION,
COMPONENT, "bits", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0,
0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_xgmi_link_speed>::value, "XgmiLinkSpeed",
trait::name<category::amd_smi_xgmi_link_speed>::description, LONG_DESCRIPTION,
COMPONENT, "GT/s", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0,
0 });
for(size_t i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i)
{
std::stringstream read_name_ss, read_symbol_ss;
read_name_ss << trait::name<category::amd_smi_xgmi_read_data>::value << "_" << i;
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
read_name_ss.str(), "XgmiRead",
trait::name<category::amd_smi_xgmi_read_data>::description,
LONG_DESCRIPTION, COMPONENT, "KB", rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0 });
std::stringstream write_name_ss, write_symbol_ss;
write_name_ss << trait::name<category::amd_smi_xgmi_write_data>::value << "_"
<< i;
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
write_name_ss.str(), "XgmiWrite",
trait::name<category::amd_smi_xgmi_write_data>::description,
LONG_DESCRIPTION, COMPONENT, "KB", rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0 });
}
// Add PCIe PMC info using specific categories for each metric
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_pcie_link_width>::value, "PcieLinkWidth",
trait::name<category::amd_smi_pcie_link_width>::description, LONG_DESCRIPTION,
COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_pcie_link_speed>::value, "PcieLinkSpeed",
trait::name<category::amd_smi_pcie_link_speed>::description, LONG_DESCRIPTION,
COMPONENT, "GT/s", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0,
0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_pcie_bandwidth_acc>::value, "PcieBwAcc",
trait::name<category::amd_smi_pcie_bandwidth_acc>::description,
LONG_DESCRIPTION, COMPONENT, "MB", rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_pcie_bandwidth_inst>::value, "PcieBwInst",
trait::name<category::amd_smi_pcie_bandwidth_inst>::description,
LONG_DESCRIPTION, COMPONENT, "MB/s", rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0 });
}
auto&
get_settings(uint32_t _dev_id)
{
static auto _v = std::unordered_map<uint32_t, amd_smi::settings>{};
return _v[_dev_id];
}
bool&
is_initialized()
{
static bool _v = false;
return _v;
}
amdsmi_version_t&
get_version()
{
static amdsmi_version_t _v = {};
if(_v.major == 0 && _v.minor == 0)
{
auto _err = amdsmi_get_lib_version(&_v);
if(_err != AMDSMI_STATUS_SUCCESS)
{
throw std::runtime_error(
"amdsmi_get_version failed. No version information available.");
}
}
return _v;
}
void
check_error(const char* _file, int _line, amdsmi_status_t _code, bool* _option = nullptr)
{
if(_code == AMDSMI_STATUS_SUCCESS)
return;
else if(_code == AMDSMI_STATUS_NOT_SUPPORTED && _option)
{
*_option = false;
return;
}
constexpr const char* _unknown_error_message =
"amdsmi_status_code_to_string failed. No error message available.";
const char* _msg = nullptr;
auto _error_code_is_known =
amdsmi_status_code_to_string(_code, &_msg) == AMDSMI_STATUS_SUCCESS;
throw std::runtime_error(
fmt::format("[{}:{}] Error code {} :: {}", _file, _line, static_cast<int>(_code),
_error_code_is_known ? _msg : _unknown_error_message));
}
std::atomic<State>&
get_state()
{
static std::atomic<State> _v{ State::PreInit };
return _v;
}
std::vector<uint8_t>
serialize_gpu_metrics(uint32_t device_id, const data::gpu_metrics_t& metrics,
const gpu::gpu_metrics_capabilities_t& capabilities)
{
// Get settings for this device
auto settings = get_settings(device_id);
// Convert amd_smi::settings to gpu::gpu_metrics_settings_t
gpu::gpu_metrics_settings_t gpu_settings;
gpu_settings.vcn_activity = settings.vcn_activity;
gpu_settings.jpeg_activity = settings.jpeg_activity;
gpu_settings.xgmi = settings.xgmi;
gpu_settings.pcie = settings.pcie;
// Use the shared serialization function
return gpu::serialize_gpu_metrics(metrics, capabilities, gpu_settings);
}
size_t
serialize_settings(uint32_t _device_id)
{
auto settings = get_settings(_device_id);
std::bitset<8> settings_bits;
settings_bits.reset();
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::busy),
settings.busy);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::temp),
settings.temp);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::power),
settings.power);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::mem_usage),
settings.mem_usage);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::vcn_activity),
settings.vcn_activity);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::jpeg_activity),
settings.jpeg_activity);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::xgmi),
settings.xgmi);
settings_bits.set(
static_cast<int>(trace_cache::amd_smi_sample::settings_positions::pcie),
settings.pcie);
return settings_bits.to_ulong();
}
} // namespace
//--------------------------------------------------------------------------------------//
size_t data::device_count = 0;
std::set<uint32_t> data::device_list = {};
std::unique_ptr<data::promise_t> data::polling_finished = {};
data::data(uint32_t _dev_id) { sample(_dev_id); }
void
data::sample(uint32_t _device_id)
{
if(is_child_process()) return;
auto _timestamp = tim::get_clock_real_now<size_t, std::nano>();
assert(_timestamp < std::numeric_limits<int64_t>::max());
amdsmi_gpu_metrics_t _gpu_metrics;
bool _gpu_metrics_needed = false;
auto _state = get_state().load();
if(_state != State::Active) return;
m_dev_id = _device_id;
m_ts = _timestamp;
#define ROCPROFSYS_AMDSMI_GET(OPTION, FUNCTION, ...) \
if(OPTION) \
{ \
try \
{ \
ROCPROFSYS_AMD_SMI_CALL(FUNCTION(__VA_ARGS__), &OPTION); \
} catch(std::runtime_error & _e) \
{ \
LOG_ERROR("Exception: {}. Disabling future samples from amd-smi...", \
_e.what()); \
get_state().store(State::Disabled); \
} \
}
amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_device_id);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity,
sample_handle, &m_busy_perc);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric,
sample_handle, AMDSMI_TEMPERATURE_TYPE_JUNCTION,
AMDSMI_TEMP_CURRENT, &m_temp);
#if(AMDSMI_LIB_VERSION_MAJOR == 2 && AMDSMI_LIB_VERSION_MINOR == 0) || \
(AMDSMI_LIB_VERSION_MAJOR == 25 && AMDSMI_LIB_VERSION_MINOR == 2)
// This was a transient change in the AMD SMI API. It was never officially released.
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).power, amdsmi_get_power_info,
sample_handle, 0, &m_power)
#else
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).power, amdsmi_get_power_info,
sample_handle, &m_power)
#endif
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage,
sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage);
// Check if GPU metrics are needed for VCN, JPEG, XGMI, or PCIe
_gpu_metrics_needed = get_settings(m_dev_id).vcn_activity ||
get_settings(m_dev_id).jpeg_activity ||
get_settings(m_dev_id).xgmi || get_settings(m_dev_id).pcie;
ROCPROFSYS_AMDSMI_GET(_gpu_metrics_needed, amdsmi_get_gpu_metrics_info, sample_handle,
&_gpu_metrics);
// Determine if basic metrics are enabled
bool _basic_metrics_enabled =
get_settings(m_dev_id).busy || get_settings(m_dev_id).temp ||
get_settings(m_dev_id).power || get_settings(m_dev_id).mem_usage;
// Process GPU metrics if needed
if(_gpu_metrics_needed || _basic_metrics_enabled)
{
gpu_metrics_t metrics;
bool has_data = false;
gpu::gpu_metrics_capabilities_t capabilities;
if(_gpu_metrics_needed)
{
capabilities.flags.vcn_is_device_level_only =
gpu::vcn_is_device_level_only(m_dev_id);
capabilities.flags.jpeg_is_device_level_only =
gpu::jpeg_is_device_level_only(m_dev_id);
// Helper lambda to filter max uint values (unsupported) - returns 0 if max,
// otherwise the value
auto filter_max_uint_value = [](const auto& value) {
using ValueType = std::decay_t<decltype(value)>;
return (value == std::numeric_limits<ValueType>::max()) ? ValueType{ 0 }
: value;
};
auto fill_gpu_metrics = [](auto& dest, const auto& src, auto max_val) {
for(const auto& val : src)
{
if(val != max_val) dest.push_back(val);
}
};
if(get_settings(m_dev_id).vcn_activity)
{
if(capabilities.flags.vcn_is_device_level_only)
{
fill_gpu_metrics(metrics.vcn_activity, _gpu_metrics.vcn_activity,
UINT16_MAX);
if(!metrics.vcn_activity.empty()) has_data = true;
}
else
{
for(const auto& xcp : _gpu_metrics.xcp_stats)
{
std::vector<uint16_t> xcp_vcn_data;
fill_gpu_metrics(xcp_vcn_data, xcp.vcn_busy, UINT16_MAX);
if(!xcp_vcn_data.empty())
{
metrics.vcn_busy.push_back(std::move(xcp_vcn_data));
has_data = true;
}
}
}
}
if(get_settings(m_dev_id).jpeg_activity)
{
if(capabilities.flags.jpeg_is_device_level_only)
{
fill_gpu_metrics(metrics.jpeg_activity, _gpu_metrics.jpeg_activity,
UINT16_MAX);
if(!metrics.jpeg_activity.empty()) has_data = true;
}
else
{
for(const auto& xcp : _gpu_metrics.xcp_stats)
{
std::vector<uint16_t> xcp_jpeg_data;
fill_gpu_metrics(xcp_jpeg_data, xcp.jpeg_busy, UINT16_MAX);
if(!xcp_jpeg_data.empty())
{
metrics.jpeg_busy.push_back(std::move(xcp_jpeg_data));
has_data = true;
}
}
}
}
// Process XGMI metrics if enabled
if(get_settings(m_dev_id).xgmi)
{
// Filter scalar values - returns 0 if unsupported (max value)
metrics.xgmi_link_width =
filter_max_uint_value(_gpu_metrics.xgmi_link_width);
metrics.xgmi_link_speed =
filter_max_uint_value(_gpu_metrics.xgmi_link_speed);
// Vector values filtered by fill_gpu_metrics
fill_gpu_metrics(metrics.xgmi_read_data_acc,
_gpu_metrics.xgmi_read_data_acc, UINT64_MAX);
fill_gpu_metrics(metrics.xgmi_write_data_acc,
_gpu_metrics.xgmi_write_data_acc, UINT64_MAX);
if(metrics.xgmi_link_width != 0 || metrics.xgmi_link_speed != 0 ||
!metrics.xgmi_read_data_acc.empty() ||
!metrics.xgmi_write_data_acc.empty())
{
has_data = true;
}
}
// Process PCIe metrics if enabled
if(get_settings(m_dev_id).pcie)
{
// Filter scalar values - returns 0 if unsupported (max value)
metrics.pcie_link_width =
filter_max_uint_value(_gpu_metrics.pcie_link_width);
metrics.pcie_link_speed =
filter_max_uint_value(_gpu_metrics.pcie_link_speed);
metrics.pcie_bandwidth_acc =
filter_max_uint_value(_gpu_metrics.pcie_bandwidth_acc);
metrics.pcie_bandwidth_inst =
filter_max_uint_value(_gpu_metrics.pcie_bandwidth_inst);
if(metrics.pcie_link_width != 0 || metrics.pcie_link_speed != 0 ||
metrics.pcie_bandwidth_acc != 0 || metrics.pcie_bandwidth_inst != 0)
{
has_data = true;
}
}
}
// Store samples if basic metrics are enabled OR if there's advanced metric data
if(_basic_metrics_enabled || has_data)
{
trace_cache::get_buffer_storage().store(trace_cache::amd_smi_sample{
serialize_settings(m_dev_id), _device_id, _timestamp,
m_busy_perc.gfx_activity, m_busy_perc.umc_activity,
m_busy_perc.mm_activity, m_power.current_socket_power, m_temp,
(m_mem_usage / units::megabyte),
serialize_gpu_metrics(m_dev_id, metrics, capabilities) });
if(has_data) m_gpu_metrics.push_back(metrics);
}
}
#undef ROCPROFSYS_AMDSMI_GET
}
void
data::print(std::ostream& _os) const
{
std::stringstream _ss{};
#if ROCPROFSYS_USE_ROCM > 0
_ss << "device: " << m_dev_id << ", gpu busy: = " << m_busy_perc.gfx_activity
<< "%, mm busy: = " << m_busy_perc.mm_activity
<< "%, umc busy: = " << m_busy_perc.umc_activity << "%, temp = " << m_temp
<< ", current power = " << m_power.current_socket_power
<< ", memory usage = " << m_mem_usage;
#endif
_os << _ss.str();
}
namespace
{
std::vector<unique_ptr_t<bundle_t>*> _bundle_data{};
}
void
config()
{
_bundle_data.resize(data::device_count, nullptr);
for(size_t i = 0; i < data::device_count; ++i)
{
if(data::device_list.count(i) > 0)
{
_bundle_data.at(i) = &sampler_instances::get()->at(i);
if(!*_bundle_data.at(i))
*_bundle_data.at(i) = unique_ptr_t<bundle_t>{ new bundle_t{} };
}
}
data::get_initial().resize(data::device_count);
for(auto itr : data::device_list)
data::get_initial().at(itr).sample(itr);
metadata_initialize_category();
for(const auto& _dev_id : data::device_list)
{
metadata_initialize_smi_tracks(_dev_id);
metadata_initialize_smi_pmc(_dev_id);
}
}
void
sample()
{
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
// TODO: Reorganize amd_smi::data and sampling mechanism not to store same data in
// bundle_data and in trace_cache
for(auto itr : data::device_list)
{
if(amd_smi::get_state() != State::Active) continue;
LOG_TRACE("Polling amd-smi for device {}", itr);
auto& _data = *_bundle_data.at(itr);
if(!_data) continue;
_data->emplace_back(data{ itr });
}
}
void
set_state(State _v)
{
amd_smi::get_state().store(_v);
}
std::vector<data>&
data::get_initial()
{
static std::vector<data> _v{};
return _v;
}
bool
data::setup()
{
perfetto_counter_track<data>::init();
amd_smi::set_state(State::PreInit);
return true;
}
bool
data::shutdown()
{
amd_smi::set_state(State::Finalized);
return true;
}
#define GPU_METRIC(COMPONENT, ...) \
if constexpr(tim::trait::is_available<COMPONENT>::value) \
{ \
auto* _val = _v.get<COMPONENT>(); \
if(_val) \
{ \
_val->set_value(itr.__VA_ARGS__); \
_val->set_accum(itr.__VA_ARGS__); \
} \
}
void
data::post_process(uint32_t _dev_id)
{
using component::sampling_gpu_busy_gfx;
using component::sampling_gpu_busy_mm;
using component::sampling_gpu_busy_umc;
using component::sampling_gpu_jpeg;
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
using component::sampling_gpu_temp;
using component::sampling_gpu_vcn;
if(device_count < _dev_id) return;
auto& _amd_smi_v = sampler_instances::get()->at(_dev_id);
auto _amd_smi = (_amd_smi_v) ? *_amd_smi_v : std::deque<amd_smi::data>{};
const auto& _thread_info = thread_info::get(0, InternalTID);
LOG_DEBUG("Post-processing {} amd-smi samples from device {}", _amd_smi.size(),
_dev_id);
if(get_is_continuous_integration() && !_thread_info)
{
throw std::runtime_error("Missing thread info for thread 0");
return;
}
if(!_thread_info) return;
auto _settings = get_settings(_dev_id);
auto use_perfetto = get_use_perfetto();
for(auto& itr : _amd_smi)
{
using counter_track = perfetto_counter_track<data>;
if(itr.m_dev_id != _dev_id) continue;
uint64_t _ts = itr.m_ts;
if(!_thread_info->is_valid_time(_ts)) continue;
double _gfxbusy = itr.m_busy_perc.gfx_activity;
double _umcbusy = itr.m_busy_perc.umc_activity;
double _mmbusy = itr.m_busy_perc.mm_activity;
double _temp = itr.m_temp;
double _power = itr.m_power.current_socket_power;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
auto setup_perfetto_counter_tracks = [&]() {
if(counter_track::exists(_dev_id)) return;
auto addendum = [&](const char* _v) {
return fmt::format("GPU {} [{}] (S)", _v, _dev_id);
};
auto addendum_blk = [&](std::size_t _i, const char* _metric,
std::size_t xcp_idx = SIZE_MAX) {
if(xcp_idx != SIZE_MAX)
{
return fmt::format("GPU [{}] {} XCP_{}: [{}] (S)", _dev_id, _metric,
xcp_idx, (_i < 10 ? "0" : ""), _i);
}
else
{
return fmt::format("GPU [{}] {} [{}] (S)", _dev_id, _metric,
(_i < 10 ? "0" : ""), _i);
}
};
if(_settings.busy)
{
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
}
if(_settings.temp)
{
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
}
if(_settings.power)
{
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
}
if(_settings.mem_usage)
{
counter_track::emplace(_dev_id, addendum("Memory Usage"), "megabytes");
}
if(_settings.vcn_activity)
{
if(itr.m_gpu_metrics.empty())
{
LOG_DEBUG("No VCN activity data collected from device {}", _dev_id);
}
else if(gpu::vcn_is_device_level_only(_dev_id))
{
// For VCN activity supported: use vcn_activity vector
for(std::size_t i = 0;
i < std::size(itr.m_gpu_metrics[0].vcn_activity); ++i)
counter_track::emplace(_dev_id, addendum_blk(i, "VCN Activity"),
"%");
}
else
{
// For VCN activity NOT supported: use vcn_busy vector with per-XCP
// organization
for(size_t xcp = 0; xcp < itr.m_gpu_metrics[0].vcn_busy.size(); ++xcp)
{
// Loop through each XCP's VCN busy values
for(size_t i = 0; i < itr.m_gpu_metrics[0].vcn_busy[xcp].size();
++i)
{
counter_track::emplace(
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
}
}
}
}
if(_settings.jpeg_activity)
{
if(itr.m_gpu_metrics.empty())
{
LOG_DEBUG("No JPEG activity data collected from device {}", _dev_id);
}
else if(gpu::jpeg_is_device_level_only(_dev_id))
{
// For JPEG activity supported: use jpeg_activity vector
for(std::size_t i = 0;
i < std::size(itr.m_gpu_metrics[0].jpeg_activity); ++i)
counter_track::emplace(_dev_id, addendum_blk(i, "JPEG Activity"),
"%");
}
else
{
// For JPEG activity NOT supported: use jpeg_busy vector with per-XCP
// organization
for(size_t xcp = 0; xcp < itr.m_gpu_metrics[0].jpeg_busy.size();
++xcp)
{
// Loop through each XCP's JPEG busy values
for(size_t i = 0; i < itr.m_gpu_metrics[0].jpeg_busy[xcp].size();
++i)
{
counter_track::emplace(
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
}
}
}
}
if(_settings.xgmi)
{
if(itr.m_gpu_metrics.empty())
{
LOG_DEBUG("No XGMI activity data collected from device {}", _dev_id);
}
else
{
counter_track::emplace(_dev_id, addendum("XGMI Link Width"), "bits");
counter_track::emplace(_dev_id, addendum("XGMI Link Speed"), "GT/s");
for(std::size_t i = 0;
i < std::size(itr.m_gpu_metrics[0].xgmi_read_data_acc); ++i)
counter_track::emplace(_dev_id, addendum_blk(i, "XGMI Read Data"),
"KB");
for(std::size_t i = 0;
i < std::size(itr.m_gpu_metrics[0].xgmi_write_data_acc); ++i)
counter_track::emplace(_dev_id,
addendum_blk(i, "XGMI Write Data"), "KB");
}
}
if(_settings.pcie)
{
if(itr.m_gpu_metrics.empty())
{
LOG_DEBUG("No PCIe activity data collected from device {}", _dev_id);
}
else
{
counter_track::emplace(_dev_id, addendum("PCIe Link Width"), "");
counter_track::emplace(_dev_id, addendum("PCIe Link Speed"), "GT/s");
counter_track::emplace(_dev_id, addendum("PCIe Bandwidth Acc"), "MB");
counter_track::emplace(_dev_id, addendum("PCIe Bandwidth Inst"),
"MB/s");
}
}
};
auto write_perfetto_metrics = [&]() {
size_t track_index = 0;
if(_settings.busy)
{
TRACE_COUNTER("device_busy_gfx",
counter_track::at(_dev_id, track_index++), _ts, _gfxbusy);
TRACE_COUNTER("device_busy_umc",
counter_track::at(_dev_id, track_index++), _ts, _umcbusy);
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, track_index++),
_ts, _mmbusy);
}
if(_settings.temp)
{
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, track_index++),
_ts, _temp);
}
if(_settings.power)
{
TRACE_COUNTER("device_power", counter_track::at(_dev_id, track_index++),
_ts, _power);
}
if(_settings.mem_usage)
{
TRACE_COUNTER("device_memory_usage",
counter_track::at(_dev_id, track_index++), _ts, _usage);
}
if(_settings.vcn_activity && !itr.m_gpu_metrics.empty())
{
if(gpu::vcn_is_device_level_only(_dev_id))
{
// Device-level VCN activity
for(const auto& vcn_val : itr.m_gpu_metrics[0].vcn_activity)
{
TRACE_COUNTER("device_vcn_activity",
counter_track::at(_dev_id, track_index++), _ts,
vcn_val);
}
}
else
{
// XCP-level VCN busy (per-XCP organization)
for(const auto& xcp_data : itr.m_gpu_metrics[0].vcn_busy)
{
for(const auto& vcn_val : xcp_data)
{
TRACE_COUNTER("device_vcn_activity",
counter_track::at(_dev_id, track_index++), _ts,
vcn_val);
}
}
}
}
if(_settings.jpeg_activity && !itr.m_gpu_metrics.empty())
{
if(gpu::jpeg_is_device_level_only(_dev_id))
{
// Device-level JPEG activity
for(const auto& jpeg_val : itr.m_gpu_metrics[0].jpeg_activity)
{
TRACE_COUNTER("device_jpeg_activity",
counter_track::at(_dev_id, track_index++), _ts,
jpeg_val);
}
}
else
{
// XCP-level JPEG busy (per-XCP organization)
for(const auto& xcp_data : itr.m_gpu_metrics[0].jpeg_busy)
{
for(const auto& jpeg_val : xcp_data)
{
TRACE_COUNTER("device_jpeg_activity",
counter_track::at(_dev_id, track_index++), _ts,
jpeg_val);
}
}
}
}
if(_settings.xgmi && !itr.m_gpu_metrics.empty())
{
TRACE_COUNTER("device_xgmi_link_width",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].xgmi_link_width);
TRACE_COUNTER("device_xgmi_link_speed",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].xgmi_link_speed);
for(const auto& read_val : itr.m_gpu_metrics[0].xgmi_read_data_acc)
{
TRACE_COUNTER("device_xgmi_read_data",
counter_track::at(_dev_id, track_index++), _ts,
read_val);
}
for(const auto& write_val : itr.m_gpu_metrics[0].xgmi_write_data_acc)
{
TRACE_COUNTER("device_xgmi_write_data",
counter_track::at(_dev_id, track_index++), _ts,
write_val);
}
}
if(_settings.pcie && !itr.m_gpu_metrics.empty())
{
TRACE_COUNTER("device_pcie_link_width",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].pcie_link_width);
TRACE_COUNTER("device_pcie_link_speed",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].pcie_link_speed);
TRACE_COUNTER("device_pcie_bandwidth_acc",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].pcie_bandwidth_acc);
TRACE_COUNTER("device_pcie_bandwidth_inst",
counter_track::at(_dev_id, track_index++), _ts,
itr.m_gpu_metrics[0].pcie_bandwidth_inst);
}
};
if(use_perfetto)
{
setup_perfetto_counter_tracks();
write_perfetto_metrics();
}
}
}
//--------------------------------------------------------------------------------------//
void
setup()
{
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
if(is_initialized() || !get_use_amd_smi()) return;
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
if(!gpu::initialize_amdsmi())
{
LOG_WARNING("AMD SMI is not available. Disabling AMD SMI sampling...");
return;
}
amdsmi_version_t _version = get_version();
LOG_INFO("AMD SMI version: {} - str: {}.", _version.major, _version.minor,
_version.release, _version.build);
data::device_count = gpu::device_count();
auto _devices_v = get_sampling_gpus();
for(auto& itr : _devices_v)
itr = tolower(itr);
if(_devices_v == "off")
_devices_v = "none";
else if(_devices_v == "on")
_devices_v = "all";
bool _all_devices = _devices_v.find("all") != std::string::npos || _devices_v.empty();
bool _no_devices = _devices_v.find("none") != std::string::npos;
std::set<uint32_t> _devices = {};
auto _emplace = [&_devices](auto idx) {
if(idx < data::device_count) _devices.emplace(idx);
};
if(_all_devices)
{
for(uint32_t i = 0; i < data::device_count; ++i)
_emplace(i);
}
else if(!_no_devices)
{
auto _enabled = tim::delimit(_devices_v, ",; \t");
for(auto&& itr : _enabled)
{
if(itr.find_first_not_of("0123456789-") != std::string::npos)
{
throw std::runtime_error(
fmt::format("Invalid GPU specification: '{}'. Only numerical values "
"(e.g., 0) or ranges (e.g., 0-7) are permitted.",
itr));
}
if(itr.find('-') != std::string::npos)
{
auto _v = tim::delimit(itr, "-");
if(_v.size() != 2)
{
throw std::runtime_error(
fmt::format("Invalid GPU range specification: '{}'. "
"Required format N-M, e.g. 0-4",
itr));
}
for(auto i = std::stoul(_v.at(0)); i < std::stoul(_v.at(1)); ++i)
_emplace(i);
}
else
{
_emplace(std::stoul(itr));
}
}
}
data::device_list = _devices;
auto _metrics = get_setting_value<std::string>("ROCPROFSYS_AMD_SMI_METRICS");
try
{
for(auto itr : _devices)
{
// Enable selected metrics only
if((_metrics && !_metrics->empty()) && (*_metrics != "all"))
{
using key_pair_t = std::pair<std::string_view, bool&>;
const auto supported = std::unordered_map<std::string_view, bool&>{
key_pair_t{ "busy", get_settings(itr).busy },
key_pair_t{ "temp", get_settings(itr).temp },
key_pair_t{ "power", get_settings(itr).power },
key_pair_t{ "mem_usage", get_settings(itr).mem_usage },
key_pair_t{ "vcn_activity", get_settings(itr).vcn_activity },
key_pair_t{ "jpeg_activity", get_settings(itr).jpeg_activity },
key_pair_t{ "xgmi", get_settings(itr).xgmi },
key_pair_t{ "pcie", get_settings(itr).pcie },
};
// Initialize all metrics to false
for(auto& it : supported)
it.second = false;
// Parse list of metrics enabled by the user
if(*_metrics != "none")
{
for(const auto& metric : tim::delimit(*_metrics, ",;:\t\n "))
{
auto iitr = supported.find(metric);
if(iitr == supported.end())
{
LOG_CRITICAL("Unsupported amd-smi metric: {}", metric);
::rocprofsys::set_state(::rocprofsys ::State ::Finalized);
std::exit(1);
}
LOG_DEBUG("Enabling amd-smi metric '{}' on device [{}]", metric,
itr);
iitr->second = true;
}
}
}
}
is_initialized() = true;
data::setup();
} catch(std::runtime_error& _e)
{
LOG_WARNING("Exception thrown when initializing amd-smi: {}", _e.what());
data::device_list = {};
}
}
void
shutdown()
{
auto_lock_t _lk{ type_mutex<category::amd_smi>() };
if(!is_initialized()) return;
LOG_DEBUG("Shutting down amd-smi...");
try
{
if(data::shutdown())
{
ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down());
}
} catch(std::runtime_error& _e)
{
LOG_WARNING("Exception thrown when shutting down amd-smi: {}", _e.what());
}
is_initialized() = false;
}
void
post_process()
{
for(auto itr : data::device_list)
{
LOG_DEBUG("Post-processing amd-smi data for device: {}", itr);
data::post_process(itr);
}
}
uint32_t
device_count()
{
return gpu::device_count();
}
void
postfork_child_cleanup()
{
// In child process, disable AMD SMI to prevent shutdown errors
LOG_DEBUG("Disabling AMD SMI in child process after fork...");
// Set to Finalized to prevent any sampling attempts (though is_child_process() check
// in sample() already handles this)
get_state().store(State::Finalized);
// Mark as not initialized so shutdown won't try to cleanup AMD SMI library
is_initialized() = false;
// Clear device list to prevent any GPU operations
data::device_list.clear();
}
void
postfork_parent_reinit()
{
// In parent process, AMD SMI device handles may be corrupted after fork
// Reinitialize AMD SMI to get fresh handles
LOG_DEBUG("Reinitializing AMD SMI in parent process after fork...");
// Shutdown and reinitialize to get fresh device handles
shutdown();
setup();
}
} // namespace amd_smi
} // namespace rocprofsys
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_gfx>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_umc>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_busy_mm>),
true, double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_temp>), true,
double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_power>), true,
double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_memory>), true,
double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_vcn>), true,
double)
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_jpeg>), true,
double)