diff --git a/rocm_smi/include/rocm_smi/rocm_smi_gpu_metrics.h b/rocm_smi/include/rocm_smi/rocm_smi_gpu_metrics.h index b47e4c8b44..70d31c5347 100644 --- a/rocm_smi/include/rocm_smi/rocm_smi_gpu_metrics.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_gpu_metrics.h @@ -31,9 +31,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -938,20 +941,18 @@ class GpuMetricsBase_t { virtual void set_device_id(uint32_t device_id) { m_device_id = device_id; } virtual void set_partition_id(uint32_t partition_id) { m_partition_id = partition_id; } virtual AMDGpuDynamicMetricsTbl_t get_metrics_dynamic_tbl() { - return m_metrics_dynamic_tbl; + return m_base_metrics_dynamic_tbl; } protected: - AMDGpuDynamicMetricsTbl_t m_metrics_dynamic_tbl; + AMDGpuDynamicMetricsTbl_t m_base_metrics_dynamic_tbl; uint64_t m_metrics_timestamp; uint32_t m_device_id; uint32_t m_partition_id; - }; using GpuMetricsBasePtr = std::shared_ptr; using AMDGpuMetricFactories_t = const std::map; - class GpuMetricsBase_v11_t final : public GpuMetricsBase_t { public: virtual ~GpuMetricsBase_v11_t() = default; diff --git a/rocm_smi/src/rocm_smi_gpu_metrics.cc b/rocm_smi/src/rocm_smi_gpu_metrics.cc index d05bd77aed..7383e92c2a 100644 --- a/rocm_smi/src/rocm_smi_gpu_metrics.cc +++ b/rocm_smi/src/rocm_smi_gpu_metrics.cc @@ -869,10 +869,7 @@ rsmi_status_t GpuMetricsBase_v17_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -1117,6 +1114,12 @@ rsmi_status_t GpuMetricsBase_v17_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -1126,10 +1129,7 @@ rsmi_status_t GpuMetricsBase_v16_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -1357,6 +1357,12 @@ rsmi_status_t GpuMetricsBase_v16_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -1474,10 +1480,7 @@ rsmi_status_t GpuMetricsBase_v15_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -1694,6 +1697,12 @@ rsmi_status_t GpuMetricsBase_v15_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -1803,10 +1812,7 @@ rsmi_status_t GpuMetricsBase_v14_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -2008,6 +2014,12 @@ rsmi_status_t GpuMetricsBase_v14_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -3014,10 +3026,7 @@ rsmi_status_t GpuMetricsBase_v13_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -3263,6 +3272,12 @@ rsmi_status_t GpuMetricsBase_v13_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -3397,10 +3412,7 @@ rsmi_status_t GpuMetricsBase_v12_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -3624,6 +3636,12 @@ rsmi_status_t GpuMetricsBase_v12_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -3734,10 +3752,7 @@ rsmi_status_t GpuMetricsBase_v11_t::populate_metrics_dynamic_tbl() { ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - if (!m_metrics_dynamic_tbl.empty()) { - m_metrics_dynamic_tbl.clear(); - } - + auto m_metrics_dynamic_tbl = AMDGpuDynamicMetricsTbl_t{}; // // Note: Any metric treatment/changes (if any) should happen before they // get written to internal/external tables. @@ -3948,6 +3963,12 @@ rsmi_status_t GpuMetricsBase_v11_t::populate_metrics_dynamic_tbl() { << " |"; LOG_TRACE(ss); + // Copy to base class + std::copy(m_metrics_dynamic_tbl.begin(), + m_metrics_dynamic_tbl.end(), + std::inserter(GpuMetricsBase_t::m_base_metrics_dynamic_tbl, + GpuMetricsBase_t::m_base_metrics_dynamic_tbl.end())); + return status_code; } @@ -4692,8 +4713,8 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t* smu) { CHK_SUPPORT_NAME_ONLY(smu) auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS); - std::ostringstream ostrstream; - std::ostringstream ss; + thread_local std::ostringstream ostrstream; + thread_local std::ostringstream ss; ss << __PRETTY_FUNCTION__ << "| ======= start ======="; LOG_TRACE(ss); @@ -4717,6 +4738,7 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t* smu) { rsmi_dev_partition_id_get(dv_ind, &partition_id); dev->set_smi_partition_id(partition_id); dev->dev_log_gpu_metrics(ostrstream); + const auto [error_code, external_metrics] = dev->dev_copy_internal_to_external_metrics(); if (error_code != rsmi_status_t::RSMI_STATUS_SUCCESS) { ss << __PRETTY_FUNCTION__