Merge rocm-smi/amd-staging into amd-dev 20240119
Change-Id: Ie706473ff92a91b19e95d2d58f64904cad73a89a
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/amdsmi commit: 6132074089]
Цей коміт міститься в:
@@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
BasedOnStyle: Google
|
||||||
|
ColumnLimit: 100
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# THIS FILE IS GENERATED FROM .clangd!
|
||||||
|
# Run .update-clang-tidy.sh to regenerate.
|
||||||
|
Checks:
|
||||||
|
bugprone*,
|
||||||
|
clang-analyzer*,
|
||||||
|
google*,
|
||||||
|
misc*,
|
||||||
|
modernize*,
|
||||||
|
-abseil*,
|
||||||
|
-bugprone-easily-swappable-parameters,
|
||||||
|
-bugprone-reserved-identifier,
|
||||||
|
-clang-analyzer-security.insecureAPI.strcpy,
|
||||||
|
-cppcoreguidelines*,
|
||||||
|
-cppcoreguidelines-pro*,
|
||||||
|
-misc-non-copyable-objects,
|
||||||
|
-misc-use-anonymous-namespace,
|
||||||
|
-modernize-avoid-c-arrays,
|
||||||
|
-modernize-redundant-void-arg,
|
||||||
|
-modernize-use-auto,
|
||||||
|
-modernize-use-nodiscard,
|
||||||
|
-modernize-use-noexcept,
|
||||||
|
-modernize-use-trailing-return-type,
|
||||||
|
-modernize-use-using,
|
||||||
|
-performance*,
|
||||||
|
-readability*,
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
CompileFlags:
|
||||||
|
Remove: -W*
|
||||||
|
Add: [-Wall, -pedantic, -I/opt/rocm/include, -I/opt/rocm/include/hsa, -I/opt/rocm/include/rocprofiler]
|
||||||
|
Compiler: clang++
|
||||||
|
|
||||||
|
# list here: https://clang.llvm.org/extra/clang-tidy/checks/list.html
|
||||||
|
Diagnostics:
|
||||||
|
UnusedIncludes: Strict
|
||||||
|
# rules below are copied into .clang-tidy using ./.update-clang-tidy.sh
|
||||||
|
# please keep the rules sorted alphabetically
|
||||||
|
ClangTidy:
|
||||||
|
Add: [
|
||||||
|
bugprone*,
|
||||||
|
clang-analyzer*,
|
||||||
|
google*,
|
||||||
|
misc*,
|
||||||
|
modernize*,
|
||||||
|
]
|
||||||
|
Remove: [
|
||||||
|
abseil*,
|
||||||
|
bugprone-easily-swappable-parameters,
|
||||||
|
bugprone-reserved-identifier,
|
||||||
|
clang-analyzer-security.insecureAPI.strcpy,
|
||||||
|
cppcoreguidelines*,
|
||||||
|
cppcoreguidelines-pro*,
|
||||||
|
misc-non-copyable-objects,
|
||||||
|
misc-use-anonymous-namespace,
|
||||||
|
modernize-avoid-c-arrays,
|
||||||
|
modernize-redundant-void-arg,
|
||||||
|
modernize-use-auto,
|
||||||
|
modernize-use-nodiscard,
|
||||||
|
modernize-use-noexcept,
|
||||||
|
modernize-use-trailing-return-type,
|
||||||
|
modernize-use-using,
|
||||||
|
performance*,
|
||||||
|
readability*,
|
||||||
|
]
|
||||||
@@ -13,3 +13,4 @@ indent_style = space
|
|||||||
charset = utf-8
|
charset = utf-8
|
||||||
indent_style = space
|
indent_style = space
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
max_line_length = 100
|
||||||
|
|||||||
@@ -34,3 +34,8 @@ device/
|
|||||||
|
|
||||||
# misc
|
# misc
|
||||||
esmi_ib_library/
|
esmi_ib_library/
|
||||||
|
|
||||||
|
# do NOT ignore these files
|
||||||
|
!.clang-format
|
||||||
|
!.clang-tidy
|
||||||
|
!.clangd
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# - How to use:
|
||||||
|
# python3 -m pip install pre-commit
|
||||||
|
# pre-commit install --install hooks
|
||||||
|
# Upon a new commit - the hooks should automagically run
|
||||||
|
#
|
||||||
|
# - How to skip:
|
||||||
|
# git commit --no-verify
|
||||||
|
# or
|
||||||
|
# SKIP=clang-format-docker git commit
|
||||||
|
# SKIP=cpplint-docker git commit
|
||||||
|
|
||||||
|
fail_fast: false
|
||||||
|
repos:
|
||||||
|
# For portability I decided to use Docker containers
|
||||||
|
- repo: https://github.com/dmitrii-galantsev/pre-commit-docker-cpplint
|
||||||
|
rev: 0.0.3
|
||||||
|
hooks:
|
||||||
|
- id: clang-format-docker
|
||||||
|
- id: cpplint-docker
|
||||||
|
# Below is a local way of running formatters and linters
|
||||||
|
# NOTE: clang-tidy is not used in the above tests
|
||||||
|
# - repo: https://github.com/pocc/pre-commit-hooks
|
||||||
|
# rev: v1.3.5
|
||||||
|
# hooks:
|
||||||
|
# - id: clang-format
|
||||||
|
# args: [--no-diff, -i]
|
||||||
|
# - id: clang-tidy
|
||||||
|
# args: [-p=build, --quiet]
|
||||||
|
# - id: cpplint
|
||||||
|
# args: [--verbose=5]
|
||||||
Виконуваний файл
+36
@@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -x # trace
|
||||||
|
set -e # exit immediately if command fails
|
||||||
|
set -u # exit if an undefined variable is found
|
||||||
|
|
||||||
|
awk '
|
||||||
|
BEGIN {
|
||||||
|
print "# THIS FILE IS GENERATED FROM .clangd!"
|
||||||
|
print "# Run ./.update-clang-tidy.sh to regenerate."
|
||||||
|
print "Checks:"
|
||||||
|
}
|
||||||
|
/Add: \[$/{
|
||||||
|
a=1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
/]/{
|
||||||
|
a=0
|
||||||
|
}
|
||||||
|
a{
|
||||||
|
gsub(/^\s+/," ")
|
||||||
|
print
|
||||||
|
}
|
||||||
|
|
||||||
|
/Remove: \[$/{
|
||||||
|
r=1
|
||||||
|
next
|
||||||
|
}
|
||||||
|
/]/{
|
||||||
|
r=0
|
||||||
|
}
|
||||||
|
r{
|
||||||
|
gsub(/^\s+/," -")
|
||||||
|
print
|
||||||
|
}
|
||||||
|
' .clangd | tee .clang-tidy
|
||||||
@@ -1,21 +1,25 @@
|
|||||||
# Change Log for ROCm SMI Library
|
# Change Log for ROCm SMI Library
|
||||||
|
|
||||||
Full documentation for rocm_smi_lib is available at [https://docs.amd.com/](https://docs.amd.com/category/SMI%20API%20Guides).
|
Full documentation for rocm_smi_lib is available at [https://docs.amd.com/](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/).
|
||||||
|
|
||||||
## rocm_smi_lib for ROCm 5.5.0
|
## rocm_smi_lib for ROCm 5.5.0
|
||||||
|
|
||||||
### Optimizations
|
### Optimizations
|
||||||
|
|
||||||
- Add new test to measure api execution time.
|
- Add new test to measure api execution time.
|
||||||
- Remove the shared mutex if no process is using it.
|
- Remove the shared mutex if no process is using it.
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- ROCm SMI CLI: Add --showtempgraph Feature.
|
- ROCm SMI CLI: Add --showtempgraph Feature.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
- Relying on vendor ID to detect AMDGPU.
|
- Relying on vendor ID to detect AMDGPU.
|
||||||
- Change pragma message to warning for backward compatibility.
|
- Change pragma message to warning for backward compatibility.
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
- Fix --showproductname when device's SKU cannot be parsed out of the VBIOS string.
|
- Fix --showproductname when device's SKU cannot be parsed out of the VBIOS string.
|
||||||
- Fix compile error: ‘memcpy’ was not declared.
|
- Fix compile error: ‘memcpy’ was not declared.
|
||||||
- Fix order of CE and UE reporting in ROCm SMI CLI.
|
- Fix order of CE and UE reporting in ROCm SMI CLI.
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
set noparent
|
||||||
|
linelength=100
|
||||||
|
filter=-build/include_subdir,-legal/copyright,-runtime/printf,-build/c++11,-runtime/int,-build/header_guard
|
||||||
@@ -919,16 +919,6 @@ amdsmi_process_handle_t = ctypes.c_uint32
|
|||||||
class struct_amdsmi_proc_info_t(Structure):
|
class struct_amdsmi_proc_info_t(Structure):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class struct_engine_usage_(Structure):
|
|
||||||
pass
|
|
||||||
|
|
||||||
struct_engine_usage_._pack_ = 1 # source:False
|
|
||||||
struct_engine_usage_._fields_ = [
|
|
||||||
('gfx', ctypes.c_uint64),
|
|
||||||
('enc', ctypes.c_uint64),
|
|
||||||
('reserved', ctypes.c_uint32 * 12),
|
|
||||||
]
|
|
||||||
|
|
||||||
class struct_memory_usage_(Structure):
|
class struct_memory_usage_(Structure):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -940,6 +930,16 @@ struct_memory_usage_._fields_ = [
|
|||||||
('reserved', ctypes.c_uint32 * 10),
|
('reserved', ctypes.c_uint32 * 10),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
class struct_engine_usage_(Structure):
|
||||||
|
pass
|
||||||
|
|
||||||
|
struct_engine_usage_._pack_ = 1 # source:False
|
||||||
|
struct_engine_usage_._fields_ = [
|
||||||
|
('gfx', ctypes.c_uint64),
|
||||||
|
('enc', ctypes.c_uint64),
|
||||||
|
('reserved', ctypes.c_uint32 * 12),
|
||||||
|
]
|
||||||
|
|
||||||
struct_amdsmi_proc_info_t._pack_ = 1 # source:False
|
struct_amdsmi_proc_info_t._pack_ = 1 # source:False
|
||||||
struct_amdsmi_proc_info_t._fields_ = [
|
struct_amdsmi_proc_info_t._fields_ = [
|
||||||
('name', ctypes.c_char * 32),
|
('name', ctypes.c_char * 32),
|
||||||
|
|||||||
@@ -40,11 +40,12 @@ if(${ROCM_PATCH_VERSION})
|
|||||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
|
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
|
||||||
else()
|
else()
|
||||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||||
endif()
|
endif ()
|
||||||
set(${ROCM_SMI}_VERSION_MAJOR "${VERSION_MAJOR}")
|
set(${ROCM_SMI}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
|
||||||
set(${ROCM_SMI}_VERSION_MINOR "${VERSION_MINOR}")
|
set(${ROCM_SMI}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
|
||||||
set(${ROCM_SMI}_VERSION_PATCH "0")
|
set(${ROCM_SMI}_VERSION_PATCH "${CPACK_PACKAGE_VERSION_PATCH}")
|
||||||
set(${ROCM_SMI}_VERSION_BUILD "0")
|
set(${ROCM_SMI}_VERSION_BUILD "0")
|
||||||
|
set(${ROCM_SMI}_VERSION_HASH "${PKG_VERSION_HASH}")
|
||||||
message("SOVERSION: ${SO_VERSION_STRING}")
|
message("SOVERSION: ${SO_VERSION_STRING}")
|
||||||
|
|
||||||
# Create a configure file to get version info from within library
|
# Create a configure file to get version info from within library
|
||||||
|
|||||||
@@ -745,8 +745,8 @@ auto print_error_or_value(rsmi_status_t status_code, const T& metric) {
|
|||||||
return str_values;
|
return str_values;
|
||||||
}
|
}
|
||||||
else if constexpr ((std::is_same_v<T, std::uint16_t>) ||
|
else if constexpr ((std::is_same_v<T, std::uint16_t>) ||
|
||||||
(std::is_same_v<T, std::uint32_t>) ||
|
(std::is_same_v<T, std::uint32_t>) ||
|
||||||
(std::is_same_v<T, std::uint64_t>)) {
|
(std::is_same_v<T, std::uint64_t>)) {
|
||||||
return std::to_string(metric);
|
return std::to_string(metric);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ extern "C" {
|
|||||||
//! The number of points that make up a voltage-frequency curve definition
|
//! The number of points that make up a voltage-frequency curve definition
|
||||||
#define RSMI_NUM_VOLTAGE_CURVE_POINTS 3
|
#define RSMI_NUM_VOLTAGE_CURVE_POINTS 3
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Error codes retured by rocm_smi_lib functions
|
* @brief Error codes retured by rocm_smi_lib functions
|
||||||
*/
|
*/
|
||||||
@@ -353,7 +354,7 @@ typedef struct {
|
|||||||
* Clock types
|
* Clock types
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
RSMI_CLK_TYPE_SYS = 0x0, //!< System clock
|
RSMI_CLK_TYPE_SYS = 0x0, //!< System clock
|
||||||
RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
|
RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
|
||||||
RSMI_CLK_TYPE_DF, //!< Data Fabric clock (for ASICs
|
RSMI_CLK_TYPE_DF, //!< Data Fabric clock (for ASICs
|
||||||
//!< running on a separate clock)
|
//!< running on a separate clock)
|
||||||
@@ -970,6 +971,9 @@ struct metrics_table_header_t {
|
|||||||
uint8_t content_revision;
|
uint8_t content_revision;
|
||||||
/// \endcond
|
/// \endcond
|
||||||
};
|
};
|
||||||
|
/// \cond Ignore in docs.
|
||||||
|
typedef struct metrics_table_header_t metrics_table_header_t;
|
||||||
|
/// \endcond
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The following structure holds the gpu metrics values for a device.
|
* @brief The following structure holds the gpu metrics values for a device.
|
||||||
@@ -986,9 +990,14 @@ struct metrics_table_header_t {
|
|||||||
#define RSMI_NUM_HBM_INSTANCES 4
|
#define RSMI_NUM_HBM_INSTANCES 4
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief This should match kRSMI_MAX_NUM_VCN
|
* @brief This should match kRSMI_MAX_NUM_VCNS
|
||||||
*/
|
*/
|
||||||
#define RSMI_MAX_NUM_VCN 4
|
#define RSMI_MAX_NUM_VCNS 4
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief This should match kRSMI_MAX_JPEG_ENGINES
|
||||||
|
*/
|
||||||
|
#define RSMI_MAX_NUM_JPEG_ENGS 32
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief This should match kRSMI_MAX_NUM_CLKS
|
* @brief This should match kRSMI_MAX_NUM_CLKS
|
||||||
@@ -1109,7 +1118,7 @@ typedef struct {
|
|||||||
uint16_t current_socket_power;
|
uint16_t current_socket_power;
|
||||||
|
|
||||||
// Utilization (%)
|
// Utilization (%)
|
||||||
uint16_t vcn_activity[RSMI_MAX_NUM_VCN]; // VCN instances activity percent (encode/decode)
|
uint16_t vcn_activity[RSMI_MAX_NUM_VCNS]; // VCN instances activity percent (encode/decode)
|
||||||
|
|
||||||
// Clock Lock Status. Each bit corresponds to clock instance
|
// Clock Lock Status. Each bit corresponds to clock instance
|
||||||
uint32_t gfxclk_lock_status;
|
uint32_t gfxclk_lock_status;
|
||||||
@@ -1143,6 +1152,19 @@ typedef struct {
|
|||||||
uint16_t current_vclk0s[RSMI_MAX_NUM_CLKS];
|
uint16_t current_vclk0s[RSMI_MAX_NUM_CLKS];
|
||||||
uint16_t current_dclk0s[RSMI_MAX_NUM_CLKS];
|
uint16_t current_dclk0s[RSMI_MAX_NUM_CLKS];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* v1.5 additions
|
||||||
|
*/
|
||||||
|
// JPEG activity percent (encode/decode)
|
||||||
|
uint16_t jpeg_activity[RSMI_MAX_NUM_JPEG_ENGS];
|
||||||
|
|
||||||
|
// PCIE NAK sent accumulated count
|
||||||
|
uint32_t pcie_nak_sent_count_acc;
|
||||||
|
|
||||||
|
// PCIE NAK received accumulated count
|
||||||
|
uint32_t pcie_nak_rcvd_count_acc;
|
||||||
|
|
||||||
|
|
||||||
/// \endcond
|
/// \endcond
|
||||||
} rsmi_gpu_metrics_t;
|
} rsmi_gpu_metrics_t;
|
||||||
|
|
||||||
@@ -1358,7 +1380,7 @@ rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision);
|
|||||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *sku);
|
rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, char *sku);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Get the device vendor id associated with the device with provided
|
* @brief Get the device vendor id associated with the device with provided
|
||||||
@@ -1733,7 +1755,6 @@ rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id);
|
|||||||
*/
|
*/
|
||||||
rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id);
|
rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Get the XGMI physical id associated with the device
|
* @brief Get the XGMI physical id associated with the device
|
||||||
*
|
*
|
||||||
@@ -4097,7 +4118,7 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
|||||||
/** @} */ // end of HWTopo
|
/** @} */ // end of HWTopo
|
||||||
|
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
/** @defgroup compute_partition Compute Partition Functions
|
/** @defgroup ComputePartition Compute Partition Functions
|
||||||
* These functions are used to configure and query the device's
|
* These functions are used to configure and query the device's
|
||||||
* compute parition setting.
|
* compute parition setting.
|
||||||
* @{
|
* @{
|
||||||
@@ -4182,10 +4203,10 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
|||||||
*/
|
*/
|
||||||
rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
|
rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
|
||||||
|
|
||||||
/** @} */ // end of compute_partition
|
/** @} */ // end of ComputePartition
|
||||||
|
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
/** @defgroup memory_partition Memory Partition Functions
|
/** @defgroup memory_partition The Memory Partition Functions
|
||||||
* These functions are used to query and set the device's current memory
|
* These functions are used to query and set the device's current memory
|
||||||
* partition.
|
* partition.
|
||||||
* @{
|
* @{
|
||||||
@@ -4627,7 +4648,8 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind);
|
|||||||
* Metric multi-valued counter types
|
* Metric multi-valued counter types
|
||||||
*/
|
*/
|
||||||
typedef uint16_t GPUMetricTempHbm_t[RSMI_NUM_HBM_INSTANCES];
|
typedef uint16_t GPUMetricTempHbm_t[RSMI_NUM_HBM_INSTANCES];
|
||||||
typedef uint16_t GPUMetricVcnActivity_t[RSMI_MAX_NUM_VCN];
|
typedef uint16_t GPUMetricVcnActivity_t[RSMI_MAX_NUM_VCNS];
|
||||||
|
typedef uint16_t GPUMetricJpegActivity_t[RSMI_MAX_NUM_JPEG_ENGS];
|
||||||
typedef uint64_t GPUMetricXgmiReadDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS];
|
typedef uint64_t GPUMetricXgmiReadDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS];
|
||||||
typedef uint64_t GPUMetricXgmiWriteDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS];
|
typedef uint64_t GPUMetricXgmiWriteDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS];
|
||||||
typedef uint16_t GPUMetricCurrGfxClk_t[RSMI_MAX_NUM_GFX_CLKS];
|
typedef uint16_t GPUMetricCurrGfxClk_t[RSMI_MAX_NUM_GFX_CLKS];
|
||||||
@@ -5113,7 +5135,7 @@ rsmi_dev_metrics_temp_hbm_get(uint32_t dv_ind, GPUMetricTempHbm_t* temp_hbm_valu
|
|||||||
*
|
*
|
||||||
* @param[inout] vcn_activity_value a pointer to uint16_t to which the device gpu
|
* @param[inout] vcn_activity_value a pointer to uint16_t to which the device gpu
|
||||||
* metric unit will be stored
|
* metric unit will be stored
|
||||||
* - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_VCN)
|
* - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_VCNS)
|
||||||
* element array (GPUMetricVcnActivity_t)
|
* element array (GPUMetricVcnActivity_t)
|
||||||
*
|
*
|
||||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
|
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
|
||||||
|
|||||||
@@ -255,6 +255,7 @@ class Device {
|
|||||||
rsmi_status_t dev_log_gpu_metrics(std::ostringstream& outstream_metrics);
|
rsmi_status_t dev_log_gpu_metrics(std::ostringstream& outstream_metrics);
|
||||||
AMGpuMetricsPublicLatestTupl_t dev_copy_internal_to_external_metrics();
|
AMGpuMetricsPublicLatestTupl_t dev_copy_internal_to_external_metrics();
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<Monitor> monitor_;
|
std::shared_ptr<Monitor> monitor_;
|
||||||
std::shared_ptr<PowerMon> power_monitor_;
|
std::shared_ptr<PowerMon> power_monitor_;
|
||||||
@@ -277,7 +278,6 @@ class Device {
|
|||||||
bool returnWriteErr = false);
|
bool returnWriteErr = false);
|
||||||
rsmi_status_t run_amdgpu_property_reinforcement_query(const AMDGpuPropertyQuery_t& amdgpu_property_query);
|
rsmi_status_t run_amdgpu_property_reinforcement_query(const AMDGpuPropertyQuery_t& amdgpu_property_query);
|
||||||
|
|
||||||
|
|
||||||
uint64_t bdfid_;
|
uint64_t bdfid_;
|
||||||
uint64_t kfd_gpu_id_;
|
uint64_t kfd_gpu_id_;
|
||||||
std::unordered_set<rsmi_event_group_t,
|
std::unordered_set<rsmi_event_group_t,
|
||||||
|
|||||||
@@ -92,7 +92,10 @@ constexpr uint32_t kRSMI_MAX_NUM_GFX_CLKS = 8;
|
|||||||
constexpr uint32_t kRSMI_MAX_NUM_CLKS = 4;
|
constexpr uint32_t kRSMI_MAX_NUM_CLKS = 4;
|
||||||
|
|
||||||
// Note: This *must* match NUM_VCN
|
// Note: This *must* match NUM_VCN
|
||||||
constexpr uint32_t kRSMI_MAX_NUM_VCN = 4;
|
constexpr uint32_t kRSMI_MAX_NUM_VCNS = 4;
|
||||||
|
|
||||||
|
// Note: This *must* match NUM_JPEG_ENG
|
||||||
|
constexpr uint32_t kRSMI_MAX_JPEG_ENGINES = 32;
|
||||||
|
|
||||||
|
|
||||||
struct AMDGpuMetricsHeader_v1_t
|
struct AMDGpuMetricsHeader_v1_t
|
||||||
@@ -326,7 +329,7 @@ struct AMDGpuMetrics_v14_t
|
|||||||
// Utilization (%)
|
// Utilization (%)
|
||||||
uint16_t m_average_gfx_activity;
|
uint16_t m_average_gfx_activity;
|
||||||
uint16_t m_average_umc_activity; // memory controller
|
uint16_t m_average_umc_activity; // memory controller
|
||||||
uint16_t m_vcn_activity[kRSMI_MAX_NUM_VCN]; // VCN instances activity percent (encode/decode)
|
uint16_t m_vcn_activity[kRSMI_MAX_NUM_VCNS]; // VCN instances activity percent (encode/decode)
|
||||||
|
|
||||||
// Energy (15.259uJ (2^-16) units)
|
// Energy (15.259uJ (2^-16) units)
|
||||||
uint64_t m_energy_accumulator;
|
uint64_t m_energy_accumulator;
|
||||||
@@ -383,7 +386,89 @@ struct AMDGpuMetrics_v14_t
|
|||||||
|
|
||||||
uint16_t m_padding;
|
uint16_t m_padding;
|
||||||
};
|
};
|
||||||
using AMGpuMetricsLatest_t = AMDGpuMetrics_v14_t;
|
|
||||||
|
struct AMDGpuMetrics_v15_t
|
||||||
|
{
|
||||||
|
~AMDGpuMetrics_v15_t() = default;
|
||||||
|
|
||||||
|
struct AMDGpuMetricsHeader_v1_t m_common_header;
|
||||||
|
|
||||||
|
// Temperature (Celsius). It will be zero (0) if unsupported.
|
||||||
|
uint16_t m_temperature_hotspot;
|
||||||
|
uint16_t m_temperature_mem;
|
||||||
|
uint16_t m_temperature_vrsoc;
|
||||||
|
|
||||||
|
// Power (Watts)
|
||||||
|
uint16_t m_current_socket_power;
|
||||||
|
|
||||||
|
// Utilization (%)
|
||||||
|
uint16_t m_average_gfx_activity;
|
||||||
|
uint16_t m_average_umc_activity; // memory controller
|
||||||
|
uint16_t m_vcn_activity[kRSMI_MAX_NUM_VCNS]; // VCN instances activity percent (encode/decode)
|
||||||
|
uint16_t m_jpeg_activity[kRSMI_MAX_JPEG_ENGINES]; // JPEG activity percent (encode/decode)
|
||||||
|
|
||||||
|
// Energy (15.259uJ (2^-16) units)
|
||||||
|
uint64_t m_energy_accumulator;
|
||||||
|
|
||||||
|
// Driver attached timestamp (in ns)
|
||||||
|
uint64_t m_system_clock_counter;
|
||||||
|
|
||||||
|
// Throttle status
|
||||||
|
uint32_t m_throttle_status;
|
||||||
|
|
||||||
|
// Clock Lock Status. Each bit corresponds to clock instance
|
||||||
|
uint32_t m_gfxclk_lock_status;
|
||||||
|
|
||||||
|
// Link width (number of lanes) and speed (in 0.1 GT/s)
|
||||||
|
uint16_t m_pcie_link_width;
|
||||||
|
uint16_t m_pcie_link_speed; // in 0.1 GT/s
|
||||||
|
|
||||||
|
// XGMI bus width and bitrate (in Gbps)
|
||||||
|
uint16_t m_xgmi_link_width;
|
||||||
|
uint16_t m_xgmi_link_speed;
|
||||||
|
|
||||||
|
// Utilization Accumulated (%)
|
||||||
|
uint32_t m_gfx_activity_acc;
|
||||||
|
uint32_t m_mem_activity_acc;
|
||||||
|
|
||||||
|
// PCIE accumulated bandwidth (GB/sec)
|
||||||
|
uint64_t m_pcie_bandwidth_acc;
|
||||||
|
|
||||||
|
// PCIE instantaneous bandwidth (GB/sec)
|
||||||
|
uint64_t m_pcie_bandwidth_inst;
|
||||||
|
|
||||||
|
// PCIE L0 to recovery state transition accumulated count
|
||||||
|
uint64_t m_pcie_l0_to_recov_count_acc;
|
||||||
|
|
||||||
|
// PCIE replay accumulated count
|
||||||
|
uint64_t m_pcie_replay_count_acc;
|
||||||
|
|
||||||
|
// PCIE replay rollover accumulated count
|
||||||
|
uint64_t m_pcie_replay_rover_count_acc;
|
||||||
|
|
||||||
|
// PCIE NAK sent accumulated count
|
||||||
|
uint32_t m_pcie_nak_sent_count_acc;
|
||||||
|
|
||||||
|
// PCIE NAK received accumulated count
|
||||||
|
uint32_t m_pcie_nak_rcvd_count_acc;
|
||||||
|
|
||||||
|
// XGMI accumulated data transfer size(KiloBytes)
|
||||||
|
uint64_t m_xgmi_read_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
|
||||||
|
uint64_t m_xgmi_write_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
|
||||||
|
|
||||||
|
// PMFW attached timestamp (10ns resolution)
|
||||||
|
uint64_t m_firmware_timestamp;
|
||||||
|
|
||||||
|
// Current clocks (Mhz)
|
||||||
|
uint16_t m_current_gfxclk[kRSMI_MAX_NUM_GFX_CLKS];
|
||||||
|
uint16_t m_current_socclk[kRSMI_MAX_NUM_CLKS];
|
||||||
|
uint16_t m_current_vclk0[kRSMI_MAX_NUM_CLKS];
|
||||||
|
uint16_t m_current_dclk0[kRSMI_MAX_NUM_CLKS];
|
||||||
|
uint16_t m_current_uclk;
|
||||||
|
|
||||||
|
uint16_t m_padding;
|
||||||
|
};
|
||||||
|
using AMGpuMetricsLatest_t = AMDGpuMetrics_v15_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is GPU Metrics version that gets to public access.
|
* This is GPU Metrics version that gets to public access.
|
||||||
@@ -410,6 +495,9 @@ using GPUMetricTempHbmTbl_t = GpuMetricU16Tbl_t;
|
|||||||
using GPUMetricVcnActivity_t = decltype(AMDGpuMetrics_v14_t::m_vcn_activity);
|
using GPUMetricVcnActivity_t = decltype(AMDGpuMetrics_v14_t::m_vcn_activity);
|
||||||
using GPUMetricVcnActivityTbl_t = GpuMetricU16Tbl_t;
|
using GPUMetricVcnActivityTbl_t = GpuMetricU16Tbl_t;
|
||||||
|
|
||||||
|
using GPUMetricJpegActivity_t = decltype(AMDGpuMetrics_v15_t::m_jpeg_activity);
|
||||||
|
using GPUMetricJpegActivityTbl_t = GpuMetricU16Tbl_t;
|
||||||
|
|
||||||
using GPUMetricXgmiReadDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_read_data_acc);
|
using GPUMetricXgmiReadDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_read_data_acc);
|
||||||
using GPUMetricXgmiWriteDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_write_data_acc);
|
using GPUMetricXgmiWriteDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_write_data_acc);
|
||||||
using GPUMetricXgmiAccTbl_t = GpuMetricU64Tbl_t;
|
using GPUMetricXgmiAccTbl_t = GpuMetricU64Tbl_t;
|
||||||
@@ -518,6 +606,7 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
|
|||||||
kMetricGfxActivityAccumulator,
|
kMetricGfxActivityAccumulator,
|
||||||
kMetricMemActivityAccumulator,
|
kMetricMemActivityAccumulator,
|
||||||
kMetricVcnActivity, //v1.4
|
kMetricVcnActivity, //v1.4
|
||||||
|
kMetricJpegActivity, //v1.5
|
||||||
|
|
||||||
// kGpuMetricAverageClock counters
|
// kGpuMetricAverageClock counters
|
||||||
kMetricAvgGfxClockFrequency,
|
kMetricAvgGfxClockFrequency,
|
||||||
@@ -559,6 +648,8 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
|
|||||||
kMetricPcieL0RecovCountAccumulator, //v1.4
|
kMetricPcieL0RecovCountAccumulator, //v1.4
|
||||||
kMetricPcieReplayCountAccumulator, //v1.4
|
kMetricPcieReplayCountAccumulator, //v1.4
|
||||||
kMetricPcieReplayRollOverCountAccumulator, //v1.4
|
kMetricPcieReplayRollOverCountAccumulator, //v1.4
|
||||||
|
kMetricPcieNakSentCountAccumulator, //v1.5
|
||||||
|
kMetricPcieNakReceivedCountAccumulator, //v1.5
|
||||||
|
|
||||||
// kGpuMetricPowerEnergy counters
|
// kGpuMetricPowerEnergy counters
|
||||||
kMetricAvgSocketPower,
|
kMetricAvgSocketPower,
|
||||||
@@ -608,6 +699,7 @@ enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
|
|||||||
kGpuMetricV12 = (0x1 << 2),
|
kGpuMetricV12 = (0x1 << 2),
|
||||||
kGpuMetricV13 = (0x1 << 3),
|
kGpuMetricV13 = (0x1 << 3),
|
||||||
kGpuMetricV14 = (0x1 << 4),
|
kGpuMetricV14 = (0x1 << 4),
|
||||||
|
kGpuMetricV15 = (0x1 << 5),
|
||||||
};
|
};
|
||||||
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint16_t, AMDGpuMetricVersionFlags_t>;
|
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint16_t, AMDGpuMetricVersionFlags_t>;
|
||||||
using GpuMetricTypePtr_t = std::shared_ptr<void>;
|
using GpuMetricTypePtr_t = std::shared_ptr<void>;
|
||||||
@@ -780,6 +872,40 @@ class GpuMetricsBase_v14_t final : public GpuMetricsBase_t
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class GpuMetricsBase_v15_t final : public GpuMetricsBase_t
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
~GpuMetricsBase_v15_t() = default;
|
||||||
|
|
||||||
|
size_t sizeof_metric_table() override {
|
||||||
|
return sizeof(AMDGpuMetrics_v15_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMetricTypePtr_t get_metrics_table() override
|
||||||
|
{
|
||||||
|
if (!m_gpu_metric_ptr) {
|
||||||
|
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v15_t*){});
|
||||||
|
}
|
||||||
|
assert(m_gpu_metric_ptr != nullptr);
|
||||||
|
return m_gpu_metric_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_internal_metrics_table() override;
|
||||||
|
|
||||||
|
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
|
||||||
|
{
|
||||||
|
return AMDGpuMetricVersionFlags_t::kGpuMetricV15;
|
||||||
|
}
|
||||||
|
|
||||||
|
rsmi_status_t populate_metrics_dynamic_tbl() override;
|
||||||
|
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
AMDGpuMetrics_v15_t m_gpu_metrics_tbl;
|
||||||
|
std::shared_ptr<AMDGpuMetrics_v15_t> m_gpu_metric_ptr;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind, AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
|
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind, AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
|
||||||
|
|||||||
@@ -29,10 +29,12 @@ from rsmiBindings import *
|
|||||||
# Major version - Increment when backwards-compatibility breaks
|
# Major version - Increment when backwards-compatibility breaks
|
||||||
# Minor version - Increment when adding a new feature, set to 0 when major is incremented
|
# Minor version - Increment when adding a new feature, set to 0 when major is incremented
|
||||||
# Patch version - Increment when adding a fix, set to 0 when minor is incremented
|
# Patch version - Increment when adding a fix, set to 0 when minor is incremented
|
||||||
SMI_MAJ = 1
|
# Hash version - Shortened commit hash. Print here and not with lib for consistency with amd-smi
|
||||||
SMI_MIN = 5
|
SMI_MAJ = 2
|
||||||
|
SMI_MIN = 0
|
||||||
SMI_PAT = 0
|
SMI_PAT = 0
|
||||||
__version__ = '%s.%s.%s' % (SMI_MAJ, SMI_MIN, SMI_PAT)
|
# SMI_HASH is provided by rsmiBindings
|
||||||
|
__version__ = '%s.%s.%s+%s' % (SMI_MAJ, SMI_MIN, SMI_PAT, SMI_HASH)
|
||||||
|
|
||||||
# Set to 1 if an error occurs
|
# Set to 1 if an error occurs
|
||||||
RETCODE = 0
|
RETCODE = 0
|
||||||
@@ -828,23 +830,20 @@ def printTableRow(space, displayString, v_delim=" "):
|
|||||||
|
|
||||||
def checkIfSecondaryDie(device):
|
def checkIfSecondaryDie(device):
|
||||||
""" Checks if GCD(die) is the secondary die in a MCM.
|
""" Checks if GCD(die) is the secondary die in a MCM.
|
||||||
|
MI200 device specific feature check.
|
||||||
|
The secondary dies lacks power management features.
|
||||||
|
|
||||||
Secondary dies lack power management features.
|
|
||||||
TODO: switch to more robust way to check for primary/secondary die, when implemented in Kernel and rocm_smi_lib.
|
|
||||||
@param device: The device to check
|
@param device: The device to check
|
||||||
"""
|
"""
|
||||||
power_cap = c_uint64()
|
energy_count = c_uint64()
|
||||||
# secondary die can currently be determined by checking if all power1_* (power cap) values are equal to zero.
|
counter_resoution = c_float()
|
||||||
ret = rocmsmi.rsmi_dev_power_cap_get(device, 0, byref(power_cap))
|
timestamp = c_uint64()
|
||||||
if not (rsmi_ret_ok(ret, None, 'get_power_cap', False) and power_cap.value == 0):
|
|
||||||
return False
|
# secondary die can be determined by checking if energy counter == 0
|
||||||
ret = rocmsmi.rsmi_dev_power_cap_default_get(device, byref(power_cap))
|
ret = rocmsmi.rsmi_dev_energy_count_get(device, byref(energy_count), byref(counter_resoution), byref(timestamp))
|
||||||
if not (rsmi_ret_ok(ret, None, 'get_power_cap_default', False) and power_cap.value == 0):
|
if (rsmi_ret_ok(ret, None, 'energy_count_secondary_die_check', silent=False)) and (energy_count.value == 0):
|
||||||
return False
|
return True
|
||||||
ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power_cap))
|
return False
|
||||||
if not (rsmi_ret_ok(ret, None, 'get_power_avg', False) and power_cap.value == 0):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def resetClocks(deviceList):
|
def resetClocks(deviceList):
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ dv_id = c_uint64()
|
|||||||
# GPU ID
|
# GPU ID
|
||||||
gpu_id = c_uint32(0)
|
gpu_id = c_uint32(0)
|
||||||
|
|
||||||
|
SMI_HASH = '@PKG_VERSION_HASH@'
|
||||||
|
|
||||||
|
|
||||||
# Policy enums
|
# Policy enums
|
||||||
RSMI_MAX_NUM_FREQUENCIES = 33
|
RSMI_MAX_NUM_FREQUENCIES = 33
|
||||||
|
|||||||
@@ -594,7 +594,7 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
|
|||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", returning get_dev_value_line() response = "
|
<< ", returning get_dev_value_line() response = "
|
||||||
<< getRSMIStatusString(ret);
|
<< amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -613,7 +613,7 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
|
|||||||
|
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", returning strtoul() response = "
|
<< ", returning strtoul() response = "
|
||||||
<< getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(errno));
|
<< amd::smi::getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(errno));
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
|
|
||||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||||
@@ -667,7 +667,7 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
|||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", returning rsmi_dev_ecc_enabled_get() response = "
|
<< ", returning rsmi_dev_ecc_enabled_get() response = "
|
||||||
<< getRSMIStatusString(ret);
|
<< amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -728,7 +728,7 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
|||||||
default:
|
default:
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", default case -> reporting "
|
<< ", default case -> reporting "
|
||||||
<< getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED);
|
<< amd::smi::getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED);
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return RSMI_STATUS_NOT_SUPPORTED;
|
return RSMI_STATUS_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
@@ -748,7 +748,7 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
|||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", GetDevValueVec() ret was not RSMI_STATUS_SUCCESS"
|
<< ", GetDevValueVec() ret was not RSMI_STATUS_SUCCESS"
|
||||||
<< " -> reporting " << getRSMIStatusString(ret);
|
<< " -> reporting " << amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -767,7 +767,7 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
|||||||
fs2 >> ec->correctable_err;
|
fs2 >> ec->correctable_err;
|
||||||
|
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", reporting " << getRSMIStatusString(ret);;
|
<< ", reporting " << amd::smi::getRSMIStatusString(ret);;
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
return ret;
|
return ret;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -935,7 +935,7 @@ rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) {
|
|||||||
|
|
||||||
ret = get_id(dv_ind, amd::smi::kDevDevID, id);
|
ret = get_id(dv_ind, amd::smi::kDevDevID, id);
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", reporting " << getRSMIStatusString(ret);
|
<< ", reporting " << amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -950,7 +950,7 @@ rsmi_dev_oam_id_get(uint32_t dv_ind, uint16_t *id) {
|
|||||||
|
|
||||||
ret = get_id(dv_ind, amd::smi::kDevXGMIPhysicalID, id);
|
ret = get_id(dv_ind, amd::smi::kDevXGMIPhysicalID, id);
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", reporting " << getRSMIStatusString(ret);
|
<< ", reporting " << amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -965,7 +965,7 @@ rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision) {
|
|||||||
|
|
||||||
ret = get_id(dv_ind, amd::smi::kDevDevRevID, revision);
|
ret = get_id(dv_ind, amd::smi::kDevDevRevID, revision);
|
||||||
outss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
outss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", reporting " << getRSMIStatusString(ret);
|
<< ", reporting " << amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_TRACE(outss);
|
LOG_TRACE(outss);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -980,7 +980,7 @@ rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *id) {
|
|||||||
CHK_SUPPORT_NAME_ONLY(id)
|
CHK_SUPPORT_NAME_ONLY(id)
|
||||||
ret = get_id(dv_ind, amd::smi::kDevDevProdNum, id);
|
ret = get_id(dv_ind, amd::smi::kDevDevProdNum, id);
|
||||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||||
<< ", reporting " << getRSMIStatusString(ret);
|
<< ", reporting " << amd::smi::getRSMIStatusString(ret);
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
return ret;
|
return ret;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -4045,6 +4045,7 @@ rsmi_status_t rsmi_dev_serial_number_get(uint32_t dv_ind,
|
|||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t ln = static_cast<uint32_t>(val_str.copy(serial_num, len));
|
uint32_t ln = static_cast<uint32_t>(val_str.copy(serial_num, len));
|
||||||
|
|
||||||
serial_num[std::min(len - 1, ln)] = '\0';
|
serial_num[std::min(len - 1, ln)] = '\0';
|
||||||
@@ -5125,15 +5126,11 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
|||||||
<< devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
|
<< devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
|
||||||
<< " | Cause: device board name does not support this action"
|
<< " | Cause: device board name does not support this action"
|
||||||
<< " | Returning = "
|
<< " | Returning = "
|
||||||
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
|
<< getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED) << " |";
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return RSMI_STATUS_NOT_SUPPORTED;
|
return RSMI_STATUS_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string newMemoryPartition
|
|
||||||
= mapRSMIToStringMemoryPartitionTypes.at(memory_partition);
|
|
||||||
std::string currentMemoryPartition;
|
|
||||||
|
|
||||||
switch (memory_partition) {
|
switch (memory_partition) {
|
||||||
case RSMI_MEMORY_PARTITION_NPS1:
|
case RSMI_MEMORY_PARTITION_NPS1:
|
||||||
case RSMI_MEMORY_PARTITION_NPS2:
|
case RSMI_MEMORY_PARTITION_NPS2:
|
||||||
@@ -5154,6 +5151,9 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
|||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
return RSMI_STATUS_INVALID_ARGS;
|
return RSMI_STATUS_INVALID_ARGS;
|
||||||
}
|
}
|
||||||
|
std::string newMemoryPartition
|
||||||
|
= mapRSMIToStringMemoryPartitionTypes.at(memory_partition);
|
||||||
|
std::string currentMemoryPartition;
|
||||||
|
|
||||||
// do nothing if memory_partition is the current mode
|
// do nothing if memory_partition is the current mode
|
||||||
rsmi_status_t ret_get = get_memory_partition(dv_ind, currentMemoryPartition);
|
rsmi_status_t ret_get = get_memory_partition(dv_ind, currentMemoryPartition);
|
||||||
@@ -5196,13 +5196,16 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
|||||||
|
|
||||||
if (amd::smi::ErrnoToRsmiStatus(ret) != RSMI_STATUS_SUCCESS) {
|
if (amd::smi::ErrnoToRsmiStatus(ret) != RSMI_STATUS_SUCCESS) {
|
||||||
rsmi_status_t err = amd::smi::ErrnoToRsmiStatus(ret);
|
rsmi_status_t err = amd::smi::ErrnoToRsmiStatus(ret);
|
||||||
|
if (ret == EACCES) {
|
||||||
|
err = RSMI_STATUS_NOT_SUPPORTED; // already verified permissions
|
||||||
|
}
|
||||||
ss << __PRETTY_FUNCTION__
|
ss << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< " | ======= end ======= "
|
||||||
<< " | Fail "
|
<< " | Fail "
|
||||||
<< " | Device #: " << dv_ind
|
<< " | Device #: " << dv_ind
|
||||||
<< " | Type: "
|
<< " | Type: "
|
||||||
<< devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
|
<< devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
|
||||||
<< " | Cause: issue writing requested setting of " + newMemoryPartition
|
<< " | Cause: issue writing reqested setting of " + newMemoryPartition
|
||||||
<< " | Returning = "
|
<< " | Returning = "
|
||||||
<< getRSMIStatusString(err) << " |";
|
<< getRSMIStatusString(err) << " |";
|
||||||
LOG_ERROR(ss);
|
LOG_ERROR(ss);
|
||||||
@@ -6027,7 +6030,6 @@ rsmi_dev_metrics_curr_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_v
|
|||||||
CATCH
|
CATCH
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
rsmi_status_t
|
rsmi_status_t
|
||||||
rsmi_dev_metrics_avg_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value)
|
rsmi_dev_metrics_avg_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value)
|
||||||
{
|
{
|
||||||
@@ -6541,7 +6543,6 @@ rsmi_dev_metrics_pcie_bandwidth_inst_get(uint32_t dv_ind, uint64_t* pcie_bandwid
|
|||||||
CATCH
|
CATCH
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
rsmi_status_t
|
rsmi_status_t
|
||||||
rsmi_dev_metrics_pcie_l0_recov_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value)
|
rsmi_dev_metrics_pcie_l0_recov_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value)
|
||||||
{
|
{
|
||||||
@@ -6666,19 +6667,24 @@ rsmi_dev_metrics_temp_hbm_get(uint32_t dv_ind, GPUMetricTempHbm_t* temp_hbm_valu
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempHbm);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempHbm);
|
||||||
amd::smi::GPUMetricTempHbmTbl_t tmp_hbl_tbl{};
|
amd::smi::GPUMetricTempHbmTbl_t tmp_hbl_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_hbl_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_hbl_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*temp_hbm_value) - std::begin(*temp_hbm_value));
|
||||||
static_cast<uint16_t>(std::end(*temp_hbm_value) - std::begin(*temp_hbm_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_hbl_tbl), max_num_elems, *temp_hbm_value);
|
static_cast<uint16_t>((max_num_elems < tmp_hbl_tbl.size()) ? max_num_elems : tmp_hbl_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_hbl_tbl.size()
|
<< "\n | Metric Size: " << tmp_hbl_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(temp_hbm_value, 0, sizeof(*temp_hbm_value));
|
||||||
|
std::copy_n(std::begin(tmp_hbl_tbl), copy_size, *temp_hbm_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6700,19 +6706,24 @@ rsmi_dev_metrics_vcn_activity_get(uint32_t dv_ind, GPUMetricVcnActivity_t* vcn_a
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
|
||||||
amd::smi::GPUMetricVcnActivityTbl_t tmp_vcn_tbl{};
|
amd::smi::GPUMetricVcnActivityTbl_t tmp_vcn_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_vcn_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_vcn_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*vcn_activity_value) - std::begin(*vcn_activity_value));
|
||||||
static_cast<uint16_t>(std::end(*vcn_activity_value) - std::begin(*vcn_activity_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_vcn_tbl), max_num_elems, *vcn_activity_value);
|
static_cast<uint16_t>((max_num_elems < tmp_vcn_tbl.size()) ? max_num_elems : tmp_vcn_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_vcn_tbl.size()
|
<< "\n | Metric Size: " << tmp_vcn_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(vcn_activity_value, 0, sizeof(*vcn_activity_value));
|
||||||
|
std::copy_n(std::begin(tmp_vcn_tbl), copy_size, *vcn_activity_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6734,19 +6745,24 @@ rsmi_dev_metrics_xgmi_read_data_get(uint32_t dv_ind, GPUMetricXgmiReadDataAcc_t*
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator);
|
||||||
amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{};
|
amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*xgmi_read_data_acc_value) - std::begin(*xgmi_read_data_acc_value));
|
||||||
static_cast<uint16_t>(std::end(*xgmi_read_data_acc_value) - std::begin(*xgmi_read_data_acc_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_xgmi_acc_tbl), max_num_elems, *xgmi_read_data_acc_value);
|
static_cast<uint16_t>((max_num_elems < tmp_xgmi_acc_tbl.size()) ? max_num_elems : tmp_xgmi_acc_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_xgmi_acc_tbl.size()
|
<< "\n | Metric Size: " << tmp_xgmi_acc_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(xgmi_read_data_acc_value, 0, sizeof(*xgmi_read_data_acc_value));
|
||||||
|
std::copy_n(std::begin(tmp_xgmi_acc_tbl), copy_size, *xgmi_read_data_acc_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6768,19 +6784,24 @@ rsmi_dev_metrics_xgmi_write_data_get(uint32_t dv_ind, GPUMetricXgmiWriteDataAcc_
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator);
|
||||||
amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{};
|
amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*xgmi_write_data_acc_value) - std::begin(*xgmi_write_data_acc_value));
|
||||||
static_cast<uint16_t>(std::end(*xgmi_write_data_acc_value) - std::begin(*xgmi_write_data_acc_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_xgmi_acc_tbl), max_num_elems, *xgmi_write_data_acc_value);
|
static_cast<uint16_t>((max_num_elems < tmp_xgmi_acc_tbl.size()) ? max_num_elems : tmp_xgmi_acc_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_xgmi_acc_tbl.size()
|
<< "\n | Metric Size: " << tmp_xgmi_acc_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(xgmi_write_data_acc_value, 0, sizeof(*xgmi_write_data_acc_value));
|
||||||
|
std::copy_n(std::begin(tmp_xgmi_acc_tbl), copy_size, *xgmi_write_data_acc_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6800,26 +6821,28 @@ rsmi_dev_metrics_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClk_t* current
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock);
|
||||||
rsmi_gpu_metrics_t gpu = {};
|
amd::smi::GPUMetricCurrGfxClkTbl_t tmp_curr_gfxclk_tbl{};
|
||||||
auto status = rsmi_dev_gpu_metrics_info_get(dv_ind, &gpu);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_gfxclk_tbl);
|
||||||
if (status == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
std::copy_n(std::begin(gpu.current_gfxclks),
|
static_cast<uint16_t>(std::end(*current_gfxclk_value) - std::begin(*current_gfxclk_value));
|
||||||
static_cast<uint16_t>(
|
const auto copy_size =
|
||||||
sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0])),
|
static_cast<uint16_t>((max_num_elems < tmp_curr_gfxclk_tbl.size()) ? max_num_elems : tmp_curr_gfxclk_tbl.size());
|
||||||
*current_gfxclk_value);
|
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << static_cast<uint16_t>(
|
<< "\n | Metric Size: " << tmp_curr_gfxclk_tbl.size()
|
||||||
sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0]))
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
<< " | Returning = " << status << " "
|
<< "\n | Copy size: " << copy_size
|
||||||
<< getRSMIStatusString(status) << " |";
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(current_gfxclk_value, 0, sizeof(*current_gfxclk_value));
|
||||||
|
std::copy_n(std::begin(tmp_curr_gfxclk_tbl), copy_size, *current_gfxclk_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6839,19 +6862,23 @@ rsmi_dev_metrics_curr_socclk_get(uint32_t dv_ind, GPUMetricCurrSocClk_t* current
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrSocClock);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrSocClock);
|
||||||
amd::smi::GPUMetricCurrSocClkTbl_t tmp_curr_socclk_tbl{};
|
amd::smi::GPUMetricCurrSocClkTbl_t tmp_curr_socclk_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_socclk_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_socclk_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*current_socclk_value) - std::begin(*current_socclk_value));
|
||||||
static_cast<uint16_t>(std::end(*current_socclk_value) - std::begin(*current_socclk_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_curr_socclk_tbl), max_num_elems, *current_socclk_value);
|
static_cast<uint16_t>((max_num_elems < tmp_curr_socclk_tbl.size()) ? max_num_elems : tmp_curr_socclk_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_curr_socclk_tbl.size()
|
<< "\n | Metric Size: " << tmp_curr_socclk_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(current_socclk_value, 0, sizeof(*current_socclk_value));
|
||||||
|
std::copy_n(std::begin(tmp_curr_socclk_tbl), copy_size, *current_socclk_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6873,19 +6900,24 @@ rsmi_dev_metrics_curr_vclk0_get(uint32_t dv_ind, GPUMetricCurrVClk0_t* current_v
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrVClock0);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrVClock0);
|
||||||
amd::smi::GPUMetricCurrVClkTbl_t tmp_curr_vclk0_tbl{};
|
amd::smi::GPUMetricCurrVClkTbl_t tmp_curr_vclk0_tbl{};
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_vclk0_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_vclk0_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*current_vclk_value) - std::begin(*current_vclk_value));
|
||||||
static_cast<uint16_t>(std::end(*current_vclk_value) - std::begin(*current_vclk_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_curr_vclk0_tbl), max_num_elems, *current_vclk_value);
|
static_cast<uint16_t>((max_num_elems < tmp_curr_vclk0_tbl.size()) ? max_num_elems : tmp_curr_vclk0_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_curr_vclk0_tbl.size()
|
<< "\n | Metric Size: " << tmp_curr_vclk0_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(current_vclk_value, 0, sizeof(*current_vclk_value));
|
||||||
|
std::copy_n(std::begin(tmp_curr_vclk0_tbl), copy_size, *current_vclk_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -6934,19 +6966,24 @@ rsmi_dev_metrics_curr_dclk0_get(uint32_t dv_ind, GPUMetricCurrDClk0_t* current_d
|
|||||||
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrDClock0);
|
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrDClock0);
|
||||||
amd::smi::GPUMetricCurrDClkTbl_t tmp_curr_dclk0_tbl;
|
amd::smi::GPUMetricCurrDClkTbl_t tmp_curr_dclk0_tbl;
|
||||||
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_dclk0_tbl);
|
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_dclk0_tbl);
|
||||||
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
const auto max_num_elems =
|
||||||
const auto max_num_elems =
|
static_cast<uint16_t>(std::end(*current_dclk_value) - std::begin(*current_dclk_value));
|
||||||
static_cast<uint16_t>(std::end(*current_dclk_value) - std::begin(*current_dclk_value));
|
const auto copy_size =
|
||||||
std::copy_n(std::begin(tmp_curr_dclk0_tbl), max_num_elems, *current_dclk_value);
|
static_cast<uint16_t>((max_num_elems < tmp_curr_dclk0_tbl.size()) ? max_num_elems : tmp_curr_dclk0_tbl.size());
|
||||||
}
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< "\n | ======= end ======= "
|
||||||
<< " | End Result "
|
<< "\n | End Result "
|
||||||
<< " | Device #: " << dv_ind
|
<< "\n | Device #: " << dv_ind
|
||||||
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
<< "\n | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
|
||||||
<< " | Metric Size: " << tmp_curr_dclk0_tbl.size()
|
<< "\n | Metric Size: " << tmp_curr_dclk0_tbl.size()
|
||||||
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
<< "\n | Max num of elements: " << max_num_elems
|
||||||
|
<< "\n | Copy size: " << copy_size
|
||||||
|
<< "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
|
||||||
LOG_INFO(ostrstream);
|
LOG_INFO(ostrstream);
|
||||||
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
||||||
|
std::memset(current_dclk_value, 0, sizeof(*current_dclk_value));
|
||||||
|
std::copy_n(std::begin(tmp_curr_dclk0_tbl), copy_size, *current_dclk_value);
|
||||||
|
}
|
||||||
|
|
||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
@@ -7277,6 +7314,7 @@ rsmi_dev_metrics_header_info_get(uint32_t dv_ind, metrics_table_header_t* header
|
|||||||
CATCH
|
CATCH
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
rsmi_status_t
|
rsmi_status_t
|
||||||
rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t* xcd_counter_value)
|
rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t* xcd_counter_value)
|
||||||
{
|
{
|
||||||
@@ -7335,6 +7373,7 @@ rsmi_dev_metrics_log_get(uint32_t dv_ind)
|
|||||||
return status_code;
|
return status_code;
|
||||||
CATCH
|
CATCH
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// End of: new GPU Metrics related work.
|
// End of: new GPU Metrics related work.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -52,5 +52,6 @@
|
|||||||
#define rocm_smi_VERSION_MINOR @rocm_smi_VERSION_MINOR@
|
#define rocm_smi_VERSION_MINOR @rocm_smi_VERSION_MINOR@
|
||||||
#define rocm_smi_VERSION_PATCH @rocm_smi_VERSION_PATCH@
|
#define rocm_smi_VERSION_PATCH @rocm_smi_VERSION_PATCH@
|
||||||
#define rocm_smi_VERSION_BUILD "@rocm_smi_VERSION_BUILD@"
|
#define rocm_smi_VERSION_BUILD "@rocm_smi_VERSION_BUILD@"
|
||||||
|
#define rocm_smi_VERSION_HASH "@rocm_smi_VERSION_HASH@"
|
||||||
|
|
||||||
#endif // INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
|
#endif // INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
|
||||||
|
|||||||
@@ -738,7 +738,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) {
|
|||||||
<< " | " << (fs.fail() ? "[ERROR] Failed read - format error" :
|
<< " | " << (fs.fail() ? "[ERROR] Failed read - format error" :
|
||||||
"[GOOD] No fail - Successful read operation")
|
"[GOOD] No fail - Successful read operation")
|
||||||
<< " | " << (fs.eof() ? "[ERROR] Failed read - EOF error" :
|
<< " | " << (fs.eof() ? "[ERROR] Failed read - EOF error" :
|
||||||
"[GOOD] No eof error - Successful read operation")
|
"[GOOD] No eof - Successful read operation")
|
||||||
<< " | " << (fs.good() ? "[GOOD] read good - Successful read operation" :
|
<< " | " << (fs.good() ? "[GOOD] read good - Successful read operation" :
|
||||||
"[ERROR] Failed read - good error");
|
"[ERROR] Failed read - good error");
|
||||||
LOG_INFO(ss);
|
LOG_INFO(ss);
|
||||||
@@ -800,7 +800,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr,
|
|||||||
<< " | " << (fs.fail() ? "[ERROR] Failed write - format error" :
|
<< " | " << (fs.fail() ? "[ERROR] Failed write - format error" :
|
||||||
"[GOOD] No fail - Successful write operation")
|
"[GOOD] No fail - Successful write operation")
|
||||||
<< " | " << (fs.eof() ? "[ERROR] Failed write - EOF error" :
|
<< " | " << (fs.eof() ? "[ERROR] Failed write - EOF error" :
|
||||||
"[GOOD] No eof error - Successful write operation")
|
"[GOOD] No eof - Successful write operation")
|
||||||
<< " | " << (fs.good() ?
|
<< " | " << (fs.good() ?
|
||||||
"[GOOD] Write good - Successful write operation" :
|
"[GOOD] Write good - Successful write operation" :
|
||||||
"[ERROR] Failed write - good error");
|
"[ERROR] Failed write - good error");
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ const AMDGpuMetricVersionTranslationTbl_t amdgpu_metric_version_translation_tabl
|
|||||||
{join_metrics_version(1, 2), AMDGpuMetricVersionFlags_t::kGpuMetricV12},
|
{join_metrics_version(1, 2), AMDGpuMetricVersionFlags_t::kGpuMetricV12},
|
||||||
{join_metrics_version(1, 3), AMDGpuMetricVersionFlags_t::kGpuMetricV13},
|
{join_metrics_version(1, 3), AMDGpuMetricVersionFlags_t::kGpuMetricV13},
|
||||||
{join_metrics_version(1, 4), AMDGpuMetricVersionFlags_t::kGpuMetricV14},
|
{join_metrics_version(1, 4), AMDGpuMetricVersionFlags_t::kGpuMetricV14},
|
||||||
|
{join_metrics_version(1, 5), AMDGpuMetricVersionFlags_t::kGpuMetricV15},
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -201,7 +202,8 @@ const AMDGpuMetricsUnitTypeTranslationTbl_t amdgpu_metrics_unit_type_translation
|
|||||||
{AMDGpuMetricsUnitType_t::kMetricAvgMmActivity, "AvgMmActivity"},
|
{AMDGpuMetricsUnitType_t::kMetricAvgMmActivity, "AvgMmActivity"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricGfxActivityAccumulator, "GfxActivityAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricGfxActivityAccumulator, "GfxActivityAcc"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricMemActivityAccumulator, "MemActivityAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricMemActivityAccumulator, "MemActivityAcc"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricVcnActivity, "VcnActivity"},
|
{AMDGpuMetricsUnitType_t::kMetricVcnActivity, "VcnActivity"}, /* v1.4 */
|
||||||
|
{AMDGpuMetricsUnitType_t::kMetricJpegActivity, "JpegActivity"}, /* v1.5 */
|
||||||
|
|
||||||
// kGpuMetricAverageClock counters
|
// kGpuMetricAverageClock counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricAvgGfxClockFrequency, "AvgGfxClockFrequency"},
|
{AMDGpuMetricsUnitType_t::kMetricAvgGfxClockFrequency, "AvgGfxClockFrequency"},
|
||||||
@@ -213,11 +215,11 @@ const AMDGpuMetricsUnitTypeTranslationTbl_t amdgpu_metrics_unit_type_translation
|
|||||||
{AMDGpuMetricsUnitType_t::kMetricAvgDClock1Frequency, "AvgDClock1Frequency"},
|
{AMDGpuMetricsUnitType_t::kMetricAvgDClock1Frequency, "AvgDClock1Frequency"},
|
||||||
|
|
||||||
// kGpuMetricCurrentClock counters
|
// kGpuMetricCurrentClock counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrGfxClock, "CurrGfxClock"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrGfxClock, "CurrGfxClock"}, /* v1.4: Changed to array */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrSocClock, "CurrSocClock"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrSocClock, "CurrSocClock"}, /* v1.4: Changed to array */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrUClock, "CurrUClock"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrUClock, "CurrUClock"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrVClock0, "CurrVClock0"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrVClock0, "CurrVClock0"}, /* v1.4: Changed to array */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrDClock0, "CurrDClock0"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrDClock0, "CurrDClock0"}, /* v1.4: Changed to array */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrVClock1, "CurrVClock1"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrVClock1, "CurrVClock1"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrDClock1, "CurrDClock1"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrDClock1, "CurrDClock1"},
|
||||||
|
|
||||||
@@ -226,7 +228,7 @@ const AMDGpuMetricsUnitTypeTranslationTbl_t amdgpu_metrics_unit_type_translation
|
|||||||
{AMDGpuMetricsUnitType_t::kMetricIndepThrottleStatus, "IndepThrottleStatus"},
|
{AMDGpuMetricsUnitType_t::kMetricIndepThrottleStatus, "IndepThrottleStatus"},
|
||||||
|
|
||||||
// kGpuMetricGfxClkLockStatus counters
|
// kGpuMetricGfxClkLockStatus counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricGfxClkLockStatus, "GfxClkLockStatus"},
|
{AMDGpuMetricsUnitType_t::kMetricGfxClkLockStatus, "GfxClkLockStatus"}, /* v1.4 */
|
||||||
|
|
||||||
// kGpuMetricCurrentFanSpeed counters
|
// kGpuMetricCurrentFanSpeed counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrFanSpeed, "CurrFanSpeed"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrFanSpeed, "CurrFanSpeed"},
|
||||||
@@ -234,19 +236,21 @@ const AMDGpuMetricsUnitTypeTranslationTbl_t amdgpu_metrics_unit_type_translation
|
|||||||
// kGpuMetricLinkWidthSpeed counters
|
// kGpuMetricLinkWidthSpeed counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieLinkWidth, "PcieLinkWidth"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieLinkWidth, "PcieLinkWidth"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieLinkSpeed, "PcieLinkSpeed"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieLinkSpeed, "PcieLinkSpeed"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieBandwidthAccumulator, "PcieBandwidthAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieBandwidthAccumulator, "PcieBandwidthAcc"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieBandwidthInst, "PcieBandwidthInst"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieBandwidthInst, "PcieBandwidthInst"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricXgmiLinkWidth, "XgmiLinkWidth"},
|
{AMDGpuMetricsUnitType_t::kMetricXgmiLinkWidth, "XgmiLinkWidth"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricXgmiLinkSpeed, "XgmiLinkSpeed"},
|
{AMDGpuMetricsUnitType_t::kMetricXgmiLinkSpeed, "XgmiLinkSpeed"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator, "XgmiReadDataAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator, "XgmiReadDataAcc"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator, "XgmiWriteDataAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator, "XgmiWriteDataAcc"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieL0RecovCountAccumulator, "PcieL0RecovCountAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieL0RecovCountAccumulator, "PcieL0RecovCountAcc"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieReplayCountAccumulator, "PcieReplayCountAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieReplayCountAccumulator, "PcieReplayCountAcc"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricPcieReplayRollOverCountAccumulator, "PcieReplayRollOverCountAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricPcieReplayRollOverCountAccumulator, "PcieReplayRollOverCountAcc"}, /* v1.4 */
|
||||||
|
{AMDGpuMetricsUnitType_t::kMetricPcieNakSentCountAccumulator, "PcieNakSentCountAcc"}, /* v1.5 */
|
||||||
|
{AMDGpuMetricsUnitType_t::kMetricPcieNakReceivedCountAccumulator, "PcieNakRcvdCountAcc"}, /* v1.5 */
|
||||||
|
|
||||||
// kGpuMetricPowerEnergy counters
|
// kGpuMetricPowerEnergy counters
|
||||||
{AMDGpuMetricsUnitType_t::kMetricAvgSocketPower, "AvgSocketPower"},
|
{AMDGpuMetricsUnitType_t::kMetricAvgSocketPower, "AvgSocketPower"},
|
||||||
{AMDGpuMetricsUnitType_t::kMetricCurrSocketPower, "CurrSocketPower"},
|
{AMDGpuMetricsUnitType_t::kMetricCurrSocketPower, "CurrSocketPower"}, /* v1.4 */
|
||||||
{AMDGpuMetricsUnitType_t::kMetricEnergyAccumulator, "EnergyAcc"},
|
{AMDGpuMetricsUnitType_t::kMetricEnergyAccumulator, "EnergyAcc"},
|
||||||
|
|
||||||
// kGpuMetricVoltage counters
|
// kGpuMetricVoltage counters
|
||||||
@@ -343,6 +347,7 @@ AMDGpuMetricFactories_t amd_gpu_metrics_factory_table
|
|||||||
{AMDGpuMetricVersionFlags_t::kGpuMetricV12, std::make_shared<GpuMetricsBase_v12_t>(GpuMetricsBase_v12_t{})},
|
{AMDGpuMetricVersionFlags_t::kGpuMetricV12, std::make_shared<GpuMetricsBase_v12_t>(GpuMetricsBase_v12_t{})},
|
||||||
{AMDGpuMetricVersionFlags_t::kGpuMetricV13, std::make_shared<GpuMetricsBase_v13_t>(GpuMetricsBase_v13_t{})},
|
{AMDGpuMetricVersionFlags_t::kGpuMetricV13, std::make_shared<GpuMetricsBase_v13_t>(GpuMetricsBase_v13_t{})},
|
||||||
{AMDGpuMetricVersionFlags_t::kGpuMetricV14, std::make_shared<GpuMetricsBase_v14_t>(GpuMetricsBase_v14_t{})},
|
{AMDGpuMetricVersionFlags_t::kGpuMetricV14, std::make_shared<GpuMetricsBase_v14_t>(GpuMetricsBase_v14_t{})},
|
||||||
|
{AMDGpuMetricVersionFlags_t::kGpuMetricV15, std::make_shared<GpuMetricsBase_v15_t>(GpuMetricsBase_v15_t{})},
|
||||||
};
|
};
|
||||||
|
|
||||||
GpuMetricsBasePtr amdgpu_metrics_factory(AMDGpuMetricVersionFlags_t gpu_metric_version)
|
GpuMetricsBasePtr amdgpu_metrics_factory(AMDGpuMetricVersionFlags_t gpu_metric_version)
|
||||||
@@ -462,6 +467,341 @@ AMDGpuDynamicMetricTblValues_t format_metric_row(const T& metric, const std::str
|
|||||||
return multi_values;
|
return multi_values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GpuMetricsBase_v15_t::dump_internal_metrics_table()
|
||||||
|
{
|
||||||
|
std::ostringstream ostrstream;
|
||||||
|
std::cout << __PRETTY_FUNCTION__ << " | ======= start ======= \n";
|
||||||
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
|
<< " | ======= DEBUG ======= "
|
||||||
|
<< " | Metric Version: " << stringfy_metric_header_version(m_gpu_metrics_tbl.m_common_header)
|
||||||
|
<< " | Size: " << print_unsigned_int(m_gpu_metrics_tbl.m_common_header.m_structure_size)
|
||||||
|
<< " |"
|
||||||
|
<< "\n";
|
||||||
|
ostrstream << " temperature_hotspot: " << m_gpu_metrics_tbl.m_temperature_hotspot << "\n"
|
||||||
|
<< " temperature_mem: " << m_gpu_metrics_tbl.m_temperature_mem << "\n"
|
||||||
|
<< " temperature_vrsoc: " << m_gpu_metrics_tbl.m_temperature_vrsoc << "\n"
|
||||||
|
|
||||||
|
<< " current_socket_power: " << m_gpu_metrics_tbl.m_current_socket_power << "\n"
|
||||||
|
|
||||||
|
<< " average_gfx_activity: " << m_gpu_metrics_tbl.m_average_gfx_activity << "\n"
|
||||||
|
<< " average_umc_activity: " << m_gpu_metrics_tbl.m_average_umc_activity << "\n";
|
||||||
|
|
||||||
|
ostrstream << " vcn_activity: " << "\n";
|
||||||
|
auto idx = uint64_t(0);
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_vcn_activity) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " jpeg_activity: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_jpeg_activity) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " energy_accumulator: " << m_gpu_metrics_tbl.m_energy_accumulator << "\n"
|
||||||
|
<< " system_clock_counter: " << m_gpu_metrics_tbl.m_system_clock_counter << "\n"
|
||||||
|
|
||||||
|
<< " throttle_status: " << m_gpu_metrics_tbl.m_throttle_status << "\n"
|
||||||
|
|
||||||
|
<< " average_gfx_activity: " << m_gpu_metrics_tbl.m_average_gfx_activity << "\n"
|
||||||
|
<< " average_umc_activity: " << m_gpu_metrics_tbl.m_average_umc_activity << "\n"
|
||||||
|
|
||||||
|
<< " gfxclk_lock_status: " << m_gpu_metrics_tbl.m_gfxclk_lock_status << "\n"
|
||||||
|
|
||||||
|
<< " pcie_link_width: " << m_gpu_metrics_tbl.m_pcie_link_width << "\n"
|
||||||
|
<< " pcie_link_speed: " << m_gpu_metrics_tbl.m_pcie_link_speed << "\n"
|
||||||
|
|
||||||
|
<< " xgmi_link_width: " << m_gpu_metrics_tbl.m_xgmi_link_width << "\n"
|
||||||
|
<< " xgmi_link_speed: " << m_gpu_metrics_tbl.m_xgmi_link_speed << "\n"
|
||||||
|
|
||||||
|
<< " gfx_activity_acc: " << m_gpu_metrics_tbl.m_gfx_activity_acc << "\n"
|
||||||
|
<< " mem_activity_acc: " << m_gpu_metrics_tbl.m_mem_activity_acc << "\n"
|
||||||
|
|
||||||
|
<< " pcie_bandwidth_acc: " << m_gpu_metrics_tbl.m_pcie_bandwidth_acc << "\n"
|
||||||
|
<< " pcie_bandwidth_inst: " << m_gpu_metrics_tbl.m_pcie_bandwidth_inst << "\n"
|
||||||
|
<< " pcie_l0_to_recov_count_acc: " << m_gpu_metrics_tbl.m_pcie_l0_to_recov_count_acc << "\n"
|
||||||
|
<< " pcie_replay_count_acc: " << m_gpu_metrics_tbl.m_pcie_replay_count_acc << "\n"
|
||||||
|
<< " pcie_replay_rover_count_acc: " << m_gpu_metrics_tbl.m_pcie_replay_rover_count_acc << "\n"
|
||||||
|
<< " pcie_nak_sent_count_acc: " << m_gpu_metrics_tbl.m_pcie_nak_sent_count_acc << "\n"
|
||||||
|
<< " pcie_nak_rcvd_count_acc: " << m_gpu_metrics_tbl.m_pcie_nak_rcvd_count_acc << "\n";
|
||||||
|
|
||||||
|
ostrstream << " xgmi_read_data_acc: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_xgmi_read_data_acc) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " xgmi_write_data_acc: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_xgmi_write_data_acc) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " firmware_timestamp: " << m_gpu_metrics_tbl.m_firmware_timestamp << "\n";
|
||||||
|
|
||||||
|
ostrstream << " current_gfxclk: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_current_gfxclk) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " current_socclk: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_current_socclk) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " current_vclk0: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_current_vclk0) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " current_dclk0: " << "\n";
|
||||||
|
idx = 0;
|
||||||
|
for (const auto& temp : m_gpu_metrics_tbl.m_current_dclk0) {
|
||||||
|
ostrstream << "\t [" << idx << "]: " << temp << "\n";
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostrstream << " padding: " << m_gpu_metrics_tbl.m_padding << "\n";
|
||||||
|
LOG_DEBUG(ostrstream);
|
||||||
|
}
|
||||||
|
|
||||||
|
rsmi_status_t GpuMetricsBase_v15_t::populate_metrics_dynamic_tbl()
|
||||||
|
{
|
||||||
|
std::ostringstream ostrstream;
|
||||||
|
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
|
||||||
|
ostrstream << __PRETTY_FUNCTION__ << " | ======= start =======";
|
||||||
|
LOG_TRACE(ostrstream);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Note: Any metric treatment/changes (if any) should happen before they
|
||||||
|
// get written to internal/external tables.
|
||||||
|
//
|
||||||
|
auto run_metric_adjustments_v15 = [&]() {
|
||||||
|
ostrstream << __PRETTY_FUNCTION__ << " | ======= start =======";
|
||||||
|
const auto gpu_metrics_version = translate_flag_to_metric_version(get_gpu_metrics_version_used());
|
||||||
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
|
<< " | ======= info ======= "
|
||||||
|
<< " | Applying adjustments "
|
||||||
|
<< " | Metric Version: " << stringfy_metric_header_version(
|
||||||
|
disjoin_metrics_version(gpu_metrics_version))
|
||||||
|
<< " |";
|
||||||
|
LOG_TRACE(ostrstream);
|
||||||
|
|
||||||
|
// firmware_timestamp is at 10ns resolution
|
||||||
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
|
<< " | ======= Changes ======= "
|
||||||
|
<< " | {m_firmware_timestamp} from: " << m_gpu_metrics_tbl.m_firmware_timestamp
|
||||||
|
<< " to: " << (m_gpu_metrics_tbl.m_firmware_timestamp * 10);
|
||||||
|
m_gpu_metrics_tbl.m_firmware_timestamp = (m_gpu_metrics_tbl.m_firmware_timestamp * 10);
|
||||||
|
LOG_DEBUG(ostrstream);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Adjustments/Changes specific to this version
|
||||||
|
run_metric_adjustments_v15();
|
||||||
|
|
||||||
|
// Temperature Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricTemperature]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricTempHotspot,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_temperature_hotspot,
|
||||||
|
"temperature_hotspot"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricTemperature]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricTempMem,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_temperature_mem,
|
||||||
|
"temperature_mem"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricTemperature]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricTempVrSoc,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_temperature_vrsoc,
|
||||||
|
"temperature_vrsoc"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Power/Energy Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricPowerEnergy]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrSocketPower,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_socket_power,
|
||||||
|
"curr_socket_power"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricPowerEnergy]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricEnergyAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_energy_accumulator,
|
||||||
|
"energy_acc"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Utilization Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricAvgGfxActivity,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_average_gfx_activity,
|
||||||
|
"average_gfx_activity"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricAvgUmcActivity,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_average_umc_activity,
|
||||||
|
"average_umc_activity"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricVcnActivity,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_vcn_activity,
|
||||||
|
"[average_vcn_activity]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricJpegActivity,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_jpeg_activity,
|
||||||
|
"[average_jpeg_activity]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricGfxActivityAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_gfx_activity_acc,
|
||||||
|
"gfx_activity_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricUtilization]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricMemActivityAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_mem_activity_acc,
|
||||||
|
"mem_activity_acc"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Timestamp Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricTimestamp]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricTSFirmware,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_firmware_timestamp,
|
||||||
|
"firmware_timestamp"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricTimestamp]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricTSClockCounter,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_system_clock_counter,
|
||||||
|
"system_clock_counter"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Throttle Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricThrottleStatus]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricThrottleStatus,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_throttle_status,
|
||||||
|
"throttle_status"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// GfxLock Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricGfxClkLockStatus]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricGfxClkLockStatus,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_gfxclk_lock_status,
|
||||||
|
"gfxclk_lock_status"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Link/Width/Speed Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieLinkWidth,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_link_width,
|
||||||
|
"pcie_link_width"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieLinkSpeed,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_link_speed,
|
||||||
|
"pcie_link_speed"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricXgmiLinkWidth,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_xgmi_link_width,
|
||||||
|
"xgmi_link_width"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricXgmiLinkSpeed,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_xgmi_link_speed,
|
||||||
|
"xgmi_link_speed"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_bandwidth_acc,
|
||||||
|
"pcie_bandwidth_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthInst,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_bandwidth_inst,
|
||||||
|
"pcie_bandwidth_inst"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieL0RecovCountAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_l0_to_recov_count_acc,
|
||||||
|
"pcie_l0_recov_count_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieReplayCountAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_replay_count_acc,
|
||||||
|
"pcie_replay_count_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieReplayRollOverCountAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_replay_rover_count_acc,
|
||||||
|
"pcie_replay_rollover_count_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieNakSentCountAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_nak_sent_count_acc,
|
||||||
|
"pcie_nak_sent_count_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricPcieNakReceivedCountAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_pcie_nak_rcvd_count_acc,
|
||||||
|
"pcie_nak_rcvd_count_acc"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_xgmi_read_data_acc,
|
||||||
|
"[xgmi_read_data_acc]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricLinkWidthSpeed]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_xgmi_write_data_acc,
|
||||||
|
"[xgmi_write_data_acc]"))
|
||||||
|
);
|
||||||
|
|
||||||
|
// CurrentClock Info
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricCurrentClock]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_gfxclk,
|
||||||
|
"[current_gfxclk]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricCurrentClock]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrSocClock,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_socclk,
|
||||||
|
"[current_socclk]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricCurrentClock]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrVClock0,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_vclk0,
|
||||||
|
"[current_vclk0]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricCurrentClock]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrDClock0,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_dclk0,
|
||||||
|
"[current_dclk0]"))
|
||||||
|
);
|
||||||
|
m_metrics_dynamic_tbl[AMDGpuMetricsClassId_t::kGpuMetricCurrentClock]
|
||||||
|
.insert(std::make_pair(AMDGpuMetricsUnitType_t::kMetricCurrUClock,
|
||||||
|
format_metric_row(m_gpu_metrics_tbl.m_current_uclk,
|
||||||
|
"current_uclk"))
|
||||||
|
);
|
||||||
|
|
||||||
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
|
<< " | ======= end ======= "
|
||||||
|
<< " | Success "
|
||||||
|
<< " | Returning = " << getRSMIStatusString(status_code)
|
||||||
|
<< " |";
|
||||||
|
LOG_TRACE(ostrstream);
|
||||||
|
|
||||||
|
return status_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void GpuMetricsBase_v14_t::dump_internal_metrics_table()
|
void GpuMetricsBase_v14_t::dump_internal_metrics_table()
|
||||||
{
|
{
|
||||||
std::ostringstream ostrstream;
|
std::ostringstream ostrstream;
|
||||||
@@ -827,6 +1167,10 @@ rsmi_status_t init_max_public_gpu_matrics(AMGpuMetricsPublicLatest_t& rsmi_gpu_m
|
|||||||
std::end(rsmi_gpu_metrics.vcn_activity),
|
std::end(rsmi_gpu_metrics.vcn_activity),
|
||||||
init_max_uint_types<std::uint16_t>());
|
init_max_uint_types<std::uint16_t>());
|
||||||
|
|
||||||
|
std::fill(std::begin(rsmi_gpu_metrics.jpeg_activity),
|
||||||
|
std::end(rsmi_gpu_metrics.jpeg_activity),
|
||||||
|
init_max_uint_types<std::uint16_t>());
|
||||||
|
|
||||||
rsmi_gpu_metrics.gfxclk_lock_status = init_max_uint_types<decltype(rsmi_gpu_metrics.gfxclk_lock_status)>();
|
rsmi_gpu_metrics.gfxclk_lock_status = init_max_uint_types<decltype(rsmi_gpu_metrics.gfxclk_lock_status)>();
|
||||||
rsmi_gpu_metrics.xgmi_link_width = init_max_uint_types<decltype(rsmi_gpu_metrics.xgmi_link_width)>();
|
rsmi_gpu_metrics.xgmi_link_width = init_max_uint_types<decltype(rsmi_gpu_metrics.xgmi_link_width)>();
|
||||||
rsmi_gpu_metrics.xgmi_link_speed = init_max_uint_types<decltype(rsmi_gpu_metrics.xgmi_link_speed)>();
|
rsmi_gpu_metrics.xgmi_link_speed = init_max_uint_types<decltype(rsmi_gpu_metrics.xgmi_link_speed)>();
|
||||||
@@ -836,35 +1180,33 @@ rsmi_status_t init_max_public_gpu_matrics(AMGpuMetricsPublicLatest_t& rsmi_gpu_m
|
|||||||
rsmi_gpu_metrics.pcie_replay_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_count_acc)>();
|
rsmi_gpu_metrics.pcie_replay_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_count_acc)>();
|
||||||
rsmi_gpu_metrics.pcie_replay_rover_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_rover_count_acc)>();
|
rsmi_gpu_metrics.pcie_replay_rover_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_rover_count_acc)>();
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.xgmi_read_data_acc[0],
|
std::fill(std::begin(rsmi_gpu_metrics.xgmi_read_data_acc),
|
||||||
(sizeof(rsmi_gpu_metrics.xgmi_read_data_acc) /
|
std::end(rsmi_gpu_metrics.xgmi_read_data_acc),
|
||||||
sizeof(rsmi_gpu_metrics.xgmi_read_data_acc[0])),
|
init_max_uint_types<std::uint64_t>());
|
||||||
std::numeric_limits<uint64_t>::max());
|
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.xgmi_write_data_acc[0],
|
std::fill(std::begin(rsmi_gpu_metrics.xgmi_write_data_acc),
|
||||||
(sizeof(rsmi_gpu_metrics.xgmi_write_data_acc) /
|
std::end(rsmi_gpu_metrics.xgmi_write_data_acc),
|
||||||
sizeof(rsmi_gpu_metrics.xgmi_write_data_acc[0])),
|
init_max_uint_types<std::uint64_t>());
|
||||||
std::numeric_limits<uint64_t>::max());
|
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.current_gfxclks[0],
|
std::fill(std::begin(rsmi_gpu_metrics.current_gfxclks),
|
||||||
(sizeof(rsmi_gpu_metrics.current_gfxclks) /
|
std::end(rsmi_gpu_metrics.current_gfxclks),
|
||||||
sizeof(rsmi_gpu_metrics.current_gfxclks[0])),
|
init_max_uint_types<std::uint16_t>());
|
||||||
std::numeric_limits<uint16_t>::max());
|
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.current_socclks[0],
|
std::fill(std::begin(rsmi_gpu_metrics.current_socclks),
|
||||||
(sizeof(rsmi_gpu_metrics.current_socclks) /
|
std::end(rsmi_gpu_metrics.current_socclks),
|
||||||
sizeof(rsmi_gpu_metrics.current_socclks[0])),
|
init_max_uint_types<std::uint16_t>());
|
||||||
std::numeric_limits<uint16_t>::max());
|
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.current_vclk0s[0],
|
std::fill(std::begin(rsmi_gpu_metrics.current_vclk0s),
|
||||||
(sizeof(rsmi_gpu_metrics.current_vclk0s) /
|
std::end(rsmi_gpu_metrics.current_vclk0s),
|
||||||
sizeof(rsmi_gpu_metrics.current_vclk0s[0])),
|
init_max_uint_types<std::uint16_t>());
|
||||||
std::numeric_limits<uint16_t>::max());
|
|
||||||
|
std::fill(std::begin(rsmi_gpu_metrics.current_dclk0s),
|
||||||
|
std::end(rsmi_gpu_metrics.current_dclk0s),
|
||||||
|
init_max_uint_types<std::uint16_t>());
|
||||||
|
|
||||||
|
rsmi_gpu_metrics.pcie_nak_sent_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_nak_sent_count_acc)>();
|
||||||
|
rsmi_gpu_metrics.pcie_nak_rcvd_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_nak_rcvd_count_acc)>();
|
||||||
|
|
||||||
std::fill_n(&rsmi_gpu_metrics.current_dclk0s[0],
|
|
||||||
(sizeof(rsmi_gpu_metrics.current_dclk0s) /
|
|
||||||
sizeof(rsmi_gpu_metrics.current_dclk0s[0])),
|
|
||||||
std::numeric_limits<uint16_t>::max());
|
|
||||||
|
|
||||||
ostrstream << __PRETTY_FUNCTION__
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
<< " | ======= end ======= "
|
<< " | ======= end ======= "
|
||||||
@@ -876,6 +1218,195 @@ rsmi_status_t init_max_public_gpu_matrics(AMGpuMetricsPublicLatest_t& rsmi_gpu_m
|
|||||||
return status_code;
|
return status_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v15_t::copy_internal_to_external_metrics()
|
||||||
|
{
|
||||||
|
std::ostringstream ostrstream;
|
||||||
|
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
|
||||||
|
ostrstream << __PRETTY_FUNCTION__ << " | ======= start =======";
|
||||||
|
LOG_TRACE(ostrstream);
|
||||||
|
|
||||||
|
auto copy_data_from_internal_metrics_tbl = [&]() {
|
||||||
|
AMGpuMetricsPublicLatest_t metrics_public_init{};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Note: Initializing data members with their max. If field is max,
|
||||||
|
// no data was assigned to it.
|
||||||
|
init_max_public_gpu_matrics(metrics_public_init);
|
||||||
|
|
||||||
|
// Header
|
||||||
|
metrics_public_init.common_header.structure_size = m_gpu_metrics_tbl.m_common_header.m_structure_size;
|
||||||
|
metrics_public_init.common_header.format_revision = m_gpu_metrics_tbl.m_common_header.m_format_revision;
|
||||||
|
metrics_public_init.common_header.content_revision = m_gpu_metrics_tbl.m_common_header.m_content_revision;
|
||||||
|
|
||||||
|
|
||||||
|
// Temperature
|
||||||
|
metrics_public_init.temperature_hotspot = m_gpu_metrics_tbl.m_temperature_hotspot;
|
||||||
|
metrics_public_init.temperature_mem = m_gpu_metrics_tbl.m_temperature_mem;
|
||||||
|
metrics_public_init.temperature_vrsoc = m_gpu_metrics_tbl.m_temperature_vrsoc;
|
||||||
|
|
||||||
|
// Power
|
||||||
|
metrics_public_init.current_socket_power = m_gpu_metrics_tbl.m_current_socket_power;
|
||||||
|
|
||||||
|
// Utilization
|
||||||
|
metrics_public_init.average_gfx_activity = m_gpu_metrics_tbl.m_average_gfx_activity;
|
||||||
|
metrics_public_init.average_umc_activity = m_gpu_metrics_tbl.m_average_umc_activity;
|
||||||
|
|
||||||
|
// vcn_activity
|
||||||
|
const auto vcn_activity_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_vcn_activity) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_vcn_activity));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_vcn_activity),
|
||||||
|
vcn_activity_num_elems,
|
||||||
|
metrics_public_init.vcn_activity);
|
||||||
|
|
||||||
|
// jpeg_activity
|
||||||
|
const auto jpeg_activity_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_jpeg_activity) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_jpeg_activity));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_jpeg_activity),
|
||||||
|
jpeg_activity_num_elems,
|
||||||
|
metrics_public_init.jpeg_activity);
|
||||||
|
|
||||||
|
// Power/Energy
|
||||||
|
metrics_public_init.energy_accumulator = m_gpu_metrics_tbl.m_energy_accumulator;
|
||||||
|
|
||||||
|
// Driver attached timestamp (in ns)
|
||||||
|
metrics_public_init.system_clock_counter = m_gpu_metrics_tbl.m_system_clock_counter;
|
||||||
|
|
||||||
|
// Throttle status
|
||||||
|
metrics_public_init.throttle_status = m_gpu_metrics_tbl.m_throttle_status;
|
||||||
|
|
||||||
|
// Clock Lock Status. Each bit corresponds to clock instance
|
||||||
|
metrics_public_init.gfxclk_lock_status = m_gpu_metrics_tbl.m_gfxclk_lock_status;
|
||||||
|
|
||||||
|
// Link width (number of lanes) and speed
|
||||||
|
metrics_public_init.pcie_link_width = m_gpu_metrics_tbl.m_pcie_link_width;
|
||||||
|
metrics_public_init.pcie_link_speed = m_gpu_metrics_tbl.m_pcie_link_speed;
|
||||||
|
|
||||||
|
// XGMI bus width and bitrate
|
||||||
|
metrics_public_init.xgmi_link_width = m_gpu_metrics_tbl.m_xgmi_link_width;
|
||||||
|
metrics_public_init.xgmi_link_speed = m_gpu_metrics_tbl.m_xgmi_link_speed;
|
||||||
|
|
||||||
|
// Utilization Accumulated
|
||||||
|
metrics_public_init.gfx_activity_acc = m_gpu_metrics_tbl.m_gfx_activity_acc;
|
||||||
|
metrics_public_init.mem_activity_acc = m_gpu_metrics_tbl.m_mem_activity_acc;
|
||||||
|
|
||||||
|
// PCIE accumulated bandwidth
|
||||||
|
metrics_public_init.pcie_bandwidth_acc = m_gpu_metrics_tbl.m_pcie_bandwidth_acc;
|
||||||
|
|
||||||
|
// PCIE instantaneous bandwidth
|
||||||
|
metrics_public_init.pcie_bandwidth_inst = m_gpu_metrics_tbl.m_pcie_bandwidth_inst;
|
||||||
|
|
||||||
|
// PCIE L0 to recovery state transition accumulated count
|
||||||
|
metrics_public_init.pcie_l0_to_recov_count_acc = m_gpu_metrics_tbl.m_pcie_l0_to_recov_count_acc;
|
||||||
|
|
||||||
|
// PCIE replay accumulated count
|
||||||
|
metrics_public_init.pcie_replay_count_acc = m_gpu_metrics_tbl.m_pcie_replay_count_acc;
|
||||||
|
|
||||||
|
// PCIE replay rollover accumulated count
|
||||||
|
metrics_public_init.pcie_replay_rover_count_acc = m_gpu_metrics_tbl.m_pcie_replay_rover_count_acc;
|
||||||
|
|
||||||
|
// PCIE NAK sent accumulated count
|
||||||
|
metrics_public_init.pcie_nak_sent_count_acc = m_gpu_metrics_tbl.m_pcie_nak_sent_count_acc;
|
||||||
|
|
||||||
|
// PCIE NAK received accumulated count
|
||||||
|
metrics_public_init.pcie_nak_rcvd_count_acc = m_gpu_metrics_tbl.m_pcie_nak_rcvd_count_acc;
|
||||||
|
|
||||||
|
// XGMI accumulated data transfer size
|
||||||
|
// xgmi_read_data
|
||||||
|
const auto xgmi_read_data_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_xgmi_read_data_acc) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_xgmi_read_data_acc));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_xgmi_read_data_acc),
|
||||||
|
xgmi_read_data_num_elems,
|
||||||
|
metrics_public_init.xgmi_read_data_acc);
|
||||||
|
// xgmi_write_data
|
||||||
|
const auto xgmi_write_data_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_xgmi_write_data_acc) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_xgmi_write_data_acc));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_xgmi_write_data_acc),
|
||||||
|
xgmi_write_data_num_elems,
|
||||||
|
metrics_public_init.xgmi_write_data_acc);
|
||||||
|
|
||||||
|
// PMFW attached timestamp (10ns resolution)
|
||||||
|
metrics_public_init.firmware_timestamp = m_gpu_metrics_tbl.m_firmware_timestamp;
|
||||||
|
|
||||||
|
// Current clocks
|
||||||
|
// current_gfxclk
|
||||||
|
const auto curr_gfxclk_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_current_gfxclk) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_current_gfxclk));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_current_gfxclk),
|
||||||
|
curr_gfxclk_num_elems,
|
||||||
|
metrics_public_init.current_gfxclks);
|
||||||
|
|
||||||
|
// current_socclk
|
||||||
|
const auto curr_socclk_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_current_socclk) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_current_socclk));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_current_socclk),
|
||||||
|
curr_socclk_num_elems,
|
||||||
|
metrics_public_init.current_socclks);
|
||||||
|
|
||||||
|
// current_vclk0
|
||||||
|
const auto curr_vclk0_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_current_vclk0) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_current_vclk0));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_current_vclk0),
|
||||||
|
curr_vclk0_num_elems,
|
||||||
|
metrics_public_init.current_vclk0s);
|
||||||
|
|
||||||
|
// current_dclk0
|
||||||
|
const auto curr_dclk0_num_elems =
|
||||||
|
static_cast<uint16_t>(
|
||||||
|
std::end(m_gpu_metrics_tbl.m_current_dclk0) -
|
||||||
|
std::begin(m_gpu_metrics_tbl.m_current_dclk0));
|
||||||
|
std::copy_n(std::begin(m_gpu_metrics_tbl.m_current_dclk0),
|
||||||
|
curr_dclk0_num_elems,
|
||||||
|
metrics_public_init.current_dclk0s);
|
||||||
|
|
||||||
|
metrics_public_init.current_uclk = m_gpu_metrics_tbl.m_current_uclk;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Note: Backwards compatibility -> Handling extra/exception cases
|
||||||
|
// related to earlier versions (1.3)
|
||||||
|
metrics_public_init.current_gfxclk = metrics_public_init.current_gfxclks[0];
|
||||||
|
// metrics_public_init.average_gfxclk_frequency = metrics_public_init.current_gfxclks[0];
|
||||||
|
|
||||||
|
metrics_public_init.current_socclk = metrics_public_init.current_socclks[0];
|
||||||
|
// metrics_public_init.average_socclk_frequency = metrics_public_init.current_socclks[0];
|
||||||
|
|
||||||
|
metrics_public_init.current_vclk0 = metrics_public_init.current_vclk0s[0];
|
||||||
|
// metrics_public_init.average_vclk0_frequency = metrics_public_init.current_vclk0s[0];
|
||||||
|
|
||||||
|
metrics_public_init.current_vclk1 = metrics_public_init.current_vclk0s[1];
|
||||||
|
// metrics_public_init.average_vclk1_frequency = metrics_public_init.current_vclk0s[1];
|
||||||
|
|
||||||
|
metrics_public_init.current_dclk0 = metrics_public_init.current_dclk0s[0];
|
||||||
|
// metrics_public_init.average_dclk0_frequency = metrics_public_init.current_dclk0s[0];
|
||||||
|
|
||||||
|
metrics_public_init.current_dclk1 = metrics_public_init.current_dclk0s[1];
|
||||||
|
// metrics_public_init.average_dclk1_frequency = metrics_public_init.current_dclk0s[1];
|
||||||
|
|
||||||
|
return metrics_public_init;
|
||||||
|
}();
|
||||||
|
|
||||||
|
ostrstream << __PRETTY_FUNCTION__
|
||||||
|
<< " | ======= end ======= "
|
||||||
|
<< " | Success "
|
||||||
|
<< " | Returning = " << getRSMIStatusString(status_code)
|
||||||
|
<< " |";
|
||||||
|
LOG_TRACE(ostrstream);
|
||||||
|
|
||||||
|
return std::make_tuple(status_code, copy_data_from_internal_metrics_tbl);
|
||||||
|
}
|
||||||
|
|
||||||
AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v14_t::copy_internal_to_external_metrics()
|
AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v14_t::copy_internal_to_external_metrics()
|
||||||
{
|
{
|
||||||
@@ -2154,11 +2685,9 @@ rsmi_status_t Device::dev_read_gpu_metrics_header_data()
|
|||||||
|
|
||||||
// Check if/when metrics table needs to be refreshed.
|
// Check if/when metrics table needs to be refreshed.
|
||||||
auto now_ts = actual_timestamp_in_secs();
|
auto now_ts = actual_timestamp_in_secs();
|
||||||
if (((!m_gpu_metrics_header.m_structure_size) ||
|
if ((!m_gpu_metrics_header.m_structure_size) ||
|
||||||
(!m_gpu_metrics_header.m_format_revision) ||
|
(!m_gpu_metrics_header.m_format_revision) ||
|
||||||
(!m_gpu_metrics_header.m_content_revision)) ||
|
(!m_gpu_metrics_header.m_content_revision)) {
|
||||||
((now_ts - m_gpu_metrics_updated_timestamp) >=
|
|
||||||
kRSMI_GPU_METRICS_EXPIRATION_SECS)) {
|
|
||||||
auto op_result = readDevInfo(DevInfoTypes::kDevGpuMetrics,
|
auto op_result = readDevInfo(DevInfoTypes::kDevGpuMetrics,
|
||||||
sizeof(AMDGpuMetricsHeader_v1_t),
|
sizeof(AMDGpuMetricsHeader_v1_t),
|
||||||
&m_gpu_metrics_header);
|
&m_gpu_metrics_header);
|
||||||
@@ -2617,7 +3146,7 @@ rsmi_status_t Device::run_internal_gpu_metrics_query(AMDGpuMetricsUnitType_t met
|
|||||||
<< " | Returning = "
|
<< " | Returning = "
|
||||||
<< getRSMIStatusString(status_code)
|
<< getRSMIStatusString(status_code)
|
||||||
<< " |";
|
<< " |";
|
||||||
LOG_ERROR(ostrstream);
|
LOG_TRACE(ostrstream);
|
||||||
return status_code;
|
return status_code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2727,7 +3256,7 @@ rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind, AMDGpuMetricsUnit
|
|||||||
<< " | Returning = "
|
<< " | Returning = "
|
||||||
<< getRSMIStatusString(status_code)
|
<< getRSMIStatusString(status_code)
|
||||||
<< " |";
|
<< " |";
|
||||||
LOG_ERROR(ostrstream);
|
LOG_TRACE(ostrstream);
|
||||||
return status_code;
|
return status_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -52,8 +52,8 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <algorithm>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -391,10 +391,6 @@ RocmSMI::Initialize(uint64_t flags) {
|
|||||||
<< "\n | final update: device->bdfid() holds correct device bdf";
|
<< "\n | final update: device->bdfid() holds correct device bdf";
|
||||||
LOG_TRACE(ss);
|
LOG_TRACE(ss);
|
||||||
}
|
}
|
||||||
if (ret != 0) {
|
|
||||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
|
||||||
"Failed to initialize rocm_smi library (amdgpu node discovery).");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<amd::smi::Device> dev;
|
std::shared_ptr<amd::smi::Device> dev;
|
||||||
// Sort index based on the BDF, collect BDF id firstly.
|
// Sort index based on the BDF, collect BDF id firstly.
|
||||||
@@ -437,6 +433,7 @@ RocmSMI::Initialize(uint64_t flags) {
|
|||||||
for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++)
|
for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++)
|
||||||
io_link_map_[it->first] = it->second;
|
io_link_map_[it->first] = it->second;
|
||||||
|
|
||||||
|
|
||||||
// Remove any drm nodes that don't have a corresponding readable kfd node.
|
// Remove any drm nodes that don't have a corresponding readable kfd node.
|
||||||
// kfd nodes will not be added if their properties file is not readable.
|
// kfd nodes will not be added if their properties file is not readable.
|
||||||
auto dev_iter = devices_.begin();
|
auto dev_iter = devices_.begin();
|
||||||
@@ -480,6 +477,7 @@ RocmSMI::Initialize(uint64_t flags) {
|
|||||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||||
logSystemDetails();
|
logSystemDetails();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Leaving below to help debug temp file issues
|
// Leaving below to help debug temp file issues
|
||||||
// displayAppTmpFilesContent();
|
// displayAppTmpFilesContent();
|
||||||
std::string amdGPUDeviceList = displayAllDevicePaths(devices_);
|
std::string amdGPUDeviceList = displayAllDevicePaths(devices_);
|
||||||
|
|||||||
@@ -42,11 +42,14 @@
|
|||||||
*/
|
*/
|
||||||
#define _GNU_SOURCE 1 // REQUIRED: to utilize some GNU features/functions, see
|
#define _GNU_SOURCE 1 // REQUIRED: to utilize some GNU features/functions, see
|
||||||
// _GNU_SOURCE functions which check
|
// _GNU_SOURCE functions which check
|
||||||
|
#include <assert.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <dlfcn.h>
|
|
||||||
#include <glob.h>
|
#include <glob.h>
|
||||||
#include <sys/utsname.h>
|
#include <sys/utsname.h>
|
||||||
#include <unistd.h>
|
#include <dlfcn.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|||||||
Посилання в новій задачі
Заблокувати користувача