Added new ecc blocks and adjusted metric --ecc-block filtering
Signed-off-by: Maisam Arif <maisarif@amd.com>
Change-Id: Ib2f69c7d59ee5108024794434fb202b5e4f58738
[ROCm/amdsmi commit: 1bd18c1a65]
Bu işleme şunda yer alıyor:
işlemeyi yapan:
Maisam Arif
ebeveyn
9b4f0f1d2b
işleme
cebb07e795
@@ -12,10 +12,47 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/](
|
||||
|
||||
### Changed
|
||||
|
||||
- **Removed `amdsmi_get_gpu_process_info` from python library**
|
||||
- **Updated `amd-smi metric --ecc-blocks` output**
|
||||
The ecc blocks arguement was outputing blocks without counters available, updated the filtering show blocks that counters are available for:
|
||||
|
||||
``` shell
|
||||
$ amd-smi metric --ecc-block
|
||||
GPU: 0
|
||||
ECC_BLOCKS:
|
||||
UMC:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
SDMA:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
GFX:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
MMHUB:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
PCIE_BIF:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
HDP:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
XGMI_WAFL:
|
||||
CORRECTABLE_COUNT: 0
|
||||
UNCORRECTABLE_COUNT: 0
|
||||
DEFERRED_COUNT: 0
|
||||
```
|
||||
|
||||
- **Removed `amdsmi_get_gpu_process_info` from python library**
|
||||
amdsmi_get_gpu_process_info was removed from the C library in an earlier build, but the API was still in the python interface
|
||||
|
||||
- **Updated metrics --clocks**
|
||||
- **Updated metrics --clocks**
|
||||
Output for `amd-smi metric --clock` is updated to reflect each engine and bug fixes for the clock lock status and deep sleep status.
|
||||
|
||||
``` shell
|
||||
|
||||
@@ -1692,14 +1692,15 @@ class AMDSMICommands():
|
||||
if "ecc_blocks" in current_platform_args:
|
||||
if args.ecc_blocks:
|
||||
ecc_dict = {}
|
||||
uncountable_blocks = ["ATHUB", "DF", "SMN", "SEM", "FUSE"]
|
||||
sysfs_blocks = ["UMC", "SDMA", "GFX", "MMHUB", "PCIE_BIF", "HDP", "XGMI_WAFL"]
|
||||
try:
|
||||
ras_states = amdsmi_interface.amdsmi_get_gpu_ras_block_features_enabled(args.gpu)
|
||||
for state in ras_states:
|
||||
# Only add enabled blocks that are also in sysfs
|
||||
if state['status'] == amdsmi_interface.AmdSmiRasErrState.ENABLED.name:
|
||||
gpu_block = amdsmi_interface.AmdSmiGpuBlock[state['block']]
|
||||
# if the blocks are uncountable do not add them at all.
|
||||
if gpu_block.name not in uncountable_blocks:
|
||||
if gpu_block.name in sysfs_blocks:
|
||||
try:
|
||||
ecc_count = amdsmi_interface.amdsmi_get_gpu_ecc_count(args.gpu, gpu_block)
|
||||
ecc_dict[state['block']] = {'correctable_count' : ecc_count['correctable_count'],
|
||||
|
||||
@@ -964,10 +964,10 @@ typedef enum {
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_GPU_BLOCK_INVALID = 0x0000000000000000, //!< Used to indicate an
|
||||
//!< invalid block
|
||||
//!< invalid block
|
||||
AMDSMI_GPU_BLOCK_FIRST = 0x0000000000000001,
|
||||
|
||||
AMDSMI_GPU_BLOCK_UMC = AMDSMI_GPU_BLOCK_FIRST, //!< UMC block
|
||||
AMDSMI_GPU_BLOCK_UMC = AMDSMI_GPU_BLOCK_FIRST, //!< UMC block
|
||||
AMDSMI_GPU_BLOCK_SDMA = 0x0000000000000002, //!< SDMA block
|
||||
AMDSMI_GPU_BLOCK_GFX = 0x0000000000000004, //!< GFX block
|
||||
AMDSMI_GPU_BLOCK_MMHUB = 0x0000000000000008, //!< MMHUB block
|
||||
@@ -981,9 +981,14 @@ typedef enum {
|
||||
AMDSMI_GPU_BLOCK_MP0 = 0x0000000000000800, //!< MP0 block
|
||||
AMDSMI_GPU_BLOCK_MP1 = 0x0000000000001000, //!< MP1 block
|
||||
AMDSMI_GPU_BLOCK_FUSE = 0x0000000000002000, //!< Fuse block
|
||||
AMDSMI_GPU_BLOCK_MCA = 0x0000000000004000, //!< MCA block
|
||||
AMDSMI_GPU_BLOCK_VCN = 0x0000000000008000, //!< VCN block
|
||||
AMDSMI_GPU_BLOCK_JPEG = 0x0000000000010000, //!< JPEG block
|
||||
AMDSMI_GPU_BLOCK_IH = 0x0000000000020000, //!< IH block
|
||||
AMDSMI_GPU_BLOCK_MPIO = 0x0000000000040000, //!< MPIO block
|
||||
|
||||
AMDSMI_GPU_BLOCK_LAST = AMDSMI_GPU_BLOCK_FUSE, //!< The highest bit position
|
||||
//!< for supported blocks
|
||||
AMDSMI_GPU_BLOCK_LAST = AMDSMI_GPU_BLOCK_MPIO, //!< The highest bit position
|
||||
//!< for supported blocks
|
||||
AMDSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
|
||||
} amdsmi_gpu_block_t;
|
||||
|
||||
|
||||
@@ -300,6 +300,11 @@ class AmdSmiGpuBlock(IntEnum):
|
||||
MP0 = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MP0
|
||||
MP1 = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MP1
|
||||
FUSE = amdsmi_wrapper.AMDSMI_GPU_BLOCK_FUSE
|
||||
MCA = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MCA
|
||||
VCN = amdsmi_wrapper.AMDSMI_GPU_BLOCK_VCN
|
||||
JPEG = amdsmi_wrapper.AMDSMI_GPU_BLOCK_JPEG
|
||||
IH = amdsmi_wrapper.AMDSMI_GPU_BLOCK_IH
|
||||
MPIO = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MPIO
|
||||
RESERVED = amdsmi_wrapper.AMDSMI_GPU_BLOCK_RESERVED
|
||||
|
||||
|
||||
@@ -1906,7 +1911,7 @@ def amdsmi_get_gpu_ras_block_features_enabled(
|
||||
if gpu_block.name == "RESERVED" or gpu_block.name == "INVALID":
|
||||
continue
|
||||
if gpu_block.name == "LAST":
|
||||
gpu_block.name = "FUSE"
|
||||
gpu_block.name = "MPIO"
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_gpu_ras_block_features_enabled(
|
||||
processor_handle,
|
||||
@@ -1959,6 +1964,7 @@ def amdsmi_get_gpu_process_list(
|
||||
"vram_mem": process_list[index].memory_usage.vram_mem,
|
||||
},
|
||||
})
|
||||
print(result)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -748,6 +748,19 @@ amdsmi_card_form_factor_t = ctypes.c_uint32 # enum
|
||||
class struct_amdsmi_pcie_info_t(Structure):
|
||||
pass
|
||||
|
||||
class struct_pcie_static_(Structure):
|
||||
pass
|
||||
|
||||
struct_pcie_static_._pack_ = 1 # source:False
|
||||
struct_pcie_static_._fields_ = [
|
||||
('max_pcie_width', ctypes.c_uint16),
|
||||
('PADDING_0', ctypes.c_ubyte * 2),
|
||||
('max_pcie_speed', ctypes.c_uint32),
|
||||
('pcie_interface_version', ctypes.c_uint32),
|
||||
('slot_type', amdsmi_card_form_factor_t),
|
||||
('reserved', ctypes.c_uint64 * 10),
|
||||
]
|
||||
|
||||
class struct_pcie_metric_(Structure):
|
||||
pass
|
||||
|
||||
@@ -766,19 +779,6 @@ struct_pcie_metric_._fields_ = [
|
||||
('reserved', ctypes.c_uint64 * 13),
|
||||
]
|
||||
|
||||
class struct_pcie_static_(Structure):
|
||||
pass
|
||||
|
||||
struct_pcie_static_._pack_ = 1 # source:False
|
||||
struct_pcie_static_._fields_ = [
|
||||
('max_pcie_width', ctypes.c_uint16),
|
||||
('PADDING_0', ctypes.c_ubyte * 2),
|
||||
('max_pcie_speed', ctypes.c_uint32),
|
||||
('pcie_interface_version', ctypes.c_uint32),
|
||||
('slot_type', amdsmi_card_form_factor_t),
|
||||
('reserved', ctypes.c_uint64 * 10),
|
||||
]
|
||||
|
||||
struct_amdsmi_pcie_info_t._pack_ = 1 # source:False
|
||||
struct_amdsmi_pcie_info_t._fields_ = [
|
||||
('pcie_static', struct_pcie_static_),
|
||||
@@ -1300,7 +1300,12 @@ amdsmi_gpu_block_t__enumvalues = {
|
||||
2048: 'AMDSMI_GPU_BLOCK_MP0',
|
||||
4096: 'AMDSMI_GPU_BLOCK_MP1',
|
||||
8192: 'AMDSMI_GPU_BLOCK_FUSE',
|
||||
8192: 'AMDSMI_GPU_BLOCK_LAST',
|
||||
16384: 'AMDSMI_GPU_BLOCK_MCA',
|
||||
32768: 'AMDSMI_GPU_BLOCK_VCN',
|
||||
65536: 'AMDSMI_GPU_BLOCK_JPEG',
|
||||
131072: 'AMDSMI_GPU_BLOCK_IH',
|
||||
262144: 'AMDSMI_GPU_BLOCK_MPIO',
|
||||
262144: 'AMDSMI_GPU_BLOCK_LAST',
|
||||
9223372036854775808: 'AMDSMI_GPU_BLOCK_RESERVED',
|
||||
}
|
||||
AMDSMI_GPU_BLOCK_INVALID = 0
|
||||
@@ -1319,7 +1324,12 @@ AMDSMI_GPU_BLOCK_SEM = 1024
|
||||
AMDSMI_GPU_BLOCK_MP0 = 2048
|
||||
AMDSMI_GPU_BLOCK_MP1 = 4096
|
||||
AMDSMI_GPU_BLOCK_FUSE = 8192
|
||||
AMDSMI_GPU_BLOCK_LAST = 8192
|
||||
AMDSMI_GPU_BLOCK_MCA = 16384
|
||||
AMDSMI_GPU_BLOCK_VCN = 32768
|
||||
AMDSMI_GPU_BLOCK_JPEG = 65536
|
||||
AMDSMI_GPU_BLOCK_IH = 131072
|
||||
AMDSMI_GPU_BLOCK_MPIO = 262144
|
||||
AMDSMI_GPU_BLOCK_LAST = 262144
|
||||
AMDSMI_GPU_BLOCK_RESERVED = 9223372036854775808
|
||||
amdsmi_gpu_block_t = ctypes.c_uint64 # enum
|
||||
|
||||
@@ -2380,17 +2390,19 @@ __all__ = \
|
||||
'AMDSMI_GPU_BLOCK_ATHUB', 'AMDSMI_GPU_BLOCK_DF',
|
||||
'AMDSMI_GPU_BLOCK_FIRST', 'AMDSMI_GPU_BLOCK_FUSE',
|
||||
'AMDSMI_GPU_BLOCK_GFX', 'AMDSMI_GPU_BLOCK_HDP',
|
||||
'AMDSMI_GPU_BLOCK_INVALID', 'AMDSMI_GPU_BLOCK_LAST',
|
||||
'AMDSMI_GPU_BLOCK_MMHUB', 'AMDSMI_GPU_BLOCK_MP0',
|
||||
'AMDSMI_GPU_BLOCK_MP1', 'AMDSMI_GPU_BLOCK_PCIE_BIF',
|
||||
'AMDSMI_GPU_BLOCK_IH', 'AMDSMI_GPU_BLOCK_INVALID',
|
||||
'AMDSMI_GPU_BLOCK_JPEG', 'AMDSMI_GPU_BLOCK_LAST',
|
||||
'AMDSMI_GPU_BLOCK_MCA', 'AMDSMI_GPU_BLOCK_MMHUB',
|
||||
'AMDSMI_GPU_BLOCK_MP0', 'AMDSMI_GPU_BLOCK_MP1',
|
||||
'AMDSMI_GPU_BLOCK_MPIO', 'AMDSMI_GPU_BLOCK_PCIE_BIF',
|
||||
'AMDSMI_GPU_BLOCK_RESERVED', 'AMDSMI_GPU_BLOCK_SDMA',
|
||||
'AMDSMI_GPU_BLOCK_SEM', 'AMDSMI_GPU_BLOCK_SMN',
|
||||
'AMDSMI_GPU_BLOCK_UMC', 'AMDSMI_GPU_BLOCK_XGMI_WAFL',
|
||||
'AMDSMI_HSMP_TIMEOUT', 'AMDSMI_INIT_ALL_PROCESSORS',
|
||||
'AMDSMI_INIT_AMD_APUS', 'AMDSMI_INIT_AMD_CPUS',
|
||||
'AMDSMI_INIT_AMD_GPUS', 'AMDSMI_INIT_NON_AMD_CPUS',
|
||||
'AMDSMI_INIT_NON_AMD_GPUS', 'AMDSMI_INVALID_POWER',
|
||||
'AMDSMI_IOLINK_TYPE_NUMIOLINKTYPES',
|
||||
'AMDSMI_GPU_BLOCK_UMC', 'AMDSMI_GPU_BLOCK_VCN',
|
||||
'AMDSMI_GPU_BLOCK_XGMI_WAFL', 'AMDSMI_HSMP_TIMEOUT',
|
||||
'AMDSMI_INIT_ALL_PROCESSORS', 'AMDSMI_INIT_AMD_APUS',
|
||||
'AMDSMI_INIT_AMD_CPUS', 'AMDSMI_INIT_AMD_GPUS',
|
||||
'AMDSMI_INIT_NON_AMD_CPUS', 'AMDSMI_INIT_NON_AMD_GPUS',
|
||||
'AMDSMI_INVALID_POWER', 'AMDSMI_IOLINK_TYPE_NUMIOLINKTYPES',
|
||||
'AMDSMI_IOLINK_TYPE_PCIEXPRESS', 'AMDSMI_IOLINK_TYPE_SIZE',
|
||||
'AMDSMI_IOLINK_TYPE_UNDEFINED', 'AMDSMI_IOLINK_TYPE_XGMI',
|
||||
'AMDSMI_LINK_TYPE_NOT_APPLICABLE', 'AMDSMI_LINK_TYPE_PCIE',
|
||||
|
||||
@@ -608,8 +608,13 @@ typedef enum {
|
||||
RSMI_GPU_BLOCK_MP0 = 0x0000000000000800, //!< MP0 block
|
||||
RSMI_GPU_BLOCK_MP1 = 0x0000000000001000, //!< MP1 block
|
||||
RSMI_GPU_BLOCK_FUSE = 0x0000000000002000, //!< Fuse block
|
||||
RSMI_GPU_BLOCK_MCA = 0x0000000000004000, //!< MCA block
|
||||
RSMI_GPU_BLOCK_VCN = 0x0000000000008000, //!< VCN block
|
||||
RSMI_GPU_BLOCK_JPEG = 0x0000000000010000, //!< JPEG block
|
||||
RSMI_GPU_BLOCK_IH = 0x0000000000020000, //!< IH block
|
||||
RSMI_GPU_BLOCK_MPIO = 0x0000000000040000, //!< MPIO block
|
||||
|
||||
RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_FUSE, //!< The highest bit position
|
||||
RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_MPIO, //!< The highest bit position
|
||||
//!< for supported blocks
|
||||
RSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
|
||||
} rsmi_gpu_block_t;
|
||||
|
||||
@@ -331,7 +331,13 @@ class rsmi_gpu_block_t(c_int):
|
||||
RSMI_GPU_BLOCK_MP0 = 0x0000000000000800
|
||||
RSMI_GPU_BLOCK_MP1 = 0x0000000000001000
|
||||
RSMI_GPU_BLOCK_FUSE = 0x0000000000002000
|
||||
RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_FUSE
|
||||
RSMI_GPU_BLOCK_MCA = 0x0000000000004000
|
||||
RSMI_GPU_BLOCK_VCN = 0x0000000000008000
|
||||
RSMI_GPU_BLOCK_JPEG = 0x0000000000010000
|
||||
RSMI_GPU_BLOCK_IH = 0x0000000000020000
|
||||
RSMI_GPU_BLOCK_MPIO = 0x0000000000040000
|
||||
|
||||
RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_MPIO
|
||||
RSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
|
||||
|
||||
|
||||
@@ -340,20 +346,25 @@ rsmi_gpu_block = rsmi_gpu_block_t
|
||||
|
||||
# The following dictionary correlates with rsmi_gpu_block_t enum
|
||||
rsmi_gpu_block_d = {
|
||||
'UMC' : 0x0000000000000001,
|
||||
'SDMA' : 0x0000000000000002,
|
||||
'GFX' : 0x0000000000000004,
|
||||
'MMHUB': 0x0000000000000008,
|
||||
'ATHUB': 0x0000000000000010,
|
||||
'PCIE_BIF': 0x0000000000000020,
|
||||
'HDP': 0x0000000000000040,
|
||||
'XGMI_WAFL': 0x0000000000000080,
|
||||
'DF': 0x0000000000000100,
|
||||
'SMN': 0x0000000000000200,
|
||||
'SEM': 0x0000000000000400,
|
||||
'MP0': 0x0000000000000800,
|
||||
'MP1': 0x0000000000001000,
|
||||
'FUSE': 0x0000000000002000
|
||||
'UMC' : 0x0000000000000001,
|
||||
'SDMA' : 0x0000000000000002,
|
||||
'GFX' : 0x0000000000000004,
|
||||
'MMHUB' : 0x0000000000000008,
|
||||
'ATHUB' : 0x0000000000000010,
|
||||
'PCIE_BIF' : 0x0000000000000020,
|
||||
'HDP' : 0x0000000000000040,
|
||||
'XGMI_WAFL' : 0x0000000000000080,
|
||||
'DF' : 0x0000000000000100,
|
||||
'SMN' : 0x0000000000000200,
|
||||
'SEM' : 0x0000000000000400,
|
||||
'MP0' : 0x0000000000000800,
|
||||
'MP1' : 0x0000000000001000,
|
||||
'FUSE' : 0x0000000000002000,
|
||||
'MCA' : 0x0000000000004000,
|
||||
'VCN' : 0x0000000000008000,
|
||||
'JPEG' : 0x0000000000010000,
|
||||
'IH' : 0x0000000000020000,
|
||||
'MPIO' : 0x0000000000040000,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -91,8 +91,13 @@ static const std::map<amdsmi_gpu_block_t, const char *> kBlockNameMap = {
|
||||
{AMDSMI_GPU_BLOCK_MP0, "MP0"},
|
||||
{AMDSMI_GPU_BLOCK_MP1, "MP1"},
|
||||
{AMDSMI_GPU_BLOCK_FUSE, "FUSE"},
|
||||
{AMDSMI_GPU_BLOCK_MCA, "MCA"},
|
||||
{AMDSMI_GPU_BLOCK_VCN, "VCN"},
|
||||
{AMDSMI_GPU_BLOCK_JPEG, "JPEG"},
|
||||
{AMDSMI_GPU_BLOCK_IH, "IH"},
|
||||
{AMDSMI_GPU_BLOCK_MPIO, "MPIO"},
|
||||
};
|
||||
static_assert(AMDSMI_GPU_BLOCK_LAST == AMDSMI_GPU_BLOCK_FUSE,
|
||||
static_assert(AMDSMI_GPU_BLOCK_LAST == AMDSMI_GPU_BLOCK_MPIO,
|
||||
"kBlockNameMap needs to be updated");
|
||||
|
||||
static const char * kRasErrStateStrings[] = {
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle