Updated monitor --pcie to use gpu_metrics pcie bandwidth
Signed-off-by: Maisam Arif <maisarif@amd.com> Change-Id: Id37aebc0297317edcd0f459a4817f56a6030d902
This commit is contained in:
@@ -12,6 +12,15 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/](
|
||||
|
||||
### Changed
|
||||
|
||||
- **Updated `amd-smi monitor --pcie` output**
|
||||
The source for pcie bandwidth monitor output was a legacy file we no longer support and was causing delays within the monitor command. The output is no longer using TX/RX but instantaneous bandwidth from gpu_metrics instead; updated output:
|
||||
|
||||
```shell
|
||||
$ amd-smi monitor --pcie
|
||||
GPU PCIE_BW
|
||||
0 26 Mb/s
|
||||
```
|
||||
|
||||
- **Updated `amd-smi metric --ecc-blocks` output**
|
||||
The ecc blocks arguement was outputing blocks without counters available, updated the filtering show blocks that counters are available for:
|
||||
|
||||
|
||||
@@ -4285,38 +4285,14 @@ class AMDSMICommands():
|
||||
self.logger.table_header += 'VRAM_TOTAL'.rjust(12)
|
||||
if args.pcie:
|
||||
try:
|
||||
pcie_bw = amdsmi_interface.amdsmi_get_gpu_pci_throughput(args.gpu)
|
||||
sent = pcie_bw['sent'] * pcie_bw['max_pkt_sz']
|
||||
received = pcie_bw['received'] * pcie_bw['max_pkt_sz']
|
||||
|
||||
bw_unit = "Mb/s"
|
||||
packet_size_unit = "B"
|
||||
if sent > 0:
|
||||
sent = sent // 1024 // 1024
|
||||
if received > 0:
|
||||
received = received // 1024 // 1024
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
sent = f"{sent} {bw_unit}"
|
||||
received = f"{received} {bw_unit}"
|
||||
pcie_bw['max_pkt_sz'] = f"{pcie_bw['max_pkt_sz']} {packet_size_unit}"
|
||||
if self.logger.is_json_format():
|
||||
sent = {"value" : sent,
|
||||
"unit" : bw_unit}
|
||||
received = {"value" : received,
|
||||
"unit" : bw_unit}
|
||||
pcie_bw['max_pkt_sz'] = {"value" : pcie_bw['max_pkt_sz'],
|
||||
"unit" : packet_size_unit}
|
||||
|
||||
monitor_values['pcie_tx'] = sent
|
||||
monitor_values['pcie_rx'] = received
|
||||
pcie_info = amdsmi_interface.amdsmi_get_pcie_info(args.gpu)['pcie_metric']
|
||||
pcie_bw_unit = 'Mb/s'
|
||||
monitor_values['pcie_bw'] = self.helpers.unit_format(self.logger, pcie_info['pcie_bandwidth'], pcie_bw_unit)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
monitor_values['pcie_tx'] = "N/A"
|
||||
monitor_values['pcie_rx'] = "N/A"
|
||||
logging.debug("Failed to get pci throughput on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
monitor_values['pcie_bw'] = "N/A"
|
||||
logging.debug("Failed to get pci bandwidth on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'PCIE_TX'.rjust(10)
|
||||
self.logger.table_header += 'PCIE_RX'.rjust(10)
|
||||
self.logger.table_header += 'PCIE_BW'.rjust(10)
|
||||
|
||||
self.logger.store_output(args.gpu, 'values', monitor_values)
|
||||
|
||||
|
||||
@@ -1110,7 +1110,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
throttle_help = "Monitor thermal throttle status"
|
||||
ecc_help = "Monitor ECC single bit, ECC double bit, and PCIe replay error counts"
|
||||
mem_usage_help = "Monitor memory usage in MB"
|
||||
pcie_throughput_help = "Monitor PCIe Tx/Rx in MB/s"
|
||||
pcie_bandwidth_help = "Monitor PCIe bandwidth in Mb/s"
|
||||
|
||||
# Create monitor subparser
|
||||
monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help)
|
||||
@@ -1133,7 +1133,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
monitor_parser.add_argument('-s', '--throttle-status', action='store_true', required=False, help=throttle_help)
|
||||
monitor_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
|
||||
monitor_parser.add_argument('-v', '--vram-usage', action='store_true', required=False, help=mem_usage_help)
|
||||
monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_throughput_help)
|
||||
monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_bandwidth_help)
|
||||
|
||||
|
||||
def _add_rocm_smi_parser(self, subparsers, func):
|
||||
|
||||
Reference in New Issue
Block a user