Removed Throttle Status from CLI Tool
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: I8eb8f30f821589003201d6d8bb96592ec5f8a483
[ROCm/amdsmi commit: 37c044696d]
This commit is contained in:
@@ -88,6 +88,9 @@ ASIC products. This requires users to update any ABIs using this structure.
|
||||
|
||||
### Fixes
|
||||
|
||||
- **Removed `throttle-status` from `amd-smi monitor` as it is no longer reliably supported**.
|
||||
Throttle status may work for older ASICs, but will be replaced with PVIOL and TVIOL metrics for future ASIC support. It remains a field in the gpu_metrics API and in `amd-smi metric --power`.
|
||||
|
||||
- **`amdsmi_get_gpu_board_info()` no longer returns junk char strings**.
|
||||
Previously if there was a partial failure to retrieve character strings, we would return
|
||||
garbage output to users using the API. This fix intends to populate as many values as possible.
|
||||
|
||||
@@ -594,7 +594,7 @@ Command Modifiers:
|
||||
usage: amd-smi monitor [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL]
|
||||
[-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]]
|
||||
[-w INTERVAL] [-W TIME] [-i ITERATIONS] [-p] [-t] [-u] [-m] [-n]
|
||||
[-d] [-s] [-e] [-v] [-r]
|
||||
[-d] [-e] [-v] [-r]
|
||||
|
||||
Monitor a target device for the specified arguments.
|
||||
If no arguments are provided, all arguments will be enabled.
|
||||
@@ -626,7 +626,6 @@ Monitor Arguments:
|
||||
-m, --mem Monitor memory utilization (%) and clock (MHz)
|
||||
-n, --encoder Monitor encoder utilization (%) and clock (MHz)
|
||||
-d, --decoder Monitor decoder utilization (%) and clock (MHz)
|
||||
-s, --throttle-status Monitor thermal throttle status
|
||||
-e, --ecc Monitor ECC single bit, ECC double bit, and PCIe replay error counts
|
||||
-v, --vram-usage Monitor memory usage in MB
|
||||
-r, --pcie Monitor PCIe bandwidth in Mb/s
|
||||
|
||||
@@ -1328,8 +1328,8 @@ class AMDSMICommands():
|
||||
'gfx_voltage': "N/A",
|
||||
'soc_voltage': "N/A",
|
||||
'mem_voltage': "N/A",
|
||||
'power_management': "N/A",
|
||||
'throttle_status': "N/A"}
|
||||
'throttle_status': "N/A",
|
||||
'power_management': "N/A"}
|
||||
|
||||
try:
|
||||
voltage_unit = "mV"
|
||||
@@ -3968,7 +3968,7 @@ class AMDSMICommands():
|
||||
def monitor(self, args, multiple_devices=False, watching_output=False, gpu=None,
|
||||
watch=None, watch_time=None, iterations=None, power_usage=None,
|
||||
temperature=None, gfx_util=None, mem_util=None, encoder=None, decoder=None,
|
||||
throttle_status=None, ecc=None, vram_usage=None, pcie=None):
|
||||
ecc=None, vram_usage=None, pcie=None):
|
||||
""" Populate a table with each GPU as an index to rows of targeted data
|
||||
|
||||
Args:
|
||||
@@ -3984,7 +3984,6 @@ class AMDSMICommands():
|
||||
mem (bool, optional): Value override for args.mem. Defaults to None.
|
||||
encoder (bool, optional): Value override for args.encoder. Defaults to None.
|
||||
decoder (bool, optional): Value override for args.decoder. Defaults to None.
|
||||
throttle_status (bool, optional): Value override for args.throttle_status. Defaults to None.
|
||||
ecc (bool, optional): Value override for args.ecc. Defaults to None.
|
||||
vram_usage (bool, optional): Value override for args.vram_usage. Defaults to None.
|
||||
pcie (bool, optional): Value override for args.pcie. Defaults to None.
|
||||
@@ -4019,8 +4018,6 @@ class AMDSMICommands():
|
||||
args.encoder = encoder
|
||||
if decoder:
|
||||
args.decoder = decoder
|
||||
if throttle_status:
|
||||
args.throttle_status = throttle_status
|
||||
if ecc:
|
||||
args.ecc = ecc
|
||||
if vram_usage:
|
||||
@@ -4034,10 +4031,10 @@ class AMDSMICommands():
|
||||
|
||||
# If all arguments are False, the print all values
|
||||
if not any([args.power_usage, args.temperature, args.gfx, args.mem,
|
||||
args.encoder, args.decoder, args.throttle_status, args.ecc,
|
||||
args.encoder, args.decoder, args.ecc,
|
||||
args.vram_usage, args.pcie]):
|
||||
args.power_usage = args.temperature = args.gfx = args.mem = \
|
||||
args.encoder = args.decoder = args.throttle_status = args.ecc = \
|
||||
args.encoder = args.decoder = args.ecc = \
|
||||
args.vram_usage = args.pcie = True
|
||||
|
||||
# Handle watch logic, will only enter this block once
|
||||
@@ -4282,20 +4279,6 @@ class AMDSMICommands():
|
||||
logging.debug("Failed to get decoder clock on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'DEC_CLOCK'.rjust(11)
|
||||
if args.throttle_status:
|
||||
try:
|
||||
throttle_status = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['throttle_status']
|
||||
if throttle_status != "N/A":
|
||||
if throttle_status:
|
||||
throttle_status = "THROTTLED"
|
||||
else:
|
||||
throttle_status = "UNTHROTTLED"
|
||||
monitor_values['throttle_status'] = throttle_status
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
monitor_values['throttle_status'] = "N/A"
|
||||
logging.debug("Failed to get throttle status on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'THROTTLE'.rjust(13)
|
||||
if args.ecc:
|
||||
try:
|
||||
ecc = amdsmi_interface.amdsmi_get_gpu_total_ecc_count(args.gpu)
|
||||
|
||||
@@ -104,8 +104,8 @@ class AMDSMIHelpers():
|
||||
|
||||
if string_format:
|
||||
return f"{operating_system} {operating_system_type}"
|
||||
else:
|
||||
return (operating_system, operating_system_type)
|
||||
|
||||
return (operating_system, operating_system_type)
|
||||
|
||||
|
||||
def is_virtual_os(self):
|
||||
|
||||
@@ -116,7 +116,7 @@ class AMDSMILogger():
|
||||
table_values += value.rjust(11)
|
||||
elif key == 'vram_total' or 'ecc' in key:
|
||||
table_values += value.rjust(12)
|
||||
elif key in ('throttle_status', 'pcie_replay'):
|
||||
elif key in ['pcie_replay']:
|
||||
table_values += value.rjust(13)
|
||||
# Only for handling topology tables
|
||||
elif 'gpu_' in key:
|
||||
|
||||
@@ -1113,7 +1113,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
mem_util_help = "Monitor memory utilization (%%) and clock (MHz)"
|
||||
encoder_util_help = "Monitor encoder utilization (%%) and clock (MHz)"
|
||||
decoder_util_help = "Monitor decoder utilization (%%) and clock (MHz)"
|
||||
throttle_help = "Monitor thermal throttle status"
|
||||
ecc_help = "Monitor ECC single bit, ECC double bit, and PCIe replay error counts"
|
||||
mem_usage_help = "Monitor memory usage in MB"
|
||||
pcie_bandwidth_help = "Monitor PCIe bandwidth in Mb/s"
|
||||
@@ -1136,7 +1135,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
monitor_parser.add_argument('-m', '--mem', action='store_true', required=False, help=mem_util_help)
|
||||
monitor_parser.add_argument('-n', '--encoder', action='store_true', required=False, help=encoder_util_help)
|
||||
monitor_parser.add_argument('-d', '--decoder', action='store_true', required=False, help=decoder_util_help)
|
||||
monitor_parser.add_argument('-s', '--throttle-status', action='store_true', required=False, help=throttle_help)
|
||||
monitor_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
|
||||
monitor_parser.add_argument('-v', '--vram-usage', action='store_true', required=False, help=mem_usage_help)
|
||||
monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_bandwidth_help)
|
||||
|
||||
@@ -2257,7 +2257,7 @@ Output: Dictionary with fields
|
||||
`current_dclk0` | Current dclk0 | MHz
|
||||
`current_vclk1` | Current vclk1 | MHz
|
||||
`current_dclk1` | Current dclk1 | MHz
|
||||
`throttle_status` | Current throttle status | MHz
|
||||
`throttle_status` | Current throttle status | bool
|
||||
`current_fan_speed` | Current fan speed | RPM
|
||||
`pcie_link_width` | PCIe link width (number of lanes) | lanes
|
||||
`pcie_link_speed` | PCIe link speed in 0.1 GT/s (Giga Transfers per second) | GT/s
|
||||
|
||||
Reference in New Issue
Block a user