diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 26ff1602c7..487531b6cb 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1122,14 +1122,11 @@ class AMDSMICommands(): if args.usage: try: engine_usage = amdsmi_interface.amdsmi_get_gpu_activity(args.gpu) - engine_usage['gfx_usage'] = engine_usage.pop('gfx_activity') - engine_usage['mem_usage'] = engine_usage.pop('umc_activity') - engine_usage['mm_ip_usage'] = engine_usage.pop('mm_activity') - engine_usage['vcn_activities'] = gpu_metric_output.pop('vcn_activity') - engine_usage['jpeg_activities[AID0]'] = gpu_metric_output.pop('jpeg_activities[AID0]') - engine_usage['jpeg_activities[AID1]'] = gpu_metric_output.pop('jpeg_activities[AID1]') - engine_usage['jpeg_activities[AID2]'] = gpu_metric_output.pop('jpeg_activities[AID2]') - engine_usage['jpeg_activities[AID3]'] = gpu_metric_output.pop('jpeg_activities[AID3]') + engine_usage['gfx_activity'] = engine_usage.pop('gfx_activity') + engine_usage['umc_activity'] = engine_usage.pop('umc_activity') + engine_usage['mm_activity'] = engine_usage.pop('mm_activity') + engine_usage['vcn_activity'] = gpu_metric_output.pop('vcn_activity') + engine_usage['jpeg_activity'] = gpu_metric_output.pop('jpeg_activity') for key, value in engine_usage.items(): if not isinstance(value, list) and value > 100: engine_usage[key] = "N/A" diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index f10852cb9c..de5fbbb7cf 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -2126,8 +2126,8 @@ Output: Dictionary with fields `temperature_vrsoc` | vrsoc temperature value | Celsius (C) `temperature_vrmem` | vrmem temperature value | Celsius (C) `average_gfx_activity` | Average gfx activity | % -`average_umc_activity` | Average umc activity | % -`average_mm_activity` | Average mm activity | % +`average_umc_activity` | Average umc (Universal Memory Controller) activity | % +`average_mm_activity` | Average mm (multimedia) engine activity | % `average_socket_power` | Average socket power | W `energy_accumulator` | Energy accumulated with a 15.3 uJ resolution over 1ns | uJ `system_clock_counter` | System clock counter | ns @@ -2178,7 +2178,7 @@ Output: Dictionary with fields `mem_max_bandwidth` | Maximum memory bandwidth usage accumulated | GB/s `pcie_nak_sent_count_acc` | PCIe NAC sent count accumulated | `pcie_nak_rcvd_count_acc` | PCIe NAC received count accumulated | -`jpeg_activitys[AID]` | List of JPEG engine activity for each AID (X=0-3) | % +`jpeg_activity` | List of JPEG engine activity | % Exceptions that can be thrown by `amdsmi_get_gpu_metrics_info` function: diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index 1709ab795a..01743641ed 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -3242,10 +3242,7 @@ def amdsmi_get_gpu_metrics_info( "mem_max_bandwidth": gpu_metrics.mem_max_bandwidth, "pcie_nak_sent_count_acc": gpu_metrics.pcie_nak_sent_count_acc, "pcie_nak_rcvd_count_acc": gpu_metrics.pcie_nak_rcvd_count_acc, - "jpeg_activities[AID0]": list(gpu_metrics.jpeg_activities)[:8], - "jpeg_activities[AID1]": list(gpu_metrics.jpeg_activities)[8:16], - "jpeg_activities[AID2]": list(gpu_metrics.jpeg_activities)[16:24], - "jpeg_activities[AID3]": list(gpu_metrics.jpeg_activities)[24:32], + "jpeg_activity": list(gpu_metrics.jpeg_activity), } diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index c0047ff1be..d7836d1316 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -906,7 +906,6 @@ amdsmi_clk_info_t = struct_amdsmi_clk_info_t class struct_amdsmi_engine_usage_t(Structure): pass - struct_amdsmi_engine_usage_t._pack_ = 1 # source:False struct_amdsmi_engine_usage_t._fields_ = [ ('gfx_activity', ctypes.c_uint32), @@ -920,6 +919,16 @@ amdsmi_process_handle_t = ctypes.c_uint32 class struct_amdsmi_proc_info_t(Structure): pass +class struct_engine_usage_(Structure): + pass + +struct_engine_usage_._pack_ = 1 # source:False +struct_engine_usage_._fields_ = [ + ('gfx', ctypes.c_uint64), + ('enc', ctypes.c_uint64), + ('reserved', ctypes.c_uint32 * 12), +] + class struct_memory_usage_(Structure): pass @@ -931,16 +940,6 @@ struct_memory_usage_._fields_ = [ ('reserved', ctypes.c_uint32 * 10), ] -class struct_engine_usage_(Structure): - pass - -struct_engine_usage_._pack_ = 1 # source:False -struct_engine_usage_._fields_ = [ - ('gfx', ctypes.c_uint64), - ('enc', ctypes.c_uint64), - ('reserved', ctypes.c_uint32 * 12), -] - struct_amdsmi_proc_info_t._pack_ = 1 # source:False struct_amdsmi_proc_info_t._fields_ = [ ('name', ctypes.c_char * 32), @@ -1527,11 +1526,12 @@ struct_amdsmi_gpu_metrics_t._fields_ = [ ('current_socclks', ctypes.c_uint16 * 4), ('current_vclk0s', ctypes.c_uint16 * 4), ('current_dclk0s', ctypes.c_uint16 * 4), + ('jpeg_activity', ctypes.c_uint16 * 32), ('mem_bandwidth_acc', ctypes.c_uint64), ('mem_max_bandwidth', ctypes.c_uint32), ('pcie_nak_sent_count_acc', ctypes.c_uint32), ('pcie_nak_rcvd_count_acc', ctypes.c_uint32), - ('jpeg_activities', ctypes.c_uint16 * 32), + ('PADDING_4', ctypes.c_ubyte * 4), ] amdsmi_gpu_metrics_t = struct_amdsmi_gpu_metrics_t diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc index 2887d3021f..b92b8d542b 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc @@ -1413,7 +1413,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m metrics_public_init.average_mm_activity = m_gpu_metrics_tbl.m_average_mm_activity; // Power/Energy - // metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value + metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value metrics_public_init.energy_accumulator = m_gpu_metrics_tbl.m_energy_accumulator; // Driver attached timestamp (in ns) diff --git a/projects/amdsmi/update_wrapper.sh b/projects/amdsmi/update_wrapper.sh index d6557d92c0..918158dec4 100755 --- a/projects/amdsmi/update_wrapper.sh +++ b/projects/amdsmi/update_wrapper.sh @@ -27,7 +27,7 @@ if ! does_image_exist; then # docker pull dmitriigalantsev/amdsmi_wrapper_updater echo "No docker image found! Generating one" # set to 0 because it's compatible with more systems - DOCKER_BUILDKIT="${DOCKER_BUILDKIT:0}" docker build "$DIR/py-interface" -t "$DOCKER_NAME":latest + DOCKER_BUILDKIT="${DOCKER_BUILDKIT:=0}" docker build "$DIR/py-interface" -t "$DOCKER_NAME":latest fi ENABLE_ESMI_LIB=""