From 55b88706e1de6f21445170e2971aac534953e8b2 Mon Sep 17 00:00:00 2001 From: "Oliveira, Daniel" Date: Mon, 5 Aug 2024 20:49:53 -0500 Subject: [PATCH] SWDEV-463401: amdsmi_get_gpu_asic_info() adds num_of_compute_units number of compute units `amdgpu_gpu_info.num_of_compute_units` is exposed through amdsmi_get_gpu_asic_info(). Code changes related to the following: * API * CLI * Unit tests * Examples Change-Id: Ibeb612d079ed87437a0e56124b8504098fc2dcfd Signed-off-by: Oliveira, Daniel [ROCm/amdsmi commit: b05849dad0f2de1761ff78049aac81b15df7c2c9] --- .../amdsmi/example/amd_smi_drm_example.cc | 3 ++- .../amdsmi/example/amd_smi_nodrm_example.cc | 3 ++- projects/amdsmi/include/amd_smi/amdsmi.h | 3 ++- .../amdsmi/py-interface/amdsmi_interface.py | 7 ++++- .../amdsmi/py-interface/amdsmi_wrapper.py | 3 ++- .../rocm_smi/include/rocm_smi/rocm_smi_kfd.h | 4 +++ .../include/rocm_smi/rocm_smi_utils.h | 4 +-- projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc | 27 +++++++++++++++++++ .../amdsmi/rocm_smi/src/rocm_smi_utils.cc | 20 ++++++++++++++ projects/amdsmi/src/amd_smi/amd_smi.cc | 9 +++++++ projects/amdsmi/src/amd_smi/amd_smi_utils.cc | 1 + 11 files changed, 77 insertions(+), 7 deletions(-) diff --git a/projects/amdsmi/example/amd_smi_drm_example.cc b/projects/amdsmi/example/amd_smi_drm_example.cc index 9f3de13957..e8ef3d80d9 100644 --- a/projects/amdsmi/example/amd_smi_drm_example.cc +++ b/projects/amdsmi/example/amd_smi_drm_example.cc @@ -301,7 +301,8 @@ int main() { printf("\tDeviceID: 0x%lx\n", asic_info.device_id); printf("\tVendorID: 0x%x\n", asic_info.vendor_id); printf("\tRevisionID: 0x%x\n", asic_info.rev_id); - printf("\tAsic serial: 0x%s\n\n", asic_info.asic_serial); + printf("\tAsic serial: 0x%s\n", asic_info.asic_serial); + printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units); // Get VRAM info amdsmi_vram_info_t vram_info = {}; diff --git a/projects/amdsmi/example/amd_smi_nodrm_example.cc b/projects/amdsmi/example/amd_smi_nodrm_example.cc index 37f5b9645b..19e8cf5947 100644 --- a/projects/amdsmi/example/amd_smi_nodrm_example.cc +++ b/projects/amdsmi/example/amd_smi_nodrm_example.cc @@ -151,7 +151,8 @@ int main() { printf("\tVendorID: 0x%x\n", asic_info.vendor_id); printf("\tRevisionID: 0x%x\n", asic_info.rev_id); printf("\tAsic serial: 0x%s\n", asic_info.asic_serial); - printf("\tOAM id: 0x%x\n\n", asic_info.oam_id); + printf("\tOAM id: 0x%x\n", asic_info.oam_id); + printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units); // Get VBIOS info amdsmi_vbios_info_t vbios_info = {}; diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index f49636e675..8be2c18b36 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -588,7 +588,8 @@ typedef struct { uint32_t rev_id; char asic_serial[AMDSMI_NORMAL_STRING_LENGTH]; uint32_t oam_id; //< 0xFFFF if not supported - uint32_t reserved[18]; + uint32_t num_of_compute_units; //< 0xFFFFFFFF if not supported + uint32_t reserved[17]; } amdsmi_asic_info_t; typedef enum { diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index 7ddb94d2ee..664b7d2b3e 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -1643,7 +1643,8 @@ def amdsmi_get_gpu_asic_info( "device_id": asic_info_struct.device_id, "rev_id": _padHexValue(hex(asic_info_struct.rev_id), 2), "asic_serial": asic_info_struct.asic_serial.decode("utf-8"), - "oam_id": asic_info_struct.oam_id + "oam_id": asic_info_struct.oam_id, + "num_compute_units": asic_info_struct.num_of_compute_units } string_values = ["market_name", "vendor_name"] @@ -1670,6 +1671,10 @@ def amdsmi_get_gpu_asic_info( if asic_info["oam_id"] == 0xFFFF: # uint 16 max asic_info["oam_id"] = "N/A" + # Check for max value as a sign for not applicable + if asic_info["num_compute_units"] == 0xFFFFFFFF: # uint 32 max + asic_info["num_compute_units"] = "N/A" + # Remove commas from vendor name for clean output asic_info["vendor_name"] = asic_info["vendor_name"].replace(',', '') diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index 1c7a4377d4..57fc962a0a 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -901,7 +901,8 @@ struct_amdsmi_asic_info_t._fields_ = [ ('rev_id', ctypes.c_uint32), ('asic_serial', ctypes.c_char * 32), ('oam_id', ctypes.c_uint32), - ('reserved', ctypes.c_uint32 * 18), + ('num_of_compute_units', ctypes.c_uint32), + ('reserved', ctypes.c_uint32 * 17), ] amdsmi_asic_info_t = struct_amdsmi_asic_info_t diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_kfd.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_kfd.h index 1173f48109..81a76400ce 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_kfd.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_kfd.h @@ -90,6 +90,10 @@ class KFDNode { // Get gfx target version from kfd int get_gfx_target_version(uint64_t* gfx_target_version); + // Get simd_per_cu from kfd + int32_t get_simd_per_cu(uint64_t* simd_per_cu) const; + int32_t get_simd_count(uint64_t* simd_count) const; + private: uint32_t node_indx_; uint32_t amdgpu_dev_index_; diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h index eb53dfbba3..36261d89e6 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h @@ -126,8 +126,8 @@ std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv); std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions, rsmi_freq_volt_region_t *regions); bool is_sudo_user(); -rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, - std::string *gfx_version); +rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, std::string *gfx_version); +rsmi_status_t rsmi_dev_number_of_computes_get(uint32_t dv_ind, uint32_t* num_computes); std::string leftTrim(const std::string &s); std::string rightTrim(const std::string &s); diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc index 13d2c27be0..b319a1fcd4 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc @@ -1077,5 +1077,32 @@ int KFDNode::get_gfx_target_version(uint64_t *gfx_target_version) { return ret; } +int32_t KFDNode::get_simd_per_cu(uint64_t* simd_per_cu) const +{ + const std::string properties_path("/sys/class/kfd/kfd/topology/nodes/" + + std::to_string(this->node_indx_) + + "/properties"); + + auto tmp_simd_per_cu = uint64_t(0); + auto ret = read_node_properties(this->node_indx_, "simd_per_cu", + &tmp_simd_per_cu); + *simd_per_cu = tmp_simd_per_cu; + return ret; +} + +int32_t KFDNode::get_simd_count(uint64_t* simd_count) const +{ + const std::string properties_path("/sys/class/kfd/kfd/topology/nodes/" + + std::to_string(this->node_indx_) + + "/properties"); + + auto tmp_simd_count = uint64_t(0); + auto ret = read_node_properties(this->node_indx_, "simd_count", + &tmp_simd_count); + *simd_count = tmp_simd_count; + return ret; +} + + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc index 122584f9bd..32f0209654 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc @@ -65,6 +65,7 @@ #include #include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_kfd.h" #include "rocm_smi/rocm_smi_utils.h" #include "rocm_smi/rocm_smi_exception.h" #include "rocm_smi/rocm_smi_main.h" @@ -1198,6 +1199,25 @@ rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, } } +rsmi_status_t rsmi_dev_number_of_computes_get(uint32_t dv_ind, uint32_t* num_computes) +{ + GET_DEV_AND_KFDNODE_FROM_INDX + + auto tmp_simd_per_cu = uint64_t(0); + auto tmp_simd_count = uint64_t(0); + auto ret_simd_per_cu = kfd_node->get_simd_per_cu(&tmp_simd_per_cu); + auto ret_simd_count = kfd_node->get_simd_count(&tmp_simd_count); + + if (((ret_simd_per_cu != 0) || (ret_simd_count != 0)) || + ((tmp_simd_per_cu == 0) || (tmp_simd_count == 0))) { + return rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED; + } + + *num_computes = (tmp_simd_count / tmp_simd_per_cu); + return rsmi_status_t::RSMI_STATUS_SUCCESS; +} + + std::queue getAllDeviceGfxVers() { uint32_t num_monitor_devs = 0; rsmi_status_t ret; diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 9f4d3aa74a..be0adfd876 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -757,6 +757,15 @@ amdsmi_get_gpu_asic_info(amdsmi_processor_handle processor_handle, amdsmi_asic_i &(tmp_oam_id)); info->oam_id = tmp_oam_id; + // default to 0xffffffff as not supported + info->num_of_compute_units = std::numeric_limits::max(); + auto tmp_num_of_compute_units = uint32_t(0); + status = rsmi_wrapper(amd::smi::rsmi_dev_number_of_computes_get, processor_handle, + &tmp_num_of_compute_units); + if (status == amdsmi_status_t::AMDSMI_STATUS_SUCCESS) { + info->num_of_compute_units = tmp_num_of_compute_units; + } + return AMDSMI_STATUS_SUCCESS; } diff --git a/projects/amdsmi/src/amd_smi/amd_smi_utils.cc b/projects/amdsmi/src/amd_smi/amd_smi_utils.cc index fa8a6e1e4e..7c64fb4ba4 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi_utils.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi_utils.cc @@ -615,3 +615,4 @@ amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDe *enabled = false; return AMDSMI_STATUS_SUCCESS; } +