From 6ca95c1a2dd0cb197909bc6189daadf66e239ce0 Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Fri, 6 Oct 2023 12:39:40 -0500 Subject: [PATCH] Add support to XGMI physical id Get XGMI physical id from sysfs. Change-Id: Ifd9e431bc2fbfd759d888a71b99046a5eb07b6ed --- example/amd_smi_nodrm_example.cc | 4 +++- include/amd_smi/amdsmi.h | 3 ++- py-interface/amdsmi_wrapper.py | 25 +++++++++++---------- rocm_smi/include/rocm_smi/rocm_smi.h | 17 ++++++++++++++ rocm_smi/include/rocm_smi/rocm_smi_device.h | 1 + rocm_smi/src/rocm_smi.cc | 15 +++++++++++++ rocm_smi/src/rocm_smi_device.cc | 5 +++++ rocm_smi/src/rocm_smi_main.cc | 1 + src/amd_smi/amd_smi.cc | 5 +++++ 9 files changed, 62 insertions(+), 14 deletions(-) diff --git a/example/amd_smi_nodrm_example.cc b/example/amd_smi_nodrm_example.cc index eaf1b688dc..ec02ec53d8 100644 --- a/example/amd_smi_nodrm_example.cc +++ b/example/amd_smi_nodrm_example.cc @@ -140,7 +140,9 @@ int main() { printf("\tDeviceID: 0x%lx\n", asic_info.device_id); printf("\tVendorID: 0x%x\n", asic_info.vendor_id); printf("\tRevisionID: 0x%x\n", asic_info.rev_id); - printf("\tAsic serial: 0x%s\n\n", asic_info.asic_serial); + printf("\tAsic serial: 0x%s\n", asic_info.asic_serial); + printf("\tXGMI physical id: 0x%x\n\n", + asic_info.xgmi_physical_id); // Get VBIOS info amdsmi_vbios_info_t vbios_info = {}; diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index edb27bebc5..5b85077f4c 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -444,7 +444,8 @@ typedef struct { uint64_t device_id; //< The device id of a GPU uint32_t rev_id; char asic_serial[AMDSMI_NORMAL_STRING_LENGTH]; - uint32_t reserved[19]; + uint16_t xgmi_physical_id; //< 0xFFFF if not supported + uint16_t reserved[37]; } amdsmi_asic_info_t; typedef struct{ diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 74240ad597..2d2e8bde27 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -23,7 +23,7 @@ import os # -*- coding: utf-8 -*- # -# TARGET arch is: ['-I/usr/lib64/clang/16/include'] +# TARGET arch is: ['-I/usr/lib64/clang/17/include'] # WORD_SIZE is: 8 # POINTER_SIZE is: 8 # LONGDOUBLE_SIZE is: 16 @@ -754,7 +754,8 @@ struct_amdsmi_asic_info_t._fields_ = [ ('device_id', ctypes.c_uint64), ('rev_id', ctypes.c_uint32), ('asic_serial', ctypes.c_char * 32), - ('reserved', ctypes.c_uint32 * 19), + ('xgmi_physical_id', ctypes.c_uint16), + ('reserved', ctypes.c_uint16 * 37), ] amdsmi_asic_info_t = struct_amdsmi_asic_info_t @@ -839,6 +840,16 @@ amdsmi_process_handle_t = ctypes.c_uint32 class struct_amdsmi_proc_info_t(Structure): pass +class struct_engine_usage_(Structure): + pass + +struct_engine_usage_._pack_ = 1 # source:False +struct_engine_usage_._fields_ = [ + ('gfx', ctypes.c_uint64), + ('enc', ctypes.c_uint64), + ('reserved', ctypes.c_uint32 * 12), +] + class struct_memory_usage_(Structure): pass @@ -850,16 +861,6 @@ struct_memory_usage_._fields_ = [ ('reserved', ctypes.c_uint32 * 10), ] -class struct_engine_usage_(Structure): - pass - -struct_engine_usage_._pack_ = 1 # source:False -struct_engine_usage_._fields_ = [ - ('gfx', ctypes.c_uint64), - ('enc', ctypes.c_uint64), - ('reserved', ctypes.c_uint32 * 12), -] - struct_amdsmi_proc_info_t._pack_ = 1 # source:False struct_amdsmi_proc_info_t._fields_ = [ ('name', ctypes.c_char * 32), diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index c041eb24a5..44179e07f0 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -1599,6 +1599,23 @@ rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id); */ rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id); + +/** + * @brief Get the XGMI physical id associated with the device + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t to + * which the XGMI physical id will be written + * + * @param[in] dv_ind a device index + * + * @param[inout] revision a pointer to uint32_t to which the XGMI physical id + * will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id); + /** @} */ // end of IDQuer /*****************************************************************************/ diff --git a/rocm_smi/include/rocm_smi/rocm_smi_device.h b/rocm_smi/include/rocm_smi/rocm_smi_device.h index 8640bae9c1..d0743029f9 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi_device.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_device.h @@ -102,6 +102,7 @@ enum DevInfoTypes { kDevOverDriveLevel, kDevMemOverDriveLevel, kDevDevID, + kDevXGMIPhysicalID, kDevDevRevID, kDevDevProdName, kDevDevProdNum, diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 1d9da410d1..cbaf9af45d 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -861,6 +861,21 @@ rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) { return ret; } +rsmi_status_t +rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id) { + std::ostringstream ss; + rsmi_status_t ret; + ss << __PRETTY_FUNCTION__ << "| ======= start ======="; + LOG_TRACE(ss); + CHK_SUPPORT_NAME_ONLY(id) + + ret = get_id(dv_ind, amd::smi::kDevXGMIPhysicalID, id); + ss << __PRETTY_FUNCTION__ << " | ======= end =======" + << ", reporting " << amd::smi::getRSMIStatusString(ret); + LOG_TRACE(ss); + return ret; +} + rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision) { std::ostringstream outss; diff --git a/rocm_smi/src/rocm_smi_device.cc b/rocm_smi/src/rocm_smi_device.cc index a1a45795e2..572ade8f35 100755 --- a/rocm_smi/src/rocm_smi_device.cc +++ b/rocm_smi/src/rocm_smi_device.cc @@ -85,6 +85,7 @@ static const char *kDevPerfLevelFName = "power_dpm_force_performance_level"; static const char *kDevDevProdNameFName = "product_name"; static const char *kDevDevProdNumFName = "product_number"; static const char *kDevDevIDFName = "device"; +static const char* kDevXGMIPhysicalIDFName = "xgmi_physical_id"; static const char *kDevDevRevIDFName = "revision"; static const char *kDevVendorIDFName = "vendor"; static const char *kDevBoardInfoFName = "board_info"; @@ -243,6 +244,7 @@ static const std::map kDevAttribNameMap = { {kDevDevProdName, kDevDevProdNameFName}, {kDevDevProdNum, kDevDevProdNumFName}, {kDevDevID, kDevDevIDFName}, + {kDevXGMIPhysicalID, kDevXGMIPhysicalIDFName}, {kDevDevRevID, kDevDevRevIDFName}, {kDevVendorID, kDevVendorIDFName}, {kDevPCieVendorID, kDevPCieVendorIDFName}, @@ -385,6 +387,7 @@ static const std::map kDevFuncDependsMap = { // Functions with only mandatory dependencies {"rsmi_dev_vram_vendor_get", {{kDevVramVendorFName}, {}}}, {"rsmi_dev_id_get", {{kDevDevIDFName}, {}}}, + {"rsmi_dev_xgmi_physical_id_get", {{kDevXGMIPhysicalIDFName}, {}}}, {"rsmi_dev_revision_get", {{kDevDevRevIDFName}, {}}}, {"rsmi_dev_vendor_id_get", {{kDevVendorIDFName}, {}}}, {"rsmi_dev_name_get", {{kDevVendorIDFName, @@ -929,6 +932,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { case kDevVendorID: case kDevPCieVendorID: case kDevErrCntFeatures: + case kDevXGMIPhysicalID: ret = readDevInfoStr(type, &tempStr); RET_IF_NONZERO(ret); @@ -1104,6 +1108,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) { case kDevComputePartition: case kDevMemoryPartition: case kDevNumaNode: + case kDevXGMIPhysicalID: return readDevInfoStr(type, val); break; diff --git a/rocm_smi/src/rocm_smi_main.cc b/rocm_smi/src/rocm_smi_main.cc index 2e5b322f28..bed19c3f36 100755 --- a/rocm_smi/src/rocm_smi_main.cc +++ b/rocm_smi/src/rocm_smi_main.cc @@ -84,6 +84,7 @@ amd::smi::RocmSMI::devInfoTypesStrings = { {amd::smi::kDevOverDriveLevel, amdSMI + "kDevOverDriveLevel"}, {amd::smi::kDevMemOverDriveLevel, amdSMI + "kDevMemOverDriveLevel"}, {amd::smi::kDevDevID, amdSMI + "kDevDevID"}, + {amd::smi::kDevXGMIPhysicalID, amdSMI + "kDevXGMIPhysicalID"}, {amd::smi::kDevDevRevID, amdSMI + "kDevDevRevID"}, {amd::smi::kDevDevProdName, amdSMI + "kDevDevProdName"}, {amd::smi::kDevBoardInfo, amdSMI + "kDevBoardInfo"}, diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 1db02250c6..201881b462 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -721,6 +721,11 @@ amdsmi_get_gpu_asic_info(amdsmi_processor_handle processor_handle, amdsmi_asic_i status = rsmi_wrapper(rsmi_dev_pcie_vendor_name_get, processor_handle, info->vendor_name, AMDSMI_MAX_STRING_LENGTH); + // default to 0xffff as not supported + info->xgmi_physical_id = std::numeric_limits::max(); + status = rsmi_wrapper(rsmi_dev_xgmi_physical_id_get, processor_handle, + &(info->xgmi_physical_id)); + return AMDSMI_STATUS_SUCCESS; }