From 6715c5aa92db019e778e39e54c98dd54355475ff Mon Sep 17 00:00:00 2001 From: "Kanangot Balakrishnan, Bindhiya" Date: Wed, 17 Sep 2025 16:30:04 -0500 Subject: [PATCH] [SWDEV-534605] Increase max devices supported and drm test link type (#625) Increased the AMDSMI_MAX_DEVICES to 64 to accomodate all devices in CPX mode. The link type has been modified in amd-smi to match with rocm-smi types, updated the same for drm tests. --------- Signed-off-by: Bindhiya Kanangot Balakrishnan --- example/amd_smi_drm_example.cc | 4 ++-- example/amd_smi_nodrm_example.cc | 2 +- include/amd_smi/amdsmi.h | 2 +- py-interface/amdsmi_wrapper.py | 4 ++-- src/amd_smi/amd_smi.cc | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/example/amd_smi_drm_example.cc b/example/amd_smi_drm_example.cc index 962e9f486c..210852747d 100644 --- a/example/amd_smi_drm_example.cc +++ b/example/amd_smi_drm_example.cc @@ -291,8 +291,8 @@ static const std::map static const std::map link_type_map = { {AMDSMI_LINK_TYPE_INTERNAL, "INTERNAL"}, - {AMDSMI_LINK_TYPE_XGMI, "XGMI"}, {AMDSMI_LINK_TYPE_PCIE, "PCIE"}, + {AMDSMI_LINK_TYPE_XGMI, "XGMI"}, {AMDSMI_LINK_TYPE_NOT_APPLICABLE, "NOT_APPLICABLE"}, {AMDSMI_LINK_TYPE_UNKNOWN, "UNKNOWN"} }; @@ -1959,8 +1959,8 @@ int main() { // Get nearest GPUs const char *topology_link_type_str[] = { "AMDSMI_LINK_TYPE_INTERNAL", - "AMDSMI_LINK_TYPE_XGMI", "AMDSMI_LINK_TYPE_PCIE", + "AMDSMI_LINK_TYPE_XGMI", "AMDSMI_LINK_TYPE_NOT_APPLICABLE", "AMDSMI_LINK_TYPE_UNKNOWN", }; diff --git a/example/amd_smi_nodrm_example.cc b/example/amd_smi_nodrm_example.cc index 118fa2f475..3b0024e73d 100644 --- a/example/amd_smi_nodrm_example.cc +++ b/example/amd_smi_nodrm_example.cc @@ -334,8 +334,8 @@ int main() { // Get nearest GPUs const char *topology_link_type_str[] = { "AMDSMI_LINK_TYPE_INTERNAL", - "AMDSMI_LINK_TYPE_XGMI", "AMDSMI_LINK_TYPE_PCIE", + "AMDSMI_LINK_TYPE_XGMI", "AMDSMI_LINK_TYPE_NOT_APPLICABLE", "AMDSMI_LINK_TYPE_UNKNOWN", }; diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 84fbd0e355..fb22a02eb1 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -2092,7 +2092,7 @@ typedef struct { */ typedef struct { uint32_t count; - amdsmi_processor_handle processor_list[AMDSMI_MAX_DEVICES+1]; + amdsmi_processor_handle processor_list[AMDSMI_MAX_DEVICES * AMDSMI_MAX_NUM_XCP]; uint64_t reserved[14]; } amdsmi_topology_nearest_t; diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 257a4c27e2..285a0e5bda 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -2204,8 +2204,8 @@ struct_amdsmi_topology_nearest_t._pack_ = 1 # source:False struct_amdsmi_topology_nearest_t._fields_ = [ ('count', ctypes.c_uint32), ('PADDING_0', ctypes.c_ubyte * 4), - ('processor_list', ctypes.POINTER(None) * 32), - ('reserved', ctypes.c_uint64 * 15), + ('processor_list', ctypes.POINTER(None) * 256), + ('reserved', ctypes.c_uint64 * 14), ] amdsmi_topology_nearest_t = struct_amdsmi_topology_nearest_t diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index e186f4d080..b5cdd0de6d 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -4807,8 +4807,8 @@ amdsmi_get_link_topology_nearest(amdsmi_processor_handle processor_handle, } - uint32_t device_counter(AMDSMI_MAX_DEVICES); - amdsmi_processor_handle device_list[AMDSMI_MAX_DEVICES]; + uint32_t device_counter(AMDSMI_MAX_DEVICES * AMDSMI_MAX_NUM_XCP); + amdsmi_processor_handle device_list[AMDSMI_MAX_DEVICES * AMDSMI_MAX_NUM_XCP]; for (auto socket_idx = uint32_t(0); socket_idx < socket_counter; ++socket_idx) { if (auto api_status = amdsmi_get_processor_handles(socket_list[socket_idx], &device_counter, device_list); (api_status != amdsmi_status_t::AMDSMI_STATUS_SUCCESS)) { @@ -4856,14 +4856,14 @@ amdsmi_get_link_topology_nearest(amdsmi_processor_handle processor_handle, /* * Note: The link topology table is sorted by the number of hops and link weight. */ - topology_nearest_info->processor_list[AMDSMI_MAX_DEVICES] = {nullptr}; + topology_nearest_info->processor_list[AMDSMI_MAX_DEVICES * AMDSMI_MAX_NUM_XCP] = {nullptr}; topology_nearest_info->count = static_cast(link_topology_order.size()); auto topology_nearest_counter = uint32_t(0); while (!link_topology_order.empty()) { auto link_info = link_topology_order.top(); link_topology_order.pop(); - if (topology_nearest_counter < AMDSMI_MAX_DEVICES) { + if (topology_nearest_counter < (AMDSMI_MAX_DEVICES * AMDSMI_MAX_NUM_XCP)) { topology_nearest_info->processor_list[topology_nearest_counter++] = link_info.target_processor_handle; } }