From de5bc164dec7e0939997df28021e7d0811aabe5b Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Wed, 25 Oct 2023 08:17:25 -0500 Subject: [PATCH] Query the CPU and GPU link type The rsmi_topo_get_link_type() is extended to support query the CPU and GPU link type by passing dv_ind_dst as 0xFFFFFFFF. Change-Id: I1f212a01e8120adb70a08ab772fa9faaaecefa29 --- include/rocm_smi/rocm_smi.h | 14 +++++++++++++- src/rocm_smi.cc | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 9b169f9827..ea945c91a0 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -686,6 +686,10 @@ typedef enum _RSMI_IO_LINK_TYPE { RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types } RSMI_IO_LINK_TYPE; +//! The CPU node index which will be used in rsmi_topo_get_link_type +//! to query the link type between GPU and CPU +#define CPU_NODE_INDEX 0xFFFFFFFF + /** * @brief The utilization counter type */ @@ -3671,7 +3675,7 @@ rsmi_minmax_bandwidth_get(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t *min_bandwidth, uint64_t *max_bandwidth); /** - * @brief Retrieve the hops and the connection type between 2 GPUs + * @brief Retrieve the hops and the connection type between GPU to GPU/CPU * * @details Given a source device index @p dv_ind_src and * a destination device index @p dv_ind_dst, and a pointer to an @@ -3680,6 +3684,14 @@ rsmi_minmax_bandwidth_get(uint32_t dv_ind_src, uint32_t dv_ind_dst, * between the device @p dv_ind_src and @p dv_ind_dst to the memory * pointed to by @p hops and @p type. * + * To query the link type between GPU and CPU, given a source GPU index + * @p dev_ind_srcc and a destination device index @p dv_ind_dst + * CPU_NODE_INDEX(0xFFFFFFFF), a pointer to an + * uint64_t @p hops and a pointer to an RSMI_IO_LINK_TYPE @p type, + * this function will write the number of hops and the connection type + * between the device @p dv_ind_src and CPU to the memory + * pointed to by @p hops and @p type. + * * @param[in] dv_ind_src the source device index * * @param[in] dv_ind_dst the destination device index diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 9a7bd5d3ac..8749b13213 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -4387,6 +4387,30 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, rsmi_status_t status; uint32_t node_ind_dst; + + // handle the link type for CPU + if (dv_ind_dst == CPU_NODE_INDEX) { + // No CPU connected + if (kfd_node->numa_node_weight() == 0) { + return RSMI_STATUS_NOT_SUPPORTED; + } + amd::smi::IO_LINK_TYPE io_link_type = + kfd_node->numa_node_type(); + switch (io_link_type) { + case amd::smi::IOLINK_TYPE_XGMI: + *type = RSMI_IOLINK_TYPE_XGMI; + *hops = 1; + return RSMI_STATUS_SUCCESS; + case amd::smi::IOLINK_TYPE_PCIEXPRESS: + *type = RSMI_IOLINK_TYPE_PCIEXPRESS; + // always be the same CPU node + *hops = 2; + return RSMI_STATUS_SUCCESS; + default: + return RSMI_STATUS_NOT_SUPPORTED; + } + } + int ret = smi.get_node_index(dv_ind_dst, &node_ind_dst); if (ret == 0) {