From e3c1bb153ba808fc6c2c490507faf760fb7e13a2 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Sat, 13 Jun 2020 08:13:59 +0800 Subject: [PATCH] Deduce hops from numa distance Rocr won't return real hops so that we have to deduce hops from numa distance as a workaround. This will be subject to change as driver team will provide a long term solution in rocm3.7 Change-Id: Ifb939ed848db190c3d544bb7f30a5821161921e6 [ROCm/clr commit: b14d9e0a366bd641c835f391761bff927aaf45fb] --- projects/clr/rocclr/device/rocm/rocdevice.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 5e49e33921..cfdbcf06cd 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2383,8 +2383,6 @@ bool Device::getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count, HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info); if (res == HSA_STATUS_SUCCESS) { - *hop_count = hops; - // Now RocR always set hops=1 between two different devices. // If RocR changes the behavior, we need revisit here. *link_type = link_info[0].link_type; @@ -2394,6 +2392,13 @@ bool Device::getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count, distance += link_info[i].numa_distance; } *numa_distance = distance; + + // The following logics will be subject to change in rocm3.7 + uint32_t oneHopDistance = 20; // Default to PCIE + if (*link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) { + oneHopDistance = 15; + } + *hop_count = distance/oneHopDistance; } delete [] link_info;