diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 7f15c841a3..2e0cf01cf3 100755 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1211,6 +1211,16 @@ class Device : public RuntimeObject { uint64_t all_sum; }; + //Attributes that could be retrived from hsa_amd_memory_pool_link_info_t. + typedef enum LinkAttribute { + kLinkLinkType = 0, + kLinkHopCount, + kLinkDistance, + kLinkAtomicSupport + } LinkAttribute; + + typedef std::pair LinkAttrType; + static constexpr size_t kP2PStagingSize = 4 * Mi; static constexpr size_t kMGSyncDataSize = sizeof(MGSyncData); static constexpr size_t kMGInfoSizePerDevice = kMGSyncDataSize + sizeof(MGSyncInfo); @@ -1477,8 +1487,9 @@ class Device : public RuntimeObject { //! Returns index of current device uint32_t index() const { return index_; } - virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type, - uint32_t* hop_count) { + //! Returns value for LinkAttribute for lost of vectors + virtual bool findLinkInfo(const amd::Device& other_device, + std::vector* link_attr) { ShouldNotReachHere(); return false; } diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp old mode 100644 new mode 100755 index 10a9e9cb79..fa39b250a5 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -570,9 +570,9 @@ class Device : public NullDevice { std::map& QueuePool() { return queue_pool_; } const std::map& QueuePool() const { return queue_pool_; } - virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type, - uint32_t* hop_count) { - /* Not Supported in PAL yet */ + virtual bool findLinkInfo(const amd::Device& other_device, + std::vector* link_attr) { + // Not implemented in PAL yet ShouldNotReachHere(); return false; } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 81a89f49b9..29455531b0 100755 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -153,16 +153,15 @@ Device::Device(hsa_agent_t bkendDevice) } void Device::setupCpuAgent() { - uint32_t numaDistance = std::numeric_limits::max(); + int32_t numaDistance = std::numeric_limits::max(); uint32_t index = 0; // 0 as default auto size = cpu_agents_.size(); for (uint32_t i = 0; i < size; i++) { - uint32_t hops = 0; - uint32_t link_type = 0; - uint32_t distance = 0; - if (getNumaInfo(cpu_agents_[i].fine_grain_pool, &hops, &link_type, &distance)) { - if (distance < numaDistance) { - numaDistance = distance; + std::vector link_attrs; + link_attrs.push_back(std::make_pair(LinkAttribute::kLinkDistance, 0)); + if (findLinkInfo(cpu_agents_[i].fine_grain_pool, &link_attrs)) { + if (link_attrs[0].second < numaDistance) { + numaDistance = link_attrs[0].second; index = i; } } @@ -2391,63 +2390,122 @@ void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue) { return buffer; } -bool Device::findLinkTypeAndHopCount(amd::Device* other_device, - uint32_t* link_type, uint32_t* hop_count) { - uint32_t distance = 0; - return getNumaInfo((static_cast(other_device))->gpuvm_segment_, - hop_count, link_type, &distance); +bool Device::findLinkInfo(const amd::Device& other_device, + std::vector* link_attrs) { + return findLinkInfo((static_cast(&other_device))->gpuvm_segment_, + link_attrs); } -bool Device::getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count, - uint32_t* link_type, uint32_t* numa_distance) const { - uint32_t hops = 0; +bool Device::findLinkInfo(const hsa_amd_memory_pool_t& pool, + std::vector* link_attrs) { - if (!pool.handle) { + if ((!pool.handle) || (link_attrs == nullptr)) { return false; } - hsa_status_t res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool, - HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops); - if (res != HSA_STATUS_SUCCESS) { + // Retrieve the hops between 2 devices. + int32_t hops = 0; + hsa_status_t hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops); + + if (hsa_status != HSA_STATUS_SUCCESS) { + DevLogPrintfError("Cannot get hops info, hsa failed with status: %d", hsa_status); return false; } if (hops < 0) { return false; - } else if (hops == 0) { - //This pool is on its agent - *hop_count = 0; // No hop - *link_type = -1; // No link, so type is meaningless, caller should ignore it. - *numa_distance = 0; + } + + // The pool is on its agent + if (hops == 0) { + for (auto& link_attr : (*link_attrs)) { + switch (link_attr.first) { + case kLinkLinkType: { + // No link, so type is meaningless, + // caller should ignore it + link_attr.second = -1; + break; + } + case kLinkHopCount: { + // no hop + link_attr.second = 0; + break; + } + case kLinkDistance: { + // distance is zero, if no hops + link_attr.second = 0; + break; + } + case kLinkAtomicSupport: { + // atomic support if its on the same agent + link_attr.second = 1; + break; + } + default: { + DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first); + return false; + } + } + } return true; } - hsa_amd_memory_pool_link_info_t *link_info = new hsa_amd_memory_pool_link_info_t[hops]; + // Retrieve link info on the pool. + hsa_amd_memory_pool_link_info_t* link_info = new hsa_amd_memory_pool_link_info_t[hops]; + hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info); - res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool, - HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info); - - if (res == HSA_STATUS_SUCCESS) { - // Now RocR always set hops=1 between two different devices. - // If RocR changes the behavior, we need revisit here. - *link_type = link_info[0].link_type; - - uint32_t distance = 0; - for (uint32_t i = 0; i < hops; i++) { - distance += link_info[i].numa_distance; - } - *numa_distance = distance; - - // The following logics will be subject to change in rocm3.7 - uint32_t oneHopDistance = 20; // Default to PCIE - if (*link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) { - oneHopDistance = 15; - } - *hop_count = distance/oneHopDistance; + if (hsa_status != HSA_STATUS_SUCCESS) { + DevLogPrintfError("Cannot retrieve link info, hsa failed with status: %d", hsa_status); + delete[] link_info; + return false; } - delete [] link_info; - return res == HSA_STATUS_SUCCESS; + for (auto& link_attr : (*link_attrs)) { + switch (link_attr.first) { + case kLinkLinkType: { + link_attr.second = static_cast(link_info[0].link_type); + break; + } + case kLinkHopCount: { + uint32_t distance = 0; + // Because of Rocrs limitation hops is set to 1 always between two different devices + // If Rocr Changes the behaviour revisit this logic + for (size_t hop_idx = 0; hop_idx < static_cast(hops); ++hop_idx) { + distance += link_info[hop_idx].numa_distance; + } + uint32_t oneHopDistance + = (link_info[0].link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) ? 15 : 20; + link_attr.second = static_cast(distance/oneHopDistance); + break; + } + case kLinkDistance: { + uint32_t distance = 0; + // Sum of distances between hops + for (size_t hop_idx = 0; hop_idx < static_cast(hops); ++hop_idx) { + distance += link_info[hop_idx].numa_distance; + } + link_attr.second = static_cast(distance); + break; + } + case kLinkAtomicSupport: { + // if either of the atomic is supported + link_attr.second = static_cast(link_info[0].atomic_support_64bit + || link_info[0].atomic_support_32bit); + break; + } + default: { + DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first); + delete[] link_info; + return false; + } + } + } + + delete[] link_info; + + return true; } } // namespace roc diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 9980ba499e..a4cde6cbce 100755 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -298,9 +298,6 @@ class Device : public NullDevice { static bool loadHsaModules(); - bool getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count, - uint32_t* link_type, uint32_t* numa_distance) const; - bool create(); //! Construct a new physical HSA device @@ -470,8 +467,9 @@ class Device : public NullDevice { //! Return multi GPU grid launch sync buffer address MGSync() const { return mg_sync_; } - virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type, - uint32_t* hop_count); + //! Returns value for corresponding Link Attributes in a vector, given other device + virtual bool findLinkInfo(const amd::Device& other_device, + std::vector* link_attr); //! Returns a GPU memory object from AMD memory object roc::Memory* getGpuMemory(amd::Memory* mem //!< Pointer to AMD memory object @@ -532,6 +530,10 @@ class Device : public NullDevice { hsa_queue_t* getQueueFromPool(const uint qIndex); void* coopHostcallBuffer_; + //! returns value for corresponding LinkAttrbutes in a vector given Memory pool. + virtual bool findLinkInfo(const hsa_amd_memory_pool_t& pool, + std::vector* link_attr); + public: amd::Atomic numOfVgpus_; //!< Virtual gpu unique index