SWDEV-240800 - P2P device attributes support
Change-Id: I2b060a66715f676dfb7136940e69e21288f509d8
[ROCm/clr commit: 0163d8acc0]
This commit is contained in:
zatwierdzone przez
Karthik Jayaprakash
rodzic
618619e903
commit
bfaa8f8ab5
@@ -1211,6 +1211,16 @@ class Device : public RuntimeObject {
|
||||
uint64_t all_sum;
|
||||
};
|
||||
|
||||
//Attributes that could be retrived from hsa_amd_memory_pool_link_info_t.
|
||||
typedef enum LinkAttribute {
|
||||
kLinkLinkType = 0,
|
||||
kLinkHopCount,
|
||||
kLinkDistance,
|
||||
kLinkAtomicSupport
|
||||
} LinkAttribute;
|
||||
|
||||
typedef std::pair<LinkAttribute, int32_t /* value */> LinkAttrType;
|
||||
|
||||
static constexpr size_t kP2PStagingSize = 4 * Mi;
|
||||
static constexpr size_t kMGSyncDataSize = sizeof(MGSyncData);
|
||||
static constexpr size_t kMGInfoSizePerDevice = kMGSyncDataSize + sizeof(MGSyncInfo);
|
||||
@@ -1477,8 +1487,9 @@ class Device : public RuntimeObject {
|
||||
//! Returns index of current device
|
||||
uint32_t index() const { return index_; }
|
||||
|
||||
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
|
||||
uint32_t* hop_count) {
|
||||
//! Returns value for LinkAttribute for lost of vectors
|
||||
virtual bool findLinkInfo(const amd::Device& other_device,
|
||||
std::vector<LinkAttrType>* link_attr) {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
Regular → Executable
+3
-3
@@ -570,9 +570,9 @@ class Device : public NullDevice {
|
||||
std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() { return queue_pool_; }
|
||||
const std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() const { return queue_pool_; }
|
||||
|
||||
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
|
||||
uint32_t* hop_count) {
|
||||
/* Not Supported in PAL yet */
|
||||
virtual bool findLinkInfo(const amd::Device& other_device,
|
||||
std::vector<LinkAttrType>* link_attr) {
|
||||
// Not implemented in PAL yet
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -153,16 +153,15 @@ Device::Device(hsa_agent_t bkendDevice)
|
||||
}
|
||||
|
||||
void Device::setupCpuAgent() {
|
||||
uint32_t numaDistance = std::numeric_limits<uint32_t>::max();
|
||||
int32_t numaDistance = std::numeric_limits<int32_t>::max();
|
||||
uint32_t index = 0; // 0 as default
|
||||
auto size = cpu_agents_.size();
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
uint32_t hops = 0;
|
||||
uint32_t link_type = 0;
|
||||
uint32_t distance = 0;
|
||||
if (getNumaInfo(cpu_agents_[i].fine_grain_pool, &hops, &link_type, &distance)) {
|
||||
if (distance < numaDistance) {
|
||||
numaDistance = distance;
|
||||
std::vector<amd::Device::LinkAttrType> link_attrs;
|
||||
link_attrs.push_back(std::make_pair(LinkAttribute::kLinkDistance, 0));
|
||||
if (findLinkInfo(cpu_agents_[i].fine_grain_pool, &link_attrs)) {
|
||||
if (link_attrs[0].second < numaDistance) {
|
||||
numaDistance = link_attrs[0].second;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
@@ -2391,63 +2390,122 @@ void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
bool Device::findLinkTypeAndHopCount(amd::Device* other_device,
|
||||
uint32_t* link_type, uint32_t* hop_count) {
|
||||
uint32_t distance = 0;
|
||||
return getNumaInfo((static_cast<roc::Device*>(other_device))->gpuvm_segment_,
|
||||
hop_count, link_type, &distance);
|
||||
bool Device::findLinkInfo(const amd::Device& other_device,
|
||||
std::vector<LinkAttrType>* link_attrs) {
|
||||
return findLinkInfo((static_cast<const roc::Device*>(&other_device))->gpuvm_segment_,
|
||||
link_attrs);
|
||||
}
|
||||
|
||||
bool Device::getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count,
|
||||
uint32_t* link_type, uint32_t* numa_distance) const {
|
||||
uint32_t hops = 0;
|
||||
bool Device::findLinkInfo(const hsa_amd_memory_pool_t& pool,
|
||||
std::vector<LinkAttrType>* link_attrs) {
|
||||
|
||||
if (!pool.handle) {
|
||||
if ((!pool.handle) || (link_attrs == nullptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_status_t res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
|
||||
if (res != HSA_STATUS_SUCCESS) {
|
||||
// Retrieve the hops between 2 devices.
|
||||
int32_t hops = 0;
|
||||
hsa_status_t hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
|
||||
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
DevLogPrintfError("Cannot get hops info, hsa failed with status: %d", hsa_status);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hops < 0) {
|
||||
return false;
|
||||
} else if (hops == 0) {
|
||||
//This pool is on its agent
|
||||
*hop_count = 0; // No hop
|
||||
*link_type = -1; // No link, so type is meaningless, caller should ignore it.
|
||||
*numa_distance = 0;
|
||||
}
|
||||
|
||||
// The pool is on its agent
|
||||
if (hops == 0) {
|
||||
for (auto& link_attr : (*link_attrs)) {
|
||||
switch (link_attr.first) {
|
||||
case kLinkLinkType: {
|
||||
// No link, so type is meaningless,
|
||||
// caller should ignore it
|
||||
link_attr.second = -1;
|
||||
break;
|
||||
}
|
||||
case kLinkHopCount: {
|
||||
// no hop
|
||||
link_attr.second = 0;
|
||||
break;
|
||||
}
|
||||
case kLinkDistance: {
|
||||
// distance is zero, if no hops
|
||||
link_attr.second = 0;
|
||||
break;
|
||||
}
|
||||
case kLinkAtomicSupport: {
|
||||
// atomic support if its on the same agent
|
||||
link_attr.second = 1;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_link_info_t *link_info = new hsa_amd_memory_pool_link_info_t[hops];
|
||||
// Retrieve link info on the pool.
|
||||
hsa_amd_memory_pool_link_info_t* link_info = new hsa_amd_memory_pool_link_info_t[hops];
|
||||
hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
|
||||
|
||||
res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
|
||||
|
||||
if (res == HSA_STATUS_SUCCESS) {
|
||||
// Now RocR always set hops=1 between two different devices.
|
||||
// If RocR changes the behavior, we need revisit here.
|
||||
*link_type = link_info[0].link_type;
|
||||
|
||||
uint32_t distance = 0;
|
||||
for (uint32_t i = 0; i < hops; i++) {
|
||||
distance += link_info[i].numa_distance;
|
||||
}
|
||||
*numa_distance = distance;
|
||||
|
||||
// The following logics will be subject to change in rocm3.7
|
||||
uint32_t oneHopDistance = 20; // Default to PCIE
|
||||
if (*link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
|
||||
oneHopDistance = 15;
|
||||
}
|
||||
*hop_count = distance/oneHopDistance;
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
DevLogPrintfError("Cannot retrieve link info, hsa failed with status: %d", hsa_status);
|
||||
delete[] link_info;
|
||||
return false;
|
||||
}
|
||||
|
||||
delete [] link_info;
|
||||
return res == HSA_STATUS_SUCCESS;
|
||||
for (auto& link_attr : (*link_attrs)) {
|
||||
switch (link_attr.first) {
|
||||
case kLinkLinkType: {
|
||||
link_attr.second = static_cast<int32_t>(link_info[0].link_type);
|
||||
break;
|
||||
}
|
||||
case kLinkHopCount: {
|
||||
uint32_t distance = 0;
|
||||
// Because of Rocrs limitation hops is set to 1 always between two different devices
|
||||
// If Rocr Changes the behaviour revisit this logic
|
||||
for (size_t hop_idx = 0; hop_idx < static_cast<size_t>(hops); ++hop_idx) {
|
||||
distance += link_info[hop_idx].numa_distance;
|
||||
}
|
||||
uint32_t oneHopDistance
|
||||
= (link_info[0].link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) ? 15 : 20;
|
||||
link_attr.second = static_cast<int32_t>(distance/oneHopDistance);
|
||||
break;
|
||||
}
|
||||
case kLinkDistance: {
|
||||
uint32_t distance = 0;
|
||||
// Sum of distances between hops
|
||||
for (size_t hop_idx = 0; hop_idx < static_cast<size_t>(hops); ++hop_idx) {
|
||||
distance += link_info[hop_idx].numa_distance;
|
||||
}
|
||||
link_attr.second = static_cast<int32_t>(distance);
|
||||
break;
|
||||
}
|
||||
case kLinkAtomicSupport: {
|
||||
// if either of the atomic is supported
|
||||
link_attr.second = static_cast<int32_t>(link_info[0].atomic_support_64bit
|
||||
|| link_info[0].atomic_support_32bit);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first);
|
||||
delete[] link_info;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] link_info;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace roc
|
||||
|
||||
@@ -298,9 +298,6 @@ class Device : public NullDevice {
|
||||
|
||||
static bool loadHsaModules();
|
||||
|
||||
bool getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count,
|
||||
uint32_t* link_type, uint32_t* numa_distance) const;
|
||||
|
||||
bool create();
|
||||
|
||||
//! Construct a new physical HSA device
|
||||
@@ -470,8 +467,9 @@ class Device : public NullDevice {
|
||||
//! Return multi GPU grid launch sync buffer
|
||||
address MGSync() const { return mg_sync_; }
|
||||
|
||||
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
|
||||
uint32_t* hop_count);
|
||||
//! Returns value for corresponding Link Attributes in a vector, given other device
|
||||
virtual bool findLinkInfo(const amd::Device& other_device,
|
||||
std::vector<LinkAttrType>* link_attr);
|
||||
|
||||
//! Returns a GPU memory object from AMD memory object
|
||||
roc::Memory* getGpuMemory(amd::Memory* mem //!< Pointer to AMD memory object
|
||||
@@ -532,6 +530,10 @@ class Device : public NullDevice {
|
||||
hsa_queue_t* getQueueFromPool(const uint qIndex);
|
||||
|
||||
void* coopHostcallBuffer_;
|
||||
//! returns value for corresponding LinkAttrbutes in a vector given Memory pool.
|
||||
virtual bool findLinkInfo(const hsa_amd_memory_pool_t& pool,
|
||||
std::vector<LinkAttrType>* link_attr);
|
||||
|
||||
public:
|
||||
amd::Atomic<uint> numOfVgpus_; //!< Virtual gpu unique index
|
||||
|
||||
|
||||
Reference in New Issue
Block a user