SWDEV-240800 - P2P device attributes support

Change-Id: I2b060a66715f676dfb7136940e69e21288f509d8


[ROCm/clr commit: 0163d8acc0]
This commit is contained in:
kjayapra-amd
2020-08-12 22:00:19 -04:00
zatwierdzone przez Karthik Jayaprakash
rodzic 618619e903
commit bfaa8f8ab5
4 zmienionych plików z 128 dodań i 57 usunięć
+13 -2
Wyświetl plik
@@ -1211,6 +1211,16 @@ class Device : public RuntimeObject {
uint64_t all_sum;
};
//Attributes that could be retrived from hsa_amd_memory_pool_link_info_t.
typedef enum LinkAttribute {
kLinkLinkType = 0,
kLinkHopCount,
kLinkDistance,
kLinkAtomicSupport
} LinkAttribute;
typedef std::pair<LinkAttribute, int32_t /* value */> LinkAttrType;
static constexpr size_t kP2PStagingSize = 4 * Mi;
static constexpr size_t kMGSyncDataSize = sizeof(MGSyncData);
static constexpr size_t kMGInfoSizePerDevice = kMGSyncDataSize + sizeof(MGSyncInfo);
@@ -1477,8 +1487,9 @@ class Device : public RuntimeObject {
//! Returns index of current device
uint32_t index() const { return index_; }
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
uint32_t* hop_count) {
//! Returns value for LinkAttribute for lost of vectors
virtual bool findLinkInfo(const amd::Device& other_device,
std::vector<LinkAttrType>* link_attr) {
ShouldNotReachHere();
return false;
}
+3 -3
Wyświetl plik
@@ -570,9 +570,9 @@ class Device : public NullDevice {
std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() { return queue_pool_; }
const std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() const { return queue_pool_; }
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
uint32_t* hop_count) {
/* Not Supported in PAL yet */
virtual bool findLinkInfo(const amd::Device& other_device,
std::vector<LinkAttrType>* link_attr) {
// Not implemented in PAL yet
ShouldNotReachHere();
return false;
}
@@ -153,16 +153,15 @@ Device::Device(hsa_agent_t bkendDevice)
}
void Device::setupCpuAgent() {
uint32_t numaDistance = std::numeric_limits<uint32_t>::max();
int32_t numaDistance = std::numeric_limits<int32_t>::max();
uint32_t index = 0; // 0 as default
auto size = cpu_agents_.size();
for (uint32_t i = 0; i < size; i++) {
uint32_t hops = 0;
uint32_t link_type = 0;
uint32_t distance = 0;
if (getNumaInfo(cpu_agents_[i].fine_grain_pool, &hops, &link_type, &distance)) {
if (distance < numaDistance) {
numaDistance = distance;
std::vector<amd::Device::LinkAttrType> link_attrs;
link_attrs.push_back(std::make_pair(LinkAttribute::kLinkDistance, 0));
if (findLinkInfo(cpu_agents_[i].fine_grain_pool, &link_attrs)) {
if (link_attrs[0].second < numaDistance) {
numaDistance = link_attrs[0].second;
index = i;
}
}
@@ -2391,63 +2390,122 @@ void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue) {
return buffer;
}
bool Device::findLinkTypeAndHopCount(amd::Device* other_device,
uint32_t* link_type, uint32_t* hop_count) {
uint32_t distance = 0;
return getNumaInfo((static_cast<roc::Device*>(other_device))->gpuvm_segment_,
hop_count, link_type, &distance);
bool Device::findLinkInfo(const amd::Device& other_device,
std::vector<LinkAttrType>* link_attrs) {
return findLinkInfo((static_cast<const roc::Device*>(&other_device))->gpuvm_segment_,
link_attrs);
}
bool Device::getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count,
uint32_t* link_type, uint32_t* numa_distance) const {
uint32_t hops = 0;
bool Device::findLinkInfo(const hsa_amd_memory_pool_t& pool,
std::vector<LinkAttrType>* link_attrs) {
if (!pool.handle) {
if ((!pool.handle) || (link_attrs == nullptr)) {
return false;
}
hsa_status_t res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
if (res != HSA_STATUS_SUCCESS) {
// Retrieve the hops between 2 devices.
int32_t hops = 0;
hsa_status_t hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
if (hsa_status != HSA_STATUS_SUCCESS) {
DevLogPrintfError("Cannot get hops info, hsa failed with status: %d", hsa_status);
return false;
}
if (hops < 0) {
return false;
} else if (hops == 0) {
//This pool is on its agent
*hop_count = 0; // No hop
*link_type = -1; // No link, so type is meaningless, caller should ignore it.
*numa_distance = 0;
}
// The pool is on its agent
if (hops == 0) {
for (auto& link_attr : (*link_attrs)) {
switch (link_attr.first) {
case kLinkLinkType: {
// No link, so type is meaningless,
// caller should ignore it
link_attr.second = -1;
break;
}
case kLinkHopCount: {
// no hop
link_attr.second = 0;
break;
}
case kLinkDistance: {
// distance is zero, if no hops
link_attr.second = 0;
break;
}
case kLinkAtomicSupport: {
// atomic support if its on the same agent
link_attr.second = 1;
break;
}
default: {
DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first);
return false;
}
}
}
return true;
}
hsa_amd_memory_pool_link_info_t *link_info = new hsa_amd_memory_pool_link_info_t[hops];
// Retrieve link info on the pool.
hsa_amd_memory_pool_link_info_t* link_info = new hsa_amd_memory_pool_link_info_t[hops];
hsa_status = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
res = hsa_amd_agent_memory_pool_get_info(_bkendDevice, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
if (res == HSA_STATUS_SUCCESS) {
// Now RocR always set hops=1 between two different devices.
// If RocR changes the behavior, we need revisit here.
*link_type = link_info[0].link_type;
uint32_t distance = 0;
for (uint32_t i = 0; i < hops; i++) {
distance += link_info[i].numa_distance;
}
*numa_distance = distance;
// The following logics will be subject to change in rocm3.7
uint32_t oneHopDistance = 20; // Default to PCIE
if (*link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
oneHopDistance = 15;
}
*hop_count = distance/oneHopDistance;
if (hsa_status != HSA_STATUS_SUCCESS) {
DevLogPrintfError("Cannot retrieve link info, hsa failed with status: %d", hsa_status);
delete[] link_info;
return false;
}
delete [] link_info;
return res == HSA_STATUS_SUCCESS;
for (auto& link_attr : (*link_attrs)) {
switch (link_attr.first) {
case kLinkLinkType: {
link_attr.second = static_cast<int32_t>(link_info[0].link_type);
break;
}
case kLinkHopCount: {
uint32_t distance = 0;
// Because of Rocrs limitation hops is set to 1 always between two different devices
// If Rocr Changes the behaviour revisit this logic
for (size_t hop_idx = 0; hop_idx < static_cast<size_t>(hops); ++hop_idx) {
distance += link_info[hop_idx].numa_distance;
}
uint32_t oneHopDistance
= (link_info[0].link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) ? 15 : 20;
link_attr.second = static_cast<int32_t>(distance/oneHopDistance);
break;
}
case kLinkDistance: {
uint32_t distance = 0;
// Sum of distances between hops
for (size_t hop_idx = 0; hop_idx < static_cast<size_t>(hops); ++hop_idx) {
distance += link_info[hop_idx].numa_distance;
}
link_attr.second = static_cast<int32_t>(distance);
break;
}
case kLinkAtomicSupport: {
// if either of the atomic is supported
link_attr.second = static_cast<int32_t>(link_info[0].atomic_support_64bit
|| link_info[0].atomic_support_32bit);
break;
}
default: {
DevLogPrintfError("Invalid LinkAttribute: %d ", link_attr.first);
delete[] link_info;
return false;
}
}
}
delete[] link_info;
return true;
}
} // namespace roc
@@ -298,9 +298,6 @@ class Device : public NullDevice {
static bool loadHsaModules();
bool getNumaInfo(const hsa_amd_memory_pool_t& pool, uint32_t* hop_count,
uint32_t* link_type, uint32_t* numa_distance) const;
bool create();
//! Construct a new physical HSA device
@@ -470,8 +467,9 @@ class Device : public NullDevice {
//! Return multi GPU grid launch sync buffer
address MGSync() const { return mg_sync_; }
virtual bool findLinkTypeAndHopCount(amd::Device* other_device, uint32_t* link_type,
uint32_t* hop_count);
//! Returns value for corresponding Link Attributes in a vector, given other device
virtual bool findLinkInfo(const amd::Device& other_device,
std::vector<LinkAttrType>* link_attr);
//! Returns a GPU memory object from AMD memory object
roc::Memory* getGpuMemory(amd::Memory* mem //!< Pointer to AMD memory object
@@ -532,6 +530,10 @@ class Device : public NullDevice {
hsa_queue_t* getQueueFromPool(const uint qIndex);
void* coopHostcallBuffer_;
//! returns value for corresponding LinkAttrbutes in a vector given Memory pool.
virtual bool findLinkInfo(const hsa_amd_memory_pool_t& pool,
std::vector<LinkAttrType>* link_attr);
public:
amd::Atomic<uint> numOfVgpus_; //!< Virtual gpu unique index