Files
rocm-systems/tests/kfdtest/src/KFDTestUtil.cpp
T

644 строки
19 KiB
C++
Исходник Обычный вид История

2018-07-23 14:45:44 -04:00
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDTestUtil.hpp"
#include <stdlib.h>
#include <sys/time.h>
2018-07-23 14:45:44 -04:00
#include <algorithm>
#include <vector>
#include "BaseQueue.hpp"
#include "Dispatch.hpp"
#include "SDMAPacket.hpp"
void WaitUntilInput() {
char dummy;
printf("Press enter to continue: ");
do {
scanf("%c", &dummy);
} while (dummy != 10); // enter key's ascii value is 10
}
2018-08-23 13:27:34 +08:00
uint64_t RoundToPowerOf2(uint64_t val) {
int bytes = sizeof(uint64_t);
val--;
for (int i = 0; i < bytes; i++) {
val |= val >> (1 << i);
}
val++;
return val;
}
bool WaitOnValue(const volatile unsigned int *buf, unsigned int value, unsigned int timeOut) {
2018-07-23 14:45:44 -04:00
while (timeOut > 0 && *buf != value) {
Delay(1);
if (timeOut != HSA_EVENTTIMEOUT_INFINITE)
timeOut--;
}
return *buf == value;
}
2018-08-13 09:03:31 -04:00
void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart) {
2018-07-23 14:45:44 -04:00
rLoPart = static_cast<unsigned int>(value);
rHiPart = static_cast<unsigned int>(value >> 32);
}
bool GetHwCapabilityHWS() {
unsigned int value = 0;
bool valExists = ReadDriverConfigValue(CONFIG_HWS, value);
2018-08-14 09:52:31 -04:00
/* HWS is enabled by default */
2018-07-23 14:45:44 -04:00
return ( (!valExists) || ( value > 0));
}
HSAKMT_STATUS CreateQueueTypeEvent(
bool ManualReset, // IN
bool IsSignaled, // IN
unsigned int NodeId, // IN
HsaEvent** Event // OUT
) {
HsaEventDescriptor Descriptor;
2018-08-14 09:52:31 -04:00
// TODO: Create per-OS header with this sort of definitions
2018-07-23 14:45:44 -04:00
#ifdef _WIN32
Descriptor.EventType = HSA_EVENTTYPE_QUEUE_EVENT;
#else
Descriptor.EventType = HSA_EVENTTYPE_SIGNAL;
#endif
Descriptor.SyncVar.SyncVar.UserData = (void*)0xABCDABCD;
Descriptor.NodeId = NodeId;
return hsaKmtCreateEvent(&Descriptor, ManualReset, IsSignaled, Event);
}
static bool is_dgpu_dev = false;
bool is_dgpu() {
return is_dgpu_dev;
}
unsigned int FamilyIdFromNode(const HsaNodeProperties *props) {
unsigned int familyId = FAMILY_UNKNOWN;
switch (props->EngineId.ui32.Major) {
case 7:
if (props->EngineId.ui32.Minor == 0) {
if (props->EngineId.ui32.Stepping == 0)
familyId = FAMILY_KV;
else
familyId = FAMILY_CI;
}
break;
case 8:
familyId = FAMILY_VI;
if (props->EngineId.ui32.Stepping == 1)
familyId = FAMILY_CZ;
break;
case 9:
familyId = FAMILY_AI;
if (props->EngineId.ui32.Stepping == 2)
familyId = FAMILY_RV;
2019-09-05 12:47:16 -05:00
if (props->EngineId.ui32.Stepping == 8)
familyId = FAMILY_AR;
2018-07-23 14:45:44 -04:00
break;
2019-07-04 16:23:37 -04:00
case 10:
familyId = FAMILY_NV;
break;
2018-07-23 14:45:44 -04:00
}
if (props->NumCPUCores && props->NumFComputeCores)
2018-07-23 14:45:44 -04:00
is_dgpu_dev = false;
else
is_dgpu_dev = true;
return familyId;
}
void GetSdmaInfo(const HsaNodeProperties *props,
unsigned int *p_num_sdma_engines,
unsigned int *p_num_sdma_xgmi_engines,
unsigned int *p_num_sdma_queues_per_engine) {
int num_sdma_queues_per_engine = 2;
if (props->EngineId.ui32.Major == 9) {
if (props->EngineId.ui32.Stepping == 6) // VEGA20
num_sdma_queues_per_engine = 8;
} else if (props->EngineId.ui32.Major == 10) { //NAVIi
num_sdma_queues_per_engine = 8;
}
if (p_num_sdma_engines)
*p_num_sdma_engines = props->NumSdmaEngines;
if (p_num_sdma_xgmi_engines)
*p_num_sdma_xgmi_engines = props->NumSdmaXgmiEngines;
if (p_num_sdma_queues_per_engine)
*p_num_sdma_queues_per_engine = num_sdma_queues_per_engine;
}
2018-07-23 14:45:44 -04:00
bool isTonga(const HsaNodeProperties *props) {
/* Tonga has some workarounds in the thunk that cause certain failures */
if (props->EngineId.ui32.Major == 8 && props->EngineId.ui32.Stepping == 2) {
return true;
}
return false;
}
HSAuint64 GetSystemTickCountInMicroSec() {
struct timeval t;
gettimeofday(&t, 0);
return t.tv_sec * 1000000ULL + t.tv_usec;
}
2018-07-23 14:45:44 -04:00
const HsaMemoryBuffer HsaMemoryBuffer::Null;
2018-08-13 09:03:31 -04:00
HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec,
bool isScratch, bool isReadOnly)
2018-07-23 14:45:44 -04:00
:m_Size(size),
m_pUser(NULL),
m_pBuf(NULL),
m_Local(isLocal),
m_Node(node) {
m_Flags.Value = 0;
HsaMemMapFlags mapFlags = {0};
bool map_specific_gpu = (node && !isScratch);
2018-07-23 14:45:44 -04:00
if (isScratch) {
m_Flags.ui32.Scratch = 1;
m_Flags.ui32.HostAccess = 1;
} else {
m_Flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
if (isLocal) {
m_Flags.ui32.HostAccess = 0;
m_Flags.ui32.NonPaged = 1;
2019-08-29 14:28:08 -04:00
m_Flags.ui32.CoarseGrain = 1;
2018-07-23 14:45:44 -04:00
} else {
m_Flags.ui32.HostAccess = 1;
m_Flags.ui32.NonPaged = 0;
2019-08-29 14:28:08 -04:00
m_Flags.ui32.CoarseGrain = 0;
m_Flags.ui32.NoNUMABind = 1;
2018-07-23 14:45:44 -04:00
}
if (isExec)
m_Flags.ui32.ExecuteAccess = 1;
}
if (isReadOnly)
m_Flags.ui32.ReadOnly = 1;
if (zero)
EXPECT_EQ(m_Flags.ui32.HostAccess, 1);
2018-08-13 09:03:31 -04:00
EXPECT_SUCCESS(hsaKmtAllocMemory(m_Node, m_Size, m_Flags, &m_pBuf));
2018-07-23 14:45:44 -04:00
if (is_dgpu()) {
if (map_specific_gpu)
EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(m_pBuf, m_Size, NULL, mapFlags, 1, &m_Node));
else
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL));
2018-07-23 14:45:44 -04:00
m_MappedNodes = 1 << m_Node;
}
if (zero && !isLocal)
Fill(0);
}
HsaMemoryBuffer::HsaMemoryBuffer(void *addr, HSAuint64 size):
m_Size(size),
m_pUser(addr),
m_pBuf(NULL),
m_Local(false),
m_Node(0) {
HSAuint64 gpuva = 0;
EXPECT_SUCCESS(hsaKmtRegisterMemory(m_pUser, m_Size));
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pUser, m_Size, &gpuva));
m_pBuf = gpuva ? (void *)gpuva : m_pUser;
}
HsaMemoryBuffer::HsaMemoryBuffer()
:m_Size(0),
m_pBuf(NULL) {
}
void HsaMemoryBuffer::Fill(unsigned char value, HSAuint64 offset, HSAuint64 size) {
HSAuint32 uiValue;
EXPECT_EQ(m_Local, 0) << "Local Memory. Call Fill(HSAuint32 value, BaseQueue& baseQueue)";
size = size ? size : m_Size;
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
if (m_pUser != NULL)
2018-08-13 09:03:31 -04:00
memset(reinterpret_cast<char *>(m_pUser) + offset, value, size);
2018-07-23 14:45:44 -04:00
else if (m_pBuf != NULL)
2018-08-13 09:03:31 -04:00
memset(reinterpret_cast<char *>(m_pBuf) + offset, value, size);
2018-07-23 14:45:44 -04:00
else
ASSERT_TRUE(0) << "Invalid HsaMemoryBuffer";
}
/* Fill CPU accessible buffer with the value. */
void HsaMemoryBuffer::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) {
HSAuint64 i;
HSAuint32 *ptr = NULL;
EXPECT_EQ(m_Local, 0) << "Local Memory. Call Fill(HSAuint32 value, BaseQueue& baseQueue)";
size = size ? size : m_Size;
EXPECT_EQ((size & (sizeof(HSAuint32) - 1)), 0) << "Not word aligned. Call Fill(unsigned char)";
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
if (m_pUser != NULL)
2018-08-13 09:03:31 -04:00
ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pUser) + offset);
2018-07-23 14:45:44 -04:00
else if (m_pBuf != NULL)
2018-08-13 09:03:31 -04:00
ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pBuf) + offset);
2018-07-23 14:45:44 -04:00
ASSERT_NOTNULL(ptr);
for (i = 0; i < size / sizeof(HSAuint32); i++)
ptr[i] = value;
}
/* Fill GPU only accessible Local memory with @value using SDMA Constant Fill Command */
void HsaMemoryBuffer::Fill(HSAuint32 value, BaseQueue& baseQueue, HSAuint64 offset, HSAuint64 size) {
HsaEvent* event = NULL;
EXPECT_NE(m_Local, 0) << "Not Local Memory. Call Fill(HSAuint32 value)";
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_Node, &event));
ASSERT_EQ(baseQueue.GetQueueType(), HSA_QUEUE_SDMA) << "Only SDMA queues supported";
size = size ? size : m_Size;
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
baseQueue.PlacePacket(SDMAFillDataPacket(baseQueue.GetFamilyId(),
(reinterpret_cast<void *>(this->As<char*>() + offset)), value, size));
baseQueue.PlacePacket(SDMAFencePacket(baseQueue.GetFamilyId(),
reinterpret_cast<void*>(event->EventData.HWData2), event->EventId));
2018-07-23 14:45:44 -04:00
baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
EXPECT_SUCCESS(hsaKmtWaitOnEvent(event, g_TestTimeOut));
2018-07-23 14:45:44 -04:00
hsaKmtDestroyEvent(event);
}
/* Check if HsaMemoryBuffer[location] has the pattern specified.
* Return TRUE if correct pattern else return FALSE
* HsaMemoryBuffer has to be CPU accessible
*/
bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern) {
HSAuint32 *ptr = NULL;
EXPECT_EQ(m_Local, 0) << "Local Memory. Call IsPattern(..baseQueue& baseQueue)";
if (location >= m_Size) /* Out of bounds */
return false;
if (m_pUser != NULL)
2018-08-13 09:03:31 -04:00
ptr = reinterpret_cast<HSAuint32 *>(m_pUser);
2018-07-23 14:45:44 -04:00
else if (m_pBuf != NULL)
2018-08-13 09:03:31 -04:00
ptr = reinterpret_cast<HSAuint32 *>(m_pBuf);
2018-07-23 14:45:44 -04:00
else
return false;
if (ptr)
return (ptr[location/sizeof(HSAuint32)] == pattern);
return false;
}
/* Check if HsaMemoryBuffer[location] has the pattern specified.
* Return TRUE if correct pattern else return FALSE
* HsaMemoryBuffer is supposed to be only GPU accessible
* Use @baseQueue to copy the HsaMemoryBuffer[location] to stack and check the value
*/
bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern, BaseQueue& baseQueue, volatile HSAuint32 *tmp) {
HsaEvent* event = NULL;
int ret;
EXPECT_NE(m_Local, 0) << "Not Local Memory. Call IsPattern(HSAuint64 location, HSAuint32 pattern)";
EXPECT_EQ(baseQueue.GetQueueType(), HSA_QUEUE_SDMA) << "Only SDMA queues supported";
if (location >= m_Size) /* Out of bounds */
return false;
ret = CreateQueueTypeEvent(false, false, m_Node, &event);
if (ret)
return false;
*tmp = ~pattern;
baseQueue.PlacePacket(SDMACopyDataPacket(baseQueue.GetFamilyId(), (void *)tmp,
2018-08-13 09:03:31 -04:00
reinterpret_cast<void *>(this->As<HSAuint64>() + location),
2018-07-23 14:45:44 -04:00
sizeof(HSAuint32)));
baseQueue.PlacePacket(SDMAFencePacket(baseQueue.GetFamilyId(), reinterpret_cast<void*>(event->EventData.HWData2),
2018-07-23 14:45:44 -04:00
event->EventId));
baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
ret = hsaKmtWaitOnEvent(event, g_TestTimeOut);
hsaKmtDestroyEvent(event);
if (ret)
return false;
return WaitOnValue(tmp, pattern);
}
unsigned int HsaMemoryBuffer::Size() {
return m_Size;
}
HsaMemFlags HsaMemoryBuffer::Flags() {
return m_Flags;
}
unsigned int HsaMemoryBuffer::Node() const {
return m_Node;
}
int HsaMemoryBuffer::MapMemToNodes(unsigned int *nodes, unsigned int nodes_num) {
int ret, bit;
ret = hsaKmtRegisterMemoryToNodes(m_pBuf, m_Size, nodes_num, nodes);
if (ret != 0)
return ret;
ret = hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL);
if (ret != 0) {
hsaKmtDeregisterMemory(m_pBuf);
return ret;
}
for (unsigned int i = 0; i < nodes_num; i++) {
bit = 1 << nodes[i];
m_MappedNodes |= bit;
}
return 0;
}
int HsaMemoryBuffer::UnmapMemToNodes(unsigned int *nodes, unsigned int nodes_num) {
int ret, bit;
ret = hsaKmtUnmapMemoryToGPU(m_pBuf);
if (ret)
return ret;
hsaKmtDeregisterMemory(m_pBuf);
for (unsigned int i = 0; i < nodes_num; i++) {
bit = 1 << nodes[i];
m_MappedNodes &= ~bit;
}
return 0;
}
void HsaMemoryBuffer::UnmapAllNodes() {
unsigned int *Arr, size, i, j;
int bit;
size = 0;
for (i = 0; i < 8; i++) {
bit = 1 << i;
if (m_MappedNodes & bit)
size++;
}
Arr = (unsigned int *)malloc(sizeof(unsigned int) * size);
if (!Arr)
return;
for (i = 0, j =0; i < 8; i++) {
bit = 1 << i;
if (m_MappedNodes & bit)
Arr[j++] = i;
}
/*
2018-08-14 09:52:31 -04:00
* TODO: When thunk is updated, use hsaKmtRegisterToNodes. Then nodes will be used
2018-07-23 14:45:44 -04:00
*/
hsaKmtUnmapMemoryToGPU(m_pBuf);
hsaKmtDeregisterMemory(m_pBuf);
m_MappedNodes = 0;
free(Arr);
}
HsaMemoryBuffer::~HsaMemoryBuffer() {
if (m_pUser != NULL) {
hsaKmtUnmapMemoryToGPU(m_pUser);
hsaKmtDeregisterMemory(m_pUser);
} else if (m_pBuf != NULL) {
if (is_dgpu()) {
if (m_MappedNodes) {
hsaKmtUnmapMemoryToGPU(m_pBuf);
hsaKmtDeregisterMemory(m_pBuf);
}
}
hsaKmtFreeMemory(m_pBuf, m_Size);
}
m_pBuf = NULL;
}
2018-08-13 09:03:31 -04:00
HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle,
HSAuint64 size, unsigned int node)
2018-07-23 14:45:44 -04:00
:m_Size(0),
m_pBuf(NULL),
m_graphic_handle(0),
m_Node(node) {
HSAuint64 flat_address;
EXPECT_SUCCESS(hsaKmtMapGraphicHandle(m_Node, device_handle, buffer_handle, 0, size, &flat_address));
2018-08-13 09:03:31 -04:00
m_pBuf = reinterpret_cast<void*>(flat_address);
2018-07-23 14:45:44 -04:00
}
HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
hsaKmtUnmapGraphicHandle(m_Node, (HSAuint64)m_pBuf, m_Size);
}
HsaNodeInfo::HsaNodeInfo() {
}
2018-08-14 09:52:31 -04:00
/* Init - Get and store information about all the HSA nodes from the Thunk Library.
* @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
* @Return - false: if no node information is available
*/
2018-07-23 14:45:44 -04:00
bool HsaNodeInfo::Init(int NumOfNodes) {
HsaNodeProperties *nodeProperties;
_HSAKMT_STATUS status;
bool ret = false;
for (int i = 0; i < NumOfNodes; i++) {
nodeProperties = new HsaNodeProperties();
status = hsaKmtGetNodeProperties(i, nodeProperties);
2018-08-14 09:52:31 -04:00
/* This is not a fatal test (not using assert), since even when it fails for one node
* we want to get information regarding others.
*/
2018-07-23 14:45:44 -04:00
EXPECT_SUCCESS(status) << "Node index: " << i << "hsaKmtGetNodeProperties returned status " << status;
if (status == HSAKMT_STATUS_SUCCESS) {
m_HsaNodeProps.push_back(nodeProperties);
ret = true; // Return true if atleast one information is available
if (nodeProperties->NumFComputeCores)
m_NodesWithGPU.push_back(i);
else
m_NodesWithoutGPU.push_back(i);
} else {
delete nodeProperties;
}
}
return ret;
}
HsaNodeInfo::~HsaNodeInfo() {
const HsaNodeProperties *nodeProperties;
for (unsigned int i = 0; i < m_HsaNodeProps.size(); i++)
delete m_HsaNodeProps.at(i);
m_HsaNodeProps.clear();
}
const std::vector<int>& HsaNodeInfo::GetNodesWithGPU() const {
return m_NodesWithGPU;
}
const HsaNodeProperties* HsaNodeInfo::GetNodeProperties(int NodeNum) const {
return m_HsaNodeProps.at(NodeNum);
}
const HsaNodeProperties* HsaNodeInfo::HsaDefaultGPUNodeProperties() const {
int NodeNum = HsaDefaultGPUNode();
if (NodeNum < 0)
return NULL;
return GetNodeProperties(NodeNum);
}
const int HsaNodeInfo::HsaDefaultGPUNode() const {
if (m_NodesWithGPU.size() == 0)
return -1;
if (g_TestNodeId >= 0) {
2018-08-14 09:52:31 -04:00
// Check if this is a valid Id, if so use this else use first available
2018-07-23 14:45:44 -04:00
for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
if (g_TestNodeId == m_NodesWithGPU.at(i))
return g_TestNodeId;
}
}
return m_NodesWithGPU.at(0);
}
void HsaNodeInfo::PrintNodeInfo() const {
const HsaNodeProperties *nodeProperties;
for (unsigned int i = 0; i < m_HsaNodeProps.size(); i++) {
nodeProperties = m_HsaNodeProps.at(i);
LOG() << "***********************************" << std::endl;
LOG() << "Node " << i << std::endl;
LOG() << "NumCPUCores=\t" << nodeProperties->NumCPUCores << std::endl;
LOG() << "NumFComputeCores=\t" << nodeProperties->NumFComputeCores << std::endl;
LOG() << "NumMemoryBanks=\t" << nodeProperties->NumMemoryBanks << std::endl;
LOG() << "VendorId=\t" << nodeProperties->VendorId << std::endl;
LOG() << "DeviceId=\t" << nodeProperties->DeviceId << std::endl;
LOG() << "***********************************" << std::endl;
}
LOG() << "Default GPU NODE " << HsaDefaultGPUNode() << std::endl;
}
const bool HsaNodeInfo::IsGPUNodeLargeBar(int node) const {
const HsaNodeProperties *pNodeProperties;
pNodeProperties = GetNodeProperties(node);
if (pNodeProperties) {
HsaMemoryProperties *memoryProperties =
new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(node,
pNodeProperties->NumMemoryBanks, memoryProperties));
for (unsigned bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++)
if (memoryProperties[bank].HeapType ==
HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC) {
delete [] memoryProperties;
return true;
}
delete [] memoryProperties;
}
return false;
}
const int HsaNodeInfo::FindLargeBarGPUNode() const {
const std::vector<int> gpuNodes = GetNodesWithGPU();
for (unsigned i = 0; i < gpuNodes.size(); i++)
if (IsGPUNodeLargeBar(gpuNodes.at(i)))
return gpuNodes.at(i);
return -1;
}
2019-10-04 16:49:38 -04:00
const bool HsaNodeInfo::AreGPUNodesXGMI(int node0, int node1) const {
const HsaNodeProperties *pNodeProperties0 = GetNodeProperties(node0);
const HsaNodeProperties *pNodeProperties1 = GetNodeProperties(node1);
if ((pNodeProperties0->HiveID != 0) && (pNodeProperties1->HiveID != 0) &&
(pNodeProperties0->HiveID == pNodeProperties1->HiveID))
return true;
return false;
}
int HsaNodeInfo::FindAccessiblePeers(std::vector<HSAuint32> *peers, HSAuint32 dstNode,
bool bidirectional) const {
peers->push_back(dstNode);
if (IsGPUNodeLargeBar(dstNode)) {
for (unsigned i = 0; i < m_NodesWithGPU.size(); i++) {
if (m_NodesWithGPU.at(i) == dstNode)
continue;
if (!bidirectional || IsGPUNodeLargeBar(m_NodesWithGPU.at(i)) ||
AreGPUNodesXGMI(dstNode, m_NodesWithGPU.at(i)))
peers->push_back(m_NodesWithGPU.at(i));
}
} else {
for (unsigned i = 0; i < m_NodesWithGPU.size(); i++) {
if (m_NodesWithGPU.at(i) == dstNode)
continue;
if (AreGPUNodesXGMI(dstNode, m_NodesWithGPU.at(i)))
peers->push_back(m_NodesWithGPU.at(i));
}
}
return peers->size();
}