kfdtest: Clean up comments
Consolidate style (use /* */ for multi-line), fix typos,
use dword instad of DWORD/DWord
Change-Id: I620e45c1687550db41127e45641b7d79d28223a1
[ROCm/ROCR-Runtime commit: 414042abf7]
This commit is contained in:
@@ -45,7 +45,7 @@ unsigned int AqlQueue::RptrWhenConsumed() {
|
||||
}
|
||||
|
||||
void AqlQueue::SubmitPacket() {
|
||||
// m_pending Wptr is in DWORDs
|
||||
// m_pending Wptr is in dwords
|
||||
*m_Resources.Queue_write_ptr = m_pendingWptr;
|
||||
*(m_Resources.Queue_DoorBell) = Wptr();
|
||||
}
|
||||
|
||||
@@ -31,14 +31,14 @@ class AqlQueue : public BaseQueue {
|
||||
AqlQueue();
|
||||
virtual ~AqlQueue();
|
||||
|
||||
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
|
||||
// @brief Updates queue write pointer and sets the queue doorbell to the queue write pointer
|
||||
virtual void SubmitPacket();
|
||||
|
||||
// @ return read pointer are in DWORDs
|
||||
// @return Read pointer in dwords
|
||||
virtual unsigned int Rptr();
|
||||
// @ return write pointer are in DWORDs
|
||||
// @return Write pointer in dwords
|
||||
virtual unsigned int Wptr();
|
||||
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
|
||||
// @return Expected m_Resources.Queue_read_ptr when all packets are consumed
|
||||
virtual unsigned int RptrWhenConsumed();
|
||||
|
||||
protected:
|
||||
@@ -47,4 +47,4 @@ class AqlQueue : public BaseQueue {
|
||||
virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE_AQL; }
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif // __KFD_AQL_QUEUE__H__
|
||||
|
||||
@@ -25,8 +25,8 @@
|
||||
#define __KFD_BASE_PACKET__H__
|
||||
|
||||
/**
|
||||
* all packets profiles must be defined here
|
||||
* every type defined here has sub-types
|
||||
* All packets profiles must be defined here
|
||||
* Every type defined here has sub-types
|
||||
*/
|
||||
enum PACKETTYPE {
|
||||
PACKETTYPE_PM4,
|
||||
@@ -40,13 +40,13 @@ class BasePacket {
|
||||
BasePacket(void) {}
|
||||
virtual ~BasePacket(void) {}
|
||||
|
||||
// @returns the packet type
|
||||
// @returns Packet type
|
||||
virtual PACKETTYPE PacketType() const = 0;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const = 0;
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const = 0;
|
||||
// @returns the packet size in DWORDS
|
||||
// @returns Packet size in dwordS
|
||||
unsigned int SizeInDWords() const { return SizeInBytes()/sizeof(unsigned int); }
|
||||
|
||||
void Dump() const;
|
||||
@@ -54,4 +54,4 @@ class BasePacket {
|
||||
protected:
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif // __KFD_BASE_PACKET__H__
|
||||
|
||||
@@ -41,7 +41,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
|
||||
HSAKMT_STATUS status;
|
||||
|
||||
if (m_QueueBuf != NULL) {
|
||||
// queue already exist, one queue per object
|
||||
// Queue already exists, one queue per object
|
||||
Destroy();
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
|
||||
status = HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// needs to match the queue write ptr
|
||||
// Needs to match the queue write ptr
|
||||
m_pendingWptr = 0;
|
||||
m_pendingWptr64 = 0;
|
||||
m_Node = NodeId;
|
||||
@@ -137,7 +137,7 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
|
||||
unsigned int queueSizeInDWord = m_QueueBuf->Size() / sizeof(uint32_t);
|
||||
|
||||
if (writePtr + packetSizeInDwords > queueSizeInDWord) {
|
||||
// wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
|
||||
// Wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
|
||||
dwordsRequired += queueSizeInDWord - writePtr;
|
||||
}
|
||||
|
||||
@@ -147,14 +147,14 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
|
||||
ASSERT_GE(queueSizeInDWord, packetSizeInDwords) << "Cannot add a packet, packet size too large";
|
||||
|
||||
if (writePtr + packetSizeInDwords >= queueSizeInDWord) {
|
||||
// wraparound
|
||||
// Wraparound
|
||||
while (writePtr + packetSizeInDwords > queueSizeInDWord) {
|
||||
m_QueueBuf->As<unsigned int *>()[writePtr] = CMD_NOP;
|
||||
writePtr = (writePtr + 1) % queueSizeInDWord;
|
||||
writePtr64++;
|
||||
}
|
||||
|
||||
// not updating Wptr since we might want to do place packet without submission
|
||||
// Not updating Wptr since we might want to place the packet without submission
|
||||
m_pendingWptr = (writePtr % queueSizeInDWord);
|
||||
m_pendingWptr64 = writePtr64;
|
||||
}
|
||||
|
||||
@@ -48,10 +48,10 @@ class BaseQueue {
|
||||
HSAuint64 *pointers = NULL);
|
||||
/** Update the queue.
|
||||
* @see hsaKmtUpdateQueue
|
||||
* @param percent the new queue percentage
|
||||
* @param priority the new queue priority
|
||||
* @param percent New queue percentage
|
||||
* @param priority New queue priority
|
||||
* @param nullifyBuffer
|
||||
* if 'true', set the new buffer address to NULL and the size to 0. Otherwise
|
||||
* If 'true', set the new buffer address to NULL and the size to 0. Otherwise
|
||||
* don't change the queue buffer address/size.
|
||||
*/
|
||||
virtual HSAKMT_STATUS Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer);
|
||||
@@ -64,17 +64,17 @@ class BaseQueue {
|
||||
* Note that all packets being consumed is not the same as all packets being processed.
|
||||
*/
|
||||
virtual void Wait4PacketConsumption();
|
||||
/** @brief place packet and submit it in one go
|
||||
/** @brief Place packet and submit it in one function
|
||||
*/
|
||||
virtual void PlaceAndSubmitPacket(const BasePacket &packet);
|
||||
/** @brief copy packet to queue and update write pointer
|
||||
/** @brief Copy packet to queue and update write pointer
|
||||
*/
|
||||
virtual void PlacePacket(const BasePacket &packet);
|
||||
/** @brief update queue write pointer and sets the queue doorbell to the queue write pointer
|
||||
/** @brief Update queue write pointer and set the queue doorbell to the queue write pointer
|
||||
*/
|
||||
virtual void SubmitPacket() = 0;
|
||||
/** @brief checkes if all packets in queue already processed
|
||||
* compares queue read and write pointers
|
||||
/** @brief Check if all packets in queue are already processed
|
||||
* Compare queue read and write pointers
|
||||
*/
|
||||
bool AllPacketsSubmitted();
|
||||
|
||||
@@ -100,11 +100,11 @@ class BaseQueue {
|
||||
HsaMemoryBuffer *m_QueueBuf;
|
||||
unsigned int m_Node;
|
||||
|
||||
// @ return write pointer modulo queue size in DWORDs
|
||||
// @return Write pointer modulo queue size in dwords
|
||||
virtual unsigned int Wptr() = 0;
|
||||
// @ return read pointer modulo queue size in DWORDs
|
||||
// @return Read pointer modulo queue size in dwords
|
||||
virtual unsigned int Rptr() = 0;
|
||||
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
|
||||
// @return Expected m_Resources.Queue_read_ptr when all packets consumed
|
||||
virtual unsigned int RptrWhenConsumed() = 0;
|
||||
virtual PACKETTYPE PacketTypeSupported() = 0;
|
||||
|
||||
@@ -131,4 +131,4 @@ class QueueArray {
|
||||
void Destroy();
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif // __KFD_BASE_QUEUE__H__
|
||||
|
||||
@@ -90,7 +90,7 @@ void Dispatch::Sync(unsigned int timeout) {
|
||||
ASSERT_SUCCESS(hsaKmtWaitOnEvent(m_pEop, timeout));
|
||||
}
|
||||
|
||||
// returning with status in order to allow to take actions before proc termination
|
||||
// Returning with status in order to allow actions to be performed before process termination
|
||||
int Dispatch::SyncWithStatus(unsigned int timeout) {
|
||||
int stat;
|
||||
|
||||
@@ -103,7 +103,7 @@ void Dispatch::BuildIb() {
|
||||
SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
|
||||
SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);
|
||||
|
||||
// starts at COMPUTE_START_X
|
||||
// Starts at COMPUTE_START_X
|
||||
const unsigned int COMPUTE_DISPATCH_DIMS_VALUES[] = {
|
||||
0, // START_X
|
||||
0, // START_Y
|
||||
@@ -138,14 +138,14 @@ void Dispatch::BuildIb() {
|
||||
pgmRsrc2
|
||||
};
|
||||
|
||||
// starts at COMPUTE_PGM_LO
|
||||
// Starts at COMPUTE_PGM_LO
|
||||
const unsigned int COMPUTE_PGM_VALUES_GFX8[] = {
|
||||
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
|
||||
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
|
||||
| (is_dgpu() ? 0 : (1<<8)) // including PGM_ATC=?
|
||||
};
|
||||
|
||||
// starts at COMPUTE_PGM_LO
|
||||
// Starts at COMPUTE_PGM_LO
|
||||
const unsigned int COMPUTE_PGM_VALUES_GFX9[] = {
|
||||
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
|
||||
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
|
||||
@@ -156,17 +156,17 @@ void Dispatch::BuildIb() {
|
||||
static_cast<uint32_t>(m_scratch_base >> 40)
|
||||
};
|
||||
|
||||
// starts at COMPUTE_RESOURCE_LIMITS
|
||||
// Starts at COMPUTE_RESOURCE_LIMITS
|
||||
const unsigned int COMPUTE_RESOURCE_LIMITS[] = {
|
||||
0, // COMPUTE_RESOURCE_LIMITS
|
||||
};
|
||||
|
||||
// starts at COMPUTE_TMPRING_SIZE
|
||||
// Starts at COMPUTE_TMPRING_SIZE
|
||||
const unsigned int COMPUTE_TMPRING_SIZE[] = {
|
||||
m_ComputeTmpringSize, // COMPUTE_TMPRING_SIZE
|
||||
};
|
||||
|
||||
// starts at COMPUTE_RESTART_X
|
||||
// Starts at COMPUTE_RESTART_X
|
||||
const unsigned int COMPUTE_RESTART_VALUES[] = {
|
||||
0, // COMPUTE_RESTART_X
|
||||
0, // COMPUTE_RESTART_Y
|
||||
@@ -174,13 +174,13 @@ void Dispatch::BuildIb() {
|
||||
0 // COMPUTE_THREAD_TRACE_ENABLE
|
||||
};
|
||||
|
||||
// starts at COMPUTE_USER_DATA_0
|
||||
// Starts at COMPUTE_USER_DATA_0
|
||||
const unsigned int COMPUTE_USER_DATA_VALUES[] = {
|
||||
// Reg name - use in KFDtest - use in ABI
|
||||
arg0, // COMPUTE_USER_DATA_0 - arg0 - resource descriptor for the scratch buffer - 1st DWORD
|
||||
arg1, // COMPUTE_USER_DATA_1 - arg1 - resource descriptor for the scratch buffer - 2nd DWORD
|
||||
arg2, // COMPUTE_USER_DATA_2 - arg2 - resource descriptor for the scratch buffer - 3rd DWORD
|
||||
arg3, // COMPUTE_USER_DATA_3 - arg3 - resource descriptor for the scratch buffer - 4th DWORD
|
||||
arg0, // COMPUTE_USER_DATA_0 - arg0 - resource descriptor for the scratch buffer - 1st dword
|
||||
arg1, // COMPUTE_USER_DATA_1 - arg1 - resource descriptor for the scratch buffer - 2nd dword
|
||||
arg2, // COMPUTE_USER_DATA_2 - arg2 - resource descriptor for the scratch buffer - 3rd dword
|
||||
arg3, // COMPUTE_USER_DATA_3 - arg3 - resource descriptor for the scratch buffer - 4th dword
|
||||
static_cast<uint32_t>(m_scratch_base), // COMPUTE_USER_DATA_4 - flat_scratch_lo
|
||||
static_cast<uint32_t>(m_scratch_base >> 32), // COMPUTE_USER_DATA_4 - flat_scratch_hi
|
||||
0, // COMPUTE_USER_DATA_6 - - AQL queue address, low part
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
bool Ok2Run(unsigned int testProfile) {
|
||||
bool testMatchProfile = true;
|
||||
if ((testProfile & g_TestRunProfile) == 0) {
|
||||
// display msg to notify a test that is not running
|
||||
WARN() << "Test is skipped beacuse profile does not match current run mode" << std::endl;
|
||||
testMatchProfile = false;
|
||||
}
|
||||
@@ -35,11 +34,10 @@ bool Ok2Run(unsigned int testProfile) {
|
||||
return testMatchProfile;
|
||||
}
|
||||
|
||||
// This predication is used when specific HW capabilites must exist for the test to succeed.
|
||||
// This predication is used when specific HW capabilities must exist for the test to succeed.
|
||||
bool TestReqEnvCaps(unsigned int envCaps) {
|
||||
bool testMatchEnv = true;
|
||||
if ((envCaps & g_TestENVCaps) != envCaps) {
|
||||
// display msg to notify a test that is not running
|
||||
WARN() << "Test is skipped due to HW capability issues" << std::endl;
|
||||
testMatchEnv = false;
|
||||
}
|
||||
@@ -47,12 +45,11 @@ bool TestReqEnvCaps(unsigned int envCaps) {
|
||||
return testMatchEnv;
|
||||
}
|
||||
|
||||
// This predication is used when specific HW capabilites must abscent for the test to succeed.
|
||||
// e.g testing capabilites not supported by HW scheduling
|
||||
// This predication is used when specific HW capabilities must be absent for the test to succeed.
|
||||
// e.g Testing capabilities not supported by HW scheduling
|
||||
bool TestReqNoEnvCaps(unsigned int envCaps) {
|
||||
bool testMatchEnv = true;
|
||||
if ((envCaps & g_TestENVCaps) != 0) {
|
||||
// display msg to notify a test that is not running
|
||||
WARN() << "Test is skipped due to HW capability issues" << std::endl;
|
||||
testMatchEnv = false;
|
||||
}
|
||||
|
||||
@@ -29,24 +29,24 @@
|
||||
#include "KFDTestFlags.hpp"
|
||||
|
||||
enum LOGTYPE {
|
||||
LOGTYPE_INFO, // msg header in green
|
||||
LOGTYPE_INFO, // msg header in green
|
||||
LOGTYPE_WARNING // msg header in yellow
|
||||
};
|
||||
|
||||
class KFDLog{};
|
||||
std::ostream& operator << (KFDLog log, LOGTYPE level);
|
||||
|
||||
// @brief log additional details, to be displayed in the same format as other google test outputs
|
||||
// currently not supported by google test
|
||||
// should be used like cout: LOG() << "message" << value << std::endl;
|
||||
// @brief Log additional details, to be displayed in the same format as other google test outputs
|
||||
// Currently not supported by gtest
|
||||
// Should be used like cout: LOG() << "message" << value << std::endl;
|
||||
#define LOG() KFDLog() << LOGTYPE_INFO
|
||||
#define WARN() KFDLog() << LOGTYPE_WARNING
|
||||
|
||||
// all test MUST be in a try catch since google test flag to throw exception on any fatal fail is on
|
||||
// All tests MUST be in a try catch since the gtest flag to throw an exception on any fatal failure is enabled
|
||||
#define TEST_START(testProfile) if (Ok2Run(testProfile)) try {
|
||||
#define TEST_END } catch (...) {}
|
||||
|
||||
// used to wrape setup and teardown functions, anything that is build-in gtest and is not a test
|
||||
// Used to wrap setup and teardown functions, anything that is built-in gtest and is not a test
|
||||
#define ROUTINE_START try {
|
||||
#define ROUTINE_END }catch(...) {}
|
||||
|
||||
@@ -59,13 +59,13 @@ std::ostream& operator << (KFDLog log, LOGTYPE level);
|
||||
#define ASSERT_NOTNULL(_val) ASSERT_NE((void *)NULL, _val)
|
||||
#define EXPECT_NOTNULL(_val) EXPECT_NE((void *)NULL, _val)
|
||||
|
||||
// @brief determines if its ok to run a test given input flags
|
||||
// @brief Determines if it is ok to run a test given input flags
|
||||
bool Ok2Run(unsigned int testProfile);
|
||||
|
||||
// @brief checks if all HW capabilities needed for a test to run exist
|
||||
// @brief Checks if all HW capabilities needed for a test to run exist
|
||||
bool TestReqEnvCaps(unsigned int hwCaps);
|
||||
|
||||
// @brief checks if all HW capabilities that prevents a test from running are non existing
|
||||
// @brief Checks if all HW capabilities that prevents a test from running are absent
|
||||
bool TestReqNoEnvCaps(unsigned int hwCaps);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -28,32 +28,32 @@
|
||||
#include "KFDTestUtil.hpp"
|
||||
|
||||
/** @class IndirectBuffer
|
||||
* when working with indirect buffer, create IndirectBuffer, fill it with all the packets you want
|
||||
* When working with an indirect buffer, create IndirectBuffer, fill it with all the packets you want,
|
||||
* create an indirect packet to point to it, and submit the packet to queue
|
||||
*/
|
||||
class IndirectBuffer {
|
||||
public:
|
||||
// @param[size] queue max size in DWords
|
||||
// @param[type] packets type allowed in queue
|
||||
// @param[size] Queue max size in DWords
|
||||
// @param[type] Packet type allowed in queue
|
||||
IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId);
|
||||
~IndirectBuffer(void);
|
||||
|
||||
// @brief add packet to queue, all validations are done with gtest ASSERT and EXPECT
|
||||
// @brief Add packet to queue, all validations are done with gtest ASSERT and EXPECT
|
||||
void AddPacket(const BasePacket &packet);
|
||||
// @returns the actual size of the indirect queue in DWord, equivalent to write pointer
|
||||
// @returns Actual size of the indirect queue in DWords, equivalent to write pointer
|
||||
unsigned int SizeInDWord() { return m_ActualSize; }
|
||||
// @returns indirect queue address
|
||||
// @returns Indirect queue address
|
||||
unsigned int *Addr() { return m_IndirectBuf->As<unsigned int*>(); }
|
||||
|
||||
protected:
|
||||
// how many packets in queue
|
||||
// Number of packets in the queue
|
||||
unsigned int m_NumOfPackets;
|
||||
// max size of queue in DWords
|
||||
// Max size of queue in DWords
|
||||
unsigned int m_MaxSize;
|
||||
// current size of queue in DWords
|
||||
// Current size of queue in DWords
|
||||
unsigned int m_ActualSize;
|
||||
HsaMemoryBuffer *m_IndirectBuf;
|
||||
// defines what packets are supported in this queue
|
||||
// What packets are supported in this queue
|
||||
PACKETTYPE m_PacketTypeAllowed;
|
||||
};
|
||||
|
||||
|
||||
@@ -38,9 +38,9 @@ void KFDBaseComponentTest::SetUp() {
|
||||
memset( &m_SystemProperties, 0, sizeof(m_SystemProperties) );
|
||||
memset(m_RenderNodes, 0, sizeof(m_RenderNodes));
|
||||
|
||||
/** in order to be correctly testing the KFD interfaces and ensure
|
||||
/** In order to be correctly testing the KFD interfaces and ensure
|
||||
* that the KFD acknowledges relevant node parameters
|
||||
* for the rest of the tests and used for more specific topology tests
|
||||
* for the rest of the tests and used for more specific topology tests,
|
||||
* call to GetSystemProperties for a system snapshot of the topology here
|
||||
*/
|
||||
ASSERT_SUCCESS(hsaKmtAcquireSystemProperties(&m_SystemProperties));
|
||||
@@ -53,8 +53,8 @@ void KFDBaseComponentTest::SetUp() {
|
||||
m_MemoryFlags.ui32.CachePolicy = HSA_CACHING_NONCACHED; // Non cached
|
||||
m_MemoryFlags.ui32.ReadOnly = 0; // Read/Write
|
||||
m_MemoryFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB; // 4KB page
|
||||
m_MemoryFlags.ui32.HostAccess = 1; // host accessible
|
||||
m_MemoryFlags.ui32.NoSubstitute = 0; // fall back to node 0 if needed
|
||||
m_MemoryFlags.ui32.HostAccess = 1; // Host accessible
|
||||
m_MemoryFlags.ui32.NoSubstitute = 0; // Fall back to node 0 if needed
|
||||
m_MemoryFlags.ui32.GDSMemory = 0;
|
||||
m_MemoryFlags.ui32.Scratch = 0;
|
||||
|
||||
@@ -94,7 +94,7 @@ HSAuint64 KFDBaseComponentTest::GetSysMemSize() {
|
||||
for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
|
||||
nodeProps = m_NodeInfo.GetNodeProperties(node);
|
||||
if (nodeProps != NULL && nodeProps->NumCPUCores > 0 && nodeProps->NumMemoryBanks > 0) {
|
||||
/* For NUMA nodes, memory is distributed among differnt nodes.
|
||||
/* For NUMA nodes, memory is distributed among different nodes.
|
||||
* Compute total system memory size. KFD driver also computes
|
||||
* the system memory (si_meminfo) similarly
|
||||
*/
|
||||
|
||||
@@ -63,17 +63,13 @@ class KFDBaseComponentTest : public testing::Test {
|
||||
HsaMemFlags m_MemoryFlags;
|
||||
HsaNodeInfo m_NodeInfo;
|
||||
|
||||
// @brief SetUpTestCase function run before the first test that uses
|
||||
// KFDOpenCloseKFDTest class fixture, and opens KFD.
|
||||
// @brief Executed before the first test that uses KFDOpenCloseKFDTest class and opens KFD.
|
||||
static void SetUpTestCase();
|
||||
// @brief TearDownTestCase function run after the last test from
|
||||
// KFDOpenCloseKFDTest class fixture and calls close KFD.
|
||||
// @brief Executed after the last test from KFDOpenCloseKFDTest class and closes KFD.
|
||||
static void TearDownTestCase();
|
||||
// @brief SetUp function run before every test that uses
|
||||
// KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
|
||||
// @brief Executed before every test that uses KFDOpenCloseKFDTest class and sets all common settings for the tests.
|
||||
virtual void SetUp();
|
||||
// @brief TearDown function run after every test that uses
|
||||
// KFDOpenCloseKFDTest class fixture.
|
||||
// @brief Executed after every test that uses KFDOpenCloseKFDTest class.
|
||||
virtual void TearDown();
|
||||
};
|
||||
|
||||
|
||||
@@ -89,9 +89,10 @@ void KFDCWSRTest::SetUp() {
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
// TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not
|
||||
// set the destination based on workitem_id.
|
||||
// Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future.
|
||||
/* TODO: In the ISA, the workitem_id is not obtained as expected, so the destination cannot
|
||||
* be set based on workitem_id. Set the wave_num to 1 for now as a workarpound.
|
||||
* Will set it to 8 or even 256 in the future.
|
||||
*/
|
||||
wave_number = 1;
|
||||
|
||||
ROUTINE_END
|
||||
@@ -149,15 +150,15 @@ TEST_F(KFDCWSRTest, BasicTest) {
|
||||
iter[0] = 40000000;
|
||||
iter[1] = 20000000;
|
||||
|
||||
// submit the shader, queue1
|
||||
// Submit the shader, queue1
|
||||
dispatch1->Submit(queue1);
|
||||
// Create queue2 during queue1 still running will trigger the CWSR
|
||||
EXPECT_SUCCESS(queue2.Create(defaultGPUNode));
|
||||
// submit the shader
|
||||
// Submit the shader
|
||||
dispatch2->Submit(queue2);
|
||||
dispatch1->Sync();
|
||||
dispatch2->Sync();
|
||||
// ensure all the waves complete as expected
|
||||
// Ensure all the waves complete as expected
|
||||
int i;
|
||||
for (i = 0 ; i < wave_number; ++i) {
|
||||
if (result[i] != iter[0]) {
|
||||
|
||||
@@ -39,7 +39,7 @@ class KFDCWSRTest : public KFDBaseComponentTest {
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
unsigned wave_number;
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
@@ -79,7 +79,7 @@ void KFDDBGTest::TearDown() {
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
/* reset the user trap handler */
|
||||
/* Reset the user trap handler */
|
||||
hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
@@ -118,7 +118,7 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
|
||||
ASSERT_SUCCESS(queue_flush.Create(defaultGPUNode));
|
||||
|
||||
// Set Address Watch Params
|
||||
// TODO: Set atchMode[1] to Atomic in case we want to test this mode.
|
||||
// TODO: Set WatchMode[1] to Atomic in case we want to test this mode.
|
||||
|
||||
HSA_DBG_WATCH_MODE WatchMode[2];
|
||||
HSAuint64 WatchAddress[2];
|
||||
@@ -153,9 +153,9 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
|
||||
dispatch.SetArgs(dstBuf.As<void*>(), reinterpret_cast<void *>(secDstBuf));
|
||||
dispatch.SetDim(1, 1, 1);
|
||||
|
||||
// TODO: use Memory ordering rules w/ atomics
|
||||
// for host-GPU memory syncs.
|
||||
// set to: std::memory_order_seq_cst
|
||||
/* TODO: Use Memory ordering rules w/ atomics for host-GPU memory syncs.
|
||||
* Set to std::memory_order_seq_cst
|
||||
*/
|
||||
|
||||
dispatch.Submit(queue);
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ class KFDDBGTest : public KFDBaseComponentTest {
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ void KFDEventTest::SetUp() {
|
||||
void KFDEventTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
// not all tests create event, destroy only if there is one
|
||||
// Not all tests create an event, destroy only if there is one
|
||||
if (m_pHsaEvent != NULL) {
|
||||
// hsaKmtDestroyEvent moved to TearDown to make sure it is being called
|
||||
EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pHsaEvent));
|
||||
@@ -58,7 +58,7 @@ TEST_F(KFDEventTest, CreateDestroyEvent) {
|
||||
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_NodeInfo.HsaDefaultGPUNode(), &m_pHsaEvent));
|
||||
EXPECT_NE(0, m_pHsaEvent->EventData.HWData2);
|
||||
|
||||
// destroy event is being called in test TearDown
|
||||
// Destroy event is being called in test TearDown
|
||||
TEST_END;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,9 +31,9 @@ class KFDEventTest : public KFDBaseComponentTest {
|
||||
KFDEventTest(void) {}
|
||||
~KFDEventTest(void) {}
|
||||
|
||||
// @brief SetUp function runs before every test in KFDEventTest.
|
||||
// @brief Executed before every test in KFDEventTest.
|
||||
virtual void SetUp();
|
||||
// @brief TearDown function runs after every test in KFDEventTest.
|
||||
// @brief Executed after every test in KFDEventTest.
|
||||
virtual void TearDown();
|
||||
|
||||
protected:
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include "SDMAQueue.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
#define N_PROCESSES (8) /* number of processes running in parallel, at least 2 */
|
||||
#define N_PROCESSES (8) /* Number of processes running in parallel, must be at least 2 */
|
||||
#define ALLOCATE_BUF_SIZE_MB (64)
|
||||
#define ALLOCATE_RETRY_TIMES (3)
|
||||
|
||||
@@ -70,7 +70,7 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui
|
||||
break;
|
||||
}
|
||||
|
||||
/* wait for 1 second to try allocate again */
|
||||
/* Wait for 1 second to try allocate again */
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
@@ -268,7 +268,7 @@ void KFDEvictTest::ForkChildProcesses(int nprocesses) {
|
||||
|
||||
void KFDEvictTest::WaitChildProcesses() {
|
||||
if (m_IsParent) {
|
||||
/* only run by parent process */
|
||||
/* Only run by parent process */
|
||||
int childStatus;
|
||||
int childExitOkNum = 0;
|
||||
int size = m_ChildPids.size();
|
||||
@@ -286,7 +286,7 @@ void KFDEvictTest::WaitChildProcesses() {
|
||||
ASSERT_EQ(childExitOkNum, size);
|
||||
}
|
||||
|
||||
/* child process or parent process finished successfullly */
|
||||
/* Child process or parent process finished successfully */
|
||||
m_ChildStatus = HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -296,19 +296,19 @@ void KFDEvictTest::WaitChildProcesses() {
|
||||
*
|
||||
* ALLOCATE_BUF_SIZE_MB buf allocation size
|
||||
*
|
||||
* number of buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
|
||||
* buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
|
||||
* Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
|
||||
*
|
||||
* many times of eviction and restore will happen:
|
||||
* ttm will evict buffers of another process if not enough free vram
|
||||
* Eviction and restore will happen many times:
|
||||
* ttm will evict buffers of another process if there is not enough free vram
|
||||
* process restore will evict buffers of another process
|
||||
*
|
||||
* Sometimes the allocate may fail (maybe that is normal)
|
||||
* Sometimes the allocation may fail (maybe that is normal)
|
||||
* ALLOCATE_RETRY_TIMES max retry times to allocate
|
||||
*
|
||||
* This is basic test, no queue so vram are not used by GPU during test
|
||||
* This is basic test with no queue, so vram is not used by the GPU during test
|
||||
*
|
||||
* Todo:
|
||||
* TODO:
|
||||
* - Synchronization between the processes, so they know for sure when
|
||||
* they are done allocating memory
|
||||
*/
|
||||
@@ -345,7 +345,7 @@ TEST_F(KFDEvictTest, BasicTest) {
|
||||
std::vector<void *> pBuffers;
|
||||
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
|
||||
|
||||
/* allocate gfx vram size of at most one third system memory */
|
||||
/* Allocate gfx vram size of at most one third system memory */
|
||||
HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
|
||||
amdgpu_bo_handle handle;
|
||||
AllocAmdgpuBo(rn, size, handle);
|
||||
@@ -365,12 +365,12 @@ TEST_F(KFDEvictTest, BasicTest) {
|
||||
* until address buffer is filled with specific value 0x5678 by host program,
|
||||
* then each wavefront fills value 0x5678 at corresponding result buffer and quit
|
||||
*
|
||||
* initial state:
|
||||
* Initial state:
|
||||
* s[0:1] - address buffer base address
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* registers:
|
||||
* Registers:
|
||||
* v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
|
||||
* v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
@@ -514,7 +514,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
|
||||
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
|
||||
|
||||
/* Skip test for chip it doesn't have CWSR, which the test depends on */
|
||||
/* Skip test for chip if it doesn't have CWSR, which the test depends on */
|
||||
if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties)) {
|
||||
LOG() << std::hex << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
|
||||
return;
|
||||
@@ -538,7 +538,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
LOG() << "Skipping test: Not enough system memory available." << std::endl;
|
||||
return;
|
||||
}
|
||||
/* assert all buffer address can be stored within one page
|
||||
/* Assert all buffer address can be stored within one page
|
||||
* because only one page host memory srcBuf is allocated
|
||||
*/
|
||||
ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));
|
||||
@@ -559,7 +559,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
std::vector<void *> pBuffers;
|
||||
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
|
||||
|
||||
/* allocate gfx vram size of at most one third system memory */
|
||||
/* Allocate gfx vram size of at most one third system memory */
|
||||
HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
|
||||
amdgpu_bo_handle handle;
|
||||
AllocAmdgpuBo(rn, size, handle);
|
||||
@@ -583,27 +583,32 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
dispatch0.SetArgs(localBufAddr, result);
|
||||
dispatch0.SetDim(wavefront_num, 1, 1);
|
||||
/* submit the packet and start shader */
|
||||
/* Submit the packet and start shader */
|
||||
dispatch0.Submit(pm4Queue);
|
||||
|
||||
/* doing evict/restore queue test for 5 seconds while queue is running */
|
||||
/* Doing evict/restore queue test for 5 seconds while queue is running */
|
||||
sleep(5);
|
||||
|
||||
/* LOG() << m_psName << "notify shader to quit" << std::endl; */
|
||||
/* fill address buffer so shader quits */
|
||||
/* Uncomment this line for debugging */
|
||||
// LOG() << m_psName << "notify shader to quit" << std::endl;
|
||||
|
||||
/* Fill address buffer so shader quits */
|
||||
addrBuffer.Fill(0x5678);
|
||||
|
||||
/* wait for shader to finish or timeout if shade has vm page fault */
|
||||
/* Wait for shader to finish or timeout if shader has vm page fault */
|
||||
dispatch0.SyncWithStatus(120000);
|
||||
|
||||
ASSERT_SUCCESS(pm4Queue.Destroy());
|
||||
|
||||
FreeAmdgpuBo(handle);
|
||||
/* LOG() << m_psName << "free buffer" << std::endl; */
|
||||
/* cleanup */
|
||||
|
||||
/* Uncomment this line for debugging */
|
||||
// LOG() << m_psName << "free buffer" << std::endl;
|
||||
|
||||
/* Cleanup */
|
||||
FreeBuffers(pBuffers, vramBufSize);
|
||||
|
||||
/* check if all wavefronts finish successfully */
|
||||
/* Check if all wavefronts finished successfully */
|
||||
for (i = 0; i < wavefront_num; i++)
|
||||
ASSERT_EQ(0x5678, *(result + i));
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ class KFDEvictTest : public KFDLocalMemoryTest {
|
||||
|
||||
~KFDEvictTest(void) {
|
||||
if (!m_IsParent) {
|
||||
/* child process has to exit
|
||||
/* Child process has to exit
|
||||
* otherwise gtest will continue other tests
|
||||
*/
|
||||
exit(m_ChildStatus);
|
||||
@@ -60,7 +60,7 @@ class KFDEvictTest : public KFDLocalMemoryTest {
|
||||
void ForkChildProcesses(int nprocesses);
|
||||
void WaitChildProcesses();
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
std::string m_psName;
|
||||
std::vector<pid_t> m_ChildPids;
|
||||
HsaMemFlags m_Flags;
|
||||
|
||||
@@ -28,8 +28,6 @@
|
||||
#include "SDMAQueue.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
// All tests are marked by their serial number in the QCM FDD
|
||||
|
||||
void KFDExceptionTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
@@ -198,7 +196,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
|
||||
TestMemoryException(defaultGPUNode, srcBuffer.As<HSAuint64>(),
|
||||
(HSAuint64)pDst);
|
||||
|
||||
/* Wait enough time here to ensure this process got killed by kernel
|
||||
/* Wait for enough time here to ensure this process got killed by kernel
|
||||
* due to PPR exception.
|
||||
*/
|
||||
sleep(5);
|
||||
@@ -213,8 +211,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
/* TODO: Same as previous test InvalidPPRWriteProtection
|
||||
*/
|
||||
/* TODO: Same as previous test InvalidPPRWriteProtection */
|
||||
TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
|
||||
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
||||
TEST_START(TESTPROFILE_RUNALL);
|
||||
@@ -249,7 +246,7 @@ TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
|
||||
TestMemoryException(defaultGPUNode, (HSAuint64)pSrc,
|
||||
dstBuffer.As<HSAuint64>());
|
||||
|
||||
/* Wait enough time here to ensure this process got killed by kernel
|
||||
/* Wait for enough time here to ensure this process got killed by kernel
|
||||
* due to PPR exception.
|
||||
*/
|
||||
sleep(5);
|
||||
|
||||
@@ -55,7 +55,7 @@ class KFDExceptionTest : public KFDBaseComponentTest {
|
||||
unsigned int dimX = 1, unsigned int dimY = 1,
|
||||
unsigned int dimZ = 1);
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
pid_t m_ChildPid;
|
||||
HSAKMT_STATUS m_ChildStatus;
|
||||
|
||||
|
||||
@@ -83,8 +83,9 @@ TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
|
||||
ASSERT_SUCCESS(hsaKmtRegisterGraphicsHandleToNodes(dmabufFd, &info,
|
||||
1, nodes));
|
||||
|
||||
// DMA buffer handle and GEM handle are no longer needed, KFD
|
||||
// should have taken a reference to the BO
|
||||
/* DMA buffer handle and GEM handle are no longer needed, KFD
|
||||
* should have taken a reference to the BO
|
||||
*/
|
||||
ASSERT_EQ(0, close(dmabufFd));
|
||||
ASSERT_EQ(0, amdgpu_bo_free(handle));
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ KFDIPCTest::~KFDIPCTest(void) {
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Imort shared Local Memory from parent process. Check for the pattern
|
||||
/* Import shared Local Memory from parent process. Check for the pattern
|
||||
* filled in by the parent process. Then fill a new pattern.
|
||||
*/
|
||||
void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
|
||||
@@ -101,9 +101,9 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
|
||||
ASSERT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
|
||||
}
|
||||
|
||||
/* Fill a pattern in to Local Memory and share with the child process.
|
||||
/* Fill a pattern into Local Memory and share with the child process.
|
||||
* Then wait until Child process to exit and check for the new pattern
|
||||
* fill in by the child process.
|
||||
* filled in by the child process.
|
||||
*/
|
||||
|
||||
void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd) {
|
||||
@@ -213,7 +213,7 @@ TEST_F(KFDIPCTest, BasicTest) {
|
||||
* dstBuf3[0x800-0x1000] is expected to be 0xAAAAAAAA
|
||||
* and dstBuf4[0x0-0x1000] is expected to be 0xAAAAAAAA
|
||||
*
|
||||
* For this CMA test after copy only the first and the last of dstBuf is checked
|
||||
* For this CMA test, after copying only the first and the last of dstBuf is checked
|
||||
*/
|
||||
|
||||
static testMemoryDescriptor srcRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = {
|
||||
@@ -490,7 +490,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachChildProcess(int defaultGPUNode, in
|
||||
break;
|
||||
}
|
||||
|
||||
/* Wait till the test is over */
|
||||
/* Wait until the test is over */
|
||||
memset(msg, 0, sizeof(msg));
|
||||
if (read_non_block(readPipe, msg, 4) < 0) {
|
||||
status = CMA_IPC_PIPE_ERROR;
|
||||
@@ -523,7 +523,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachParentProcess(int defaultGPUNode, p
|
||||
int testNo;
|
||||
CMA_TEST_STATUS status;
|
||||
|
||||
/* Recevie buffer array from child and then initialize and fill in Local Buffer Array.
|
||||
/* Receive buffer array from child and then initialize and fill in Local Buffer Array.
|
||||
* READ_TEST: Copy remote buffer array into Local Buffer Array and then check
|
||||
* for the new pattern.
|
||||
* WRITE_TEST: Write Local Buffer Array into remote buffer array. Notify child to
|
||||
@@ -615,7 +615,6 @@ TEST_F(KFDIPCTest, CrossMemoryAttachTest) {
|
||||
ASSERT_EQ(pipe2(pipePtoC, O_NONBLOCK), 0);
|
||||
|
||||
/* Create a child process and share the above Local Memory with it */
|
||||
|
||||
m_ChildPid = fork();
|
||||
if (m_ChildPid == 0 && hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS) {
|
||||
/* Child Process */
|
||||
|
||||
@@ -53,8 +53,9 @@ enum CMA_TEST_STATUS {
|
||||
CMA_TEST_HSA_WRITE_FAIL
|
||||
};
|
||||
|
||||
// @struct testMemoryDescriptor
|
||||
// Describes test buffers for Cross Memory Attach Test.
|
||||
/* @struct testMemoryDescriptor
|
||||
* @brief Describes test buffers for Cross Memory Attach Test.
|
||||
*/
|
||||
struct testMemoryDescriptor {
|
||||
CMA_MEM_TYPE m_MemType;
|
||||
HSAuint64 m_MemSize;
|
||||
@@ -76,9 +77,10 @@ struct testMemoryDescriptor {
|
||||
~testMemoryDescriptor(){}
|
||||
};
|
||||
|
||||
// @class KFDCMAArray
|
||||
// Array of buffers that will be passed between the parent and child
|
||||
// process for Cross memory read and write tests
|
||||
/* @class KFDCMAArray
|
||||
* @brief Array of buffers that will be passed between the parent and child
|
||||
* process for Cross memory read and write tests
|
||||
*/
|
||||
class KFDCMAArray {
|
||||
/* Used to store the actual buffer array */
|
||||
HsaMemoryBuffer* m_MemArray[CMA_MEMORY_TEST_ARRAY_SIZE];
|
||||
@@ -130,5 +132,4 @@ class KFDIPCTest : public KFDBaseComponentTest {
|
||||
pid_t m_ChildPid;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __KFD_MEMORY_TEST__H__
|
||||
|
||||
@@ -28,8 +28,6 @@
|
||||
#include "SDMAQueue.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
// All tests are marked by their serial number in the QCM FDD
|
||||
|
||||
void KFDLocalMemoryTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
@@ -285,7 +283,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
|
||||
unsigned value = 0;
|
||||
memset(pages, 0, sizeof(pages));
|
||||
for (order = 0; order <= maxOrder; order++) {
|
||||
// At maxOrder, block sizes is 1/4 of available memory
|
||||
// At maxOrder, block size is 1/4 of available memory
|
||||
pages[order].nPages = 1UL << (maxOrder - order + 2);
|
||||
// At order != 0, 1/2 the memory is already allocated
|
||||
if (order > 0)
|
||||
@@ -467,7 +465,7 @@ TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
|
||||
}
|
||||
|
||||
if (dst_node != defaultGPUNode) {
|
||||
/* at least one node should be defaultGPUNode */
|
||||
/* At least one node should be defaultGPUNode */
|
||||
src_node = defaultGPUNode;
|
||||
} else {
|
||||
for (auto node : gpuNodes) {
|
||||
|
||||
@@ -38,7 +38,7 @@ class KFDLocalMemoryTest : public KFDBaseComponentTest {
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
|
||||
@@ -130,10 +130,10 @@ void KFDMemoryTest::TearDown() {
|
||||
#define GB(x) ((x) << 30)
|
||||
|
||||
/*
|
||||
* try to map as much as possible system memory to gpu.
|
||||
* lets see if kfd support 1TB memory correctly or not.
|
||||
* And after this test case, we can observe if there is any sideeffect.
|
||||
* NOTICE: there are memory usage limit checks in hsa/kfd according to the total
|
||||
* Try to map as much as possible system memory to gpu
|
||||
* to see if KFD supports 1TB memory correctly or not.
|
||||
* After this test case, we can observe if there are any side effects.
|
||||
* NOTICE: There are memory usage limit checks in hsa/kfd according to the total
|
||||
* physical system memory.
|
||||
*/
|
||||
TEST_F(KFDMemoryTest, MMapLarge) {
|
||||
@@ -187,19 +187,19 @@ TEST_F(KFDMemoryTest, MMapLarge) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
/* keep memory mapped to default node
|
||||
/* Keep memory mapped to default node
|
||||
* Keep mapping/unmapping memory to/from non-default node
|
||||
* A shader running on default node consistantly access
|
||||
* memory - make sure memory is always accessible on default,
|
||||
* i.e., there is no gpu vm fault.
|
||||
* A shader running on default node consistantly accesses
|
||||
* memory - make sure memory is always accessible by default,
|
||||
* i.e. there is no gpu vm fault.
|
||||
* Synchronization b/t host program and shader:
|
||||
* 1. host initialize src and dst buffer to 0
|
||||
* 2. shader keep reading src buffer and check value
|
||||
* 3. host write src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
|
||||
* 4. shader write dst buffer to 0x5678 after src changed to 0x5678, quit
|
||||
* 5. host program quit after dst becomes 0x5678
|
||||
* Need at least two gpu nodes to run the test. The defaut node has to be a gfx9 node.
|
||||
* Otherwise, test is skipped. Use kfdtest --node=$$ to specify the defaut node
|
||||
* 1. Host initializes src and dst buffer to 0
|
||||
* 2. Shader keeps reading src buffer and check value
|
||||
* 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
|
||||
* 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
|
||||
* 5. Host program quits after dst becomes 0x5678
|
||||
* Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
|
||||
* otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
|
||||
* This test case is introduced as a side-result of investigation of SWDEV-134798, which
|
||||
* is a gpu vm fault while running rocr conformance test. Here we try to simulate the
|
||||
* same test behaviour.
|
||||
@@ -250,7 +250,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
|
||||
hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
|
||||
}
|
||||
|
||||
/* fill src buffer so shader quits */
|
||||
/* Fill src buffer so shader quits */
|
||||
srcBuffer.Fill(0x5678);
|
||||
WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
|
||||
ASSERT_EQ(*dstBuffer.As<uint32_t *>(), 0x5678);
|
||||
@@ -258,7 +258,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
|
||||
// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
|
||||
TEST_F(KFDMemoryTest , MapMemoryToGPU) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -280,7 +280,7 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// following tests are for hsaKmtAllocMemory with invalid params
|
||||
// Following tests are for hsaKmtAllocMemory with invalid params
|
||||
TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -299,7 +299,7 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// basic test for hsaKmtAllocMemory
|
||||
// Basic test for hsaKmtAllocMemory
|
||||
TEST_F(KFDMemoryTest, MemoryAlloc) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -381,7 +381,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
|
||||
HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));
|
||||
|
||||
/* Create PM4 and SDMA queues before fork+COW to test queue
|
||||
* eviction and restore */
|
||||
* eviction and restore
|
||||
*/
|
||||
PM4Queue pm4Queue;
|
||||
SDMAQueue sdmaQueue;
|
||||
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
||||
@@ -392,7 +393,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
|
||||
|
||||
/* First submit just so the queues are not empty, and to get the
|
||||
* TLB populated (in case we need to flush TLBs somewhere after
|
||||
* updating the page tables) */
|
||||
* updating the page tables)
|
||||
*/
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
|
||||
dispatch0.Submit(pm4Queue);
|
||||
@@ -410,7 +412,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
|
||||
* make any write access to the stack because we want the
|
||||
* parent to make the first write access and get a new copy. A
|
||||
* busy loop is the safest way to do that, since any function
|
||||
* call (e.g. sleep) would write to the stack. */
|
||||
* call (e.g. sleep) would write to the stack.
|
||||
*/
|
||||
while (1)
|
||||
{}
|
||||
WARN() << "Shouldn't get here!" << std::endl;
|
||||
@@ -419,13 +422,15 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
|
||||
|
||||
/* Parent process writes to COW page(s) and gets a new copy. MMU
|
||||
* notifier needs to update the GPU mapping(s) for the test to
|
||||
* pass. */
|
||||
* pass.
|
||||
*/
|
||||
globalData = 0xD00BED00;
|
||||
stackData[dstOffset] = 0xdeadbeef;
|
||||
stackData[sdmaOffset] = 0xdeadbeef;
|
||||
|
||||
/* Terminate the child process before a possible test failure that
|
||||
* would leave it spinning in the background indefinitely. */
|
||||
* would leave it spinning in the background indefinitely.
|
||||
*/
|
||||
int status;
|
||||
EXPECT_EQ(0, kill(pid, SIGTERM));
|
||||
EXPECT_EQ(pid, waitpid(pid, &status, 0));
|
||||
@@ -516,10 +521,11 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// FlatScratchAccess
|
||||
// Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
|
||||
// will not work for multiple GPU nodes. For now test only one default GPU node.
|
||||
// TODO: Generalize it to support multiple nodes
|
||||
/* FlatScratchAccess
|
||||
* Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
|
||||
* will not work for multiple GPU nodes. For now test only one default GPU node.
|
||||
* TODO: Generalize it to support multiple nodes
|
||||
*/
|
||||
|
||||
#define SCRATCH_SLICE_SIZE 0x10000
|
||||
#define SCRATCH_SLICE_NUM 3
|
||||
@@ -558,24 +564,23 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
|
||||
// Map everything for test below
|
||||
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL));
|
||||
|
||||
// source & destination memory buffers
|
||||
HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
|
||||
|
||||
// Initialize the srcBuffer to some fixed value
|
||||
srcMemBuffer.Fill(0x01010101);
|
||||
|
||||
// Initialize a buffer with a DWORD copy ISA
|
||||
// Initialize a buffer with a dword copy ISA
|
||||
m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
|
||||
"ScratchCopyDword", isaBuffer);
|
||||
|
||||
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
|
||||
|
||||
// TODO: Add support to all GPU Nodes.
|
||||
// The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
|
||||
// also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
|
||||
// HsaMemoryBuffer is now associated with a Node.
|
||||
/* TODO: Add support to all GPU Nodes.
|
||||
* The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
|
||||
* also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
|
||||
* HsaMemoryBuffer is now associated with a Node.
|
||||
*/
|
||||
if (pNodeProperties != NULL) {
|
||||
// Get the aperture of the scratch buffer
|
||||
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
|
||||
@@ -585,7 +590,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
|
||||
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
|
||||
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
|
||||
int numWaves = 4; // WAVES must be >= # SE
|
||||
int waveSize = 1; // amount of space used by each wave in units of 256 dwords...
|
||||
int waveSize = 1; // Amount of space used by each wave in units of 256 dwords
|
||||
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
@@ -595,25 +600,24 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
|
||||
// Create a dispatch packet to copy
|
||||
Dispatch dispatchSrcToScratch(isaBuffer);
|
||||
|
||||
// setup the dispatch packet
|
||||
// Setup the dispatch packet
|
||||
// Copying from the source Memory Buffer to the scratch buffer
|
||||
dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
|
||||
dispatchSrcToScratch.SetDim(1, 1, 1);
|
||||
dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
|
||||
// submit the packet
|
||||
// Submit the packet
|
||||
dispatchSrcToScratch.Submit(queue);
|
||||
dispatchSrcToScratch.Sync();
|
||||
|
||||
// Create another dispatch packet to copy scratch buffer contents to destination buffer.
|
||||
Dispatch dispatchScratchToDst(isaBuffer);
|
||||
|
||||
// set the arguments to copy from the scratch buffer
|
||||
// to the destination buffer
|
||||
// Set the arguments to copy from the scratch buffer to the destination buffer
|
||||
dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
|
||||
dispatchScratchToDst.SetDim(1, 1, 1);
|
||||
dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
|
||||
|
||||
// submit the packet
|
||||
// Submit the packet
|
||||
dispatchScratchToDst.Submit(queue);
|
||||
dispatchScratchToDst.Sync();
|
||||
|
||||
@@ -708,7 +712,7 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular
|
||||
lastTestedSize = sizeMB;
|
||||
}
|
||||
|
||||
/* Save the biggest allocated system buffer forsignal handling test */
|
||||
/* Save the biggest allocated system buffer for signal handling test */
|
||||
LOG() << "The biggest allocated system buffer is " << std::dec
|
||||
<< lastTestedSize << "MB" << std::endl;
|
||||
if (lastSize)
|
||||
@@ -781,7 +785,8 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
|
||||
* is small. For example, on a typical Carrizo platform, the biggest allocated
|
||||
* system buffer could be more than 14G even though it only has 4G memory.
|
||||
* In that situation, it will take too much time to finish the test, because of
|
||||
* the onerous memory swap operation. So we limit the buffer size that way.*/
|
||||
* the onerous memory swap operation. So we limit the buffer size that way.
|
||||
*/
|
||||
TEST_F(KFDMemoryTest, BigBufferStressTest) {
|
||||
if (!is_dgpu()) {
|
||||
LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
|
||||
@@ -804,7 +809,8 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
|
||||
BigBufferVRAM(defaultGPUNode, granularityMB, NULL);
|
||||
|
||||
/* Repeatedly allocate and map big buffers in system memory until it fails,
|
||||
* then unmap and free them. */
|
||||
* then unmap and free them.
|
||||
*/
|
||||
#define ARRAY_ENTRIES 2048
|
||||
|
||||
int i = 0;
|
||||
@@ -875,7 +881,8 @@ TEST_F(KFDMemoryTest, MMBench) {
|
||||
/* Two SDMA queues to interleave user mode SDMA with memory
|
||||
* management on either SDMA engine. Make the queues long enough
|
||||
* to buffer at least nBufs x WriteData packets (7 dwords per
|
||||
* packet). */
|
||||
* packet).
|
||||
*/
|
||||
SDMAQueue sdmaQueue[2];
|
||||
ASSERT_SUCCESS(sdmaQueue[0].Create(defaultGPUNode, PAGE_SIZE*8));
|
||||
ASSERT_SUCCESS(sdmaQueue[1].Create(defaultGPUNode, PAGE_SIZE*8));
|
||||
@@ -1094,7 +1101,8 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
|
||||
* to access its memory like a debugger would. Child copies data in
|
||||
* the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
|
||||
* the child terminates, the parent checks that the copy was
|
||||
* successful. */
|
||||
* successful.
|
||||
*/
|
||||
TEST_F(KFDMemoryTest, PtraceAccess) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -1108,13 +1116,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
|
||||
void *mem[2];
|
||||
unsigned i;
|
||||
|
||||
// Offset in the VRAM buffer to test crossing non-contiguous
|
||||
// buffer boundaries. The second access starting from offset
|
||||
// sizeof(HSAint64)+1 will cross a node boundary in a single access,
|
||||
// for node sizes of 4MB or smaller.
|
||||
/* Offset in the VRAM buffer to test crossing non-contiguous
|
||||
* buffer boundaries. The second access starting from offset
|
||||
* sizeof(HSAint64)+1 will cross a node boundary in a single access,
|
||||
* for node sizes of 4MB or smaller.
|
||||
*/
|
||||
const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);
|
||||
|
||||
// alloc system memory from node 0 and initialize it
|
||||
// Alloc system memory from node 0 and initialize it
|
||||
memFlags.ui32.NonPaged = 0;
|
||||
ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
|
||||
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
|
||||
@@ -1122,7 +1131,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
|
||||
(reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0; // destination
|
||||
}
|
||||
|
||||
// try to alloc local memory from GPU node
|
||||
// Try to alloc local memory from GPU node
|
||||
memFlags.ui32.NonPaged = 1;
|
||||
if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
|
||||
EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
|
||||
@@ -1137,13 +1146,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
|
||||
mem[1] = NULL;
|
||||
}
|
||||
|
||||
// Allow any process to trace this one. If kernel is built without
|
||||
// Yama, this is not needed, and this call will fail.
|
||||
/* Allow any process to trace this one. If kernel is built without
|
||||
* Yama, this is not needed, and this call will fail.
|
||||
*/
|
||||
#ifdef PR_SET_PTRACER
|
||||
prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
|
||||
#endif
|
||||
|
||||
// Find out my pid so the child can trace it
|
||||
// Find current pid so the child can trace it
|
||||
pid_t tracePid = getpid();
|
||||
|
||||
// Fork the child
|
||||
@@ -1168,8 +1178,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
|
||||
} while (!WIFSTOPPED(traceStatus));
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
// Test 4 different (mis-)alignments, leaving 1-byte
|
||||
// gaps between longs
|
||||
// Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
|
||||
HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
|
||||
errno = 0;
|
||||
long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
|
||||
@@ -1264,7 +1273,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
|
||||
|
||||
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, memFlags, &mem));
|
||||
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(mem, size, NULL));
|
||||
/* set the word before 4M boundary to 0xdeadbeefdeadbeef
|
||||
/* Set the word before 4M boundary to 0xdeadbeefdeadbeef
|
||||
* and the word after 4M boundary to 0xcafebabecafebabe
|
||||
*/
|
||||
mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
|
||||
@@ -1309,7 +1318,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
|
||||
waitpid(tracePid, &traceStatus, 0);
|
||||
} while (!WIFSTOPPED(traceStatus));
|
||||
|
||||
/* peek the memory */
|
||||
/* Peek the memory */
|
||||
errno = 0;
|
||||
HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
|
||||
ASSERT_EQ(0, errno);
|
||||
@@ -1318,7 +1327,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
|
||||
ASSERT_EQ(0, errno);
|
||||
ASSERT_EQ(data[1], data1);
|
||||
|
||||
/* swap mem0 and mem1 by poking */
|
||||
/* Swap mem0 and mem1 by poking */
|
||||
ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
|
||||
ASSERT_EQ(0, errno);
|
||||
ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
|
||||
@@ -1404,7 +1413,7 @@ TEST_F(KFDMemoryTest, SignalHandling) {
|
||||
size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);
|
||||
|
||||
ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
|
||||
// verify that pDb is not null before it's being used
|
||||
// Verify that pDb is not null before it's being used
|
||||
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
|
||||
|
||||
pid_t childPid = fork();
|
||||
@@ -1473,7 +1482,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* check the first 64 bit */
|
||||
/* Check the first 64 bits */
|
||||
EXPECT_EQ(0, pDb[0]);
|
||||
pDb[0] = 1;
|
||||
|
||||
@@ -1495,7 +1504,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
|
||||
}
|
||||
|
||||
static inline void access(volatile void *sd, int size, int rw) {
|
||||
/* Most like sit in cache*/
|
||||
/* Most likely sitting in cache*/
|
||||
static struct DUMMY {
|
||||
char dummy[1024];
|
||||
} dummy;
|
||||
@@ -1509,8 +1518,8 @@ static inline void access(volatile void *sd, int size, int rw) {
|
||||
}
|
||||
|
||||
/*
|
||||
* on large-ber system, test the visible vram access speed.
|
||||
* kfd is not allowd to alloc visible vram on non-largebar system.
|
||||
* On large-bar system, test the visible vram access speed.
|
||||
* KFD is not allowed to alloc visible vram on non-largebar system.
|
||||
*/
|
||||
TEST_F(KFDMemoryTest, MMBandWidth) {
|
||||
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
|
||||
@@ -1571,7 +1580,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
|
||||
memFlags.ui32.HostAccess = 1;
|
||||
memFlags.ui32.NonPaged = 0;
|
||||
} else {
|
||||
/* alloc visible vram*/
|
||||
/* Alloc visible vram*/
|
||||
allocNode = defaultGPUNode;
|
||||
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
memFlags.ui32.HostAccess = 1;
|
||||
|
||||
@@ -27,9 +27,10 @@
|
||||
#ifndef __KFD_MEMORY_TEST__H__
|
||||
#define __KFD_MEMORY_TEST__H__
|
||||
|
||||
// @class KFDTopologyTest
|
||||
// this class has no additional features to KFDBaseComponentTest
|
||||
// the separation was made so we are able to goup all memeory tests together
|
||||
/* @class KFDTopologyTest
|
||||
* This class has no additional features to KFDBaseComponentTest
|
||||
* The separation was made so we are able to group all memory tests together
|
||||
*/
|
||||
class KFDMemoryTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDMemoryTest(void) :m_pIsaGen(NULL) {}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include "KFDOpenCloseKFDTest.hpp"
|
||||
#include "KFDTestUtil.hpp"
|
||||
|
||||
// before every test from this class fixture - open KFD
|
||||
// Before every test from this class fixture, open KFD
|
||||
void KFDOpenCloseKFDTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
@@ -33,7 +33,7 @@ void KFDOpenCloseKFDTest::SetUp() {
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
// after every test from this class fixture - close KFD
|
||||
// After every test from this class fixture, close KFD
|
||||
void KFDOpenCloseKFDTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
@@ -42,9 +42,10 @@ void KFDOpenCloseKFDTest::TearDown() {
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
// this test does not use class KFDOpenCloseKFDTest but is placed here
|
||||
// since it's testing same topic as other test
|
||||
// verify that calling hsaKmtCloseKFD on a closed KFD will return right status
|
||||
/* This test does not use class KFDOpenCloseKFDTest but is placed here
|
||||
* since it's testing same topic as other test
|
||||
* Verify that calling hsaKmtCloseKFD on a closed KFD will return right status
|
||||
*/
|
||||
TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -53,8 +54,7 @@ TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// verify that calling hsaKmtCloseKFD on an already opened KFD will return
|
||||
// right status
|
||||
// Verify that calling hsaKmtCloseKFD on an already opened KFD will return right status
|
||||
TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -65,8 +65,7 @@ TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// testing the normal scenario: open followed by close (done in the setup and
|
||||
// teardown functions)
|
||||
// Testing the normal scenario: open followed by close (done in the setup and teardown functions)
|
||||
TEST_F(KFDOpenCloseKFDTest, OpenCloseKFD ) {
|
||||
}
|
||||
|
||||
|
||||
@@ -34,10 +34,9 @@ class KFDOpenCloseKFDTest : public testing::Test {
|
||||
~KFDOpenCloseKFDTest(void) {}
|
||||
|
||||
protected:
|
||||
// @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture,
|
||||
// sets all common settings for the tests.
|
||||
// @brief Executed before every test that uses KFDOpenCloseKFDTest class, sets all common settings for the tests.
|
||||
virtual void SetUp();
|
||||
// @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture.
|
||||
// @brief Executed after every test that uses KFDOpenCloseKFDTest class
|
||||
virtual void TearDown();
|
||||
};
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ void KFDPMTest::SetUpTestCase() {
|
||||
m_SetupSuccess = false;
|
||||
AcquirePrivilege(OS_SUSPEND);
|
||||
|
||||
// if AcquirePrivilege fails, it will throw and we will not reach here.
|
||||
// If AcquirePrivilege fails, it will throw and we will not reach here.
|
||||
m_SetupSuccess = true;
|
||||
|
||||
ROUTINE_END
|
||||
@@ -119,4 +119,4 @@ TEST_F(KFDPMTest, SuspendWithIdleQueueAfterWork) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// TODO suspend while workload is being executed by a queue
|
||||
// TODO: Suspend while workload is being executed by a queue
|
||||
|
||||
@@ -34,7 +34,7 @@ void KFDPNPTest::SetUpTestCase() {
|
||||
|
||||
AcquirePrivilege(OS_DRIVER_OPERATIONS);
|
||||
|
||||
// if AcquirePrivilege fails, it will throw and we will not reach here.
|
||||
// If AcquirePrivilege fails, it will throw and we will not reach here.
|
||||
m_SetupSuccess = true;
|
||||
|
||||
ROUTINE_END
|
||||
|
||||
@@ -32,8 +32,6 @@
|
||||
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
// All tests are marked by their serial number in the QCM FDD
|
||||
|
||||
void KFDQMTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
@@ -155,7 +153,8 @@ TEST_F(KFDQMTest, CreateMultipleSdmaQueues) {
|
||||
* Fiji and other VI/Polaris GPUs. This test typically hangs in a few
|
||||
* seconds. According to analysis done by HW engineers, the culprit
|
||||
* seems to be PCIe speed switching. The problem can be worked around
|
||||
* by disabling the lowest DPM level on Fiji. */
|
||||
* by disabling the lowest DPM level on Fiji.
|
||||
*/
|
||||
TEST_F(KFDQMTest, SdmaConcurrentCopies) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -189,14 +188,16 @@ TEST_F(KFDQMTest, SdmaConcurrentCopies) {
|
||||
srcBuf.As<char *>()+COPY_SIZE*j, COPY_SIZE));
|
||||
queue.SubmitPacket();
|
||||
|
||||
// Waste a variable amount of time. Submission timing
|
||||
// while SDMA runs concurrently seems to be critical for
|
||||
// reproducing the hang
|
||||
/* Waste a variable amount of time. Submission timing
|
||||
* while SDMA runs concurrently seems to be critical for
|
||||
* reproducing the hang
|
||||
*/
|
||||
for (int k = 0; k < (i & 0xfff); k++)
|
||||
memcpy(srcBuf.As<char *>()+PAGE_SIZE, srcBuf.As<char *>(), 1024);
|
||||
|
||||
// Wait for idle every 8 packets to allow the SDMA engine to
|
||||
// run concurrently for a bit without getting too far ahead
|
||||
/* Wait for idle every 8 packets to allow the SDMA engine to
|
||||
* run concurrently for a bit without getting too far ahead
|
||||
*/
|
||||
if ((i & 0x7) == 0)
|
||||
queue.Wait4PacketConsumption();
|
||||
}
|
||||
@@ -268,7 +269,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) {
|
||||
|
||||
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 1, 1));
|
||||
|
||||
// don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
// Don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
Delay(2000);
|
||||
|
||||
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
|
||||
@@ -309,7 +310,7 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) {
|
||||
|
||||
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(destBuf.As<void*>(), 0));
|
||||
|
||||
// don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
// Don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
Delay(2000);
|
||||
|
||||
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
|
||||
@@ -356,7 +357,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {
|
||||
|
||||
queue.PlaceAndSubmitPacket(packet2);
|
||||
|
||||
// don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
// Don't sync since we don't expect rptr to change when the queue is disabled.
|
||||
Delay(2000);
|
||||
|
||||
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
|
||||
@@ -388,7 +389,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
|
||||
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
||||
|
||||
do {
|
||||
// the following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
|
||||
// The following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
|
||||
unsigned int firstToCreate = (numIter % 2 != 0) ? 1 : 0;
|
||||
unsigned int firstToDestroy = (numIter % 4 > 1) ? 1 : 0;
|
||||
|
||||
@@ -440,7 +441,7 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) {
|
||||
unsigned int pktSizeDw = 0;
|
||||
for (unsigned int i = 0; i < MAX_PACKETS; i++) {
|
||||
PM4WriteDataPacket packet;
|
||||
packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i); // two DWORDs per packet
|
||||
packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i); // two dwords per packet
|
||||
queues[qidx].PlacePacket(packet);
|
||||
}
|
||||
}
|
||||
@@ -612,9 +613,9 @@ HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, u
|
||||
return timeTotal / iterations;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Apply CU masking in a linear fashion, adding 1 CU per iteration
|
||||
* until all Shader Engines are full ...
|
||||
* until all Shader Engines are full
|
||||
*/
|
||||
TEST_F(KFDQMTest, BasicCuMaskingLinear) {
|
||||
TEST_START(TESTPROFILE_RUNALL);
|
||||
@@ -1026,8 +1027,9 @@ TEST_F(KFDQMTest, CpuWriteCoherence) {
|
||||
|
||||
EXPECT_EQ(0, queue.Rptr());
|
||||
|
||||
// now that the GPU has cached the PQ contents, we modify them in CPU cache and
|
||||
// ensure that the GPU sees the updated value:
|
||||
/* Now that the GPU has cached the PQ contents, we modify them in CPU cache and
|
||||
* ensure that the GPU sees the updated value:
|
||||
*/
|
||||
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x42, 0x42));
|
||||
|
||||
queue.Wait4PacketConsumption();
|
||||
@@ -1130,10 +1132,10 @@ TEST_F(KFDQMTest, QueueLatency) {
|
||||
if (i >= skip)
|
||||
queue_latency_avg += queue_latency;
|
||||
} while (++i < slots);
|
||||
/*Calculate avg from packet[skip, slots-1]*/
|
||||
/* Calculate avg from packet[skip, slots-1] */
|
||||
queue_latency_avg /= (slots - skip);
|
||||
|
||||
/*workload of queue packet itself*/
|
||||
/* Workload of queue packet itself */
|
||||
i = 0;
|
||||
do {
|
||||
queue.PlacePacket(PM4ReleaseMemoryPacket(true,
|
||||
@@ -1159,7 +1161,7 @@ TEST_F(KFDQMTest, QueueLatency) {
|
||||
do {
|
||||
/* The queue_latency is not that correct as the workload and overhead are average*/
|
||||
queue_latency_arr[i] -= workload + overhead;
|
||||
/* The First submit takes a HSAint64 time*/
|
||||
/* The First submit takes an HSAint64 time*/
|
||||
if (i < skip)
|
||||
LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl;
|
||||
} while (++i < slots);
|
||||
@@ -1454,7 +1456,7 @@ TEST_F(KFDQMTest, P2PTest) {
|
||||
mapFlags, nodes.size(), &nodes[0]));
|
||||
#define MAGIC_NUM 0xdeadbeaf
|
||||
|
||||
/* First GPU fills mem with MAGIC_NUM*/
|
||||
/* First GPU fills mem with MAGIC_NUM */
|
||||
void *src, *dst;
|
||||
HSAuint32 cur = nodes[0], next;
|
||||
ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast<void**>(&src)));
|
||||
@@ -1478,11 +1480,11 @@ TEST_F(KFDQMTest, P2PTest) {
|
||||
}
|
||||
|
||||
LOG() << "Test " << cur << " -> " << next << std::endl;
|
||||
/* copy to sysBuf and next GPU*/
|
||||
/* Copy to sysBuf and next GPU*/
|
||||
void *dst_array[] = {sysBuf, dst};
|
||||
sdma_copy(cur, src, dst_array, n, size);
|
||||
|
||||
/* verify the data*/
|
||||
/* Verify the data*/
|
||||
ASSERT_EQ(sysBuf[0], MAGIC_NUM);
|
||||
ASSERT_EQ(sysBuf[end], MAGIC_NUM);
|
||||
|
||||
@@ -1558,7 +1560,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
|
||||
#ifdef DOORBELL_WRITE_USE_SDMA
|
||||
/* Write the wptr and doorbell update using the GPU's SDMA
|
||||
* engine. This should submit the PM4 packet on the first
|
||||
* queue. */
|
||||
* queue.
|
||||
*/
|
||||
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
|
||||
pendingWptr));
|
||||
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
|
||||
@@ -1566,7 +1569,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
|
||||
#else
|
||||
/* Write the wptr and doorbell update using WRITE_DATA packets
|
||||
* on a second PM4 queue. This should submit the PM4 packet on
|
||||
* the first queue. */
|
||||
* the first queue.
|
||||
*/
|
||||
otherQueue.PlacePacket(
|
||||
PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
|
||||
pendingWptr, false));
|
||||
@@ -1582,7 +1586,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
|
||||
#ifdef DOORBELL_WRITE_USE_SDMA
|
||||
/* Write the wptr and doorbell update using the GPU's SDMA
|
||||
* engine. This should submit the PM4 packet on the first
|
||||
* queue. */
|
||||
* queue.
|
||||
*/
|
||||
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
|
||||
2, &pendingWptr64));
|
||||
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
|
||||
@@ -1591,7 +1596,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
|
||||
/* Write the 64-bit wptr and doorbell update using RELEASE_MEM
|
||||
* packets without IRQs on a second PM4 queue. RELEASE_MEM
|
||||
* should perform one atomic 64-bit access. This should submit
|
||||
* the PM4 packet on the first queue. */
|
||||
* the PM4 packet on the first queue.
|
||||
*/
|
||||
otherQueue.PlacePacket(
|
||||
PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
|
||||
pendingWptr64, true));
|
||||
|
||||
@@ -42,10 +42,9 @@ class KFDQMTest : public KFDBaseComponentTest {
|
||||
virtual void TearDown();
|
||||
|
||||
void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1);
|
||||
// void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf);
|
||||
HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
|
||||
HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
|
||||
protected: // members
|
||||
protected: // Members
|
||||
/* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */
|
||||
const double CuVariance = 0.15;
|
||||
const double CuNegVariance = 1.0 - CuVariance;
|
||||
|
||||
@@ -32,13 +32,12 @@ extern int g_TestDstNodeId;
|
||||
extern bool g_IsChildProcess;
|
||||
extern unsigned int g_TestGPUFamilyId;
|
||||
|
||||
// each test should call TEST_START with the test custome profile and HW scheduling
|
||||
|
||||
// Each test should call TEST_START with the test custom profile and HW scheduling
|
||||
enum TESTPROFILE{
|
||||
TESTPROFILE_DEV = 0x1,
|
||||
TESTPROFILE_PROMO = 0x2,
|
||||
// 0x4 - 0x8000 - unsed flags
|
||||
// can add any flag that will mark only part of the tests to run
|
||||
// 0x4 - 0x8000 - unused flags
|
||||
// Can add any flag that will mark only part of the tests to run
|
||||
TESTPROFILE_RUNALL = 0xFFFF
|
||||
};
|
||||
|
||||
@@ -48,8 +47,8 @@ enum ENVCAPS{
|
||||
ENVCAPS_16BITPASID = 0x2,
|
||||
ENVCAPS_32BITLINUX = 0x4,
|
||||
ENVCAPS_64BITLINUX = 0x8
|
||||
// 0x8 - 0x8000 - unsed flags
|
||||
// can add any flag that will mark specific hw limitation \ capability
|
||||
// 0x8 - 0x8000 - unused flags
|
||||
// Can add any flag that will mark specific hw limitation or capability
|
||||
};
|
||||
|
||||
enum KfdFamilyId {
|
||||
|
||||
@@ -56,11 +56,13 @@ bool g_IsChildProcess;
|
||||
unsigned int g_TestGPUFamilyId;
|
||||
|
||||
GTEST_API_ int main(int argc, char **argv) {
|
||||
// default values for run parameters
|
||||
// Default values for run parameters
|
||||
g_TestRunProfile = TESTPROFILE_RUNALL;
|
||||
g_TestENVCaps = ENVCAPS_NOADDEDCAPS | ENVCAPS_64BITLINUX;
|
||||
g_TestTimeOut = KFD_TEST_DEFAULT_TIMEOUT;
|
||||
|
||||
// Every fatal fail ( = assert that failed ) will throw an exception
|
||||
testing::GTEST_FLAG(throw_on_failure) = true;
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
CommandLineArguments args;
|
||||
|
||||
@@ -52,7 +52,7 @@ bool GetHwCapabilityHWS() {
|
||||
unsigned int value = 0;
|
||||
bool valExists = ReadDriverConfigValue(CONFIG_HWS, value);
|
||||
|
||||
/* HWS is enabled by default, so... */
|
||||
/* HWS is enabled by default */
|
||||
return ( (!valExists) || ( value > 0));
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ HSAKMT_STATUS CreateQueueTypeEvent(
|
||||
) {
|
||||
HsaEventDescriptor Descriptor;
|
||||
|
||||
// TODO Create per-OS header with this sort of definitions
|
||||
// TODO: Create per-OS header with this sort of definitions
|
||||
#ifdef _WIN32
|
||||
Descriptor.EventType = HSA_EVENTTYPE_QUEUE_EVENT;
|
||||
#else
|
||||
@@ -369,7 +369,7 @@ void HsaMemoryBuffer::UnmapAllNodes() {
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: when thunk will be updated use hsaKmtRegisterToNodes. and then nodes will be used
|
||||
* TODO: When thunk is updated, use hsaKmtRegisterToNodes. Then nodes will be used
|
||||
*/
|
||||
hsaKmtUnmapMemoryToGPU(m_pBuf);
|
||||
hsaKmtDeregisterMemory(m_pBuf);
|
||||
@@ -414,10 +414,10 @@ HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
|
||||
HsaNodeInfo::HsaNodeInfo() {
|
||||
}
|
||||
|
||||
// Init - Get and store information about all the HSA nodes from the Thunk Library.
|
||||
// @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
|
||||
// @Return - false: if no node information is available
|
||||
//
|
||||
/* Init - Get and store information about all the HSA nodes from the Thunk Library.
|
||||
* @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
|
||||
* @Return - false: if no node information is available
|
||||
*/
|
||||
bool HsaNodeInfo::Init(int NumOfNodes) {
|
||||
HsaNodeProperties *nodeProperties;
|
||||
_HSAKMT_STATUS status;
|
||||
@@ -427,8 +427,9 @@ bool HsaNodeInfo::Init(int NumOfNodes) {
|
||||
nodeProperties = new HsaNodeProperties();
|
||||
|
||||
status = hsaKmtGetNodeProperties(i, nodeProperties);
|
||||
/* this is not a fatal test (not using assert), since even when it fails for one node
|
||||
* we want to get information regarding others. */
|
||||
/* This is not a fatal test (not using assert), since even when it fails for one node
|
||||
* we want to get information regarding others.
|
||||
*/
|
||||
EXPECT_SUCCESS(status) << "Node index: " << i << "hsaKmtGetNodeProperties returned status " << status;
|
||||
|
||||
if (status == HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -476,8 +477,7 @@ const int HsaNodeInfo::HsaDefaultGPUNode() const {
|
||||
return -1;
|
||||
|
||||
if (g_TestNodeId >= 0) {
|
||||
// Check if this is a valid Id, if so use this else use first
|
||||
// available
|
||||
// Check if this is a valid Id, if so use this else use first available
|
||||
for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
|
||||
if (g_TestNodeId == m_NodesWithGPU.at(i))
|
||||
return g_TestNodeId;
|
||||
|
||||
@@ -92,7 +92,7 @@ class HsaMemoryBuffer {
|
||||
~HsaMemoryBuffer();
|
||||
|
||||
private:
|
||||
// disable copy
|
||||
// Disable copy
|
||||
HsaMemoryBuffer(const HsaMemoryBuffer&);
|
||||
const HsaMemoryBuffer& operator=(const HsaMemoryBuffer&);
|
||||
|
||||
@@ -130,7 +130,7 @@ class HsaInteropMemoryBuffer {
|
||||
~HsaInteropMemoryBuffer();
|
||||
|
||||
private:
|
||||
// disable copy
|
||||
// Disable copy
|
||||
HsaInteropMemoryBuffer(const HsaInteropMemoryBuffer&);
|
||||
const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&);
|
||||
|
||||
@@ -141,8 +141,7 @@ class HsaInteropMemoryBuffer {
|
||||
unsigned int m_Node;
|
||||
};
|
||||
|
||||
// Class HsaNodeInfo - Gather and store all HSA node information from Thunk.
|
||||
//
|
||||
// @class HsaNodeInfo - Gather and store all HSA node information from Thunk.
|
||||
class HsaNodeInfo {
|
||||
// List containing HsaNodeProperties of all Nodes available
|
||||
std::vector<HsaNodeProperties*> m_HsaNodeProps;
|
||||
@@ -159,16 +158,17 @@ class HsaNodeInfo {
|
||||
|
||||
bool Init(int NumOfNodes);
|
||||
|
||||
// This function should be soon depricated. This for transistion purpose only
|
||||
// Currently, KfdTest is designed to test only ONE node. This function acts
|
||||
// as transistion.
|
||||
/* This function should be deprecated soon. This for transistion purpose only
|
||||
* Currently, KfdTest is designed to test only ONE node. This function acts
|
||||
* as transition.
|
||||
*/
|
||||
const HsaNodeProperties* HsaDefaultGPUNodeProperties() const;
|
||||
const int HsaDefaultGPUNode() const;
|
||||
|
||||
// Future use the following two functions to support multi-GPU.
|
||||
// const std::vector<int>& GpuNodes = GetNodesWithGPU()
|
||||
// for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
|
||||
//
|
||||
/* TODO: Use the following two functions to support multi-GPU.
|
||||
* const std::vector<int>& GpuNodes = GetNodesWithGPU()
|
||||
* for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
|
||||
*/
|
||||
const std::vector<int>& GetNodesWithGPU() const;
|
||||
|
||||
// @param node index of the node we are looking at
|
||||
|
||||
@@ -25,8 +25,6 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
// @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp
|
||||
|
||||
const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
|
||||
const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);
|
||||
|
||||
@@ -35,17 +33,17 @@ TEST_F(KFDTopologyTest , BasicTest) {
|
||||
|
||||
const HsaNodeProperties *pNodeProperties;
|
||||
|
||||
// goes over all nodes in the sytem properties and check the basic info received
|
||||
// Goes over all nodes in the sytem properties and check the basic info received
|
||||
for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
|
||||
pNodeProperties = m_NodeInfo.GetNodeProperties(node);
|
||||
if (pNodeProperties != NULL) {
|
||||
// checking for cpu core only if it's a cpu only node or if its KAVERY apu.
|
||||
// Checking for cpu core only if it's a cpu only node or if its KAVERI apu.
|
||||
if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) {
|
||||
EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node
|
||||
<< " No CPUs core are connected for node index";
|
||||
}
|
||||
|
||||
// if it's not a cpu only node, look for a gpu core
|
||||
// If it's not a cpu only node, look for a gpu core
|
||||
if (pNodeProperties->DeviceId != 0) {
|
||||
EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node
|
||||
<< "No GPUs core are connected.";
|
||||
@@ -64,7 +62,7 @@ TEST_F(KFDTopologyTest , BasicTest) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// this test verify failure status on hsaKmtGetNodeProperties with invalid params
|
||||
// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
|
||||
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -73,7 +71,7 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// this test verify failure status on hsaKmtGetNodeProperties with invalid params
|
||||
// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
|
||||
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -84,8 +82,8 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// test that we can get memory property successfully per node
|
||||
// @todo check validity of values returned
|
||||
// Test that we can get memory properties successfully per node
|
||||
// TODO: Check validity of values returned
|
||||
TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
const HsaNodeProperties *pNodeProperties;
|
||||
@@ -104,7 +102,7 @@ TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
|
||||
}
|
||||
|
||||
|
||||
// test the GPU local memory aperture is valid.
|
||||
// Test that the GPU local memory aperture is valid.
|
||||
TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
|
||||
TEST_REQUIRE_NO_ENV_CAPABILITIES(ENVCAPS_32BITLINUX);
|
||||
|
||||
@@ -137,8 +135,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// test that we can get cache property successfully per node
|
||||
// @todo check validity of values returned
|
||||
// Test that we can get cache property successfully per node
|
||||
// TODO: Check validity of values returned
|
||||
TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
@@ -179,8 +177,8 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
// test that we can get NodeIoLink property successfully per node
|
||||
// @todo check validity of values returned
|
||||
// Test that we can get NodeIoLink property successfully per node
|
||||
// TODO: Check validity of values returned
|
||||
// GetNodeIoLinkProperties is disabled for now, test fails due to bug in BIOS
|
||||
TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
@@ -197,7 +195,7 @@ TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
|
||||
HsaIoLinkProperties *IolinkProperties = new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
|
||||
EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties));
|
||||
if (pNodeProperties->NumIOLinks == 0) {
|
||||
// No io_links. Just Print the node
|
||||
// No io_links. Just print the node
|
||||
LOG() << "[" << node << "]" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -26,9 +26,10 @@
|
||||
#ifndef __KFD_TOPOLOGY_TEST__H__
|
||||
#define __KFD_TOPOLOGY_TEST__H__
|
||||
|
||||
// @class KFDTopologyTest
|
||||
// this class has no additional features to KFDBaseComponentTest
|
||||
// the separation was made so we are able to goup all topology tests together
|
||||
/* @class KFDTopologyTest
|
||||
* This class has no additional features to KFDBaseComponentTest
|
||||
* The separation was made so we are able to group all topology tests together
|
||||
*/
|
||||
class KFDTopologyTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDTopologyTest(void) {}
|
||||
|
||||
@@ -46,11 +46,11 @@ static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ |
|
||||
PROT_EXEC | PROT_WRITE | PROT_READ};
|
||||
|
||||
void SetConsoleTextColor(TEXTCOLOR color) {
|
||||
// TODO complete
|
||||
// TODO: Complete
|
||||
}
|
||||
|
||||
void Delay(int delayCount) {
|
||||
// usleeps accept time in microseconds
|
||||
// usleep accepts time in microseconds
|
||||
usleep(delayCount * 1000);
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ HSAuint64 GetLastErrorNo() {
|
||||
}
|
||||
|
||||
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) {
|
||||
// TODO IMPLEMENT
|
||||
// TODO: Implement
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -87,20 +87,20 @@ HSAuint64 GetSystemTickCountInMicroSec() {
|
||||
}
|
||||
|
||||
bool SuspendAndWakeUp() {
|
||||
// TODO IMPLEMENT
|
||||
// TODO: Implement
|
||||
return false;
|
||||
}
|
||||
|
||||
void AcquirePrivilege(OS_PRIVILEGE priv) {
|
||||
// TODO IMPLEMENT
|
||||
// TODO: Implement
|
||||
}
|
||||
|
||||
void DisableKfd() {
|
||||
// TODO IMPLEMENT
|
||||
// TODO: Implement
|
||||
}
|
||||
|
||||
void EnableKfd() {
|
||||
// TODO IMPLEMENT
|
||||
// TODO: Implement
|
||||
}
|
||||
|
||||
bool ReadDriverConfigValue(CONFIG_VALUE config, unsigned int& rValue) {
|
||||
@@ -145,7 +145,7 @@ bool GetCommandLineArguments(int argc, char **argv, CommandLineArguments& rArgs)
|
||||
if (c != 0)
|
||||
break;
|
||||
|
||||
/* If this option set a flag, do nothing else now. */
|
||||
/* If this option sets a flag, do nothing else. */
|
||||
if (long_options[option_index].flag != 0)
|
||||
continue;
|
||||
|
||||
|
||||
@@ -72,35 +72,34 @@ struct CommandLineArguments {
|
||||
#define MEM_WRITE 0x02
|
||||
#define MEM_EXECUTE 0x4
|
||||
|
||||
|
||||
|
||||
// @brief change console text color
|
||||
// @brief Change console text color
|
||||
void SetConsoleTextColor(TEXTCOLOR color);
|
||||
// @params delayCount : delay time in milliseconds
|
||||
void Delay(int delayCount);
|
||||
// @brief replacement for windows VirtualAlloc func
|
||||
// @brief Replacement for windows VirtualAlloc func
|
||||
void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = MEM_READ | MEM_WRITE);
|
||||
// @brief replacement for windows FreeVirtual func
|
||||
// @brief Replacement for windows FreeVirtual func
|
||||
bool VirtualFreeMemory(void *address, unsigned int size);
|
||||
// @brief retrieve the last error number
|
||||
// @brief Retrieve the last error number
|
||||
HSAuint64 GetLastErrorNo();
|
||||
|
||||
HSAint64 AtomicInc(volatile HSAint64* pValue);
|
||||
|
||||
void MemoryBarrier();
|
||||
|
||||
// @brief: runs the selected test case number of times required, each in a separate process
|
||||
// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want
|
||||
// to run all tests in a test case: TestCase.* and so on
|
||||
// @params numOfProcesses : how many processes to run in parallel
|
||||
// @params runsPerProcess : how many iteration a test should do per process, must be a positive number
|
||||
/* @brief: Runs the selected test case number of times required, each in a separate process
|
||||
* @params testToRun : Can be a specific test testcase like TestCase.TestName or if you want
|
||||
* to run all tests in a test case: TestCase.* and so on
|
||||
* @params numOfProcesses : How many processes to run in parallel
|
||||
* @params runsPerProcess : How many iteration a test should do per process, must be a positive number
|
||||
*/
|
||||
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1);
|
||||
|
||||
HSAuint64 GetSystemTickCountInMicroSec();
|
||||
|
||||
/**Put the system to S3/S4 power state and bring it back to S0.
|
||||
@return 'true' on success, 'false' on failure.
|
||||
*/
|
||||
/* Put the system to S3/S4 power state and bring it back to S0.
|
||||
* @return 'true' on success, 'false' on failure.
|
||||
*/
|
||||
bool SuspendAndWakeUp();
|
||||
|
||||
void AcquirePrivilege(OS_PRIVILEGE priv);
|
||||
|
||||
@@ -53,7 +53,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const {
|
||||
|
||||
void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
|
||||
m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(calloc(1, SizeInBytes()));
|
||||
// verify that the memory is allocated successfully, cannot use assert here
|
||||
// Verify that the memory is allocated successfully, cannot use assert here
|
||||
EXPECT_NOTNULL(m_pPacketData);
|
||||
|
||||
InitPM4Header(m_pPacketData->header, IT_WRITE_DATA);
|
||||
@@ -249,8 +249,8 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i
|
||||
// 1st register is a part of the packet struct.
|
||||
m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);
|
||||
|
||||
/* allocating the size of the packet, since the packet is assembled from a struct
|
||||
* followed by an additional DWORD data
|
||||
/* Allocating the size of the packet, since the packet is assembled from a struct
|
||||
* followed by an additional dword data
|
||||
*/
|
||||
m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(malloc(m_packetSize));
|
||||
|
||||
@@ -296,7 +296,7 @@ PM4PartialFlushPacket::PM4PartialFlushPacket(void) {
|
||||
}
|
||||
|
||||
unsigned int PM4PartialFlushPacket::SizeInBytes() const {
|
||||
// for PARTIAL_FLUSH_CS packets, the last 2 DWORDS don't exist.
|
||||
// For PARTIAL_FLUSH_CS packets, the last 2 dwordS don't exist.
|
||||
return sizeof(PM4EVENT_WRITE) - sizeof(uint32_t)*2;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include "pm4_pkt_struct_ai.h"
|
||||
#include "IndirectBuffer.hpp"
|
||||
|
||||
// @class PM4Packet: marks a group of all PM4 packets
|
||||
// @class PM4Packet: Marks a group of all PM4 packets
|
||||
class PM4Packet : public BasePacket {
|
||||
public:
|
||||
PM4Packet(void) {}
|
||||
@@ -47,9 +47,9 @@ class PM4Packet : public BasePacket {
|
||||
// @class PM4WriteDataPacket
|
||||
class PM4WriteDataPacket : public PM4Packet {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
PM4WriteDataPacket(void): m_ndw(0), m_pPacketData(NULL) {}
|
||||
// this contructor will also init the packet, no need for additional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1):
|
||||
m_ndw(1), m_pPacketData(NULL) {InitPacket(destBuf, &data1);}
|
||||
PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1, unsigned int data2):
|
||||
@@ -59,11 +59,11 @@ class PM4WriteDataPacket : public PM4Packet {
|
||||
}
|
||||
|
||||
virtual ~PM4WriteDataPacket(void);
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return m_pPacketData; }
|
||||
// @brief initialise the packet
|
||||
// @brief Initialise the packet
|
||||
void InitPacket(unsigned int *destBuf, unsigned int data1) {
|
||||
m_ndw = 1;
|
||||
InitPacket(destBuf, &data1);
|
||||
@@ -77,27 +77,27 @@ class PM4WriteDataPacket : public PM4Packet {
|
||||
|
||||
protected:
|
||||
unsigned int m_ndw;
|
||||
// PM4WRITE_DATA_CI struct contains all the packets data
|
||||
// PM4WRITE_DATA_CI struct contains all the packet's data
|
||||
PM4WRITE_DATA_CI *m_pPacketData;
|
||||
};
|
||||
|
||||
// @class PM4ReleaseMemoryPacket
|
||||
class PM4ReleaseMemoryPacket : public PM4Packet {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
PM4ReleaseMemoryPacket(void): m_pPacketData(NULL) {}
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
PM4ReleaseMemoryPacket(bool isPolling, uint64_t address, uint64_t data,
|
||||
bool is64bit = false, bool isTimeStamp = false): m_pPacketData(NULL) {
|
||||
InitPacket(isPolling, address, data, is64bit, isTimeStamp);
|
||||
}
|
||||
|
||||
virtual ~PM4ReleaseMemoryPacket(void);
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return m_packetSize; }
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return m_pPacketData; }
|
||||
// @brief initialise the packet
|
||||
// @brief Initialise the packet
|
||||
void InitPacket(bool isPolling, uint64_t address, uint64_t data,
|
||||
bool is64bit = false, bool isTimeStamp = false);
|
||||
|
||||
@@ -109,21 +109,21 @@ class PM4ReleaseMemoryPacket : public PM4Packet {
|
||||
// @class PM4IndirectBufPacket
|
||||
class PM4IndirectBufPacket : public PM4Packet {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
PM4IndirectBufPacket(void) {}
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
explicit PM4IndirectBufPacket(IndirectBuffer *pIb);
|
||||
|
||||
virtual ~PM4IndirectBufPacket(void) {}
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &m_packetData; }
|
||||
// @breif initialise the packet
|
||||
// @breif Initialise the packet
|
||||
void InitPacket(IndirectBuffer *pIb);
|
||||
|
||||
private:
|
||||
// PM4MEC_INDIRECT_BUFFER struct contains all the packets data
|
||||
// PM4MEC_INDIRECT_BUFFER struct contains all the packet's data
|
||||
PM4MEC_INDIRECT_BUFFER m_packetData;
|
||||
};
|
||||
|
||||
@@ -139,11 +139,11 @@ class PM4AcquireMemoryPacket : public PM4Packet {
|
||||
virtual const void *GetPacket() const { return &m_packetData; }
|
||||
|
||||
private:
|
||||
// PM4ACQUIRE_MEM struct contains all the packets data
|
||||
// PM4ACQUIRE_MEM struct contains all the packet's data
|
||||
PM4ACQUIRE_MEM m_packetData;
|
||||
};
|
||||
|
||||
// @class PM4SetShaderRegPacket packet that writes to consecutive registers starting at baseOffset.
|
||||
// @class PM4SetShaderRegPacket Packet that writes to consecutive registers starting at baseOffset.
|
||||
class PM4SetShaderRegPacket : public PM4Packet {
|
||||
public:
|
||||
PM4SetShaderRegPacket(void);
|
||||
@@ -152,9 +152,9 @@ class PM4SetShaderRegPacket : public PM4Packet {
|
||||
|
||||
virtual ~PM4SetShaderRegPacket(void);
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return m_packetSize; }
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return m_pPacketData; }
|
||||
|
||||
void InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs);
|
||||
@@ -162,7 +162,7 @@ class PM4SetShaderRegPacket : public PM4Packet {
|
||||
private:
|
||||
unsigned int m_packetSize;
|
||||
bool m_packetDataAllocated;
|
||||
// PM4SET_SH_REG struct contains all the packets data
|
||||
// PM4SET_SH_REG struct contains all the packet's data
|
||||
PM4SET_SH_REG *m_pPacketData;
|
||||
};
|
||||
|
||||
@@ -175,15 +175,15 @@ class PM4DispatchDirectPacket : public PM4Packet {
|
||||
|
||||
virtual ~PM4DispatchDirectPacket(void) {}
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &m_packetData; }
|
||||
|
||||
void InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit);
|
||||
|
||||
private:
|
||||
// PM4DISPATCH_DIRECT struct contains all the packets data
|
||||
// PM4DISPATCH_DIRECT struct contains all the packet's data
|
||||
PM4DISPATCH_DIRECT m_packetData;
|
||||
};
|
||||
|
||||
@@ -193,13 +193,13 @@ class PM4PartialFlushPacket : public PM4Packet {
|
||||
PM4PartialFlushPacket(void);
|
||||
virtual ~PM4PartialFlushPacket(void) {}
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &m_packetData; }
|
||||
|
||||
private:
|
||||
// PM4EVENT_WRITE struct contains all the packets data
|
||||
// PM4EVENT_WRITE struct contains all the packet's data
|
||||
PM4EVENT_WRITE m_packetData;
|
||||
};
|
||||
|
||||
@@ -209,9 +209,9 @@ class PM4NopPacket : public PM4Packet {
|
||||
PM4NopPacket(void);
|
||||
virtual ~PM4NopPacket(void) {}
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const;
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &m_packetData; }
|
||||
|
||||
private:
|
||||
|
||||
@@ -36,7 +36,8 @@ PM4Queue::~PM4Queue(void) {
|
||||
|
||||
unsigned int PM4Queue::Wptr() {
|
||||
/* Write pointer in dwords. Simulate 32-bit wptr that wraps at
|
||||
* queue size even on Vega10 and later chips with 64-bit wptr. */
|
||||
* queue size even on Vega10 and later chips with 64-bit wptr.
|
||||
*/
|
||||
return *m_Resources.Queue_write_ptr % (m_QueueBuf->Size() / 4);
|
||||
}
|
||||
|
||||
@@ -48,12 +49,13 @@ unsigned int PM4Queue::Rptr() {
|
||||
unsigned int PM4Queue::RptrWhenConsumed() {
|
||||
/* On PM4 queues Rptr is always 32-bit in dword units and wraps at
|
||||
* queue size. The expected value when all packets are consumed is
|
||||
* exactly the value returned by Wptr(). */
|
||||
* exactly the value returned by Wptr().
|
||||
*/
|
||||
return Wptr();
|
||||
}
|
||||
|
||||
void PM4Queue::SubmitPacket() {
|
||||
// m_pending Wptr is in DWORDs
|
||||
// m_pending Wptr is in dwords
|
||||
if (g_TestGPUFamilyId < FAMILY_AI) {
|
||||
// Pre-Vega10 uses 32-bit wptr and doorbell
|
||||
MemoryBarrier();
|
||||
|
||||
@@ -35,9 +35,9 @@ class PM4Queue : public BaseQueue {
|
||||
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
|
||||
virtual void SubmitPacket();
|
||||
|
||||
// @ return read pointer modulo queue size in DWORDs
|
||||
// @ return read pointer modulo queue size in dwords
|
||||
virtual unsigned int Rptr();
|
||||
// @ return write pointer modulo queue size in DWORDs
|
||||
// @ return write pointer modulo queue size in dwords
|
||||
virtual unsigned int Wptr();
|
||||
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
|
||||
virtual unsigned int RptrWhenConsumed();
|
||||
|
||||
@@ -38,7 +38,7 @@ class RDMATest : public KFDBaseComponentTest {
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // members
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
|
||||
@@ -27,8 +27,9 @@
|
||||
#include "SDMAPacket.hpp"
|
||||
#include "KFDTestUtil.hpp"
|
||||
|
||||
/* Byte/dword cound in many SDMA packets is 1-based in AI, meaning a
|
||||
* count of 1 is encoded as 0. */
|
||||
/* Byte/dword count in many SDMA packets is 1-based in AI, meaning a
|
||||
* count of 1 is encoded as 0.
|
||||
*/
|
||||
#define SDMA_COUNT(c) (g_TestGPUFamilyId < FAMILY_AI ? (c) : (c)-1)
|
||||
|
||||
SDMAWriteDataPacket::SDMAWriteDataPacket(void):
|
||||
@@ -95,7 +96,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns
|
||||
packetData = pSDMA;
|
||||
|
||||
while (surfsize > 0) {
|
||||
/* sdma support maximum 0x3fffe0 byte in one copy, take 2M here */
|
||||
/* SDMA support maximum 0x3fffe0 byte in one copy, take 2M here */
|
||||
if (surfsize > TWO_MEG)
|
||||
size = TWO_MEG;
|
||||
else
|
||||
@@ -150,7 +151,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in
|
||||
pSDMA->HEADER_UNION.op = SDMA_OP_CONST_FILL;
|
||||
pSDMA->HEADER_UNION.sub_op = 0;
|
||||
|
||||
/* If Both size and address are DW aligned, then use DW fill */
|
||||
/* If both size and address are DW aligned, then use DW fill */
|
||||
if (!(copy_size & 0x3) && !((HSAuint64)dst & 0x3))
|
||||
pSDMA->HEADER_UNION.fillsize = 2; /* DW Fill */
|
||||
else
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "BasePacket.hpp"
|
||||
#include "sdma_pkt_struct.h"
|
||||
|
||||
// @class SDMSPacket: marks a group of all SDMA packets
|
||||
// @class SDMAPacket: Marks a group of all SDMA packets
|
||||
class SDMAPacket : public BasePacket {
|
||||
public:
|
||||
SDMAPacket(void) {}
|
||||
@@ -38,44 +38,44 @@ class SDMAPacket : public BasePacket {
|
||||
|
||||
class SDMAWriteDataPacket : public SDMAPacket {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
SDMAWriteDataPacket(void);
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
SDMAWriteDataPacket(void* destAddr, unsigned int data);
|
||||
SDMAWriteDataPacket(void* destAddr, unsigned int ndw, void *data);
|
||||
|
||||
virtual ~SDMAWriteDataPacket(void);
|
||||
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return packetData; }
|
||||
// @breif initialise the packet
|
||||
// @breif Initialise the packet
|
||||
void InitPacket(void* destAddr, unsigned int data);
|
||||
void InitPacket(void* destAddr, unsigned int ndw, void *data);
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return packetSize; }
|
||||
|
||||
protected:
|
||||
// SDMA_PKT_WRITE_UNTILED struct contains all the packets data
|
||||
// SDMA_PKT_WRITE_UNTILED struct contains all the packet's data
|
||||
SDMA_PKT_WRITE_UNTILED *packetData;
|
||||
unsigned int packetSize;
|
||||
};
|
||||
|
||||
class SDMACopyDataPacket : public SDMAPacket {
|
||||
public:
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
SDMACopyDataPacket(void *dest, void *src, unsigned int size);
|
||||
SDMACopyDataPacket(void *const dst[], void *src, int n, unsigned int surfsize);
|
||||
|
||||
virtual ~SDMACopyDataPacket(void);
|
||||
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return packetData; }
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return packetSize; }
|
||||
|
||||
protected:
|
||||
// SDMA_PKT_COPY_LINEAR struct contains all the packets data
|
||||
// SDMA_PKT_COPY_LINEAR struct contains all the packet's data
|
||||
SDMA_PKT_COPY_LINEAR *packetData;
|
||||
|
||||
unsigned int packetSize;
|
||||
@@ -83,19 +83,19 @@ class SDMACopyDataPacket : public SDMAPacket {
|
||||
|
||||
class SDMAFillDataPacket : public SDMAPacket {
|
||||
public:
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
SDMAFillDataPacket(void *dest, unsigned int data, unsigned int size);
|
||||
|
||||
virtual ~SDMAFillDataPacket(void);
|
||||
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return m_PacketData; }
|
||||
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return m_PacketSize; }
|
||||
|
||||
protected:
|
||||
// SDMA_PKT_CONSTANT_FILL struct contains all the packets data
|
||||
// SDMA_PKT_CONSTANT_FILL struct contains all the packet's data
|
||||
SDMA_PKT_CONSTANT_FILL *m_PacketData;
|
||||
|
||||
unsigned int m_PacketSize;
|
||||
@@ -103,41 +103,41 @@ class SDMAFillDataPacket : public SDMAPacket {
|
||||
|
||||
class SDMAFencePacket : public SDMAPacket {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
SDMAFencePacket(void);
|
||||
// this contructor will also init the packet, no need for adittional calls
|
||||
// This contructor will also init the packet, no need for additional calls
|
||||
SDMAFencePacket(void* destAddr, unsigned int data);
|
||||
|
||||
virtual ~SDMAFencePacket(void);
|
||||
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &packetData; }
|
||||
// @brief initialise the packet
|
||||
// @brief Initialise the packet
|
||||
void InitPacket(void* destAddr, unsigned int data);
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_FENCE ); }
|
||||
|
||||
protected:
|
||||
// SDMA_PKT_FENCE struct contains all the packets data
|
||||
// SDMA_PKT_FENCE struct contains all the packet's data
|
||||
SDMA_PKT_FENCE packetData;
|
||||
};
|
||||
|
||||
class SDMATrapPacket : public SDMAPacket {
|
||||
public:
|
||||
// empty constructor, befor using the packet call the init func
|
||||
// Empty constructor, before using the packet call the init func
|
||||
explicit SDMATrapPacket(unsigned int eventID = 0);
|
||||
|
||||
virtual ~SDMATrapPacket(void);
|
||||
|
||||
// @returns a pointer to the packet
|
||||
// @returns Pointer to the packet
|
||||
virtual const void *GetPacket() const { return &packetData; }
|
||||
// @brief initialise the packet
|
||||
// @brief Initialise the packet
|
||||
void InitPacket(unsigned int eventID);
|
||||
// @returns the packet size in bytes
|
||||
// @returns Packet size in bytes
|
||||
virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_TRAP); }
|
||||
|
||||
protected:
|
||||
// SDMA_PKT_TRAP struct contains all the packets data
|
||||
// SDMA_PKT_TRAP struct contains all the packet's data
|
||||
SDMA_PKT_TRAP packetData;
|
||||
};
|
||||
|
||||
|
||||
@@ -32,20 +32,22 @@ SDMAQueue::~SDMAQueue(void) {
|
||||
|
||||
unsigned int SDMAQueue::Wptr() {
|
||||
/* In SDMA queues write pointers are saved in bytes, convert the
|
||||
* wptr value to DWORD to fit the way BaseQueue works. On Vega10
|
||||
* wptr value to dword to fit the way BaseQueue works. On Vega10
|
||||
* the write ptr is 64-bit. We only read the low 32 bit (assuming
|
||||
* the queue buffer is smaller than 4GB) and modulo divide by the
|
||||
* queue size to simulate a 32-bit read pointer. */
|
||||
* queue size to simulate a 32-bit read pointer.
|
||||
*/
|
||||
return (*m_Resources.Queue_write_ptr % m_QueueBuf->Size()) /
|
||||
sizeof(unsigned int);
|
||||
}
|
||||
|
||||
unsigned int SDMAQueue::Rptr() {
|
||||
/* In SDMA queues read pointers are saved in bytes, convert the
|
||||
* read value to DWORD to fit the way BaseQueue works. On Vega10
|
||||
* read value to dword to fit the way BaseQueue works. On Vega10
|
||||
* the read ptr is 64-bit. We only read the low 32 bit (assuming
|
||||
* the queue buffer is smaller than 4GB) and modulo divide by the
|
||||
* queue size to simulate a 32-bit read pointer. */
|
||||
* queue size to simulate a 32-bit read pointer.
|
||||
*/
|
||||
return (*m_Resources.Queue_read_ptr % m_QueueBuf->Size()) /
|
||||
sizeof(unsigned int);
|
||||
}
|
||||
@@ -53,12 +55,13 @@ unsigned int SDMAQueue::Rptr() {
|
||||
unsigned int SDMAQueue::RptrWhenConsumed() {
|
||||
/* Rptr is same size and byte units as Wptr. Here we only care
|
||||
* about the low 32-bits. When all packets are consumed, read and
|
||||
* write pointers should have the same value. */
|
||||
* write pointers should have the same value.
|
||||
*/
|
||||
return *m_Resources.Queue_write_ptr;
|
||||
}
|
||||
|
||||
void SDMAQueue::SubmitPacket() {
|
||||
// m_pending Wptr is in DWORDs
|
||||
// m_pending Wptr is in dwords
|
||||
if (g_TestGPUFamilyId < FAMILY_AI) {
|
||||
// Pre-Vega10 uses 32-bit wptr and doorbell
|
||||
unsigned int wPtrInBytes = m_pendingWptr * sizeof(unsigned int);
|
||||
|
||||
@@ -31,15 +31,15 @@ class SDMAQueue : public BaseQueue {
|
||||
SDMAQueue(void);
|
||||
virtual ~SDMAQueue(void);
|
||||
|
||||
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
|
||||
// @brief Update queue write pointer and set the queue doorbell to the queue write pointer
|
||||
virtual void SubmitPacket();
|
||||
|
||||
protected:
|
||||
// @ return write pointer modulo queue size in DWORDs
|
||||
// @ return Write pointer modulo queue size in dwords
|
||||
virtual unsigned int Wptr();
|
||||
// @ return read pointer modulo queue size in DWORDs
|
||||
// @ return Read pointer modulo queue size in dwords
|
||||
virtual unsigned int Rptr();
|
||||
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
|
||||
// @ return Expected m_Resources.Queue_read_ptr when all packets are consumed
|
||||
virtual unsigned int RptrWhenConsumed();
|
||||
|
||||
virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_SDMA; }
|
||||
|
||||
Reference in New Issue
Block a user