kfdtest: Clean up comments

Consolidate style (use /* */ for multi-line), fix typos,
use dword instad of DWORD/DWord

Change-Id: I620e45c1687550db41127e45641b7d79d28223a1
This commit is contained in:
Kent Russell
2018-08-14 09:52:31 -04:00
vanhempi db92d5af23
commit 414042abf7
51 muutettua tiedostoa jossa 427 lisäystä ja 414 poistoa
+1 -1
Näytä tiedosto
@@ -45,7 +45,7 @@ unsigned int AqlQueue::RptrWhenConsumed() {
}
void AqlQueue::SubmitPacket() {
// m_pending Wptr is in DWORDs
// m_pending Wptr is in dwords
*m_Resources.Queue_write_ptr = m_pendingWptr;
*(m_Resources.Queue_DoorBell) = Wptr();
}
+5 -5
Näytä tiedosto
@@ -31,14 +31,14 @@ class AqlQueue : public BaseQueue {
AqlQueue();
virtual ~AqlQueue();
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
// @brief Updates queue write pointer and sets the queue doorbell to the queue write pointer
virtual void SubmitPacket();
// @ return read pointer are in DWORDs
// @return Read pointer in dwords
virtual unsigned int Rptr();
// @ return write pointer are in DWORDs
// @return Write pointer in dwords
virtual unsigned int Wptr();
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
// @return Expected m_Resources.Queue_read_ptr when all packets are consumed
virtual unsigned int RptrWhenConsumed();
protected:
@@ -47,4 +47,4 @@ class AqlQueue : public BaseQueue {
virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE_AQL; }
};
#endif
#endif // __KFD_AQL_QUEUE__H__
+7 -7
Näytä tiedosto
@@ -25,8 +25,8 @@
#define __KFD_BASE_PACKET__H__
/**
* all packets profiles must be defined here
* every type defined here has sub-types
* All packets profiles must be defined here
* Every type defined here has sub-types
*/
enum PACKETTYPE {
PACKETTYPE_PM4,
@@ -40,13 +40,13 @@ class BasePacket {
BasePacket(void) {}
virtual ~BasePacket(void) {}
// @returns the packet type
// @returns Packet type
virtual PACKETTYPE PacketType() const = 0;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const = 0;
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const = 0;
// @returns the packet size in DWORDS
// @returns Packet size in dwordS
unsigned int SizeInDWords() const { return SizeInBytes()/sizeof(unsigned int); }
void Dump() const;
@@ -54,4 +54,4 @@ class BasePacket {
protected:
};
#endif
#endif // __KFD_BASE_PACKET__H__
+5 -5
Näytä tiedosto
@@ -41,7 +41,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
HSAKMT_STATUS status;
if (m_QueueBuf != NULL) {
// queue already exist, one queue per object
// Queue already exists, one queue per object
Destroy();
}
@@ -78,7 +78,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
status = HSAKMT_STATUS_ERROR;
}
// needs to match the queue write ptr
// Needs to match the queue write ptr
m_pendingWptr = 0;
m_pendingWptr64 = 0;
m_Node = NodeId;
@@ -137,7 +137,7 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
unsigned int queueSizeInDWord = m_QueueBuf->Size() / sizeof(uint32_t);
if (writePtr + packetSizeInDwords > queueSizeInDWord) {
// wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
// Wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
dwordsRequired += queueSizeInDWord - writePtr;
}
@@ -147,14 +147,14 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
ASSERT_GE(queueSizeInDWord, packetSizeInDwords) << "Cannot add a packet, packet size too large";
if (writePtr + packetSizeInDwords >= queueSizeInDWord) {
// wraparound
// Wraparound
while (writePtr + packetSizeInDwords > queueSizeInDWord) {
m_QueueBuf->As<unsigned int *>()[writePtr] = CMD_NOP;
writePtr = (writePtr + 1) % queueSizeInDWord;
writePtr64++;
}
// not updating Wptr since we might want to do place packet without submission
// Not updating Wptr since we might want to place the packet without submission
m_pendingWptr = (writePtr % queueSizeInDWord);
m_pendingWptr64 = writePtr64;
}
+12 -12
Näytä tiedosto
@@ -48,10 +48,10 @@ class BaseQueue {
HSAuint64 *pointers = NULL);
/** Update the queue.
* @see hsaKmtUpdateQueue
* @param percent the new queue percentage
* @param priority the new queue priority
* @param percent New queue percentage
* @param priority New queue priority
* @param nullifyBuffer
* if 'true', set the new buffer address to NULL and the size to 0. Otherwise
* If 'true', set the new buffer address to NULL and the size to 0. Otherwise
* don't change the queue buffer address/size.
*/
virtual HSAKMT_STATUS Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer);
@@ -64,17 +64,17 @@ class BaseQueue {
* Note that all packets being consumed is not the same as all packets being processed.
*/
virtual void Wait4PacketConsumption();
/** @brief place packet and submit it in one go
/** @brief Place packet and submit it in one function
*/
virtual void PlaceAndSubmitPacket(const BasePacket &packet);
/** @brief copy packet to queue and update write pointer
/** @brief Copy packet to queue and update write pointer
*/
virtual void PlacePacket(const BasePacket &packet);
/** @brief update queue write pointer and sets the queue doorbell to the queue write pointer
/** @brief Update queue write pointer and set the queue doorbell to the queue write pointer
*/
virtual void SubmitPacket() = 0;
/** @brief checkes if all packets in queue already processed
* compares queue read and write pointers
/** @brief Check if all packets in queue are already processed
* Compare queue read and write pointers
*/
bool AllPacketsSubmitted();
@@ -100,11 +100,11 @@ class BaseQueue {
HsaMemoryBuffer *m_QueueBuf;
unsigned int m_Node;
// @ return write pointer modulo queue size in DWORDs
// @return Write pointer modulo queue size in dwords
virtual unsigned int Wptr() = 0;
// @ return read pointer modulo queue size in DWORDs
// @return Read pointer modulo queue size in dwords
virtual unsigned int Rptr() = 0;
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
// @return Expected m_Resources.Queue_read_ptr when all packets consumed
virtual unsigned int RptrWhenConsumed() = 0;
virtual PACKETTYPE PacketTypeSupported() = 0;
@@ -131,4 +131,4 @@ class QueueArray {
void Destroy();
};
#endif
#endif // __KFD_BASE_QUEUE__H__
+12 -12
Näytä tiedosto
@@ -90,7 +90,7 @@ void Dispatch::Sync(unsigned int timeout) {
ASSERT_SUCCESS(hsaKmtWaitOnEvent(m_pEop, timeout));
}
// returning with status in order to allow to take actions before proc termination
// Returning with status in order to allow actions to be performed before process termination
int Dispatch::SyncWithStatus(unsigned int timeout) {
int stat;
@@ -103,7 +103,7 @@ void Dispatch::BuildIb() {
SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);
// starts at COMPUTE_START_X
// Starts at COMPUTE_START_X
const unsigned int COMPUTE_DISPATCH_DIMS_VALUES[] = {
0, // START_X
0, // START_Y
@@ -138,14 +138,14 @@ void Dispatch::BuildIb() {
pgmRsrc2
};
// starts at COMPUTE_PGM_LO
// Starts at COMPUTE_PGM_LO
const unsigned int COMPUTE_PGM_VALUES_GFX8[] = {
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
| (is_dgpu() ? 0 : (1<<8)) // including PGM_ATC=?
};
// starts at COMPUTE_PGM_LO
// Starts at COMPUTE_PGM_LO
const unsigned int COMPUTE_PGM_VALUES_GFX9[] = {
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
@@ -156,17 +156,17 @@ void Dispatch::BuildIb() {
static_cast<uint32_t>(m_scratch_base >> 40)
};
// starts at COMPUTE_RESOURCE_LIMITS
// Starts at COMPUTE_RESOURCE_LIMITS
const unsigned int COMPUTE_RESOURCE_LIMITS[] = {
0, // COMPUTE_RESOURCE_LIMITS
};
// starts at COMPUTE_TMPRING_SIZE
// Starts at COMPUTE_TMPRING_SIZE
const unsigned int COMPUTE_TMPRING_SIZE[] = {
m_ComputeTmpringSize, // COMPUTE_TMPRING_SIZE
};
// starts at COMPUTE_RESTART_X
// Starts at COMPUTE_RESTART_X
const unsigned int COMPUTE_RESTART_VALUES[] = {
0, // COMPUTE_RESTART_X
0, // COMPUTE_RESTART_Y
@@ -174,13 +174,13 @@ void Dispatch::BuildIb() {
0 // COMPUTE_THREAD_TRACE_ENABLE
};
// starts at COMPUTE_USER_DATA_0
// Starts at COMPUTE_USER_DATA_0
const unsigned int COMPUTE_USER_DATA_VALUES[] = {
// Reg name - use in KFDtest - use in ABI
arg0, // COMPUTE_USER_DATA_0 - arg0 - resource descriptor for the scratch buffer - 1st DWORD
arg1, // COMPUTE_USER_DATA_1 - arg1 - resource descriptor for the scratch buffer - 2nd DWORD
arg2, // COMPUTE_USER_DATA_2 - arg2 - resource descriptor for the scratch buffer - 3rd DWORD
arg3, // COMPUTE_USER_DATA_3 - arg3 - resource descriptor for the scratch buffer - 4th DWORD
arg0, // COMPUTE_USER_DATA_0 - arg0 - resource descriptor for the scratch buffer - 1st dword
arg1, // COMPUTE_USER_DATA_1 - arg1 - resource descriptor for the scratch buffer - 2nd dword
arg2, // COMPUTE_USER_DATA_2 - arg2 - resource descriptor for the scratch buffer - 3rd dword
arg3, // COMPUTE_USER_DATA_3 - arg3 - resource descriptor for the scratch buffer - 4th dword
static_cast<uint32_t>(m_scratch_base), // COMPUTE_USER_DATA_4 - flat_scratch_lo
static_cast<uint32_t>(m_scratch_base >> 32), // COMPUTE_USER_DATA_4 - flat_scratch_hi
0, // COMPUTE_USER_DATA_6 - - AQL queue address, low part
@@ -27,7 +27,6 @@
bool Ok2Run(unsigned int testProfile) {
bool testMatchProfile = true;
if ((testProfile & g_TestRunProfile) == 0) {
// display msg to notify a test that is not running
WARN() << "Test is skipped beacuse profile does not match current run mode" << std::endl;
testMatchProfile = false;
}
@@ -35,11 +34,10 @@ bool Ok2Run(unsigned int testProfile) {
return testMatchProfile;
}
// This predication is used when specific HW capabilites must exist for the test to succeed.
// This predication is used when specific HW capabilities must exist for the test to succeed.
bool TestReqEnvCaps(unsigned int envCaps) {
bool testMatchEnv = true;
if ((envCaps & g_TestENVCaps) != envCaps) {
// display msg to notify a test that is not running
WARN() << "Test is skipped due to HW capability issues" << std::endl;
testMatchEnv = false;
}
@@ -47,12 +45,11 @@ bool TestReqEnvCaps(unsigned int envCaps) {
return testMatchEnv;
}
// This predication is used when specific HW capabilites must abscent for the test to succeed.
// e.g testing capabilites not supported by HW scheduling
// This predication is used when specific HW capabilities must be absent for the test to succeed.
// e.g Testing capabilities not supported by HW scheduling
bool TestReqNoEnvCaps(unsigned int envCaps) {
bool testMatchEnv = true;
if ((envCaps & g_TestENVCaps) != 0) {
// display msg to notify a test that is not running
WARN() << "Test is skipped due to HW capability issues" << std::endl;
testMatchEnv = false;
}
@@ -29,24 +29,24 @@
#include "KFDTestFlags.hpp"
enum LOGTYPE {
LOGTYPE_INFO, // msg header in green
LOGTYPE_INFO, // msg header in green
LOGTYPE_WARNING // msg header in yellow
};
class KFDLog{};
std::ostream& operator << (KFDLog log, LOGTYPE level);
// @brief log additional details, to be displayed in the same format as other google test outputs
// currently not supported by google test
// should be used like cout: LOG() << "message" << value << std::endl;
// @brief Log additional details, to be displayed in the same format as other google test outputs
// Currently not supported by gtest
// Should be used like cout: LOG() << "message" << value << std::endl;
#define LOG() KFDLog() << LOGTYPE_INFO
#define WARN() KFDLog() << LOGTYPE_WARNING
// all test MUST be in a try catch since google test flag to throw exception on any fatal fail is on
// All tests MUST be in a try catch since the gtest flag to throw an exception on any fatal failure is enabled
#define TEST_START(testProfile) if (Ok2Run(testProfile)) try {
#define TEST_END } catch (...) {}
// used to wrape setup and teardown functions, anything that is build-in gtest and is not a test
// Used to wrap setup and teardown functions, anything that is built-in gtest and is not a test
#define ROUTINE_START try {
#define ROUTINE_END }catch(...) {}
@@ -59,13 +59,13 @@ std::ostream& operator << (KFDLog log, LOGTYPE level);
#define ASSERT_NOTNULL(_val) ASSERT_NE((void *)NULL, _val)
#define EXPECT_NOTNULL(_val) EXPECT_NE((void *)NULL, _val)
// @brief determines if its ok to run a test given input flags
// @brief Determines if it is ok to run a test given input flags
bool Ok2Run(unsigned int testProfile);
// @brief checks if all HW capabilities needed for a test to run exist
// @brief Checks if all HW capabilities needed for a test to run exist
bool TestReqEnvCaps(unsigned int hwCaps);
// @brief checks if all HW capabilities that prevents a test from running are non existing
// @brief Checks if all HW capabilities that prevents a test from running are absent
bool TestReqNoEnvCaps(unsigned int hwCaps);
#endif
+10 -10
Näytä tiedosto
@@ -28,32 +28,32 @@
#include "KFDTestUtil.hpp"
/** @class IndirectBuffer
* when working with indirect buffer, create IndirectBuffer, fill it with all the packets you want
* When working with an indirect buffer, create IndirectBuffer, fill it with all the packets you want,
* create an indirect packet to point to it, and submit the packet to queue
*/
class IndirectBuffer {
public:
// @param[size] queue max size in DWords
// @param[type] packets type allowed in queue
// @param[size] Queue max size in DWords
// @param[type] Packet type allowed in queue
IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId);
~IndirectBuffer(void);
// @brief add packet to queue, all validations are done with gtest ASSERT and EXPECT
// @brief Add packet to queue, all validations are done with gtest ASSERT and EXPECT
void AddPacket(const BasePacket &packet);
// @returns the actual size of the indirect queue in DWord, equivalent to write pointer
// @returns Actual size of the indirect queue in DWords, equivalent to write pointer
unsigned int SizeInDWord() { return m_ActualSize; }
// @returns indirect queue address
// @returns Indirect queue address
unsigned int *Addr() { return m_IndirectBuf->As<unsigned int*>(); }
protected:
// how many packets in queue
// Number of packets in the queue
unsigned int m_NumOfPackets;
// max size of queue in DWords
// Max size of queue in DWords
unsigned int m_MaxSize;
// current size of queue in DWords
// Current size of queue in DWords
unsigned int m_ActualSize;
HsaMemoryBuffer *m_IndirectBuf;
// defines what packets are supported in this queue
// What packets are supported in this queue
PACKETTYPE m_PacketTypeAllowed;
};
@@ -38,9 +38,9 @@ void KFDBaseComponentTest::SetUp() {
memset( &m_SystemProperties, 0, sizeof(m_SystemProperties) );
memset(m_RenderNodes, 0, sizeof(m_RenderNodes));
/** in order to be correctly testing the KFD interfaces and ensure
/** In order to be correctly testing the KFD interfaces and ensure
* that the KFD acknowledges relevant node parameters
* for the rest of the tests and used for more specific topology tests
* for the rest of the tests and used for more specific topology tests,
* call to GetSystemProperties for a system snapshot of the topology here
*/
ASSERT_SUCCESS(hsaKmtAcquireSystemProperties(&m_SystemProperties));
@@ -53,8 +53,8 @@ void KFDBaseComponentTest::SetUp() {
m_MemoryFlags.ui32.CachePolicy = HSA_CACHING_NONCACHED; // Non cached
m_MemoryFlags.ui32.ReadOnly = 0; // Read/Write
m_MemoryFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB; // 4KB page
m_MemoryFlags.ui32.HostAccess = 1; // host accessible
m_MemoryFlags.ui32.NoSubstitute = 0; // fall back to node 0 if needed
m_MemoryFlags.ui32.HostAccess = 1; // Host accessible
m_MemoryFlags.ui32.NoSubstitute = 0; // Fall back to node 0 if needed
m_MemoryFlags.ui32.GDSMemory = 0;
m_MemoryFlags.ui32.Scratch = 0;
@@ -94,7 +94,7 @@ HSAuint64 KFDBaseComponentTest::GetSysMemSize() {
for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
nodeProps = m_NodeInfo.GetNodeProperties(node);
if (nodeProps != NULL && nodeProps->NumCPUCores > 0 && nodeProps->NumMemoryBanks > 0) {
/* For NUMA nodes, memory is distributed among differnt nodes.
/* For NUMA nodes, memory is distributed among different nodes.
* Compute total system memory size. KFD driver also computes
* the system memory (si_meminfo) similarly
*/
@@ -63,17 +63,13 @@ class KFDBaseComponentTest : public testing::Test {
HsaMemFlags m_MemoryFlags;
HsaNodeInfo m_NodeInfo;
// @brief SetUpTestCase function run before the first test that uses
// KFDOpenCloseKFDTest class fixture, and opens KFD.
// @brief Executed before the first test that uses KFDOpenCloseKFDTest class and opens KFD.
static void SetUpTestCase();
// @brief TearDownTestCase function run after the last test from
// KFDOpenCloseKFDTest class fixture and calls close KFD.
// @brief Executed after the last test from KFDOpenCloseKFDTest class and closes KFD.
static void TearDownTestCase();
// @brief SetUp function run before every test that uses
// KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
// @brief Executed before every test that uses KFDOpenCloseKFDTest class and sets all common settings for the tests.
virtual void SetUp();
// @brief TearDown function run after every test that uses
// KFDOpenCloseKFDTest class fixture.
// @brief Executed after every test that uses KFDOpenCloseKFDTest class.
virtual void TearDown();
};
+7 -6
Näytä tiedosto
@@ -89,9 +89,10 @@ void KFDCWSRTest::SetUp() {
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
// TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not
// set the destination based on workitem_id.
// Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future.
/* TODO: In the ISA, the workitem_id is not obtained as expected, so the destination cannot
* be set based on workitem_id. Set the wave_num to 1 for now as a workarpound.
* Will set it to 8 or even 256 in the future.
*/
wave_number = 1;
ROUTINE_END
@@ -149,15 +150,15 @@ TEST_F(KFDCWSRTest, BasicTest) {
iter[0] = 40000000;
iter[1] = 20000000;
// submit the shader, queue1
// Submit the shader, queue1
dispatch1->Submit(queue1);
// Create queue2 during queue1 still running will trigger the CWSR
EXPECT_SUCCESS(queue2.Create(defaultGPUNode));
// submit the shader
// Submit the shader
dispatch2->Submit(queue2);
dispatch1->Sync();
dispatch2->Sync();
// ensure all the waves complete as expected
// Ensure all the waves complete as expected
int i;
for (i = 0 ; i < wave_number; ++i) {
if (result[i] != iter[0]) {
+1 -1
Näytä tiedosto
@@ -39,7 +39,7 @@ class KFDCWSRTest : public KFDBaseComponentTest {
virtual void SetUp();
virtual void TearDown();
protected: // members
protected: // Members
unsigned wave_number;
IsaGenerator* m_pIsaGen;
};
+5 -5
Näytä tiedosto
@@ -79,7 +79,7 @@ void KFDDBGTest::TearDown() {
delete m_pIsaGen;
m_pIsaGen = NULL;
/* reset the user trap handler */
/* Reset the user trap handler */
hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);
KFDBaseComponentTest::TearDown();
@@ -118,7 +118,7 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
ASSERT_SUCCESS(queue_flush.Create(defaultGPUNode));
// Set Address Watch Params
// TODO: Set atchMode[1] to Atomic in case we want to test this mode.
// TODO: Set WatchMode[1] to Atomic in case we want to test this mode.
HSA_DBG_WATCH_MODE WatchMode[2];
HSAuint64 WatchAddress[2];
@@ -153,9 +153,9 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
dispatch.SetArgs(dstBuf.As<void*>(), reinterpret_cast<void *>(secDstBuf));
dispatch.SetDim(1, 1, 1);
// TODO: use Memory ordering rules w/ atomics
// for host-GPU memory syncs.
// set to: std::memory_order_seq_cst
/* TODO: Use Memory ordering rules w/ atomics for host-GPU memory syncs.
* Set to std::memory_order_seq_cst
*/
dispatch.Submit(queue);
+1 -1
Näytä tiedosto
@@ -38,7 +38,7 @@ class KFDDBGTest : public KFDBaseComponentTest {
virtual void SetUp();
virtual void TearDown();
protected: // members
protected: // Members
IsaGenerator* m_pIsaGen;
};
+2 -2
Näytä tiedosto
@@ -41,7 +41,7 @@ void KFDEventTest::SetUp() {
void KFDEventTest::TearDown() {
ROUTINE_START
// not all tests create event, destroy only if there is one
// Not all tests create an event, destroy only if there is one
if (m_pHsaEvent != NULL) {
// hsaKmtDestroyEvent moved to TearDown to make sure it is being called
EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pHsaEvent));
@@ -58,7 +58,7 @@ TEST_F(KFDEventTest, CreateDestroyEvent) {
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_NodeInfo.HsaDefaultGPUNode(), &m_pHsaEvent));
EXPECT_NE(0, m_pHsaEvent->EventData.HWData2);
// destroy event is being called in test TearDown
// Destroy event is being called in test TearDown
TEST_END;
}
+2 -2
Näytä tiedosto
@@ -31,9 +31,9 @@ class KFDEventTest : public KFDBaseComponentTest {
KFDEventTest(void) {}
~KFDEventTest(void) {}
// @brief SetUp function runs before every test in KFDEventTest.
// @brief Executed before every test in KFDEventTest.
virtual void SetUp();
// @brief TearDown function runs after every test in KFDEventTest.
// @brief Executed after every test in KFDEventTest.
virtual void TearDown();
protected:
+29 -24
Näytä tiedosto
@@ -30,7 +30,7 @@
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
#define N_PROCESSES (8) /* number of processes running in parallel, at least 2 */
#define N_PROCESSES (8) /* Number of processes running in parallel, must be at least 2 */
#define ALLOCATE_BUF_SIZE_MB (64)
#define ALLOCATE_RETRY_TIMES (3)
@@ -70,7 +70,7 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui
break;
}
/* wait for 1 second to try allocate again */
/* Wait for 1 second to try allocate again */
sleep(1);
}
}
@@ -268,7 +268,7 @@ void KFDEvictTest::ForkChildProcesses(int nprocesses) {
void KFDEvictTest::WaitChildProcesses() {
if (m_IsParent) {
/* only run by parent process */
/* Only run by parent process */
int childStatus;
int childExitOkNum = 0;
int size = m_ChildPids.size();
@@ -286,7 +286,7 @@ void KFDEvictTest::WaitChildProcesses() {
ASSERT_EQ(childExitOkNum, size);
}
/* child process or parent process finished successfullly */
/* Child process or parent process finished successfully */
m_ChildStatus = HSAKMT_STATUS_SUCCESS;
}
@@ -296,19 +296,19 @@ void KFDEvictTest::WaitChildProcesses() {
*
* ALLOCATE_BUF_SIZE_MB buf allocation size
*
* number of buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
* buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
* Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
*
* many times of eviction and restore will happen:
* ttm will evict buffers of another process if not enough free vram
* Eviction and restore will happen many times:
* ttm will evict buffers of another process if there is not enough free vram
* process restore will evict buffers of another process
*
* Sometimes the allocate may fail (maybe that is normal)
* Sometimes the allocation may fail (maybe that is normal)
* ALLOCATE_RETRY_TIMES max retry times to allocate
*
* This is basic test, no queue so vram are not used by GPU during test
* This is basic test with no queue, so vram is not used by the GPU during test
*
* Todo:
* TODO:
* - Synchronization between the processes, so they know for sure when
* they are done allocating memory
*/
@@ -345,7 +345,7 @@ TEST_F(KFDEvictTest, BasicTest) {
std::vector<void *> pBuffers;
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
/* allocate gfx vram size of at most one third system memory */
/* Allocate gfx vram size of at most one third system memory */
HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
amdgpu_bo_handle handle;
AllocAmdgpuBo(rn, size, handle);
@@ -365,12 +365,12 @@ TEST_F(KFDEvictTest, BasicTest) {
* until address buffer is filled with specific value 0x5678 by host program,
* then each wavefront fills value 0x5678 at corresponding result buffer and quit
*
* initial state:
* Initial state:
* s[0:1] - address buffer base address
* s[2:3] - result buffer base address
* s4 - workgroup id
* v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
* registers:
* Registers:
* v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
* v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
@@ -514,7 +514,7 @@ TEST_F(KFDEvictTest, QueueTest) {
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
/* Skip test for chip it doesn't have CWSR, which the test depends on */
/* Skip test for chip if it doesn't have CWSR, which the test depends on */
if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties)) {
LOG() << std::hex << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
return;
@@ -538,7 +538,7 @@ TEST_F(KFDEvictTest, QueueTest) {
LOG() << "Skipping test: Not enough system memory available." << std::endl;
return;
}
/* assert all buffer address can be stored within one page
/* Assert all buffer address can be stored within one page
* because only one page host memory srcBuf is allocated
*/
ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));
@@ -559,7 +559,7 @@ TEST_F(KFDEvictTest, QueueTest) {
std::vector<void *> pBuffers;
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
/* allocate gfx vram size of at most one third system memory */
/* Allocate gfx vram size of at most one third system memory */
HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
amdgpu_bo_handle handle;
AllocAmdgpuBo(rn, size, handle);
@@ -583,27 +583,32 @@ TEST_F(KFDEvictTest, QueueTest) {
Dispatch dispatch0(isaBuffer);
dispatch0.SetArgs(localBufAddr, result);
dispatch0.SetDim(wavefront_num, 1, 1);
/* submit the packet and start shader */
/* Submit the packet and start shader */
dispatch0.Submit(pm4Queue);
/* doing evict/restore queue test for 5 seconds while queue is running */
/* Doing evict/restore queue test for 5 seconds while queue is running */
sleep(5);
/* LOG() << m_psName << "notify shader to quit" << std::endl; */
/* fill address buffer so shader quits */
/* Uncomment this line for debugging */
// LOG() << m_psName << "notify shader to quit" << std::endl;
/* Fill address buffer so shader quits */
addrBuffer.Fill(0x5678);
/* wait for shader to finish or timeout if shade has vm page fault */
/* Wait for shader to finish or timeout if shader has vm page fault */
dispatch0.SyncWithStatus(120000);
ASSERT_SUCCESS(pm4Queue.Destroy());
FreeAmdgpuBo(handle);
/* LOG() << m_psName << "free buffer" << std::endl; */
/* cleanup */
/* Uncomment this line for debugging */
// LOG() << m_psName << "free buffer" << std::endl;
/* Cleanup */
FreeBuffers(pBuffers, vramBufSize);
/* check if all wavefronts finish successfully */
/* Check if all wavefronts finished successfully */
for (i = 0; i < wavefront_num; i++)
ASSERT_EQ(0x5678, *(result + i));
+2 -2
Näytä tiedosto
@@ -38,7 +38,7 @@ class KFDEvictTest : public KFDLocalMemoryTest {
~KFDEvictTest(void) {
if (!m_IsParent) {
/* child process has to exit
/* Child process has to exit
* otherwise gtest will continue other tests
*/
exit(m_ChildStatus);
@@ -60,7 +60,7 @@ class KFDEvictTest : public KFDLocalMemoryTest {
void ForkChildProcesses(int nprocesses);
void WaitChildProcesses();
protected: // members
protected: // Members
std::string m_psName;
std::vector<pid_t> m_ChildPids;
HsaMemFlags m_Flags;
@@ -28,8 +28,6 @@
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
// All tests are marked by their serial number in the QCM FDD
void KFDExceptionTest::SetUp() {
ROUTINE_START
@@ -198,7 +196,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
TestMemoryException(defaultGPUNode, srcBuffer.As<HSAuint64>(),
(HSAuint64)pDst);
/* Wait enough time here to ensure this process got killed by kernel
/* Wait for enough time here to ensure this process got killed by kernel
* due to PPR exception.
*/
sleep(5);
@@ -213,8 +211,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
TEST_END
}
/* TODO: Same as previous test InvalidPPRWriteProtection
*/
/* TODO: Same as previous test InvalidPPRWriteProtection */
TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
@@ -249,7 +246,7 @@ TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
TestMemoryException(defaultGPUNode, (HSAuint64)pSrc,
dstBuffer.As<HSAuint64>());
/* Wait enough time here to ensure this process got killed by kernel
/* Wait for enough time here to ensure this process got killed by kernel
* due to PPR exception.
*/
sleep(5);
@@ -55,7 +55,7 @@ class KFDExceptionTest : public KFDBaseComponentTest {
unsigned int dimX = 1, unsigned int dimY = 1,
unsigned int dimZ = 1);
protected: // members
protected: // Members
pid_t m_ChildPid;
HSAKMT_STATUS m_ChildStatus;
@@ -83,8 +83,9 @@ TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
ASSERT_SUCCESS(hsaKmtRegisterGraphicsHandleToNodes(dmabufFd, &info,
1, nodes));
// DMA buffer handle and GEM handle are no longer needed, KFD
// should have taken a reference to the BO
/* DMA buffer handle and GEM handle are no longer needed, KFD
* should have taken a reference to the BO
*/
ASSERT_EQ(0, close(dmabufFd));
ASSERT_EQ(0, amdgpu_bo_free(handle));
+6 -7
Näytä tiedosto
@@ -60,7 +60,7 @@ KFDIPCTest::~KFDIPCTest(void) {
exit(0);
}
/* Imort shared Local Memory from parent process. Check for the pattern
/* Import shared Local Memory from parent process. Check for the pattern
* filled in by the parent process. Then fill a new pattern.
*/
void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
@@ -101,9 +101,9 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
ASSERT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
}
/* Fill a pattern in to Local Memory and share with the child process.
/* Fill a pattern into Local Memory and share with the child process.
* Then wait until Child process to exit and check for the new pattern
* fill in by the child process.
* filled in by the child process.
*/
void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd) {
@@ -213,7 +213,7 @@ TEST_F(KFDIPCTest, BasicTest) {
* dstBuf3[0x800-0x1000] is expected to be 0xAAAAAAAA
* and dstBuf4[0x0-0x1000] is expected to be 0xAAAAAAAA
*
* For this CMA test after copy only the first and the last of dstBuf is checked
* For this CMA test, after copying only the first and the last of dstBuf is checked
*/
static testMemoryDescriptor srcRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = {
@@ -490,7 +490,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachChildProcess(int defaultGPUNode, in
break;
}
/* Wait till the test is over */
/* Wait until the test is over */
memset(msg, 0, sizeof(msg));
if (read_non_block(readPipe, msg, 4) < 0) {
status = CMA_IPC_PIPE_ERROR;
@@ -523,7 +523,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachParentProcess(int defaultGPUNode, p
int testNo;
CMA_TEST_STATUS status;
/* Recevie buffer array from child and then initialize and fill in Local Buffer Array.
/* Receive buffer array from child and then initialize and fill in Local Buffer Array.
* READ_TEST: Copy remote buffer array into Local Buffer Array and then check
* for the new pattern.
* WRITE_TEST: Write Local Buffer Array into remote buffer array. Notify child to
@@ -615,7 +615,6 @@ TEST_F(KFDIPCTest, CrossMemoryAttachTest) {
ASSERT_EQ(pipe2(pipePtoC, O_NONBLOCK), 0);
/* Create a child process and share the above Local Memory with it */
m_ChildPid = fork();
if (m_ChildPid == 0 && hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS) {
/* Child Process */
+8 -7
Näytä tiedosto
@@ -53,8 +53,9 @@ enum CMA_TEST_STATUS {
CMA_TEST_HSA_WRITE_FAIL
};
// @struct testMemoryDescriptor
// Describes test buffers for Cross Memory Attach Test.
/* @struct testMemoryDescriptor
* @brief Describes test buffers for Cross Memory Attach Test.
*/
struct testMemoryDescriptor {
CMA_MEM_TYPE m_MemType;
HSAuint64 m_MemSize;
@@ -76,9 +77,10 @@ struct testMemoryDescriptor {
~testMemoryDescriptor(){}
};
// @class KFDCMAArray
// Array of buffers that will be passed between the parent and child
// process for Cross memory read and write tests
/* @class KFDCMAArray
* @brief Array of buffers that will be passed between the parent and child
* process for Cross memory read and write tests
*/
class KFDCMAArray {
/* Used to store the actual buffer array */
HsaMemoryBuffer* m_MemArray[CMA_MEMORY_TEST_ARRAY_SIZE];
@@ -130,5 +132,4 @@ class KFDIPCTest : public KFDBaseComponentTest {
pid_t m_ChildPid;
};
#endif
#endif // __KFD_MEMORY_TEST__H__
@@ -28,8 +28,6 @@
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
// All tests are marked by their serial number in the QCM FDD
void KFDLocalMemoryTest::SetUp() {
ROUTINE_START
@@ -285,7 +283,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
unsigned value = 0;
memset(pages, 0, sizeof(pages));
for (order = 0; order <= maxOrder; order++) {
// At maxOrder, block sizes is 1/4 of available memory
// At maxOrder, block size is 1/4 of available memory
pages[order].nPages = 1UL << (maxOrder - order + 2);
// At order != 0, 1/2 the memory is already allocated
if (order > 0)
@@ -467,7 +465,7 @@ TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
}
if (dst_node != defaultGPUNode) {
/* at least one node should be defaultGPUNode */
/* At least one node should be defaultGPUNode */
src_node = defaultGPUNode;
} else {
for (auto node : gpuNodes) {
@@ -38,7 +38,7 @@ class KFDLocalMemoryTest : public KFDBaseComponentTest {
virtual void SetUp();
virtual void TearDown();
protected: // members
protected: // Members
IsaGenerator* m_pIsaGen;
};
+75 -66
Näytä tiedosto
@@ -130,10 +130,10 @@ void KFDMemoryTest::TearDown() {
#define GB(x) ((x) << 30)
/*
* try to map as much as possible system memory to gpu.
* lets see if kfd support 1TB memory correctly or not.
* And after this test case, we can observe if there is any sideeffect.
* NOTICE: there are memory usage limit checks in hsa/kfd according to the total
* Try to map as much as possible system memory to gpu
* to see if KFD supports 1TB memory correctly or not.
* After this test case, we can observe if there are any side effects.
* NOTICE: There are memory usage limit checks in hsa/kfd according to the total
* physical system memory.
*/
TEST_F(KFDMemoryTest, MMapLarge) {
@@ -187,19 +187,19 @@ TEST_F(KFDMemoryTest, MMapLarge) {
TEST_END
}
/* keep memory mapped to default node
/* Keep memory mapped to default node
* Keep mapping/unmapping memory to/from non-default node
* A shader running on default node consistantly access
* memory - make sure memory is always accessible on default,
* i.e., there is no gpu vm fault.
* A shader running on default node consistantly accesses
* memory - make sure memory is always accessible by default,
* i.e. there is no gpu vm fault.
* Synchronization b/t host program and shader:
* 1. host initialize src and dst buffer to 0
* 2. shader keep reading src buffer and check value
* 3. host write src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
* 4. shader write dst buffer to 0x5678 after src changed to 0x5678, quit
* 5. host program quit after dst becomes 0x5678
* Need at least two gpu nodes to run the test. The defaut node has to be a gfx9 node.
* Otherwise, test is skipped. Use kfdtest --node=$$ to specify the defaut node
* 1. Host initializes src and dst buffer to 0
* 2. Shader keeps reading src buffer and check value
* 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
* 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
* 5. Host program quits after dst becomes 0x5678
* Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
* otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
* This test case is introduced as a side-result of investigation of SWDEV-134798, which
* is a gpu vm fault while running rocr conformance test. Here we try to simulate the
* same test behaviour.
@@ -250,7 +250,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
}
/* fill src buffer so shader quits */
/* Fill src buffer so shader quits */
srcBuffer.Fill(0x5678);
WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
ASSERT_EQ(*dstBuffer.As<uint32_t *>(), 0x5678);
@@ -258,7 +258,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
TEST_END
}
// basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
TEST_F(KFDMemoryTest , MapMemoryToGPU) {
TEST_START(TESTPROFILE_RUNALL)
@@ -280,7 +280,7 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) {
TEST_END
}
// following tests are for hsaKmtAllocMemory with invalid params
// Following tests are for hsaKmtAllocMemory with invalid params
TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
TEST_START(TESTPROFILE_RUNALL)
@@ -299,7 +299,7 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
TEST_END
}
// basic test for hsaKmtAllocMemory
// Basic test for hsaKmtAllocMemory
TEST_F(KFDMemoryTest, MemoryAlloc) {
TEST_START(TESTPROFILE_RUNALL)
@@ -381,7 +381,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));
/* Create PM4 and SDMA queues before fork+COW to test queue
* eviction and restore */
* eviction and restore
*/
PM4Queue pm4Queue;
SDMAQueue sdmaQueue;
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
@@ -392,7 +393,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
/* First submit just so the queues are not empty, and to get the
* TLB populated (in case we need to flush TLBs somewhere after
* updating the page tables) */
* updating the page tables)
*/
Dispatch dispatch0(isaBuffer);
dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
dispatch0.Submit(pm4Queue);
@@ -410,7 +412,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
* make any write access to the stack because we want the
* parent to make the first write access and get a new copy. A
* busy loop is the safest way to do that, since any function
* call (e.g. sleep) would write to the stack. */
* call (e.g. sleep) would write to the stack.
*/
while (1)
{}
WARN() << "Shouldn't get here!" << std::endl;
@@ -419,13 +422,15 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
/* Parent process writes to COW page(s) and gets a new copy. MMU
* notifier needs to update the GPU mapping(s) for the test to
* pass. */
* pass.
*/
globalData = 0xD00BED00;
stackData[dstOffset] = 0xdeadbeef;
stackData[sdmaOffset] = 0xdeadbeef;
/* Terminate the child process before a possible test failure that
* would leave it spinning in the background indefinitely. */
* would leave it spinning in the background indefinitely.
*/
int status;
EXPECT_EQ(0, kill(pid, SIGTERM));
EXPECT_EQ(pid, waitpid(pid, &status, 0));
@@ -516,10 +521,11 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
TEST_END
}
// FlatScratchAccess
// Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
// will not work for multiple GPU nodes. For now test only one default GPU node.
// TODO: Generalize it to support multiple nodes
/* FlatScratchAccess
* Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
* will not work for multiple GPU nodes. For now test only one default GPU node.
* TODO: Generalize it to support multiple nodes
*/
#define SCRATCH_SLICE_SIZE 0x10000
#define SCRATCH_SLICE_NUM 3
@@ -558,24 +564,23 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
// Map everything for test below
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL));
// source & destination memory buffers
HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, defaultGPUNode);
HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, defaultGPUNode);
// Initialize the srcBuffer to some fixed value
srcMemBuffer.Fill(0x01010101);
// Initialize a buffer with a DWORD copy ISA
// Initialize a buffer with a dword copy ISA
m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
"ScratchCopyDword", isaBuffer);
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
// TODO: Add support to all GPU Nodes.
// The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
// also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
// HsaMemoryBuffer is now associated with a Node.
/* TODO: Add support to all GPU Nodes.
* The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
* also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
* HsaMemoryBuffer is now associated with a Node.
*/
if (pNodeProperties != NULL) {
// Get the aperture of the scratch buffer
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
@@ -585,7 +590,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
int numWaves = 4; // WAVES must be >= # SE
int waveSize = 1; // amount of space used by each wave in units of 256 dwords...
int waveSize = 1; // Amount of space used by each wave in units of 256 dwords
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
@@ -595,25 +600,24 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
// Create a dispatch packet to copy
Dispatch dispatchSrcToScratch(isaBuffer);
// setup the dispatch packet
// Setup the dispatch packet
// Copying from the source Memory Buffer to the scratch buffer
dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
dispatchSrcToScratch.SetDim(1, 1, 1);
dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
// submit the packet
// Submit the packet
dispatchSrcToScratch.Submit(queue);
dispatchSrcToScratch.Sync();
// Create another dispatch packet to copy scratch buffer contents to destination buffer.
Dispatch dispatchScratchToDst(isaBuffer);
// set the arguments to copy from the scratch buffer
// to the destination buffer
// Set the arguments to copy from the scratch buffer to the destination buffer
dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
dispatchScratchToDst.SetDim(1, 1, 1);
dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
// submit the packet
// Submit the packet
dispatchScratchToDst.Submit(queue);
dispatchScratchToDst.Sync();
@@ -708,7 +712,7 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular
lastTestedSize = sizeMB;
}
/* Save the biggest allocated system buffer forsignal handling test */
/* Save the biggest allocated system buffer for signal handling test */
LOG() << "The biggest allocated system buffer is " << std::dec
<< lastTestedSize << "MB" << std::endl;
if (lastSize)
@@ -781,7 +785,8 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
* is small. For example, on a typical Carrizo platform, the biggest allocated
* system buffer could be more than 14G even though it only has 4G memory.
* In that situation, it will take too much time to finish the test, because of
* the onerous memory swap operation. So we limit the buffer size that way.*/
* the onerous memory swap operation. So we limit the buffer size that way.
*/
TEST_F(KFDMemoryTest, BigBufferStressTest) {
if (!is_dgpu()) {
LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
@@ -804,7 +809,8 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
BigBufferVRAM(defaultGPUNode, granularityMB, NULL);
/* Repeatedly allocate and map big buffers in system memory until it fails,
* then unmap and free them. */
* then unmap and free them.
*/
#define ARRAY_ENTRIES 2048
int i = 0;
@@ -875,7 +881,8 @@ TEST_F(KFDMemoryTest, MMBench) {
/* Two SDMA queues to interleave user mode SDMA with memory
* management on either SDMA engine. Make the queues long enough
* to buffer at least nBufs x WriteData packets (7 dwords per
* packet). */
* packet).
*/
SDMAQueue sdmaQueue[2];
ASSERT_SUCCESS(sdmaQueue[0].Create(defaultGPUNode, PAGE_SIZE*8));
ASSERT_SUCCESS(sdmaQueue[1].Create(defaultGPUNode, PAGE_SIZE*8));
@@ -1094,7 +1101,8 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
* to access its memory like a debugger would. Child copies data in
* the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
* the child terminates, the parent checks that the copy was
* successful. */
* successful.
*/
TEST_F(KFDMemoryTest, PtraceAccess) {
TEST_START(TESTPROFILE_RUNALL)
@@ -1108,13 +1116,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
void *mem[2];
unsigned i;
// Offset in the VRAM buffer to test crossing non-contiguous
// buffer boundaries. The second access starting from offset
// sizeof(HSAint64)+1 will cross a node boundary in a single access,
// for node sizes of 4MB or smaller.
/* Offset in the VRAM buffer to test crossing non-contiguous
* buffer boundaries. The second access starting from offset
* sizeof(HSAint64)+1 will cross a node boundary in a single access,
* for node sizes of 4MB or smaller.
*/
const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);
// alloc system memory from node 0 and initialize it
// Alloc system memory from node 0 and initialize it
memFlags.ui32.NonPaged = 0;
ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
@@ -1122,7 +1131,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
(reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0; // destination
}
// try to alloc local memory from GPU node
// Try to alloc local memory from GPU node
memFlags.ui32.NonPaged = 1;
if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
@@ -1137,13 +1146,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
mem[1] = NULL;
}
// Allow any process to trace this one. If kernel is built without
// Yama, this is not needed, and this call will fail.
/* Allow any process to trace this one. If kernel is built without
* Yama, this is not needed, and this call will fail.
*/
#ifdef PR_SET_PTRACER
prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
#endif
// Find out my pid so the child can trace it
// Find current pid so the child can trace it
pid_t tracePid = getpid();
// Fork the child
@@ -1168,8 +1178,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
} while (!WIFSTOPPED(traceStatus));
for (i = 0; i < 4; i++) {
// Test 4 different (mis-)alignments, leaving 1-byte
// gaps between longs
// Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
errno = 0;
long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
@@ -1264,7 +1273,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, memFlags, &mem));
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(mem, size, NULL));
/* set the word before 4M boundary to 0xdeadbeefdeadbeef
/* Set the word before 4M boundary to 0xdeadbeefdeadbeef
* and the word after 4M boundary to 0xcafebabecafebabe
*/
mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
@@ -1309,7 +1318,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
waitpid(tracePid, &traceStatus, 0);
} while (!WIFSTOPPED(traceStatus));
/* peek the memory */
/* Peek the memory */
errno = 0;
HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
ASSERT_EQ(0, errno);
@@ -1318,7 +1327,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
ASSERT_EQ(0, errno);
ASSERT_EQ(data[1], data1);
/* swap mem0 and mem1 by poking */
/* Swap mem0 and mem1 by poking */
ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
ASSERT_EQ(0, errno);
ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
@@ -1404,7 +1413,7 @@ TEST_F(KFDMemoryTest, SignalHandling) {
size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);
ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
// verify that pDb is not null before it's being used
// Verify that pDb is not null before it's being used
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
pid_t childPid = fork();
@@ -1473,7 +1482,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
return;
}
/* check the first 64 bit */
/* Check the first 64 bits */
EXPECT_EQ(0, pDb[0]);
pDb[0] = 1;
@@ -1495,7 +1504,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
}
static inline void access(volatile void *sd, int size, int rw) {
/* Most like sit in cache*/
/* Most likely sitting in cache*/
static struct DUMMY {
char dummy[1024];
} dummy;
@@ -1509,8 +1518,8 @@ static inline void access(volatile void *sd, int size, int rw) {
}
/*
* on large-ber system, test the visible vram access speed.
* kfd is not allowd to alloc visible vram on non-largebar system.
* On large-bar system, test the visible vram access speed.
* KFD is not allowed to alloc visible vram on non-largebar system.
*/
TEST_F(KFDMemoryTest, MMBandWidth) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
@@ -1571,7 +1580,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
memFlags.ui32.HostAccess = 1;
memFlags.ui32.NonPaged = 0;
} else {
/* alloc visible vram*/
/* Alloc visible vram*/
allocNode = defaultGPUNode;
memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
memFlags.ui32.HostAccess = 1;
+4 -3
Näytä tiedosto
@@ -27,9 +27,10 @@
#ifndef __KFD_MEMORY_TEST__H__
#define __KFD_MEMORY_TEST__H__
// @class KFDTopologyTest
// this class has no additional features to KFDBaseComponentTest
// the separation was made so we are able to goup all memeory tests together
/* @class KFDTopologyTest
* This class has no additional features to KFDBaseComponentTest
* The separation was made so we are able to group all memory tests together
*/
class KFDMemoryTest : public KFDBaseComponentTest {
public:
KFDMemoryTest(void) :m_pIsaGen(NULL) {}
@@ -24,7 +24,7 @@
#include "KFDOpenCloseKFDTest.hpp"
#include "KFDTestUtil.hpp"
// before every test from this class fixture - open KFD
// Before every test from this class fixture, open KFD
void KFDOpenCloseKFDTest::SetUp() {
ROUTINE_START
@@ -33,7 +33,7 @@ void KFDOpenCloseKFDTest::SetUp() {
ROUTINE_END
}
// after every test from this class fixture - close KFD
// After every test from this class fixture, close KFD
void KFDOpenCloseKFDTest::TearDown() {
ROUTINE_START
@@ -42,9 +42,10 @@ void KFDOpenCloseKFDTest::TearDown() {
ROUTINE_END
}
// this test does not use class KFDOpenCloseKFDTest but is placed here
// since it's testing same topic as other test
// verify that calling hsaKmtCloseKFD on a closed KFD will return right status
/* This test does not use class KFDOpenCloseKFDTest but is placed here
* since it's testing same topic as other test
* Verify that calling hsaKmtCloseKFD on a closed KFD will return right status
*/
TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
TEST_START(TESTPROFILE_RUNALL)
@@ -53,8 +54,7 @@ TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
TEST_END
}
// verify that calling hsaKmtCloseKFD on an already opened KFD will return
// right status
// Verify that calling hsaKmtCloseKFD on an already opened KFD will return right status
TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
TEST_START(TESTPROFILE_RUNALL)
@@ -65,8 +65,7 @@ TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
TEST_END
}
// testing the normal scenario: open followed by close (done in the setup and
// teardown functions)
// Testing the normal scenario: open followed by close (done in the setup and teardown functions)
TEST_F(KFDOpenCloseKFDTest, OpenCloseKFD ) {
}
@@ -34,10 +34,9 @@ class KFDOpenCloseKFDTest : public testing::Test {
~KFDOpenCloseKFDTest(void) {}
protected:
// @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture,
// sets all common settings for the tests.
// @brief Executed before every test that uses KFDOpenCloseKFDTest class, sets all common settings for the tests.
virtual void SetUp();
// @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture.
// @brief Executed after every test that uses KFDOpenCloseKFDTest class
virtual void TearDown();
};
+2 -2
Näytä tiedosto
@@ -35,7 +35,7 @@ void KFDPMTest::SetUpTestCase() {
m_SetupSuccess = false;
AcquirePrivilege(OS_SUSPEND);
// if AcquirePrivilege fails, it will throw and we will not reach here.
// If AcquirePrivilege fails, it will throw and we will not reach here.
m_SetupSuccess = true;
ROUTINE_END
@@ -119,4 +119,4 @@ TEST_F(KFDPMTest, SuspendWithIdleQueueAfterWork) {
TEST_END
}
// TODO suspend while workload is being executed by a queue
// TODO: Suspend while workload is being executed by a queue
+1 -1
Näytä tiedosto
@@ -34,7 +34,7 @@ void KFDPNPTest::SetUpTestCase() {
AcquirePrivilege(OS_DRIVER_OPERATIONS);
// if AcquirePrivilege fails, it will throw and we will not reach here.
// If AcquirePrivilege fails, it will throw and we will not reach here.
m_SetupSuccess = true;
ROUTINE_END
+33 -27
Näytä tiedosto
@@ -32,8 +32,6 @@
#include "Dispatch.hpp"
// All tests are marked by their serial number in the QCM FDD
void KFDQMTest::SetUp() {
ROUTINE_START
@@ -155,7 +153,8 @@ TEST_F(KFDQMTest, CreateMultipleSdmaQueues) {
* Fiji and other VI/Polaris GPUs. This test typically hangs in a few
* seconds. According to analysis done by HW engineers, the culprit
* seems to be PCIe speed switching. The problem can be worked around
* by disabling the lowest DPM level on Fiji. */
* by disabling the lowest DPM level on Fiji.
*/
TEST_F(KFDQMTest, SdmaConcurrentCopies) {
TEST_START(TESTPROFILE_RUNALL)
@@ -189,14 +188,16 @@ TEST_F(KFDQMTest, SdmaConcurrentCopies) {
srcBuf.As<char *>()+COPY_SIZE*j, COPY_SIZE));
queue.SubmitPacket();
// Waste a variable amount of time. Submission timing
// while SDMA runs concurrently seems to be critical for
// reproducing the hang
/* Waste a variable amount of time. Submission timing
* while SDMA runs concurrently seems to be critical for
* reproducing the hang
*/
for (int k = 0; k < (i & 0xfff); k++)
memcpy(srcBuf.As<char *>()+PAGE_SIZE, srcBuf.As<char *>(), 1024);
// Wait for idle every 8 packets to allow the SDMA engine to
// run concurrently for a bit without getting too far ahead
/* Wait for idle every 8 packets to allow the SDMA engine to
* run concurrently for a bit without getting too far ahead
*/
if ((i & 0x7) == 0)
queue.Wait4PacketConsumption();
}
@@ -268,7 +269,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) {
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 1, 1));
// don't sync since we don't expect rptr to change when the queue is disabled.
// Don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -309,7 +310,7 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) {
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(destBuf.As<void*>(), 0));
// don't sync since we don't expect rptr to change when the queue is disabled.
// Don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -356,7 +357,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {
queue.PlaceAndSubmitPacket(packet2);
// don't sync since we don't expect rptr to change when the queue is disabled.
// Don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -388,7 +389,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
do {
// the following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
// The following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
unsigned int firstToCreate = (numIter % 2 != 0) ? 1 : 0;
unsigned int firstToDestroy = (numIter % 4 > 1) ? 1 : 0;
@@ -440,7 +441,7 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) {
unsigned int pktSizeDw = 0;
for (unsigned int i = 0; i < MAX_PACKETS; i++) {
PM4WriteDataPacket packet;
packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i); // two DWORDs per packet
packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i); // two dwords per packet
queues[qidx].PlacePacket(packet);
}
}
@@ -612,9 +613,9 @@ HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, u
return timeTotal / iterations;
}
/**
/*
* Apply CU masking in a linear fashion, adding 1 CU per iteration
* until all Shader Engines are full ...
* until all Shader Engines are full
*/
TEST_F(KFDQMTest, BasicCuMaskingLinear) {
TEST_START(TESTPROFILE_RUNALL);
@@ -1026,8 +1027,9 @@ TEST_F(KFDQMTest, CpuWriteCoherence) {
EXPECT_EQ(0, queue.Rptr());
// now that the GPU has cached the PQ contents, we modify them in CPU cache and
// ensure that the GPU sees the updated value:
/* Now that the GPU has cached the PQ contents, we modify them in CPU cache and
* ensure that the GPU sees the updated value:
*/
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x42, 0x42));
queue.Wait4PacketConsumption();
@@ -1130,10 +1132,10 @@ TEST_F(KFDQMTest, QueueLatency) {
if (i >= skip)
queue_latency_avg += queue_latency;
} while (++i < slots);
/*Calculate avg from packet[skip, slots-1]*/
/* Calculate avg from packet[skip, slots-1] */
queue_latency_avg /= (slots - skip);
/*workload of queue packet itself*/
/* Workload of queue packet itself */
i = 0;
do {
queue.PlacePacket(PM4ReleaseMemoryPacket(true,
@@ -1159,7 +1161,7 @@ TEST_F(KFDQMTest, QueueLatency) {
do {
/* The queue_latency is not that correct as the workload and overhead are average*/
queue_latency_arr[i] -= workload + overhead;
/* The First submit takes a HSAint64 time*/
/* The First submit takes an HSAint64 time*/
if (i < skip)
LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl;
} while (++i < slots);
@@ -1454,7 +1456,7 @@ TEST_F(KFDQMTest, P2PTest) {
mapFlags, nodes.size(), &nodes[0]));
#define MAGIC_NUM 0xdeadbeaf
/* First GPU fills mem with MAGIC_NUM*/
/* First GPU fills mem with MAGIC_NUM */
void *src, *dst;
HSAuint32 cur = nodes[0], next;
ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast<void**>(&src)));
@@ -1478,11 +1480,11 @@ TEST_F(KFDQMTest, P2PTest) {
}
LOG() << "Test " << cur << " -> " << next << std::endl;
/* copy to sysBuf and next GPU*/
/* Copy to sysBuf and next GPU*/
void *dst_array[] = {sysBuf, dst};
sdma_copy(cur, src, dst_array, n, size);
/* verify the data*/
/* Verify the data*/
ASSERT_EQ(sysBuf[0], MAGIC_NUM);
ASSERT_EQ(sysBuf[end], MAGIC_NUM);
@@ -1558,7 +1560,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
#ifdef DOORBELL_WRITE_USE_SDMA
/* Write the wptr and doorbell update using the GPU's SDMA
* engine. This should submit the PM4 packet on the first
* queue. */
* queue.
*/
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
pendingWptr));
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
@@ -1566,7 +1569,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
#else
/* Write the wptr and doorbell update using WRITE_DATA packets
* on a second PM4 queue. This should submit the PM4 packet on
* the first queue. */
* the first queue.
*/
otherQueue.PlacePacket(
PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
pendingWptr, false));
@@ -1582,7 +1586,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
#ifdef DOORBELL_WRITE_USE_SDMA
/* Write the wptr and doorbell update using the GPU's SDMA
* engine. This should submit the PM4 packet on the first
* queue. */
* queue.
*/
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
2, &pendingWptr64));
otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
@@ -1591,7 +1596,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
/* Write the 64-bit wptr and doorbell update using RELEASE_MEM
* packets without IRQs on a second PM4 queue. RELEASE_MEM
* should perform one atomic 64-bit access. This should submit
* the PM4 packet on the first queue. */
* the PM4 packet on the first queue.
*/
otherQueue.PlacePacket(
PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
pendingWptr64, true));
+1 -2
Näytä tiedosto
@@ -42,10 +42,9 @@ class KFDQMTest : public KFDBaseComponentTest {
virtual void TearDown();
void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1);
// void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf);
HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
protected: // members
protected: // Members
/* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */
const double CuVariance = 0.15;
const double CuNegVariance = 1.0 - CuVariance;
+5 -6
Näytä tiedosto
@@ -32,13 +32,12 @@ extern int g_TestDstNodeId;
extern bool g_IsChildProcess;
extern unsigned int g_TestGPUFamilyId;
// each test should call TEST_START with the test custome profile and HW scheduling
// Each test should call TEST_START with the test custom profile and HW scheduling
enum TESTPROFILE{
TESTPROFILE_DEV = 0x1,
TESTPROFILE_PROMO = 0x2,
// 0x4 - 0x8000 - unsed flags
// can add any flag that will mark only part of the tests to run
// 0x4 - 0x8000 - unused flags
// Can add any flag that will mark only part of the tests to run
TESTPROFILE_RUNALL = 0xFFFF
};
@@ -48,8 +47,8 @@ enum ENVCAPS{
ENVCAPS_16BITPASID = 0x2,
ENVCAPS_32BITLINUX = 0x4,
ENVCAPS_64BITLINUX = 0x8
// 0x8 - 0x8000 - unsed flags
// can add any flag that will mark specific hw limitation \ capability
// 0x8 - 0x8000 - unused flags
// Can add any flag that will mark specific hw limitation or capability
};
enum KfdFamilyId {
+3 -1
Näytä tiedosto
@@ -56,11 +56,13 @@ bool g_IsChildProcess;
unsigned int g_TestGPUFamilyId;
GTEST_API_ int main(int argc, char **argv) {
// default values for run parameters
// Default values for run parameters
g_TestRunProfile = TESTPROFILE_RUNALL;
g_TestENVCaps = ENVCAPS_NOADDEDCAPS | ENVCAPS_64BITLINUX;
g_TestTimeOut = KFD_TEST_DEFAULT_TIMEOUT;
// Every fatal fail ( = assert that failed ) will throw an exception
testing::GTEST_FLAG(throw_on_failure) = true;
testing::InitGoogleTest(&argc, argv);
CommandLineArguments args;
+11 -11
Näytä tiedosto
@@ -52,7 +52,7 @@ bool GetHwCapabilityHWS() {
unsigned int value = 0;
bool valExists = ReadDriverConfigValue(CONFIG_HWS, value);
/* HWS is enabled by default, so... */
/* HWS is enabled by default */
return ( (!valExists) || ( value > 0));
}
@@ -64,7 +64,7 @@ HSAKMT_STATUS CreateQueueTypeEvent(
) {
HsaEventDescriptor Descriptor;
// TODO Create per-OS header with this sort of definitions
// TODO: Create per-OS header with this sort of definitions
#ifdef _WIN32
Descriptor.EventType = HSA_EVENTTYPE_QUEUE_EVENT;
#else
@@ -369,7 +369,7 @@ void HsaMemoryBuffer::UnmapAllNodes() {
}
/*
* TODO: when thunk will be updated use hsaKmtRegisterToNodes. and then nodes will be used
* TODO: When thunk is updated, use hsaKmtRegisterToNodes. Then nodes will be used
*/
hsaKmtUnmapMemoryToGPU(m_pBuf);
hsaKmtDeregisterMemory(m_pBuf);
@@ -414,10 +414,10 @@ HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
HsaNodeInfo::HsaNodeInfo() {
}
// Init - Get and store information about all the HSA nodes from the Thunk Library.
// @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
// @Return - false: if no node information is available
//
/* Init - Get and store information about all the HSA nodes from the Thunk Library.
* @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
* @Return - false: if no node information is available
*/
bool HsaNodeInfo::Init(int NumOfNodes) {
HsaNodeProperties *nodeProperties;
_HSAKMT_STATUS status;
@@ -427,8 +427,9 @@ bool HsaNodeInfo::Init(int NumOfNodes) {
nodeProperties = new HsaNodeProperties();
status = hsaKmtGetNodeProperties(i, nodeProperties);
/* this is not a fatal test (not using assert), since even when it fails for one node
* we want to get information regarding others. */
/* This is not a fatal test (not using assert), since even when it fails for one node
* we want to get information regarding others.
*/
EXPECT_SUCCESS(status) << "Node index: " << i << "hsaKmtGetNodeProperties returned status " << status;
if (status == HSAKMT_STATUS_SUCCESS) {
@@ -476,8 +477,7 @@ const int HsaNodeInfo::HsaDefaultGPUNode() const {
return -1;
if (g_TestNodeId >= 0) {
// Check if this is a valid Id, if so use this else use first
// available
// Check if this is a valid Id, if so use this else use first available
for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
if (g_TestNodeId == m_NodesWithGPU.at(i))
return g_TestNodeId;
+11 -11
Näytä tiedosto
@@ -92,7 +92,7 @@ class HsaMemoryBuffer {
~HsaMemoryBuffer();
private:
// disable copy
// Disable copy
HsaMemoryBuffer(const HsaMemoryBuffer&);
const HsaMemoryBuffer& operator=(const HsaMemoryBuffer&);
@@ -130,7 +130,7 @@ class HsaInteropMemoryBuffer {
~HsaInteropMemoryBuffer();
private:
// disable copy
// Disable copy
HsaInteropMemoryBuffer(const HsaInteropMemoryBuffer&);
const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&);
@@ -141,8 +141,7 @@ class HsaInteropMemoryBuffer {
unsigned int m_Node;
};
// Class HsaNodeInfo - Gather and store all HSA node information from Thunk.
//
// @class HsaNodeInfo - Gather and store all HSA node information from Thunk.
class HsaNodeInfo {
// List containing HsaNodeProperties of all Nodes available
std::vector<HsaNodeProperties*> m_HsaNodeProps;
@@ -159,16 +158,17 @@ class HsaNodeInfo {
bool Init(int NumOfNodes);
// This function should be soon depricated. This for transistion purpose only
// Currently, KfdTest is designed to test only ONE node. This function acts
// as transistion.
/* This function should be deprecated soon. This for transistion purpose only
* Currently, KfdTest is designed to test only ONE node. This function acts
* as transition.
*/
const HsaNodeProperties* HsaDefaultGPUNodeProperties() const;
const int HsaDefaultGPUNode() const;
// Future use the following two functions to support multi-GPU.
// const std::vector<int>& GpuNodes = GetNodesWithGPU()
// for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
//
/* TODO: Use the following two functions to support multi-GPU.
* const std::vector<int>& GpuNodes = GetNodesWithGPU()
* for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
*/
const std::vector<int>& GetNodesWithGPU() const;
// @param node index of the node we are looking at
+13 -15
Näytä tiedosto
@@ -25,8 +25,6 @@
#include <vector>
#include <string>
// @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp
const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);
@@ -35,17 +33,17 @@ TEST_F(KFDTopologyTest , BasicTest) {
const HsaNodeProperties *pNodeProperties;
// goes over all nodes in the sytem properties and check the basic info received
// Goes over all nodes in the sytem properties and check the basic info received
for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
pNodeProperties = m_NodeInfo.GetNodeProperties(node);
if (pNodeProperties != NULL) {
// checking for cpu core only if it's a cpu only node or if its KAVERY apu.
// Checking for cpu core only if it's a cpu only node or if its KAVERI apu.
if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) {
EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node
<< " No CPUs core are connected for node index";
}
// if it's not a cpu only node, look for a gpu core
// If it's not a cpu only node, look for a gpu core
if (pNodeProperties->DeviceId != 0) {
EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node
<< "No GPUs core are connected.";
@@ -64,7 +62,7 @@ TEST_F(KFDTopologyTest , BasicTest) {
TEST_END
}
// this test verify failure status on hsaKmtGetNodeProperties with invalid params
// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
TEST_START(TESTPROFILE_RUNALL)
@@ -73,7 +71,7 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
TEST_END
}
// this test verify failure status on hsaKmtGetNodeProperties with invalid params
// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
TEST_START(TESTPROFILE_RUNALL)
@@ -84,8 +82,8 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
TEST_END
}
// test that we can get memory property successfully per node
// @todo check validity of values returned
// Test that we can get memory properties successfully per node
// TODO: Check validity of values returned
TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
TEST_START(TESTPROFILE_RUNALL)
const HsaNodeProperties *pNodeProperties;
@@ -104,7 +102,7 @@ TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
}
// test the GPU local memory aperture is valid.
// Test that the GPU local memory aperture is valid.
TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
TEST_REQUIRE_NO_ENV_CAPABILITIES(ENVCAPS_32BITLINUX);
@@ -137,8 +135,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
TEST_END
}
// test that we can get cache property successfully per node
// @todo check validity of values returned
// Test that we can get cache property successfully per node
// TODO: Check validity of values returned
TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
TEST_START(TESTPROFILE_RUNALL)
@@ -179,8 +177,8 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
TEST_END
}
// test that we can get NodeIoLink property successfully per node
// @todo check validity of values returned
// Test that we can get NodeIoLink property successfully per node
// TODO: Check validity of values returned
// GetNodeIoLinkProperties is disabled for now, test fails due to bug in BIOS
TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
TEST_START(TESTPROFILE_RUNALL)
@@ -197,7 +195,7 @@ TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
HsaIoLinkProperties *IolinkProperties = new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties));
if (pNodeProperties->NumIOLinks == 0) {
// No io_links. Just Print the node
// No io_links. Just print the node
LOG() << "[" << node << "]" << std::endl;
continue;
}
+4 -3
Näytä tiedosto
@@ -26,9 +26,10 @@
#ifndef __KFD_TOPOLOGY_TEST__H__
#define __KFD_TOPOLOGY_TEST__H__
// @class KFDTopologyTest
// this class has no additional features to KFDBaseComponentTest
// the separation was made so we are able to goup all topology tests together
/* @class KFDTopologyTest
* This class has no additional features to KFDBaseComponentTest
* The separation was made so we are able to group all topology tests together
*/
class KFDTopologyTest : public KFDBaseComponentTest {
public:
KFDTopologyTest(void) {}
+8 -8
Näytä tiedosto
@@ -46,11 +46,11 @@ static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ |
PROT_EXEC | PROT_WRITE | PROT_READ};
void SetConsoleTextColor(TEXTCOLOR color) {
// TODO complete
// TODO: Complete
}
void Delay(int delayCount) {
// usleeps accept time in microseconds
// usleep accepts time in microseconds
usleep(delayCount * 1000);
}
@@ -76,7 +76,7 @@ HSAuint64 GetLastErrorNo() {
}
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) {
// TODO IMPLEMENT
// TODO: Implement
return false;
}
@@ -87,20 +87,20 @@ HSAuint64 GetSystemTickCountInMicroSec() {
}
bool SuspendAndWakeUp() {
// TODO IMPLEMENT
// TODO: Implement
return false;
}
void AcquirePrivilege(OS_PRIVILEGE priv) {
// TODO IMPLEMENT
// TODO: Implement
}
void DisableKfd() {
// TODO IMPLEMENT
// TODO: Implement
}
void EnableKfd() {
// TODO IMPLEMENT
// TODO: Implement
}
bool ReadDriverConfigValue(CONFIG_VALUE config, unsigned int& rValue) {
@@ -145,7 +145,7 @@ bool GetCommandLineArguments(int argc, char **argv, CommandLineArguments& rArgs)
if (c != 0)
break;
/* If this option set a flag, do nothing else now. */
/* If this option sets a flag, do nothing else. */
if (long_options[option_index].flag != 0)
continue;
+13 -14
Näytä tiedosto
@@ -72,35 +72,34 @@ struct CommandLineArguments {
#define MEM_WRITE 0x02
#define MEM_EXECUTE 0x4
// @brief change console text color
// @brief Change console text color
void SetConsoleTextColor(TEXTCOLOR color);
// @params delayCount : delay time in milliseconds
void Delay(int delayCount);
// @brief replacement for windows VirtualAlloc func
// @brief Replacement for windows VirtualAlloc func
void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = MEM_READ | MEM_WRITE);
// @brief replacement for windows FreeVirtual func
// @brief Replacement for windows FreeVirtual func
bool VirtualFreeMemory(void *address, unsigned int size);
// @brief retrieve the last error number
// @brief Retrieve the last error number
HSAuint64 GetLastErrorNo();
HSAint64 AtomicInc(volatile HSAint64* pValue);
void MemoryBarrier();
// @brief: runs the selected test case number of times required, each in a separate process
// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want
// to run all tests in a test case: TestCase.* and so on
// @params numOfProcesses : how many processes to run in parallel
// @params runsPerProcess : how many iteration a test should do per process, must be a positive number
/* @brief: Runs the selected test case number of times required, each in a separate process
* @params testToRun : Can be a specific test testcase like TestCase.TestName or if you want
* to run all tests in a test case: TestCase.* and so on
* @params numOfProcesses : How many processes to run in parallel
* @params runsPerProcess : How many iteration a test should do per process, must be a positive number
*/
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1);
HSAuint64 GetSystemTickCountInMicroSec();
/**Put the system to S3/S4 power state and bring it back to S0.
@return 'true' on success, 'false' on failure.
*/
/* Put the system to S3/S4 power state and bring it back to S0.
* @return 'true' on success, 'false' on failure.
*/
bool SuspendAndWakeUp();
void AcquirePrivilege(OS_PRIVILEGE priv);
+4 -4
Näytä tiedosto
@@ -53,7 +53,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const {
void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(calloc(1, SizeInBytes()));
// verify that the memory is allocated successfully, cannot use assert here
// Verify that the memory is allocated successfully, cannot use assert here
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(m_pPacketData->header, IT_WRITE_DATA);
@@ -249,8 +249,8 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i
// 1st register is a part of the packet struct.
m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);
/* allocating the size of the packet, since the packet is assembled from a struct
* followed by an additional DWORD data
/* Allocating the size of the packet, since the packet is assembled from a struct
* followed by an additional dword data
*/
m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(malloc(m_packetSize));
@@ -296,7 +296,7 @@ PM4PartialFlushPacket::PM4PartialFlushPacket(void) {
}
unsigned int PM4PartialFlushPacket::SizeInBytes() const {
// for PARTIAL_FLUSH_CS packets, the last 2 DWORDS don't exist.
// For PARTIAL_FLUSH_CS packets, the last 2 dwordS don't exist.
return sizeof(PM4EVENT_WRITE) - sizeof(uint32_t)*2;
}
+31 -31
Näytä tiedosto
@@ -31,7 +31,7 @@
#include "pm4_pkt_struct_ai.h"
#include "IndirectBuffer.hpp"
// @class PM4Packet: marks a group of all PM4 packets
// @class PM4Packet: Marks a group of all PM4 packets
class PM4Packet : public BasePacket {
public:
PM4Packet(void) {}
@@ -47,9 +47,9 @@ class PM4Packet : public BasePacket {
// @class PM4WriteDataPacket
class PM4WriteDataPacket : public PM4Packet {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
PM4WriteDataPacket(void): m_ndw(0), m_pPacketData(NULL) {}
// this contructor will also init the packet, no need for additional calls
// This contructor will also init the packet, no need for additional calls
PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1):
m_ndw(1), m_pPacketData(NULL) {InitPacket(destBuf, &data1);}
PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1, unsigned int data2):
@@ -59,11 +59,11 @@ class PM4WriteDataPacket : public PM4Packet {
}
virtual ~PM4WriteDataPacket(void);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return m_pPacketData; }
// @brief initialise the packet
// @brief Initialise the packet
void InitPacket(unsigned int *destBuf, unsigned int data1) {
m_ndw = 1;
InitPacket(destBuf, &data1);
@@ -77,27 +77,27 @@ class PM4WriteDataPacket : public PM4Packet {
protected:
unsigned int m_ndw;
// PM4WRITE_DATA_CI struct contains all the packets data
// PM4WRITE_DATA_CI struct contains all the packet's data
PM4WRITE_DATA_CI *m_pPacketData;
};
// @class PM4ReleaseMemoryPacket
class PM4ReleaseMemoryPacket : public PM4Packet {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
PM4ReleaseMemoryPacket(void): m_pPacketData(NULL) {}
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
PM4ReleaseMemoryPacket(bool isPolling, uint64_t address, uint64_t data,
bool is64bit = false, bool isTimeStamp = false): m_pPacketData(NULL) {
InitPacket(isPolling, address, data, is64bit, isTimeStamp);
}
virtual ~PM4ReleaseMemoryPacket(void);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return m_packetSize; }
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return m_pPacketData; }
// @brief initialise the packet
// @brief Initialise the packet
void InitPacket(bool isPolling, uint64_t address, uint64_t data,
bool is64bit = false, bool isTimeStamp = false);
@@ -109,21 +109,21 @@ class PM4ReleaseMemoryPacket : public PM4Packet {
// @class PM4IndirectBufPacket
class PM4IndirectBufPacket : public PM4Packet {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
PM4IndirectBufPacket(void) {}
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
explicit PM4IndirectBufPacket(IndirectBuffer *pIb);
virtual ~PM4IndirectBufPacket(void) {}
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &m_packetData; }
// @breif initialise the packet
// @breif Initialise the packet
void InitPacket(IndirectBuffer *pIb);
private:
// PM4MEC_INDIRECT_BUFFER struct contains all the packets data
// PM4MEC_INDIRECT_BUFFER struct contains all the packet's data
PM4MEC_INDIRECT_BUFFER m_packetData;
};
@@ -139,11 +139,11 @@ class PM4AcquireMemoryPacket : public PM4Packet {
virtual const void *GetPacket() const { return &m_packetData; }
private:
// PM4ACQUIRE_MEM struct contains all the packets data
// PM4ACQUIRE_MEM struct contains all the packet's data
PM4ACQUIRE_MEM m_packetData;
};
// @class PM4SetShaderRegPacket packet that writes to consecutive registers starting at baseOffset.
// @class PM4SetShaderRegPacket Packet that writes to consecutive registers starting at baseOffset.
class PM4SetShaderRegPacket : public PM4Packet {
public:
PM4SetShaderRegPacket(void);
@@ -152,9 +152,9 @@ class PM4SetShaderRegPacket : public PM4Packet {
virtual ~PM4SetShaderRegPacket(void);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return m_packetSize; }
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return m_pPacketData; }
void InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs);
@@ -162,7 +162,7 @@ class PM4SetShaderRegPacket : public PM4Packet {
private:
unsigned int m_packetSize;
bool m_packetDataAllocated;
// PM4SET_SH_REG struct contains all the packets data
// PM4SET_SH_REG struct contains all the packet's data
PM4SET_SH_REG *m_pPacketData;
};
@@ -175,15 +175,15 @@ class PM4DispatchDirectPacket : public PM4Packet {
virtual ~PM4DispatchDirectPacket(void) {}
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &m_packetData; }
void InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit);
private:
// PM4DISPATCH_DIRECT struct contains all the packets data
// PM4DISPATCH_DIRECT struct contains all the packet's data
PM4DISPATCH_DIRECT m_packetData;
};
@@ -193,13 +193,13 @@ class PM4PartialFlushPacket : public PM4Packet {
PM4PartialFlushPacket(void);
virtual ~PM4PartialFlushPacket(void) {}
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &m_packetData; }
private:
// PM4EVENT_WRITE struct contains all the packets data
// PM4EVENT_WRITE struct contains all the packet's data
PM4EVENT_WRITE m_packetData;
};
@@ -209,9 +209,9 @@ class PM4NopPacket : public PM4Packet {
PM4NopPacket(void);
virtual ~PM4NopPacket(void) {}
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const;
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &m_packetData; }
private:
+5 -3
Näytä tiedosto
@@ -36,7 +36,8 @@ PM4Queue::~PM4Queue(void) {
unsigned int PM4Queue::Wptr() {
/* Write pointer in dwords. Simulate 32-bit wptr that wraps at
* queue size even on Vega10 and later chips with 64-bit wptr. */
* queue size even on Vega10 and later chips with 64-bit wptr.
*/
return *m_Resources.Queue_write_ptr % (m_QueueBuf->Size() / 4);
}
@@ -48,12 +49,13 @@ unsigned int PM4Queue::Rptr() {
unsigned int PM4Queue::RptrWhenConsumed() {
/* On PM4 queues Rptr is always 32-bit in dword units and wraps at
* queue size. The expected value when all packets are consumed is
* exactly the value returned by Wptr(). */
* exactly the value returned by Wptr().
*/
return Wptr();
}
void PM4Queue::SubmitPacket() {
// m_pending Wptr is in DWORDs
// m_pending Wptr is in dwords
if (g_TestGPUFamilyId < FAMILY_AI) {
// Pre-Vega10 uses 32-bit wptr and doorbell
MemoryBarrier();
+2 -2
Näytä tiedosto
@@ -35,9 +35,9 @@ class PM4Queue : public BaseQueue {
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
virtual void SubmitPacket();
// @ return read pointer modulo queue size in DWORDs
// @ return read pointer modulo queue size in dwords
virtual unsigned int Rptr();
// @ return write pointer modulo queue size in DWORDs
// @ return write pointer modulo queue size in dwords
virtual unsigned int Wptr();
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
virtual unsigned int RptrWhenConsumed();
+1 -1
Näytä tiedosto
@@ -38,7 +38,7 @@ class RDMATest : public KFDBaseComponentTest {
virtual void SetUp();
virtual void TearDown();
protected: // members
protected: // Members
IsaGenerator* m_pIsaGen;
};
+5 -4
Näytä tiedosto
@@ -27,8 +27,9 @@
#include "SDMAPacket.hpp"
#include "KFDTestUtil.hpp"
/* Byte/dword cound in many SDMA packets is 1-based in AI, meaning a
* count of 1 is encoded as 0. */
/* Byte/dword count in many SDMA packets is 1-based in AI, meaning a
* count of 1 is encoded as 0.
*/
#define SDMA_COUNT(c) (g_TestGPUFamilyId < FAMILY_AI ? (c) : (c)-1)
SDMAWriteDataPacket::SDMAWriteDataPacket(void):
@@ -95,7 +96,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns
packetData = pSDMA;
while (surfsize > 0) {
/* sdma support maximum 0x3fffe0 byte in one copy, take 2M here */
/* SDMA support maximum 0x3fffe0 byte in one copy, take 2M here */
if (surfsize > TWO_MEG)
size = TWO_MEG;
else
@@ -150,7 +151,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in
pSDMA->HEADER_UNION.op = SDMA_OP_CONST_FILL;
pSDMA->HEADER_UNION.sub_op = 0;
/* If Both size and address are DW aligned, then use DW fill */
/* If both size and address are DW aligned, then use DW fill */
if (!(copy_size & 0x3) && !((HSAuint64)dst & 0x3))
pSDMA->HEADER_UNION.fillsize = 2; /* DW Fill */
else
+26 -26
Näytä tiedosto
@@ -27,7 +27,7 @@
#include "BasePacket.hpp"
#include "sdma_pkt_struct.h"
// @class SDMSPacket: marks a group of all SDMA packets
// @class SDMAPacket: Marks a group of all SDMA packets
class SDMAPacket : public BasePacket {
public:
SDMAPacket(void) {}
@@ -38,44 +38,44 @@ class SDMAPacket : public BasePacket {
class SDMAWriteDataPacket : public SDMAPacket {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
SDMAWriteDataPacket(void);
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
SDMAWriteDataPacket(void* destAddr, unsigned int data);
SDMAWriteDataPacket(void* destAddr, unsigned int ndw, void *data);
virtual ~SDMAWriteDataPacket(void);
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return packetData; }
// @breif initialise the packet
// @breif Initialise the packet
void InitPacket(void* destAddr, unsigned int data);
void InitPacket(void* destAddr, unsigned int ndw, void *data);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return packetSize; }
protected:
// SDMA_PKT_WRITE_UNTILED struct contains all the packets data
// SDMA_PKT_WRITE_UNTILED struct contains all the packet's data
SDMA_PKT_WRITE_UNTILED *packetData;
unsigned int packetSize;
};
class SDMACopyDataPacket : public SDMAPacket {
public:
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
SDMACopyDataPacket(void *dest, void *src, unsigned int size);
SDMACopyDataPacket(void *const dst[], void *src, int n, unsigned int surfsize);
virtual ~SDMACopyDataPacket(void);
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return packetData; }
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return packetSize; }
protected:
// SDMA_PKT_COPY_LINEAR struct contains all the packets data
// SDMA_PKT_COPY_LINEAR struct contains all the packet's data
SDMA_PKT_COPY_LINEAR *packetData;
unsigned int packetSize;
@@ -83,19 +83,19 @@ class SDMACopyDataPacket : public SDMAPacket {
class SDMAFillDataPacket : public SDMAPacket {
public:
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
SDMAFillDataPacket(void *dest, unsigned int data, unsigned int size);
virtual ~SDMAFillDataPacket(void);
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return m_PacketData; }
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return m_PacketSize; }
protected:
// SDMA_PKT_CONSTANT_FILL struct contains all the packets data
// SDMA_PKT_CONSTANT_FILL struct contains all the packet's data
SDMA_PKT_CONSTANT_FILL *m_PacketData;
unsigned int m_PacketSize;
@@ -103,41 +103,41 @@ class SDMAFillDataPacket : public SDMAPacket {
class SDMAFencePacket : public SDMAPacket {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
SDMAFencePacket(void);
// this contructor will also init the packet, no need for adittional calls
// This contructor will also init the packet, no need for additional calls
SDMAFencePacket(void* destAddr, unsigned int data);
virtual ~SDMAFencePacket(void);
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &packetData; }
// @brief initialise the packet
// @brief Initialise the packet
void InitPacket(void* destAddr, unsigned int data);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_FENCE ); }
protected:
// SDMA_PKT_FENCE struct contains all the packets data
// SDMA_PKT_FENCE struct contains all the packet's data
SDMA_PKT_FENCE packetData;
};
class SDMATrapPacket : public SDMAPacket {
public:
// empty constructor, befor using the packet call the init func
// Empty constructor, before using the packet call the init func
explicit SDMATrapPacket(unsigned int eventID = 0);
virtual ~SDMATrapPacket(void);
// @returns a pointer to the packet
// @returns Pointer to the packet
virtual const void *GetPacket() const { return &packetData; }
// @brief initialise the packet
// @brief Initialise the packet
void InitPacket(unsigned int eventID);
// @returns the packet size in bytes
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_TRAP); }
protected:
// SDMA_PKT_TRAP struct contains all the packets data
// SDMA_PKT_TRAP struct contains all the packet's data
SDMA_PKT_TRAP packetData;
};
+9 -6
Näytä tiedosto
@@ -32,20 +32,22 @@ SDMAQueue::~SDMAQueue(void) {
unsigned int SDMAQueue::Wptr() {
/* In SDMA queues write pointers are saved in bytes, convert the
* wptr value to DWORD to fit the way BaseQueue works. On Vega10
* wptr value to dword to fit the way BaseQueue works. On Vega10
* the write ptr is 64-bit. We only read the low 32 bit (assuming
* the queue buffer is smaller than 4GB) and modulo divide by the
* queue size to simulate a 32-bit read pointer. */
* queue size to simulate a 32-bit read pointer.
*/
return (*m_Resources.Queue_write_ptr % m_QueueBuf->Size()) /
sizeof(unsigned int);
}
unsigned int SDMAQueue::Rptr() {
/* In SDMA queues read pointers are saved in bytes, convert the
* read value to DWORD to fit the way BaseQueue works. On Vega10
* read value to dword to fit the way BaseQueue works. On Vega10
* the read ptr is 64-bit. We only read the low 32 bit (assuming
* the queue buffer is smaller than 4GB) and modulo divide by the
* queue size to simulate a 32-bit read pointer. */
* queue size to simulate a 32-bit read pointer.
*/
return (*m_Resources.Queue_read_ptr % m_QueueBuf->Size()) /
sizeof(unsigned int);
}
@@ -53,12 +55,13 @@ unsigned int SDMAQueue::Rptr() {
unsigned int SDMAQueue::RptrWhenConsumed() {
/* Rptr is same size and byte units as Wptr. Here we only care
* about the low 32-bits. When all packets are consumed, read and
* write pointers should have the same value. */
* write pointers should have the same value.
*/
return *m_Resources.Queue_write_ptr;
}
void SDMAQueue::SubmitPacket() {
// m_pending Wptr is in DWORDs
// m_pending Wptr is in dwords
if (g_TestGPUFamilyId < FAMILY_AI) {
// Pre-Vega10 uses 32-bit wptr and doorbell
unsigned int wPtrInBytes = m_pendingWptr * sizeof(unsigned int);
+4 -4
Näytä tiedosto
@@ -31,15 +31,15 @@ class SDMAQueue : public BaseQueue {
SDMAQueue(void);
virtual ~SDMAQueue(void);
// @brief update queue write pointer and sets the queue doorbell to the queue write pointer
// @brief Update queue write pointer and set the queue doorbell to the queue write pointer
virtual void SubmitPacket();
protected:
// @ return write pointer modulo queue size in DWORDs
// @ return Write pointer modulo queue size in dwords
virtual unsigned int Wptr();
// @ return read pointer modulo queue size in DWORDs
// @ return Read pointer modulo queue size in dwords
virtual unsigned int Rptr();
// @ return expected m_Resources.Queue_read_ptr when all packets consumed
// @ return Expected m_Resources.Queue_read_ptr when all packets are consumed
virtual unsigned int RptrWhenConsumed();
virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_SDMA; }