kfdtest: Clean up comments

Consolidate style (use /* */ for multi-line), fix typos, use dword instad of DWORD/DWord Change-Id: I620e45c1687550db41127e45641b7d79d28223a1
2018-08-14 09:52:31 -04:00
commit 414042abf7
@@ -45,7 +45,7 @@ unsigned int AqlQueue::RptrWhenConsumed() {
 }

 void AqlQueue::SubmitPacket() {
-    // m_pending Wptr is in DWORDs
+    // m_pending Wptr is in dwords
    *m_Resources.Queue_write_ptr = m_pendingWptr;
    *(m_Resources.Queue_DoorBell) = Wptr();
 }
@@ -31,14 +31,14 @@ class AqlQueue : public BaseQueue {
    AqlQueue();
    virtual ~AqlQueue();

-    // @brief update queue write pointer and sets the queue doorbell to the queue write pointer
+    // @brief Updates queue write pointer and sets the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

-    // @ return read pointer are in DWORDs
+    // @return Read pointer in dwords
    virtual unsigned int Rptr();
-    // @ return write pointer are in DWORDs
+    // @return Write pointer in dwords
    virtual unsigned int Wptr();
-    // @ return expected m_Resources.Queue_read_ptr when all packets consumed
+    // @return Expected m_Resources.Queue_read_ptr when all packets are consumed
    virtual unsigned int RptrWhenConsumed();

 protected:
@@ -47,4 +47,4 @@ class AqlQueue : public BaseQueue {
    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE_AQL; }
 };

-#endif
+#endif  // __KFD_AQL_QUEUE__H__
@@ -25,8 +25,8 @@
 #define __KFD_BASE_PACKET__H__

 /**
- * all packets profiles must be defined here
- * every type defined here has sub-types
+ * All packets profiles must be defined here
+ * Every type defined here has sub-types
 */
 enum PACKETTYPE {
    PACKETTYPE_PM4,
@@ -40,13 +40,13 @@ class BasePacket {
    BasePacket(void) {}
    virtual ~BasePacket(void) {}

-    // @returns the packet type
+    // @returns Packet type
    virtual PACKETTYPE PacketType() const = 0;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const = 0;
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const = 0;
-    // @returns the packet size in DWORDS
+    // @returns Packet size in dwordS
    unsigned int SizeInDWords() const { return SizeInBytes()/sizeof(unsigned int); }

    void Dump() const;
@@ -54,4 +54,4 @@ class BasePacket {
 protected:
 };

-#endif
+#endif  // __KFD_BASE_PACKET__H__
@@ -41,7 +41,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
    HSAKMT_STATUS status;

    if (m_QueueBuf != NULL) {
-        // queue already exist, one queue per object
+        // Queue already exists, one queue per object
        Destroy();
    }

@@ -78,7 +78,7 @@ HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint6
        status = HSAKMT_STATUS_ERROR;
    }

-    // needs to match the queue write ptr
+    // Needs to match the queue write ptr
    m_pendingWptr = 0;
    m_pendingWptr64 = 0;
    m_Node = NodeId;
@@ -137,7 +137,7 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
    unsigned int queueSizeInDWord = m_QueueBuf->Size() / sizeof(uint32_t);

    if (writePtr + packetSizeInDwords > queueSizeInDWord) {
-        // wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
+        // Wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
        dwordsRequired +=  queueSizeInDWord - writePtr;
    }

@@ -147,14 +147,14 @@ void BaseQueue::PlacePacket(const BasePacket &packet) {
    ASSERT_GE(queueSizeInDWord, packetSizeInDwords) << "Cannot add a packet, packet size too large";

    if (writePtr + packetSizeInDwords >= queueSizeInDWord) {
-        // wraparound
+        // Wraparound
        while (writePtr + packetSizeInDwords > queueSizeInDWord) {
            m_QueueBuf->As<unsigned int *>()[writePtr] = CMD_NOP;
            writePtr = (writePtr + 1) % queueSizeInDWord;
            writePtr64++;
        }

-        // not updating Wptr since we might want to do place packet without submission
+        // Not updating Wptr since we might want to place the packet without submission
        m_pendingWptr = (writePtr % queueSizeInDWord);
        m_pendingWptr64 = writePtr64;
    }
@@ -48,10 +48,10 @@ class BaseQueue {
                                 HSAuint64 *pointers = NULL);
    /** Update the queue.
     *  @see hsaKmtUpdateQueue
-     *  @param percent the new queue percentage
-     *  @param priority the new queue priority
+     *  @param percent New queue percentage
+     *  @param priority New queue priority
     *  @param nullifyBuffer
-     *      if 'true', set the new buffer address to NULL and the size to 0. Otherwise
+     *      If 'true', set the new buffer address to NULL and the size to 0. Otherwise
     *      don't change the queue buffer address/size.
     */
    virtual HSAKMT_STATUS Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer);
@@ -64,17 +64,17 @@ class BaseQueue {
     *  Note that all packets being consumed is not the same as all packets being processed.
     */
    virtual void Wait4PacketConsumption();
-    /** @brief place packet and submit it in one go
+    /** @brief Place packet and submit it in one function
     */
    virtual void PlaceAndSubmitPacket(const BasePacket &packet);
-    /** @brief copy packet to queue and update write pointer
+    /** @brief Copy packet to queue and update write pointer
     */
    virtual void PlacePacket(const BasePacket &packet);
-    /** @brief update queue write pointer and sets the queue doorbell to the queue write pointer
+    /** @brief Update queue write pointer and set the queue doorbell to the queue write pointer
     */
    virtual void SubmitPacket() = 0;
-    /** @brief checkes if all packets in queue already processed
-     *  compares queue read and write pointers
+    /** @brief Check if all packets in queue are already processed
+     *  Compare queue read and write pointers
     */
    bool AllPacketsSubmitted();

@@ -100,11 +100,11 @@ class BaseQueue {
    HsaMemoryBuffer *m_QueueBuf;
    unsigned int m_Node;

-    // @ return write pointer modulo queue size in DWORDs
+    // @return Write pointer modulo queue size in dwords
    virtual unsigned int Wptr() = 0;
-    // @ return read pointer modulo queue size in DWORDs
+    // @return Read pointer modulo queue size in dwords
    virtual unsigned int Rptr() = 0;
-    // @ return expected m_Resources.Queue_read_ptr when all packets consumed
+    // @return Expected m_Resources.Queue_read_ptr when all packets consumed
    virtual unsigned int RptrWhenConsumed() = 0;
    virtual PACKETTYPE PacketTypeSupported() = 0;

@@ -131,4 +131,4 @@ class QueueArray {
    void Destroy();
 };

-#endif
+#endif  // __KFD_BASE_QUEUE__H__
@@ -90,7 +90,7 @@ void Dispatch::Sync(unsigned int timeout) {
    ASSERT_SUCCESS(hsaKmtWaitOnEvent(m_pEop, timeout));
 }

-// returning  with status in order to allow to take actions before proc termination
+// Returning with status in order to allow actions to be performed before process termination
 int Dispatch::SyncWithStatus(unsigned int timeout) {
    int stat;

@@ -103,7 +103,7 @@ void Dispatch::BuildIb() {
    SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
    SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);

-    // starts at COMPUTE_START_X
+    // Starts at COMPUTE_START_X
    const unsigned int COMPUTE_DISPATCH_DIMS_VALUES[] = {
        0,      // START_X
        0,      // START_Y
@@ -138,14 +138,14 @@ void Dispatch::BuildIb() {
        pgmRsrc2
    };

-    // starts at COMPUTE_PGM_LO
+    // Starts at COMPUTE_PGM_LO
    const unsigned int COMPUTE_PGM_VALUES_GFX8[] = {
        static_cast<uint32_t>(shiftedIsaAddr),                  // PGM_LO
        static_cast<uint32_t>(shiftedIsaAddr >> 32)             // PGM_HI
            | (is_dgpu() ? 0 : (1<<8))                          // including PGM_ATC=?
    };

-    // starts at COMPUTE_PGM_LO
+    // Starts at COMPUTE_PGM_LO
    const unsigned int COMPUTE_PGM_VALUES_GFX9[] = {
        static_cast<uint32_t>(shiftedIsaAddr),                  // PGM_LO
        static_cast<uint32_t>(shiftedIsaAddr >> 32)             // PGM_HI
@@ -156,17 +156,17 @@ void Dispatch::BuildIb() {
        static_cast<uint32_t>(m_scratch_base >> 40)
    };

-    // starts at COMPUTE_RESOURCE_LIMITS
+    // Starts at COMPUTE_RESOURCE_LIMITS
    const unsigned int COMPUTE_RESOURCE_LIMITS[] = {
        0,                      // COMPUTE_RESOURCE_LIMITS
    };

-    // starts at COMPUTE_TMPRING_SIZE
+    // Starts at COMPUTE_TMPRING_SIZE
    const unsigned int COMPUTE_TMPRING_SIZE[] = {
        m_ComputeTmpringSize,   // COMPUTE_TMPRING_SIZE
    };

-    // starts at COMPUTE_RESTART_X
+    // Starts at COMPUTE_RESTART_X
    const unsigned int COMPUTE_RESTART_VALUES[] = {
        0,                      // COMPUTE_RESTART_X
        0,                      // COMPUTE_RESTART_Y
@@ -174,13 +174,13 @@ void Dispatch::BuildIb() {
        0                       // COMPUTE_THREAD_TRACE_ENABLE
    };

-    // starts at COMPUTE_USER_DATA_0
+    // Starts at COMPUTE_USER_DATA_0
    const unsigned int COMPUTE_USER_DATA_VALUES[] = {
                // Reg name             - use in KFDtest - use in ABI
-        arg0,   // COMPUTE_USER_DATA_0  - arg0           - resource descriptor for the scratch buffer - 1st DWORD
-        arg1,   // COMPUTE_USER_DATA_1  - arg1           - resource descriptor for the scratch buffer - 2nd DWORD
-        arg2,   // COMPUTE_USER_DATA_2  - arg2           - resource descriptor for the scratch buffer - 3rd DWORD
-        arg3,   // COMPUTE_USER_DATA_3  - arg3           - resource descriptor for the scratch buffer - 4th DWORD
+        arg0,   // COMPUTE_USER_DATA_0  - arg0           - resource descriptor for the scratch buffer - 1st dword
+        arg1,   // COMPUTE_USER_DATA_1  - arg1           - resource descriptor for the scratch buffer - 2nd dword
+        arg2,   // COMPUTE_USER_DATA_2  - arg2           - resource descriptor for the scratch buffer - 3rd dword
+        arg3,   // COMPUTE_USER_DATA_3  - arg3           - resource descriptor for the scratch buffer - 4th dword
        static_cast<uint32_t>(m_scratch_base),  // COMPUTE_USER_DATA_4  - flat_scratch_lo
        static_cast<uint32_t>(m_scratch_base >> 32),  // COMPUTE_USER_DATA_4  - flat_scratch_hi
        0,      // COMPUTE_USER_DATA_6  -                - AQL queue address, low part
@@ -27,7 +27,6 @@
 bool Ok2Run(unsigned int testProfile) {
    bool testMatchProfile = true;
    if ((testProfile & g_TestRunProfile) == 0) {
-        // display msg to notify a test that is not running
        WARN() << "Test is skipped beacuse profile does not match current run mode" << std::endl;
        testMatchProfile = false;
    }
@@ -35,11 +34,10 @@ bool Ok2Run(unsigned int testProfile) {
    return testMatchProfile;
 }

-// This predication is used when specific HW capabilites must exist for the test to succeed.
+// This predication is used when specific HW capabilities must exist for the test to succeed.
 bool TestReqEnvCaps(unsigned int envCaps) {
    bool testMatchEnv = true;
    if ((envCaps & g_TestENVCaps) != envCaps) {
-        // display msg to notify a test that is not running
        WARN() << "Test is skipped due to HW capability issues" << std::endl;
        testMatchEnv = false;
    }
@@ -47,12 +45,11 @@ bool TestReqEnvCaps(unsigned int envCaps) {
    return testMatchEnv;
 }

-// This predication is used when specific HW capabilites must abscent for the test to succeed.
-// e.g testing capabilites not supported by HW scheduling
+// This predication is used when specific HW capabilities must be absent for the test to succeed.
+// e.g Testing capabilities not supported by HW scheduling
 bool TestReqNoEnvCaps(unsigned int envCaps) {
    bool testMatchEnv = true;
    if ((envCaps & g_TestENVCaps) != 0) {
-        // display msg to notify a test that is not running
        WARN() << "Test is skipped due to HW capability issues" << std::endl;
        testMatchEnv = false;
    }
@@ -29,24 +29,24 @@
 #include "KFDTestFlags.hpp"

 enum LOGTYPE {
-    LOGTYPE_INFO,            // msg header in green
+    LOGTYPE_INFO,      // msg header in green
    LOGTYPE_WARNING    // msg header in yellow
 };

 class KFDLog{};
 std::ostream& operator << (KFDLog log, LOGTYPE level);

-// @brief  log additional details, to be displayed in the same format as other google test outputs
-// currently not supported by google test
-// should be used like cout: LOG() << "message" << value << std::endl;
+// @brief  Log additional details, to be displayed in the same format as other google test outputs
+// Currently not supported by gtest
+// Should be used like cout: LOG() << "message" << value << std::endl;
 #define LOG()      KFDLog() << LOGTYPE_INFO
 #define WARN()     KFDLog() << LOGTYPE_WARNING

-// all test MUST be in a try catch since google test flag to throw exception on any fatal fail is on
+// All tests MUST be in a try catch since the gtest flag to throw an exception on any fatal failure is enabled
 #define TEST_START(testProfile)   if (Ok2Run(testProfile)) try {
 #define TEST_END       } catch (...) {}

-// used to wrape setup and teardown functions, anything that is build-in gtest  and is not a test
+// Used to wrap setup and teardown functions, anything that is built-in gtest and is not a test
 #define ROUTINE_START   try {
 #define ROUTINE_END       }catch(...) {}

@@ -59,13 +59,13 @@ std::ostream& operator << (KFDLog log, LOGTYPE level);
 #define ASSERT_NOTNULL(_val) ASSERT_NE((void *)NULL, _val)
 #define EXPECT_NOTNULL(_val) EXPECT_NE((void *)NULL, _val)

-// @brief  determines if its ok to run a test given input flags
+// @brief  Determines if it is ok to run a test given input flags
 bool Ok2Run(unsigned int testProfile);

-// @brief  checks if all HW capabilities needed for a test to run exist
+// @brief  Checks if all HW capabilities needed for a test to run exist
 bool TestReqEnvCaps(unsigned int hwCaps);

-// @brief  checks if all HW capabilities that prevents a test from running are non existing
+// @brief  Checks if all HW capabilities that prevents a test from running are absent
 bool TestReqNoEnvCaps(unsigned int hwCaps);

 #endif
@@ -28,32 +28,32 @@
 #include "KFDTestUtil.hpp"

 /** @class IndirectBuffer
- *  when working with indirect buffer, create IndirectBuffer, fill it with all the packets you want
+ *  When working with an indirect buffer, create IndirectBuffer, fill it with all the packets you want,
 *  create an indirect packet to point to it, and submit the packet to queue
 */
 class IndirectBuffer {
 public:
-    // @param[size] queue max size in DWords
-    // @param[type] packets type allowed in queue
+    // @param[size] Queue max size in DWords
+    // @param[type] Packet type allowed in queue
    IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId);
    ~IndirectBuffer(void);

-    // @brief add packet to queue, all validations are done with gtest ASSERT and EXPECT
+    // @brief Add packet to queue, all validations are done with gtest ASSERT and EXPECT
    void AddPacket(const BasePacket &packet);
-    // @returns the actual size of the indirect queue in DWord, equivalent to write pointer
+    // @returns Actual size of the indirect queue in DWords, equivalent to write pointer
    unsigned int SizeInDWord() { return m_ActualSize; }
-    // @returns indirect queue address
+    // @returns Indirect queue address
    unsigned int *Addr() { return m_IndirectBuf->As<unsigned int*>(); }

 protected:
-    // how many packets in queue
+    // Number of packets in the queue
    unsigned int m_NumOfPackets;
-    // max size of queue in DWords
+    // Max size of queue in DWords
    unsigned int m_MaxSize;
-    // current size of queue in DWords
+    // Current size of queue in DWords
    unsigned int m_ActualSize;
    HsaMemoryBuffer *m_IndirectBuf;
-    // defines what packets are supported in this queue
+    // What packets are supported in this queue
    PACKETTYPE m_PacketTypeAllowed;
 };

@@ -38,9 +38,9 @@ void KFDBaseComponentTest::SetUp() {
    memset( &m_SystemProperties, 0, sizeof(m_SystemProperties) );
    memset(m_RenderNodes, 0, sizeof(m_RenderNodes));

-    /** in order to be correctly testing the KFD interfaces and ensure
+    /** In order to be correctly testing the KFD interfaces and ensure
     *  that the KFD acknowledges relevant node parameters
-     *  for the rest of the tests and used for more specific topology tests
+     *  for the rest of the tests and used for more specific topology tests,
     *  call to GetSystemProperties for a system snapshot of the topology here
     */
    ASSERT_SUCCESS(hsaKmtAcquireSystemProperties(&m_SystemProperties));
@@ -53,8 +53,8 @@ void KFDBaseComponentTest::SetUp() {
    m_MemoryFlags.ui32.CachePolicy = HSA_CACHING_NONCACHED;  // Non cached
    m_MemoryFlags.ui32.ReadOnly = 0;                         // Read/Write
    m_MemoryFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;         // 4KB page
-    m_MemoryFlags.ui32.HostAccess = 1;                       // host accessible
-    m_MemoryFlags.ui32.NoSubstitute = 0;                     // fall back to node 0 if needed
+    m_MemoryFlags.ui32.HostAccess = 1;                       // Host accessible
+    m_MemoryFlags.ui32.NoSubstitute = 0;                     // Fall back to node 0 if needed
    m_MemoryFlags.ui32.GDSMemory = 0;
    m_MemoryFlags.ui32.Scratch = 0;

@@ -94,7 +94,7 @@ HSAuint64 KFDBaseComponentTest::GetSysMemSize() {
    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        nodeProps = m_NodeInfo.GetNodeProperties(node);
        if (nodeProps != NULL && nodeProps->NumCPUCores > 0 && nodeProps->NumMemoryBanks > 0) {
-            /* For NUMA nodes, memory is distributed among differnt nodes.
+            /* For NUMA nodes, memory is distributed among different nodes.
             * Compute total system memory size. KFD driver also computes
             * the system memory (si_meminfo) similarly
             */
@@ -63,17 +63,13 @@ class KFDBaseComponentTest : public testing::Test {
    HsaMemFlags m_MemoryFlags;
    HsaNodeInfo m_NodeInfo;

-    // @brief SetUpTestCase function run before the first test that uses
-    // KFDOpenCloseKFDTest class fixture, and opens KFD.
+    // @brief Executed before the first test that uses KFDOpenCloseKFDTest class and opens KFD.
    static  void SetUpTestCase();
-    // @brief TearDownTestCase function run after the last test from
-    // KFDOpenCloseKFDTest class fixture and calls close KFD.
+    // @brief Executed after the last test from KFDOpenCloseKFDTest class and closes KFD.
    static  void TearDownTestCase();
-    // @brief SetUp function run before every test that uses
-    // KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
+    // @brief Executed before every test that uses KFDOpenCloseKFDTest class and sets all common settings for the tests.
    virtual void SetUp();
-    // @brief TearDown function run after every test that uses
-    // KFDOpenCloseKFDTest class fixture.
+    // @brief Executed after every test that uses KFDOpenCloseKFDTest class.
    virtual void TearDown();
 };

@@ -89,9 +89,10 @@ void KFDCWSRTest::SetUp() {

    m_pIsaGen = IsaGenerator::Create(m_FamilyId);

-    // TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not
-    // set the destination based on workitem_id.
-    // Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future.
+    /* TODO: In the ISA, the workitem_id is not obtained as expected, so the destination cannot
+     * be set based on workitem_id. Set the wave_num to 1 for now as a workarpound.
+     * Will set it to 8 or even 256 in the future.
+     */
    wave_number = 1;

    ROUTINE_END
@@ -149,15 +150,15 @@ TEST_F(KFDCWSRTest, BasicTest) {
        iter[0] = 40000000;
        iter[1] = 20000000;

-        // submit the shader, queue1
+        // Submit the shader, queue1
        dispatch1->Submit(queue1);
        // Create queue2 during queue1 still running will trigger the CWSR
        EXPECT_SUCCESS(queue2.Create(defaultGPUNode));
-        // submit the shader
+        // Submit the shader
        dispatch2->Submit(queue2);
        dispatch1->Sync();
        dispatch2->Sync();
-        // ensure all the waves complete as expected
+        // Ensure all the waves complete as expected
        int i;
        for (i = 0 ; i < wave_number; ++i) {
             if (result[i] != iter[0]) {
@@ -39,7 +39,7 @@ class KFDCWSRTest : public KFDBaseComponentTest {
    virtual void SetUp();
    virtual void TearDown();

- protected:  // members
+ protected:  // Members
    unsigned wave_number;
    IsaGenerator* m_pIsaGen;
 };
@@ -79,7 +79,7 @@ void KFDDBGTest::TearDown() {
        delete m_pIsaGen;
    m_pIsaGen = NULL;

-    /* reset the user trap handler */
+    /* Reset the user trap handler */
    hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);

    KFDBaseComponentTest::TearDown();
@@ -118,7 +118,7 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
        ASSERT_SUCCESS(queue_flush.Create(defaultGPUNode));

        // Set Address Watch Params
-        // TODO: Set atchMode[1] to Atomic in case we want to test this mode.
+        // TODO: Set WatchMode[1] to Atomic in case we want to test this mode.

        HSA_DBG_WATCH_MODE  WatchMode[2];
        HSAuint64           WatchAddress[2];
@@ -153,9 +153,9 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
        dispatch.SetArgs(dstBuf.As<void*>(), reinterpret_cast<void *>(secDstBuf));
        dispatch.SetDim(1, 1, 1);

-        // TODO: use Memory ordering rules w/ atomics
-        //       for host-GPU memory syncs.
-        //       set to: std::memory_order_seq_cst
+        /* TODO: Use Memory ordering rules w/ atomics for host-GPU memory syncs.
+         * Set to std::memory_order_seq_cst
+         */

        dispatch.Submit(queue);

@@ -38,7 +38,7 @@ class KFDDBGTest : public KFDBaseComponentTest {
    virtual void SetUp();
    virtual void TearDown();

- protected:  // members
+ protected:  // Members
    IsaGenerator* m_pIsaGen;
 };

@@ -41,7 +41,7 @@ void KFDEventTest::SetUp() {
 void KFDEventTest::TearDown() {
    ROUTINE_START

-    // not all tests create event, destroy only if there is one
+    // Not all tests create an event, destroy only if there is one
    if (m_pHsaEvent != NULL) {
        // hsaKmtDestroyEvent moved to TearDown to make sure it is being called
        EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pHsaEvent));
@@ -58,7 +58,7 @@ TEST_F(KFDEventTest, CreateDestroyEvent) {
    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_NodeInfo.HsaDefaultGPUNode(), &m_pHsaEvent));
    EXPECT_NE(0, m_pHsaEvent->EventData.HWData2);

-    // destroy event is being called in test TearDown
+    // Destroy event is being called in test TearDown
    TEST_END;
 }

@@ -31,9 +31,9 @@ class KFDEventTest :  public KFDBaseComponentTest {
    KFDEventTest(void) {}
    ~KFDEventTest(void) {}

-    // @brief SetUp function runs before every test in KFDEventTest.
+    // @brief Executed before every test in KFDEventTest.
    virtual void SetUp();
-    // @brief TearDown function runs after every test in KFDEventTest.
+    // @brief Executed after every test in KFDEventTest.
    virtual void TearDown();

 protected:
@@ -30,7 +30,7 @@
 #include "SDMAQueue.hpp"
 #include "Dispatch.hpp"

-#define N_PROCESSES             (8)     /* number of processes running in parallel, at least 2 */
+#define N_PROCESSES             (8)     /* Number of processes running in parallel, must be at least 2 */
 #define ALLOCATE_BUF_SIZE_MB    (64)
 #define ALLOCATE_RETRY_TIMES    (3)

@@ -70,7 +70,7 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui
                break;
            }

-            /* wait for 1 second to try allocate again */
+            /* Wait for 1 second to try allocate again */
            sleep(1);
        }
    }
@@ -268,7 +268,7 @@ void KFDEvictTest::ForkChildProcesses(int nprocesses) {

 void KFDEvictTest::WaitChildProcesses() {
    if (m_IsParent) {
-        /* only run by parent process */
+        /* Only run by parent process */
        int childStatus;
        int childExitOkNum = 0;
        int size = m_ChildPids.size();
@@ -286,7 +286,7 @@ void KFDEvictTest::WaitChildProcesses() {
        ASSERT_EQ(childExitOkNum, size);
    }

-    /* child process or parent process finished successfullly */
+    /* Child process or parent process finished successfully */
    m_ChildStatus = HSAKMT_STATUS_SUCCESS;
 }

@@ -296,19 +296,19 @@ void KFDEvictTest::WaitChildProcesses() {
 *
 * ALLOCATE_BUF_SIZE_MB buf allocation size
 *
- * number of buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
+ * buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
 * Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
 *
- * many times of eviction and restore will happen:
- * ttm will evict buffers of another process if not enough free vram
+ * Eviction and restore will happen many times:
+ * ttm will evict buffers of another process if there is not enough free vram
 * process restore will evict buffers of another process
 *
- * Sometimes the allocate may fail (maybe that is normal)
+ * Sometimes the allocation may fail (maybe that is normal)
 * ALLOCATE_RETRY_TIMES max retry times to allocate
 *
- * This is basic test, no queue so vram are not used by GPU during test
+ * This is basic test with no queue, so vram is not used by the GPU during test
 *
- * Todo:
+ * TODO:
 *    - Synchronization between the processes, so they know for sure when
 *        they are done allocating memory
 */
@@ -345,7 +345,7 @@ TEST_F(KFDEvictTest, BasicTest) {
    std::vector<void *> pBuffers;
    AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);

-    /* allocate gfx vram size of at most one third system memory */
+    /* Allocate gfx vram size of at most one third system memory */
    HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
    amdgpu_bo_handle handle;
    AllocAmdgpuBo(rn, size, handle);
@@ -365,12 +365,12 @@ TEST_F(KFDEvictTest, BasicTest) {
 * until address buffer is filled with specific value 0x5678 by host program,
 * then each wavefront fills value 0x5678 at corresponding result buffer and quit
 *
- * initial state:
+ * Initial state:
 *   s[0:1] - address buffer base address
 *   s[2:3] - result buffer base address
 *   s4 - workgroup id
 *   v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
- * registers:
+ * Registers:
 *   v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
 *   v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
 *   v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
@@ -514,7 +514,7 @@ TEST_F(KFDEvictTest, QueueTest) {

    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

-    /* Skip test for chip it doesn't have CWSR, which the test depends on */
+    /* Skip test for chip if it doesn't have CWSR, which the test depends on */
    if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties)) {
        LOG() << std::hex << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
@@ -538,7 +538,7 @@ TEST_F(KFDEvictTest, QueueTest) {
        LOG() << "Skipping test: Not enough system memory available." << std::endl;
        return;
    }
-    /* assert all buffer address can be stored within one page
+    /* Assert all buffer address can be stored within one page
     * because only one page host memory srcBuf is allocated
     */
    ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));
@@ -559,7 +559,7 @@ TEST_F(KFDEvictTest, QueueTest) {
    std::vector<void *> pBuffers;
    AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);

-    /* allocate gfx vram size of at most one third system memory */
+    /* Allocate gfx vram size of at most one third system memory */
    HSAuint64 size = GetSysMemSize() / 3 < vramSize ? GetSysMemSize() / 3 : vramSize;
    amdgpu_bo_handle handle;
    AllocAmdgpuBo(rn, size, handle);
@@ -583,27 +583,32 @@ TEST_F(KFDEvictTest, QueueTest) {
    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(localBufAddr, result);
    dispatch0.SetDim(wavefront_num, 1, 1);
-    /* submit the packet and start shader */
+    /* Submit the packet and start shader */
    dispatch0.Submit(pm4Queue);

-    /* doing evict/restore queue test for 5 seconds while queue is running */
+    /* Doing evict/restore queue test for 5 seconds while queue is running */
    sleep(5);

-    /* LOG() << m_psName << "notify shader to quit" << std::endl; */
-    /* fill address buffer so shader quits */
+    /* Uncomment this line for debugging */
+    // LOG() << m_psName << "notify shader to quit" << std::endl;
+
+    /* Fill address buffer so shader quits */
    addrBuffer.Fill(0x5678);

-    /* wait for shader to finish or timeout if shade has vm page fault */
+    /* Wait for shader to finish or timeout if shader has vm page fault */
    dispatch0.SyncWithStatus(120000);

    ASSERT_SUCCESS(pm4Queue.Destroy());

    FreeAmdgpuBo(handle);
-    /* LOG() << m_psName << "free buffer" << std::endl; */
-    /* cleanup */
+
+    /* Uncomment this line for debugging */
+    // LOG() << m_psName << "free buffer" << std::endl;
+
+    /* Cleanup */
    FreeBuffers(pBuffers, vramBufSize);

-    /* check if all wavefronts finish successfully */
+    /* Check if all wavefronts finished successfully */
    for (i = 0; i < wavefront_num; i++)
        ASSERT_EQ(0x5678, *(result + i));

@@ -38,7 +38,7 @@ class KFDEvictTest :  public KFDLocalMemoryTest {

    ~KFDEvictTest(void) {
        if (!m_IsParent) {
-            /* child process has to exit
+            /* Child process has to exit
             * otherwise gtest will continue other tests
             */
            exit(m_ChildStatus);
@@ -60,7 +60,7 @@ class KFDEvictTest :  public KFDLocalMemoryTest {
    void ForkChildProcesses(int nprocesses);
    void WaitChildProcesses();

- protected:  // members
+ protected:  // Members
    std::string     m_psName;
    std::vector<pid_t> m_ChildPids;
    HsaMemFlags     m_Flags;
@@ -28,8 +28,6 @@
 #include "SDMAQueue.hpp"
 #include "Dispatch.hpp"

-// All tests are marked by their serial number in the QCM FDD
-
 void KFDExceptionTest::SetUp() {
    ROUTINE_START

@@ -198,7 +196,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
        TestMemoryException(defaultGPUNode, srcBuffer.As<HSAuint64>(),
                            (HSAuint64)pDst);

-        /* Wait enough time here to ensure this process got killed by kernel
+        /* Wait for enough time here to ensure this process got killed by kernel
         * due to PPR exception.
         */
        sleep(5);
@@ -213,8 +211,7 @@ TEST_F(KFDExceptionTest, InvalidPPRWriteProtection) {
    TEST_END
 }

-/* TODO: Same as previous test InvalidPPRWriteProtection
- */
+/* TODO: Same as previous test InvalidPPRWriteProtection */
 TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);
@@ -249,7 +246,7 @@ TEST_F(KFDExceptionTest, InvalidPPRReadProtection) {
        TestMemoryException(defaultGPUNode, (HSAuint64)pSrc,
                            dstBuffer.As<HSAuint64>());

-        /* Wait enough time here to ensure this process got killed by kernel
+        /* Wait for enough time here to ensure this process got killed by kernel
         * due to PPR exception.
         */
        sleep(5);
@@ -55,7 +55,7 @@ class KFDExceptionTest : public KFDBaseComponentTest {
                             unsigned int dimX = 1, unsigned int dimY = 1,
                             unsigned int dimZ = 1);

- protected:  // members
+ protected:  // Members
    pid_t m_ChildPid;
    HSAKMT_STATUS m_ChildStatus;

@@ -83,8 +83,9 @@ TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
    ASSERT_SUCCESS(hsaKmtRegisterGraphicsHandleToNodes(dmabufFd, &info,
                                                       1, nodes));

-    // DMA buffer handle and GEM handle are no longer needed, KFD
-    // should have taken a reference to the BO
+    /* DMA buffer handle and GEM handle are no longer needed, KFD
+     * should have taken a reference to the BO
+     */
    ASSERT_EQ(0, close(dmabufFd));
    ASSERT_EQ(0, amdgpu_bo_free(handle));

@@ -60,7 +60,7 @@ KFDIPCTest::~KFDIPCTest(void) {
        exit(0);
 }

-/* Imort shared Local Memory from parent process. Check for the pattern
+/* Import shared Local Memory from parent process. Check for the pattern
 * filled in by the parent process. Then fill a new pattern.
 */
 void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
@@ -101,9 +101,9 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
    ASSERT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
 }

-/* Fill a pattern in to Local Memory and share with the child process.
+/* Fill a pattern into Local Memory and share with the child process.
 * Then wait until Child process to exit and check for the new pattern
- * fill in by the child process.
+ * filled in by the child process.
 */

 void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd) {
@@ -213,7 +213,7 @@ TEST_F(KFDIPCTest, BasicTest) {
 *                  dstBuf3[0x800-0x1000] is expected to be 0xAAAAAAAA
 *              and dstBuf4[0x0-0x1000] is expected to be 0xAAAAAAAA
 *
- * For this CMA test after copy only the first and the last of dstBuf is checked
+ * For this CMA test, after copying only the first and the last of dstBuf is checked
 */

 static testMemoryDescriptor srcRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = {
@@ -490,7 +490,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachChildProcess(int defaultGPUNode, in
            break;
        }

-        /* Wait till the test is over */
+        /* Wait until the test is over */
        memset(msg, 0, sizeof(msg));
        if (read_non_block(readPipe, msg, 4) < 0) {
            status = CMA_IPC_PIPE_ERROR;
@@ -523,7 +523,7 @@ CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachParentProcess(int defaultGPUNode, p
    int testNo;
    CMA_TEST_STATUS status;

-    /* Recevie buffer array from child and then initialize and fill in Local Buffer Array.
+    /* Receive buffer array from child and then initialize and fill in Local Buffer Array.
     * READ_TEST: Copy remote buffer array into Local Buffer Array and then check
     *              for the new pattern.
     * WRITE_TEST: Write Local Buffer Array into remote buffer array. Notify child to
@@ -615,7 +615,6 @@ TEST_F(KFDIPCTest, CrossMemoryAttachTest) {
    ASSERT_EQ(pipe2(pipePtoC, O_NONBLOCK), 0);

    /* Create a child process and share the above Local Memory with it */
-
    m_ChildPid = fork();
    if (m_ChildPid == 0 && hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS) {
        /* Child Process */
@@ -53,8 +53,9 @@ enum CMA_TEST_STATUS {
    CMA_TEST_HSA_WRITE_FAIL
 };

-// @struct testMemoryDescriptor
-//   Describes test buffers for Cross Memory Attach Test.
+/* @struct testMemoryDescriptor
+ * @brief Describes test buffers for Cross Memory Attach Test.
+ */
 struct testMemoryDescriptor {
    CMA_MEM_TYPE m_MemType;
    HSAuint64 m_MemSize;
@@ -76,9 +77,10 @@ struct testMemoryDescriptor {
    ~testMemoryDescriptor(){}
 };

-// @class KFDCMAArray
-//   Array of buffers that will be passed between the parent and child
-//   process for Cross memory read and write tests
+/* @class KFDCMAArray
+ * @brief Array of buffers that will be passed between the parent and child
+ *        process for Cross memory read and write tests
+ */
 class KFDCMAArray {
    /* Used to store the actual buffer array */
    HsaMemoryBuffer* m_MemArray[CMA_MEMORY_TEST_ARRAY_SIZE];
@@ -130,5 +132,4 @@ class KFDIPCTest :  public KFDBaseComponentTest {
    pid_t m_ChildPid;
 };

-#endif
-
+#endif  // __KFD_MEMORY_TEST__H__
@@ -28,8 +28,6 @@
 #include "SDMAQueue.hpp"
 #include "Dispatch.hpp"

-// All tests are marked by their serial number in the QCM FDD
-
 void KFDLocalMemoryTest::SetUp() {
    ROUTINE_START

@@ -285,7 +283,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
    unsigned value = 0;
    memset(pages, 0, sizeof(pages));
    for (order = 0; order <= maxOrder; order++) {
-        // At maxOrder, block sizes is 1/4 of available memory
+        // At maxOrder, block size is 1/4 of available memory
        pages[order].nPages = 1UL << (maxOrder - order + 2);
        // At order != 0, 1/2 the memory is already allocated
        if (order > 0)
@@ -467,7 +465,7 @@ TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
        }

        if (dst_node != defaultGPUNode) {
-            /* at least one node should be defaultGPUNode */
+            /* At least one node should be defaultGPUNode */
            src_node = defaultGPUNode;
        } else {
            for (auto node : gpuNodes) {
@@ -38,7 +38,7 @@ class KFDLocalMemoryTest : public KFDBaseComponentTest {
    virtual void SetUp();
    virtual void TearDown();

- protected:  // members
+ protected:  // Members
    IsaGenerator* m_pIsaGen;
 };

@@ -130,10 +130,10 @@ void KFDMemoryTest::TearDown() {
 #define GB(x) ((x) << 30)

 /*
- * try to map as much as possible system memory to gpu.
- * lets see if kfd support 1TB memory correctly or not.
- * And after this test case, we can observe if there is any sideeffect.
- * NOTICE: there are memory usage limit checks in hsa/kfd according to the total
+ * Try to map as much as possible system memory to gpu
+ * to see if KFD supports 1TB memory correctly or not.
+ * After this test case, we can observe if there are any side effects.
+ * NOTICE: There are memory usage limit checks in hsa/kfd according to the total
 * physical system memory.
 */
 TEST_F(KFDMemoryTest, MMapLarge) {
@@ -187,19 +187,19 @@ TEST_F(KFDMemoryTest, MMapLarge) {
    TEST_END
 }

-/* keep memory mapped to default node
+/* Keep memory mapped to default node
 * Keep mapping/unmapping memory to/from non-default node
- * A shader running on default node  consistantly access
- * memory - make sure memory is always accessible on default,
- * i.e., there is no gpu vm fault.
+ * A shader running on default node consistantly accesses
+ * memory - make sure memory is always accessible by default,
+ * i.e. there is no gpu vm fault.
 * Synchronization b/t host program and shader:
- * 1. host initialize src and dst buffer to 0
- * 2. shader keep reading src buffer and check value
- * 3. host write src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
- * 4. shader write dst buffer to 0x5678 after src changed to 0x5678, quit
- * 5. host program quit after dst becomes 0x5678
- * Need at least two gpu nodes to run the test. The defaut node has to be a gfx9 node.
- * Otherwise, test is skipped. Use kfdtest --node=$$ to specify the defaut node
+ * 1. Host initializes src and dst buffer to 0
+ * 2. Shader keeps reading src buffer and check value
+ * 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
+ * 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
+ * 5. Host program quits after dst becomes 0x5678
+ * Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
+ * otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
 * This test case is introduced as a side-result of investigation of SWDEV-134798, which
 * is a gpu vm fault while running rocr conformance test. Here we try to simulate the
 * same test behaviour.
@@ -250,7 +250,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
        hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
    }

-    /* fill src buffer so shader quits */
+    /* Fill src buffer so shader quits */
    srcBuffer.Fill(0x5678);
    WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
    ASSERT_EQ(*dstBuffer.As<uint32_t *>(), 0x5678);
@@ -258,7 +258,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
    TEST_END
 }

-// basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
+// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
 TEST_F(KFDMemoryTest , MapMemoryToGPU) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -280,7 +280,7 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) {
    TEST_END
 }

-// following tests are for hsaKmtAllocMemory with invalid params
+// Following tests are for hsaKmtAllocMemory with invalid params
 TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -299,7 +299,7 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
    TEST_END
 }

-// basic test  for hsaKmtAllocMemory
+// Basic test for hsaKmtAllocMemory
 TEST_F(KFDMemoryTest, MemoryAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -381,7 +381,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
    HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));

    /* Create PM4 and SDMA queues before fork+COW to test queue
-     * eviction and restore */
+     * eviction and restore
+     */
    PM4Queue pm4Queue;
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
@@ -392,7 +393,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {

    /* First submit just so the queues are not empty, and to get the
     * TLB populated (in case we need to flush TLBs somewhere after
-     * updating the page tables) */
+     * updating the page tables)
+     */
    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch0.Submit(pm4Queue);
@@ -410,7 +412,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
         * make any write access to the stack because we want the
         * parent to make the first write access and get a new copy. A
         * busy loop is the safest way to do that, since any function
-         * call (e.g. sleep) would write to the stack. */
+         * call (e.g. sleep) would write to the stack.
+         */
        while (1)
        {}
        WARN() << "Shouldn't get here!" << std::endl;
@@ -419,13 +422,15 @@ TEST_F(KFDMemoryTest, MemoryRegister) {

    /* Parent process writes to COW page(s) and gets a new copy. MMU
     * notifier needs to update the GPU mapping(s) for the test to
-     * pass. */
+     * pass.
+     */
    globalData = 0xD00BED00;
    stackData[dstOffset] = 0xdeadbeef;
    stackData[sdmaOffset] = 0xdeadbeef;

    /* Terminate the child process before a possible test failure that
-     * would leave it spinning in the background indefinitely. */
+     * would leave it spinning in the background indefinitely.
+     */
    int status;
    EXPECT_EQ(0, kill(pid, SIGTERM));
    EXPECT_EQ(pid, waitpid(pid, &status, 0));
@@ -516,10 +521,11 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
    TEST_END
 }

-// FlatScratchAccess
-// Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
-// will not work for multiple GPU nodes. For now test only one default GPU node.
-// TODO: Generalize it to support multiple nodes
+/* FlatScratchAccess
+ * Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
+ * will not work for multiple GPU nodes. For now test only one default GPU node.
+ * TODO: Generalize it to support multiple nodes
+ */

 #define SCRATCH_SLICE_SIZE 0x10000
 #define SCRATCH_SLICE_NUM 3
@@ -558,24 +564,23 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
    // Map everything for test below
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL));

-    // source & destination memory buffers
    HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, defaultGPUNode);
    HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, defaultGPUNode);

-
    // Initialize the srcBuffer to some fixed value
    srcMemBuffer.Fill(0x01010101);

-    // Initialize a buffer with a DWORD copy ISA
+    // Initialize a buffer with a dword copy ISA
    m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
            "ScratchCopyDword", isaBuffer);

    const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);

-    // TODO: Add support to all GPU Nodes.
-    // The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
-    // also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
-    // HsaMemoryBuffer is now associated with a Node.
+    /* TODO: Add support to all GPU Nodes.
+     * The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
+     * also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
+     * HsaMemoryBuffer is now associated with a Node.
+     */
    if (pNodeProperties != NULL) {
        // Get the aperture of the scratch buffer
        HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
@@ -585,7 +590,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
        for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
            if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
                int numWaves = 4;  // WAVES must be >= # SE
-                int waveSize = 1;  // amount of space used by each wave in units of 256 dwords...
+                int waveSize = 1;  // Amount of space used by each wave in units of 256 dwords

                PM4Queue queue;
                ASSERT_SUCCESS(queue.Create(defaultGPUNode));
@@ -595,25 +600,24 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
                // Create a dispatch packet to copy
                Dispatch dispatchSrcToScratch(isaBuffer);

-                // setup the dispatch packet
+                // Setup the dispatch packet
                // Copying from the source Memory Buffer to the scratch buffer
                dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
                dispatchSrcToScratch.SetDim(1, 1, 1);
                dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
-                // submit the packet
+                // Submit the packet
                dispatchSrcToScratch.Submit(queue);
                dispatchSrcToScratch.Sync();

                // Create another dispatch packet to copy scratch buffer contents to destination buffer.
                Dispatch dispatchScratchToDst(isaBuffer);

-                // set the arguments to copy from the scratch buffer
-                // to the destination buffer
+                // Set the arguments to copy from the scratch buffer to the destination buffer
                dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
                dispatchScratchToDst.SetDim(1, 1, 1);
                dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());

-                // submit the packet
+                // Submit the packet
                dispatchScratchToDst.Submit(queue);
                dispatchScratchToDst.Sync();

@@ -708,7 +712,7 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular
        lastTestedSize = sizeMB;
    }

-    /* Save the biggest allocated system buffer forsignal handling test */
+    /* Save the biggest allocated system buffer for signal handling test */
    LOG() << "The biggest allocated system buffer is " << std::dec
            << lastTestedSize << "MB" << std::endl;
    if (lastSize)
@@ -781,7 +785,8 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
 * is small. For example, on a typical Carrizo platform, the biggest allocated
 * system buffer could be more than 14G even though it only has 4G memory.
 * In that situation, it will take too much time to finish the test, because of
- * the onerous memory swap operation. So we limit the buffer size that way.*/
+ * the onerous memory swap operation. So we limit the buffer size that way.
+ */
 TEST_F(KFDMemoryTest, BigBufferStressTest) {
    if (!is_dgpu()) {
        LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
@@ -804,7 +809,8 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
    BigBufferVRAM(defaultGPUNode, granularityMB, NULL);

    /* Repeatedly allocate and map big buffers in system memory until it fails,
-     * then unmap and free them. */
+     * then unmap and free them.
+     */
 #define ARRAY_ENTRIES 2048

    int i = 0;
@@ -875,7 +881,8 @@ TEST_F(KFDMemoryTest, MMBench) {
    /* Two SDMA queues to interleave user mode SDMA with memory
     * management on either SDMA engine. Make the queues long enough
     * to buffer at least nBufs x WriteData packets (7 dwords per
-     * packet). */
+     * packet).
+     */
    SDMAQueue sdmaQueue[2];
    ASSERT_SUCCESS(sdmaQueue[0].Create(defaultGPUNode, PAGE_SIZE*8));
    ASSERT_SUCCESS(sdmaQueue[1].Create(defaultGPUNode, PAGE_SIZE*8));
@@ -1094,7 +1101,8 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
 * to access its memory like a debugger would. Child copies data in
 * the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
 * the child terminates, the parent checks that the copy was
- * successful. */
+ * successful.
+ */
 TEST_F(KFDMemoryTest, PtraceAccess) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -1108,13 +1116,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
    void *mem[2];
    unsigned i;

-    // Offset in the VRAM buffer to test crossing non-contiguous
-    // buffer boundaries. The second access starting from offset
-    // sizeof(HSAint64)+1 will cross a node boundary in a single access,
-    // for node sizes of 4MB or smaller.
+    /* Offset in the VRAM buffer to test crossing non-contiguous
+     * buffer boundaries. The second access starting from offset
+     * sizeof(HSAint64)+1 will cross a node boundary in a single access,
+     * for node sizes of 4MB or smaller.
+     */
    const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);

-    // alloc system memory from node 0 and initialize it
+    // Alloc system memory from node 0 and initialize it
    memFlags.ui32.NonPaged = 0;
    ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
    for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
@@ -1122,7 +1131,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
        (reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0;  // destination
    }

-    // try to alloc local memory from GPU node
+    // Try to alloc local memory from GPU node
    memFlags.ui32.NonPaged = 1;
    if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
        EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
@@ -1137,13 +1146,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
        mem[1] = NULL;
    }

-    // Allow any process to trace this one. If kernel is built without
-    // Yama, this is not needed, and this call will fail.
+    /* Allow any process to trace this one. If kernel is built without
+     * Yama, this is not needed, and this call will fail.
+     */
 #ifdef PR_SET_PTRACER
    prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
 #endif

-    // Find out my pid so the child can trace it
+    // Find current pid so the child can trace it
    pid_t tracePid = getpid();

    // Fork the child
@@ -1168,8 +1178,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
            } while (!WIFSTOPPED(traceStatus));

            for (i = 0; i < 4; i++) {
-                // Test 4 different (mis-)alignments, leaving 1-byte
-                // gaps between longs
+                // Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
                HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
                errno = 0;
                long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
@@ -1264,7 +1273,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {

    ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, memFlags, &mem));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(mem, size, NULL));
-    /* set the word before 4M boundary to 0xdeadbeefdeadbeef
+    /* Set the word before 4M boundary to 0xdeadbeefdeadbeef
     * and the word after 4M boundary to 0xcafebabecafebabe
     */
    mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
@@ -1309,7 +1318,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
                waitpid(tracePid, &traceStatus, 0);
            } while (!WIFSTOPPED(traceStatus));

-            /* peek the memory */
+            /* Peek the memory */
            errno = 0;
            HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
            ASSERT_EQ(0, errno);
@@ -1318,7 +1327,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
            ASSERT_EQ(0, errno);
            ASSERT_EQ(data[1], data1);

-            /* swap mem0 and mem1 by poking */
+            /* Swap mem0 and mem1 by poking */
            ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
            ASSERT_EQ(0, errno);
            ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
@@ -1404,7 +1413,7 @@ TEST_F(KFDMemoryTest, SignalHandling) {
    size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);

    ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
-    // verify that pDb is not null before it's being used
+    // Verify that pDb is not null before it's being used
    ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";

    pid_t childPid = fork();
@@ -1473,7 +1482,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
            return;
        }

-        /* check the first 64 bit */
+        /* Check the first 64 bits */
        EXPECT_EQ(0, pDb[0]);
        pDb[0] = 1;

@@ -1495,7 +1504,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
 }

 static inline void access(volatile void *sd, int size, int rw) {
-    /* Most like sit in cache*/
+    /* Most likely sitting in cache*/
    static struct DUMMY {
        char dummy[1024];
    } dummy;
@@ -1509,8 +1518,8 @@ static inline void access(volatile void *sd, int size, int rw) {
 }

 /*
- * on large-ber system, test the visible vram access speed.
- * kfd is not allowd to alloc visible vram on non-largebar system.
+ * On large-bar system, test the visible vram access speed.
+ * KFD is not allowed to alloc visible vram on non-largebar system.
 */
 TEST_F(KFDMemoryTest, MMBandWidth) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
@@ -1571,7 +1580,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
            memFlags.ui32.HostAccess = 1;
            memFlags.ui32.NonPaged = 0;
        } else {
-            /* alloc visible vram*/
+            /* Alloc visible vram*/
            allocNode = defaultGPUNode;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 1;
@@ -27,9 +27,10 @@
 #ifndef __KFD_MEMORY_TEST__H__
 #define __KFD_MEMORY_TEST__H__

-// @class KFDTopologyTest
-// this class has no additional features to KFDBaseComponentTest
-// the separation was made so we are able to goup all memeory tests together
+/* @class KFDTopologyTest
+ * This class has no additional features to KFDBaseComponentTest
+ * The separation was made so we are able to group all memory tests together
+ */
 class KFDMemoryTest :  public KFDBaseComponentTest {
 public:
    KFDMemoryTest(void) :m_pIsaGen(NULL) {}
@@ -24,7 +24,7 @@
 #include "KFDOpenCloseKFDTest.hpp"
 #include "KFDTestUtil.hpp"

-// before every test from this class fixture - open KFD
+// Before every test from this class fixture, open KFD
 void KFDOpenCloseKFDTest::SetUp() {
    ROUTINE_START

@@ -33,7 +33,7 @@ void KFDOpenCloseKFDTest::SetUp() {
    ROUTINE_END
 }

-// after every test from this class fixture - close KFD
+// After every test from this class fixture, close KFD
 void KFDOpenCloseKFDTest::TearDown() {
    ROUTINE_START

@@ -42,9 +42,10 @@ void KFDOpenCloseKFDTest::TearDown() {
    ROUTINE_END
 }

-// this test does not use class KFDOpenCloseKFDTest but is placed here
-// since it's testing same topic as other test
-// verify that calling hsaKmtCloseKFD on a closed KFD will return right status
+/* This test does not use class KFDOpenCloseKFDTest but is placed here
+ * since it's testing same topic as other test
+ * Verify that calling hsaKmtCloseKFD on a closed KFD will return right status
+ */
 TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -53,8 +54,7 @@ TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
    TEST_END
 }

-// verify that calling hsaKmtCloseKFD on an already opened KFD will return
-// right status
+// Verify that calling hsaKmtCloseKFD on an already opened KFD will return right status
 TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -65,8 +65,7 @@ TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
    TEST_END
 }

-// testing the normal scenario: open followed by close (done in the setup and
-// teardown functions)
+// Testing the normal scenario: open followed by close (done in the setup and teardown functions)
 TEST_F(KFDOpenCloseKFDTest, OpenCloseKFD ) {
 }

@@ -34,10 +34,9 @@ class KFDOpenCloseKFDTest : public testing::Test {
    ~KFDOpenCloseKFDTest(void) {}

 protected:
-    // @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture,
-    //        sets all common settings for the tests.
+    // @brief Executed before every test that uses KFDOpenCloseKFDTest class, sets all common settings for the tests.
    virtual void SetUp();
-    // @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture.
+    // @brief Executed after every test that uses KFDOpenCloseKFDTest class
    virtual void TearDown();
 };

@@ -35,7 +35,7 @@ void KFDPMTest::SetUpTestCase() {
    m_SetupSuccess = false;
    AcquirePrivilege(OS_SUSPEND);

-    // if AcquirePrivilege fails, it will throw and we will not reach here.
+    // If AcquirePrivilege fails, it will throw and we will not reach here.
    m_SetupSuccess = true;

    ROUTINE_END
@@ -119,4 +119,4 @@ TEST_F(KFDPMTest, SuspendWithIdleQueueAfterWork) {
    TEST_END
 }

-// TODO suspend while workload is being executed by a queue
+// TODO: Suspend while workload is being executed by a queue
@@ -34,7 +34,7 @@ void KFDPNPTest::SetUpTestCase() {

    AcquirePrivilege(OS_DRIVER_OPERATIONS);

-    // if AcquirePrivilege fails, it will throw and we will not reach here.
+    // If AcquirePrivilege fails, it will throw and we will not reach here.
    m_SetupSuccess = true;

    ROUTINE_END
@@ -32,8 +32,6 @@

 #include "Dispatch.hpp"

-// All tests are marked by their serial number in the QCM FDD
-
 void KFDQMTest::SetUp() {
    ROUTINE_START

@@ -155,7 +153,8 @@ TEST_F(KFDQMTest, CreateMultipleSdmaQueues) {
 * Fiji and other VI/Polaris GPUs. This test typically hangs in a few
 * seconds. According to analysis done by HW engineers, the culprit
 * seems to be PCIe speed switching. The problem can be worked around
- * by disabling the lowest DPM level on Fiji. */
+ * by disabling the lowest DPM level on Fiji.
+ */
 TEST_F(KFDQMTest, SdmaConcurrentCopies) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -189,14 +188,16 @@ TEST_F(KFDQMTest, SdmaConcurrentCopies) {
                                   srcBuf.As<char *>()+COPY_SIZE*j, COPY_SIZE));
        queue.SubmitPacket();

-        // Waste a variable amount of time. Submission timing
-        // while SDMA runs concurrently seems to be critical for
-        // reproducing the hang
+        /* Waste a variable amount of time. Submission timing
+         * while SDMA runs concurrently seems to be critical for
+         * reproducing the hang
+         */
        for (int k = 0; k < (i & 0xfff); k++)
            memcpy(srcBuf.As<char *>()+PAGE_SIZE, srcBuf.As<char *>(), 1024);

-        // Wait for idle every 8 packets to allow the SDMA engine to
-        // run concurrently for a bit without getting too far ahead
+        /* Wait for idle every 8 packets to allow the SDMA engine to
+         * run concurrently for a bit without getting too far ahead
+         */
        if ((i & 0x7) == 0)
            queue.Wait4PacketConsumption();
    }
@@ -268,7 +269,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) {

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 1, 1));

-    // don't sync since we don't expect rptr to change when the queue is disabled.
+    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -309,7 +310,7 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) {

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(destBuf.As<void*>(), 0));

-    // don't sync since we don't expect rptr to change when the queue is disabled.
+    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -356,7 +357,7 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {

    queue.PlaceAndSubmitPacket(packet2);

-    // don't sync since we don't expect rptr to change when the queue is disabled.
+    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
@@ -388,7 +389,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    do {
-        // the following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
+        // The following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
        unsigned int firstToCreate = (numIter % 2 != 0) ? 1 : 0;
        unsigned int firstToDestroy = (numIter % 4 > 1) ? 1 : 0;

@@ -440,7 +441,7 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) {
        unsigned int pktSizeDw = 0;
        for (unsigned int i = 0; i < MAX_PACKETS; i++) {
            PM4WriteDataPacket packet;
-            packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i);  // two DWORDs per packet
+            packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i);  // two dwords per packet
            queues[qidx].PlacePacket(packet);
        }
    }
@@ -612,9 +613,9 @@ HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, u
    return timeTotal / iterations;
 }

-/**
+/*
 * Apply CU masking in a linear fashion, adding 1 CU per iteration
- * until all Shader Engines are full ...
+ * until all Shader Engines are full
 */
 TEST_F(KFDQMTest, BasicCuMaskingLinear) {
    TEST_START(TESTPROFILE_RUNALL);
@@ -1026,8 +1027,9 @@ TEST_F(KFDQMTest, CpuWriteCoherence) {

    EXPECT_EQ(0, queue.Rptr());

-    // now that the GPU has cached the PQ contents, we modify them in CPU cache and
-    // ensure that the GPU sees the updated value:
+    /* Now that the GPU has cached the PQ contents, we modify them in CPU cache and
+     * ensure that the GPU sees the updated value:
+     */
    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x42, 0x42));

    queue.Wait4PacketConsumption();
@@ -1130,10 +1132,10 @@ TEST_F(KFDQMTest, QueueLatency) {
        if (i >= skip)
            queue_latency_avg += queue_latency;
    } while (++i < slots);
-    /*Calculate avg from packet[skip, slots-1]*/
+    /* Calculate avg from packet[skip, slots-1] */
    queue_latency_avg /= (slots - skip);

-    /*workload of queue packet itself*/
+    /* Workload of queue packet itself */
    i = 0;
    do {
        queue.PlacePacket(PM4ReleaseMemoryPacket(true,
@@ -1159,7 +1161,7 @@ TEST_F(KFDQMTest, QueueLatency) {
    do {
        /* The queue_latency is not that correct as the workload and overhead are average*/
        queue_latency_arr[i] -= workload + overhead;
-        /* The First submit takes a HSAint64 time*/
+        /* The First submit takes an HSAint64 time*/
        if (i < skip)
            LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl;
    } while (++i < slots);
@@ -1454,7 +1456,7 @@ TEST_F(KFDQMTest, P2PTest) {
                                             mapFlags, nodes.size(), &nodes[0]));
 #define MAGIC_NUM 0xdeadbeaf

-    /* First GPU fills mem with MAGIC_NUM*/
+    /* First GPU fills mem with MAGIC_NUM */
    void *src, *dst;
    HSAuint32 cur = nodes[0], next;
    ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast<void**>(&src)));
@@ -1478,11 +1480,11 @@ TEST_F(KFDQMTest, P2PTest) {
        }

        LOG() << "Test " << cur << " -> " << next << std::endl;
-        /* copy to sysBuf and next GPU*/
+        /* Copy to sysBuf and next GPU*/
        void *dst_array[] = {sysBuf, dst};
        sdma_copy(cur, src, dst_array, n, size);

-        /* verify the data*/
+        /* Verify the data*/
        ASSERT_EQ(sysBuf[0], MAGIC_NUM);
        ASSERT_EQ(sysBuf[end], MAGIC_NUM);

@@ -1558,7 +1560,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
 #ifdef DOORBELL_WRITE_USE_SDMA
        /* Write the wptr and doorbell update using the GPU's SDMA
         * engine. This should submit the PM4 packet on the first
-         * queue. */
+         * queue.
+         */
        otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
                                                   pendingWptr));
        otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
@@ -1566,7 +1569,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
 #else
        /* Write the wptr and doorbell update using WRITE_DATA packets
         * on a second PM4 queue. This should submit the PM4 packet on
-         * the first queue. */
+         * the first queue.
+         */
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
                                   pendingWptr, false));
@@ -1582,7 +1586,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
 #ifdef DOORBELL_WRITE_USE_SDMA
        /* Write the wptr and doorbell update using the GPU's SDMA
         * engine. This should submit the PM4 packet on the first
-         * queue. */
+         * queue.
+         */
        otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_write_ptr,
                                                   2, &pendingWptr64));
        otherQueue.PlacePacket(SDMAWriteDataPacket(qRes->Queue_DoorBell,
@@ -1591,7 +1596,8 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) {
        /* Write the 64-bit wptr and doorbell update using RELEASE_MEM
         * packets without IRQs on a second PM4 queue. RELEASE_MEM
         * should perform one atomic 64-bit access. This should submit
-         * the PM4 packet on the first queue. */
+         * the PM4 packet on the first queue.
+         */
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr,
                                   pendingWptr64, true));
@@ -42,10 +42,9 @@ class KFDQMTest : public KFDBaseComponentTest {
    virtual void TearDown();

    void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1);
-//    void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf);
    HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
    HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
- protected:  // members
+ protected:  // Members
    /* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */
    const double CuVariance = 0.15;
    const double CuNegVariance = 1.0 - CuVariance;
@@ -32,13 +32,12 @@ extern int g_TestDstNodeId;
 extern bool g_IsChildProcess;
 extern unsigned int g_TestGPUFamilyId;

-// each test should call TEST_START with the test custome profile and HW scheduling
-
+// Each test should call TEST_START with the test custom profile and HW scheduling
 enum TESTPROFILE{
    TESTPROFILE_DEV =          0x1,
    TESTPROFILE_PROMO =    0x2,
-    // 0x4 - 0x8000 - unsed flags
-    // can add any flag that will mark only part of the tests to run
+    // 0x4 - 0x8000 - unused flags
+    // Can add any flag that will mark only part of the tests to run
    TESTPROFILE_RUNALL = 0xFFFF
 };

@@ -48,8 +47,8 @@ enum ENVCAPS{
    ENVCAPS_16BITPASID             =  0x2,
    ENVCAPS_32BITLINUX              =  0x4,
    ENVCAPS_64BITLINUX              =  0x8
-    // 0x8 - 0x8000 - unsed flags
-    // can add any flag that will mark specific hw limitation \ capability
+    // 0x8 - 0x8000 - unused flags
+    // Can add any flag that will mark specific hw limitation or capability
 };

 enum KfdFamilyId {
@@ -56,11 +56,13 @@ bool g_IsChildProcess;
 unsigned int g_TestGPUFamilyId;

 GTEST_API_ int main(int argc, char **argv) {
-    // default values for run parameters
+    // Default values for run parameters
    g_TestRunProfile = TESTPROFILE_RUNALL;
    g_TestENVCaps = ENVCAPS_NOADDEDCAPS | ENVCAPS_64BITLINUX;
    g_TestTimeOut = KFD_TEST_DEFAULT_TIMEOUT;

+    // Every fatal fail ( = assert that failed ) will throw an exception
+    testing::GTEST_FLAG(throw_on_failure) = true;
    testing::InitGoogleTest(&argc, argv);

    CommandLineArguments args;
@@ -52,7 +52,7 @@ bool GetHwCapabilityHWS() {
    unsigned int value = 0;
    bool valExists = ReadDriverConfigValue(CONFIG_HWS, value);

-    /* HWS is enabled by default, so... */
+    /* HWS is enabled by default */
    return ( (!valExists) || ( value > 0));
 }

@@ -64,7 +64,7 @@ HSAKMT_STATUS CreateQueueTypeEvent(
    ) {
    HsaEventDescriptor Descriptor;

-// TODO Create per-OS header with this sort of definitions
+// TODO: Create per-OS header with this sort of definitions
 #ifdef _WIN32
    Descriptor.EventType = HSA_EVENTTYPE_QUEUE_EVENT;
 #else
@@ -369,7 +369,7 @@ void HsaMemoryBuffer::UnmapAllNodes() {
    }

    /*
-     * TODO: when thunk will be updated use hsaKmtRegisterToNodes. and then nodes will be used
+     * TODO: When thunk is updated, use hsaKmtRegisterToNodes. Then nodes will be used
     */
    hsaKmtUnmapMemoryToGPU(m_pBuf);
    hsaKmtDeregisterMemory(m_pBuf);
@@ -414,10 +414,10 @@ HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
 HsaNodeInfo::HsaNodeInfo() {
 }

-// Init - Get and store information about all the HSA nodes from the Thunk Library.
-// @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
-// @Return - false: if no node information is available
-//
+/* Init - Get and store information about all the HSA nodes from the Thunk Library.
+ * @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
+ * @Return - false: if no node information is available
+ */
 bool HsaNodeInfo::Init(int NumOfNodes) {
    HsaNodeProperties *nodeProperties;
    _HSAKMT_STATUS status;
@@ -427,8 +427,9 @@ bool HsaNodeInfo::Init(int NumOfNodes) {
        nodeProperties = new HsaNodeProperties();

        status = hsaKmtGetNodeProperties(i, nodeProperties);
-        /* this is not a fatal test (not using assert), since even when it fails for one node
-         * we want to get information regarding others. */
+        /* This is not a fatal test (not using assert), since even when it fails for one node
+         * we want to get information regarding others.
+         */
        EXPECT_SUCCESS(status) << "Node index: " << i << "hsaKmtGetNodeProperties returned status " << status;

        if (status == HSAKMT_STATUS_SUCCESS) {
@@ -476,8 +477,7 @@ const int HsaNodeInfo::HsaDefaultGPUNode() const {
        return -1;

    if (g_TestNodeId >= 0) {
-        // Check if this is a valid Id, if so use this else use first
-        // available
+        // Check if this is a valid Id, if so use this else use first available
        for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
            if (g_TestNodeId == m_NodesWithGPU.at(i))
                return g_TestNodeId;
@@ -92,7 +92,7 @@ class HsaMemoryBuffer {
    ~HsaMemoryBuffer();

 private:
-    // disable copy
+    // Disable copy
    HsaMemoryBuffer(const HsaMemoryBuffer&);
    const HsaMemoryBuffer& operator=(const HsaMemoryBuffer&);

@@ -130,7 +130,7 @@ class HsaInteropMemoryBuffer {
    ~HsaInteropMemoryBuffer();

 private:
-    // disable copy
+    // Disable copy
    HsaInteropMemoryBuffer(const HsaInteropMemoryBuffer&);
    const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&);

@@ -141,8 +141,7 @@ class HsaInteropMemoryBuffer {
    unsigned int m_Node;
 };

-// Class HsaNodeInfo - Gather and store all HSA node information from Thunk.
-//
+// @class HsaNodeInfo - Gather and store all HSA node information from Thunk.
 class HsaNodeInfo {
    // List containing HsaNodeProperties of all Nodes available
    std::vector<HsaNodeProperties*> m_HsaNodeProps;
@@ -159,16 +158,17 @@ class HsaNodeInfo {

    bool Init(int NumOfNodes);

-    // This function should be soon depricated. This for transistion purpose only
-    // Currently, KfdTest is designed to test only ONE node. This function acts
-    // as transistion.
+    /* This function should be deprecated soon. This for transistion purpose only
+     * Currently, KfdTest is designed to test only ONE node. This function acts
+     * as transition.
+     */
    const HsaNodeProperties* HsaDefaultGPUNodeProperties() const;
    const int HsaDefaultGPUNode() const;

-    // Future use the following two functions to support multi-GPU.
-    // const std::vector<int>& GpuNodes = GetNodesWithGPU()
-    // for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
-    //
+    /* TODO: Use the following two functions to support multi-GPU.
+     * const std::vector<int>& GpuNodes = GetNodesWithGPU()
+     * for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
+     */
    const std::vector<int>& GetNodesWithGPU() const;

    // @param node index of the node we are looking at
@@ -25,8 +25,6 @@
 #include <vector>
 #include <string>

-// @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp
-
 const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
 const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);

@@ -35,17 +33,17 @@ TEST_F(KFDTopologyTest , BasicTest) {

    const HsaNodeProperties *pNodeProperties;

-    // goes over all nodes in the sytem properties and check the basic info received
+    // Goes over all nodes in the sytem properties and check the basic info received
    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(node);
        if (pNodeProperties != NULL) {
-            // checking for cpu core only if it's a cpu only node or if its KAVERY apu.
+            // Checking for cpu core only if it's a cpu only node or if its KAVERI apu.
            if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) {
                EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node
                                                                      << " No CPUs core are connected for node index";
            }

-            // if it's not a cpu only node, look for a gpu core
+            // If it's not a cpu only node, look for a gpu core
            if (pNodeProperties->DeviceId != 0) {
                EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node
                                                                           << "No GPUs core are connected.";
@@ -64,7 +62,7 @@ TEST_F(KFDTopologyTest , BasicTest) {
    TEST_END
 }

-// this test verify failure status on hsaKmtGetNodeProperties with invalid params
+// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
 TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -73,7 +71,7 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
    TEST_END
 }

-// this test verify failure status on hsaKmtGetNodeProperties with invalid params
+// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
 TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -84,8 +82,8 @@ TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
    TEST_END
 }

-// test that we can get memory property successfully per node
-// @todo check validity of values returned
+// Test that we can get memory properties successfully per node
+// TODO: Check validity of values returned
 TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
    TEST_START(TESTPROFILE_RUNALL)
    const HsaNodeProperties *pNodeProperties;
@@ -104,7 +102,7 @@ TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
 }


-// test the GPU local memory aperture is valid.
+// Test that the GPU local memory aperture is valid.
 TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
    TEST_REQUIRE_NO_ENV_CAPABILITIES(ENVCAPS_32BITLINUX);

@@ -137,8 +135,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
    TEST_END
 }

-// test that we can get cache property successfully per node
-// @todo check validity of values returned
+// Test that we can get cache property successfully per node
+// TODO: Check validity of values returned
 TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -179,8 +177,8 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
    TEST_END
 }

-// test that we can get NodeIoLink property successfully per node
-// @todo check validity of values returned
+// Test that we can get NodeIoLink property successfully per node
+// TODO: Check validity of values returned
 // GetNodeIoLinkProperties is disabled for now, test fails due to bug in BIOS
 TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
    TEST_START(TESTPROFILE_RUNALL)
@@ -197,7 +195,7 @@ TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
            HsaIoLinkProperties  *IolinkProperties =  new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
            EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties));
            if (pNodeProperties->NumIOLinks == 0) {
-                // No io_links. Just Print the node
+                // No io_links. Just print the node
                LOG() << "[" << node << "]" << std::endl;
                continue;
            }
@@ -26,9 +26,10 @@
 #ifndef __KFD_TOPOLOGY_TEST__H__
 #define __KFD_TOPOLOGY_TEST__H__

-// @class KFDTopologyTest
-// this class has no additional features to KFDBaseComponentTest
-// the separation was made so we are able to goup all topology tests together
+/* @class KFDTopologyTest
+ * This class has no additional features to KFDBaseComponentTest
+ * The separation was made so we are able to group all topology tests together
+ */
 class KFDTopologyTest : public KFDBaseComponentTest {
 public:
    KFDTopologyTest(void) {}
@@ -46,11 +46,11 @@ static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ |
                                  PROT_EXEC | PROT_WRITE | PROT_READ};

 void SetConsoleTextColor(TEXTCOLOR color) {
-    // TODO complete
+    // TODO: Complete
 }

 void Delay(int delayCount) {
-    // usleeps accept time in microseconds
+    // usleep accepts time in microseconds
    usleep(delayCount * 1000);
 }

@@ -76,7 +76,7 @@ HSAuint64 GetLastErrorNo() {
 }

 bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) {
-    // TODO IMPLEMENT
+    // TODO: Implement
    return false;
 }

@@ -87,20 +87,20 @@ HSAuint64 GetSystemTickCountInMicroSec() {
 }

 bool SuspendAndWakeUp() {
-    // TODO IMPLEMENT
+    // TODO: Implement
    return false;
 }

 void AcquirePrivilege(OS_PRIVILEGE priv) {
-    // TODO IMPLEMENT
+    // TODO: Implement
 }

 void DisableKfd() {
-    // TODO IMPLEMENT
+    // TODO: Implement
 }

 void EnableKfd() {
-    // TODO IMPLEMENT
+    // TODO: Implement
 }

 bool ReadDriverConfigValue(CONFIG_VALUE config, unsigned int& rValue) {
@@ -145,7 +145,7 @@ bool GetCommandLineArguments(int argc, char **argv, CommandLineArguments& rArgs)
        if (c != 0)
            break;

-        /* If this option set a flag, do nothing else now. */
+        /* If this option sets a flag, do nothing else. */
        if (long_options[option_index].flag != 0)
            continue;

@@ -72,35 +72,34 @@ struct CommandLineArguments {
 #define MEM_WRITE 0x02
 #define MEM_EXECUTE 0x4

-
-
-// @brief change console text color
+// @brief Change console text color
 void SetConsoleTextColor(TEXTCOLOR color);
 // @params delayCount : delay time in milliseconds
 void Delay(int delayCount);
-// @brief replacement for windows VirtualAlloc func
+// @brief Replacement for windows VirtualAlloc func
 void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = MEM_READ | MEM_WRITE);
-// @brief replacement for windows FreeVirtual func
+// @brief Replacement for windows FreeVirtual func
 bool VirtualFreeMemory(void *address, unsigned int size);
-// @brief retrieve the last error number
+// @brief Retrieve the last error number
 HSAuint64 GetLastErrorNo();

 HSAint64 AtomicInc(volatile HSAint64* pValue);

 void MemoryBarrier();

-// @brief: runs the selected test case number of times required, each in a separate process
-// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want
-// to run all tests in a test case: TestCase.* and so on
-// @params numOfProcesses : how many processes to run in parallel
-// @params runsPerProcess : how many iteration a test should do per process, must be a positive number
+/* @brief: Runs the selected test case number of times required, each in a separate process
+ * @params testToRun : Can be a specific test testcase like TestCase.TestName or if you want
+ *                     to run all tests in a test case: TestCase.* and so on
+ * @params numOfProcesses : How many processes to run in parallel
+ * @params runsPerProcess : How many iteration a test should do per process, must be a positive number
+ */
 bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1);

 HSAuint64 GetSystemTickCountInMicroSec();

-/**Put the system to S3/S4 power state and bring it back to S0.
-@return 'true' on success, 'false' on failure.
-*/
+/* Put the system to S3/S4 power state and bring it back to S0.
+ * @return 'true' on success, 'false' on failure.
+ */
 bool SuspendAndWakeUp();

 void AcquirePrivilege(OS_PRIVILEGE priv);
@@ -53,7 +53,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const {

 void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
    m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(calloc(1, SizeInBytes()));
-    // verify that the memory is allocated successfully, cannot use assert here
+    // Verify that the memory is allocated successfully, cannot use assert here
    EXPECT_NOTNULL(m_pPacketData);

    InitPM4Header(m_pPacketData->header, IT_WRITE_DATA);
@@ -249,8 +249,8 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i
    // 1st register is a part of the packet struct.
    m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);

-    /* allocating the size of the packet, since the packet is assembled from a struct
-     * followed by an additional DWORD data
+    /* Allocating the size of the packet, since the packet is assembled from a struct
+     * followed by an additional dword data
     */
    m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(malloc(m_packetSize));

@@ -296,7 +296,7 @@ PM4PartialFlushPacket::PM4PartialFlushPacket(void) {
 }

 unsigned int PM4PartialFlushPacket::SizeInBytes() const {
-    // for PARTIAL_FLUSH_CS packets, the last 2 DWORDS don't exist.
+    // For PARTIAL_FLUSH_CS packets, the last 2 dwordS don't exist.
    return sizeof(PM4EVENT_WRITE) - sizeof(uint32_t)*2;
 }

@@ -31,7 +31,7 @@
 #include "pm4_pkt_struct_ai.h"
 #include "IndirectBuffer.hpp"

-// @class PM4Packet: marks a group of all PM4 packets
+// @class PM4Packet: Marks a group of all PM4 packets
 class PM4Packet : public BasePacket {
 public:
        PM4Packet(void) {}
@@ -47,9 +47,9 @@ class PM4Packet : public BasePacket {
 // @class PM4WriteDataPacket
 class PM4WriteDataPacket : public PM4Packet {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    PM4WriteDataPacket(void): m_ndw(0), m_pPacketData(NULL) {}
-    // this contructor will also init the packet, no need for additional calls
+    // This contructor will also init the packet, no need for additional calls
    PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1):
        m_ndw(1), m_pPacketData(NULL) {InitPacket(destBuf, &data1);}
    PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1, unsigned int data2):
@@ -59,11 +59,11 @@ class PM4WriteDataPacket : public PM4Packet {
    }

    virtual ~PM4WriteDataPacket(void);
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }
-    // @brief initialise the packet
+    // @brief Initialise the packet
    void InitPacket(unsigned int *destBuf, unsigned int data1) {
        m_ndw = 1;
        InitPacket(destBuf, &data1);
@@ -77,27 +77,27 @@ class PM4WriteDataPacket : public PM4Packet {

 protected:
    unsigned int m_ndw;
-    // PM4WRITE_DATA_CI struct contains all the packets data
+    // PM4WRITE_DATA_CI struct contains all the packet's data
    PM4WRITE_DATA_CI  *m_pPacketData;
 };

 // @class PM4ReleaseMemoryPacket
 class PM4ReleaseMemoryPacket : public PM4Packet {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    PM4ReleaseMemoryPacket(void): m_pPacketData(NULL) {}
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    PM4ReleaseMemoryPacket(bool isPolling, uint64_t address, uint64_t data,
                           bool is64bit = false, bool isTimeStamp = false): m_pPacketData(NULL) {
        InitPacket(isPolling, address, data, is64bit, isTimeStamp);
    }

    virtual ~PM4ReleaseMemoryPacket(void);
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }
-    // @brief initialise the packet
+    // @brief Initialise the packet
    void InitPacket(bool isPolling, uint64_t address, uint64_t data,
                    bool is64bit = false, bool isTimeStamp = false);

@@ -109,21 +109,21 @@ class PM4ReleaseMemoryPacket : public PM4Packet {
 // @class PM4IndirectBufPacket
 class PM4IndirectBufPacket : public PM4Packet {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    PM4IndirectBufPacket(void) {}
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    explicit PM4IndirectBufPacket(IndirectBuffer *pIb);

    virtual ~PM4IndirectBufPacket(void) {}
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }
-    // @breif initialise the packet
+    // @breif Initialise the packet
    void InitPacket(IndirectBuffer *pIb);

 private:
-    // PM4MEC_INDIRECT_BUFFER struct contains all the packets data
+    // PM4MEC_INDIRECT_BUFFER struct contains all the packet's data
    PM4MEC_INDIRECT_BUFFER  m_packetData;
 };

@@ -139,11 +139,11 @@ class PM4AcquireMemoryPacket : public PM4Packet {
    virtual const void *GetPacket() const { return &m_packetData; }

 private:
-    // PM4ACQUIRE_MEM struct contains all the packets data
+    // PM4ACQUIRE_MEM struct contains all the packet's data
    PM4ACQUIRE_MEM  m_packetData;
 };

-// @class PM4SetShaderRegPacket   packet that writes to consecutive registers starting at baseOffset.
+// @class PM4SetShaderRegPacket Packet that writes to consecutive registers starting at baseOffset.
 class PM4SetShaderRegPacket : public PM4Packet {
 public:
    PM4SetShaderRegPacket(void);
@@ -152,9 +152,9 @@ class PM4SetShaderRegPacket : public PM4Packet {

    virtual ~PM4SetShaderRegPacket(void);

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }

    void InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs);
@@ -162,7 +162,7 @@ class PM4SetShaderRegPacket : public PM4Packet {
 private:
    unsigned int m_packetSize;
    bool m_packetDataAllocated;
-    // PM4SET_SH_REG struct contains all the packets data
+    // PM4SET_SH_REG struct contains all the packet's data
    PM4SET_SH_REG  *m_pPacketData;
 };

@@ -175,15 +175,15 @@ class PM4DispatchDirectPacket : public PM4Packet {

    virtual ~PM4DispatchDirectPacket(void) {}

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

    void InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit);

 private:
-    // PM4DISPATCH_DIRECT struct contains all the packets data
+    // PM4DISPATCH_DIRECT struct contains all the packet's data
    PM4DISPATCH_DIRECT  m_packetData;
 };

@@ -193,13 +193,13 @@ class PM4PartialFlushPacket : public PM4Packet {
    PM4PartialFlushPacket(void);
    virtual ~PM4PartialFlushPacket(void) {}

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

 private:
-    // PM4EVENT_WRITE struct contains all the packets data
+    // PM4EVENT_WRITE struct contains all the packet's data
    PM4EVENT_WRITE  m_packetData;
 };

@@ -209,9 +209,9 @@ class PM4NopPacket : public PM4Packet {
    PM4NopPacket(void);
    virtual ~PM4NopPacket(void) {}

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

 private:
@@ -36,7 +36,8 @@ PM4Queue::~PM4Queue(void) {

 unsigned int PM4Queue::Wptr() {
    /* Write pointer in dwords. Simulate 32-bit wptr that wraps at
-     * queue size even on Vega10 and later chips with 64-bit wptr. */
+     * queue size even on Vega10 and later chips with 64-bit wptr.
+     */
    return *m_Resources.Queue_write_ptr % (m_QueueBuf->Size() / 4);
 }

@@ -48,12 +49,13 @@ unsigned int PM4Queue::Rptr() {
 unsigned int PM4Queue::RptrWhenConsumed() {
    /* On PM4 queues Rptr is always 32-bit in dword units and wraps at
     * queue size. The expected value when all packets are consumed is
-     * exactly the value returned by Wptr(). */
+     * exactly the value returned by Wptr().
+     */
    return Wptr();
 }

 void PM4Queue::SubmitPacket() {
-    // m_pending Wptr is in DWORDs
+    // m_pending Wptr is in dwords
    if (g_TestGPUFamilyId < FAMILY_AI) {
        // Pre-Vega10 uses 32-bit wptr and doorbell
        MemoryBarrier();
@@ -35,9 +35,9 @@ class PM4Queue : public BaseQueue {
    // @brief update queue write pointer and sets the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

-    // @ return read pointer modulo queue size in DWORDs
+    // @ return read pointer modulo queue size in dwords
    virtual unsigned int Rptr();
-    // @ return write pointer modulo queue size in DWORDs
+    // @ return write pointer modulo queue size in dwords
    virtual unsigned int Wptr();
    // @ return expected m_Resources.Queue_read_ptr when all packets consumed
    virtual unsigned int RptrWhenConsumed();
@@ -38,7 +38,7 @@ class RDMATest : public KFDBaseComponentTest {
    virtual void SetUp();
    virtual void TearDown();

- protected:  // members
+ protected:  // Members
    IsaGenerator* m_pIsaGen;
 };

@@ -27,8 +27,9 @@
 #include "SDMAPacket.hpp"
 #include "KFDTestUtil.hpp"

-/* Byte/dword cound in many SDMA packets is 1-based in AI, meaning a
- * count of 1 is encoded as 0. */
+/* Byte/dword count in many SDMA packets is 1-based in AI, meaning a
+ * count of 1 is encoded as 0.
+ */
 #define SDMA_COUNT(c) (g_TestGPUFamilyId < FAMILY_AI ? (c) : (c)-1)

 SDMAWriteDataPacket::SDMAWriteDataPacket(void):
@@ -95,7 +96,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns
    packetData = pSDMA;

    while (surfsize > 0) {
-        /* sdma support maximum 0x3fffe0 byte in one copy, take 2M here */
+        /* SDMA support maximum 0x3fffe0 byte in one copy, take 2M here */
        if (surfsize > TWO_MEG)
            size = TWO_MEG;
        else
@@ -150,7 +151,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in
        pSDMA->HEADER_UNION.op = SDMA_OP_CONST_FILL;
        pSDMA->HEADER_UNION.sub_op = 0;

-        /* If Both size and address are DW aligned, then use DW fill */
+        /* If both size and address are DW aligned, then use DW fill */
        if (!(copy_size & 0x3) && !((HSAuint64)dst & 0x3))
            pSDMA->HEADER_UNION.fillsize = 2; /* DW Fill */
        else
@@ -27,7 +27,7 @@
 #include "BasePacket.hpp"
 #include "sdma_pkt_struct.h"

-// @class SDMSPacket: marks a group of all SDMA packets
+// @class SDMAPacket: Marks a group of all SDMA packets
 class SDMAPacket : public BasePacket {
 public:
        SDMAPacket(void) {}
@@ -38,44 +38,44 @@ class SDMAPacket : public BasePacket {

 class SDMAWriteDataPacket : public SDMAPacket {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    SDMAWriteDataPacket(void);
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    SDMAWriteDataPacket(void* destAddr, unsigned int data);
    SDMAWriteDataPacket(void* destAddr, unsigned int ndw, void *data);

    virtual ~SDMAWriteDataPacket(void);

-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return packetData; }
-    // @breif initialise the packet
+    // @breif Initialise the packet
    void InitPacket(void* destAddr, unsigned int data);
    void InitPacket(void* destAddr, unsigned int ndw, void *data);
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return packetSize; }

 protected:
-    // SDMA_PKT_WRITE_UNTILED struct contains all the packets data
+    // SDMA_PKT_WRITE_UNTILED struct contains all the packet's data
    SDMA_PKT_WRITE_UNTILED *packetData;
    unsigned int packetSize;
 };

 class SDMACopyDataPacket : public SDMAPacket {
 public:
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    SDMACopyDataPacket(void *dest, void *src, unsigned int size);
    SDMACopyDataPacket(void *const dst[], void *src, int n, unsigned int surfsize);

    virtual ~SDMACopyDataPacket(void);

-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return packetData; }

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return packetSize; }

 protected:
-    // SDMA_PKT_COPY_LINEAR struct contains all the packets data
+    // SDMA_PKT_COPY_LINEAR struct contains all the packet's data
    SDMA_PKT_COPY_LINEAR  *packetData;

    unsigned int packetSize;
@@ -83,19 +83,19 @@ class SDMACopyDataPacket : public SDMAPacket {

 class SDMAFillDataPacket : public SDMAPacket {
 public:
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    SDMAFillDataPacket(void *dest, unsigned int data, unsigned int size);

    virtual ~SDMAFillDataPacket(void);

-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return m_PacketData; }

-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_PacketSize; }

 protected:
-    // SDMA_PKT_CONSTANT_FILL struct contains all the packets data
+    // SDMA_PKT_CONSTANT_FILL struct contains all the packet's data
    SDMA_PKT_CONSTANT_FILL  *m_PacketData;

    unsigned int m_PacketSize;
@@ -103,41 +103,41 @@ class SDMAFillDataPacket : public SDMAPacket {

 class SDMAFencePacket : public SDMAPacket {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    SDMAFencePacket(void);
-    // this contructor will also init the packet, no need for adittional calls
+    // This contructor will also init the packet, no need for additional calls
    SDMAFencePacket(void* destAddr, unsigned int data);

    virtual ~SDMAFencePacket(void);

-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
-    // @brief initialise the packet
+    // @brief Initialise the packet
    void InitPacket(void* destAddr, unsigned int data);
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_FENCE ); }

 protected:
-    // SDMA_PKT_FENCE struct contains all the packets data
+    // SDMA_PKT_FENCE struct contains all the packet's data
    SDMA_PKT_FENCE  packetData;
 };

 class SDMATrapPacket : public SDMAPacket {
 public:
-    // empty constructor, befor using the packet call the init func
+    // Empty constructor, before using the packet call the init func
    explicit SDMATrapPacket(unsigned int eventID = 0);

    virtual ~SDMATrapPacket(void);

-    // @returns a pointer to the packet
+    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
-    // @brief initialise the packet
+    // @brief Initialise the packet
    void InitPacket(unsigned int eventID);
-    // @returns the packet size in bytes
+    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_TRAP); }

 protected:
-    // SDMA_PKT_TRAP struct contains all the packets data
+    // SDMA_PKT_TRAP struct contains all the packet's data
    SDMA_PKT_TRAP  packetData;
 };

@@ -32,20 +32,22 @@ SDMAQueue::~SDMAQueue(void) {

 unsigned int SDMAQueue::Wptr() {
    /* In SDMA queues write pointers are saved in bytes, convert the
-     * wptr value to DWORD to fit the way BaseQueue works. On Vega10
+     * wptr value to dword to fit the way BaseQueue works. On Vega10
     * the write ptr is 64-bit. We only read the low 32 bit (assuming
     * the queue buffer is smaller than 4GB) and modulo divide by the
-     * queue size to simulate a 32-bit read pointer. */
+     * queue size to simulate a 32-bit read pointer.
+     */
    return (*m_Resources.Queue_write_ptr % m_QueueBuf->Size()) /
        sizeof(unsigned int);
 }

 unsigned int SDMAQueue::Rptr() {
    /* In SDMA queues read pointers are saved in bytes, convert the
-     * read value to DWORD to fit the way BaseQueue works. On Vega10
+     * read value to dword to fit the way BaseQueue works. On Vega10
     * the read ptr is 64-bit. We only read the low 32 bit (assuming
     * the queue buffer is smaller than 4GB) and modulo divide by the
-     * queue size to simulate a 32-bit read pointer. */
+     * queue size to simulate a 32-bit read pointer.
+     */
    return (*m_Resources.Queue_read_ptr % m_QueueBuf->Size()) /
        sizeof(unsigned int);
 }
@@ -53,12 +55,13 @@ unsigned int SDMAQueue::Rptr() {
 unsigned int SDMAQueue::RptrWhenConsumed() {
    /* Rptr is same size and byte units as Wptr. Here we only care
     * about the low 32-bits. When all packets are consumed, read and
-     * write pointers should have the same value. */
+     * write pointers should have the same value.
+     */
    return *m_Resources.Queue_write_ptr;
 }

 void SDMAQueue::SubmitPacket() {
-    // m_pending Wptr is in DWORDs
+    // m_pending Wptr is in dwords
    if (g_TestGPUFamilyId < FAMILY_AI) {
        // Pre-Vega10 uses 32-bit wptr and doorbell
        unsigned int wPtrInBytes = m_pendingWptr * sizeof(unsigned int);
@@ -31,15 +31,15 @@ class SDMAQueue : public BaseQueue {
    SDMAQueue(void);
    virtual ~SDMAQueue(void);

-    // @brief update queue write pointer and sets the queue doorbell to the queue write pointer
+    // @brief Update queue write pointer and set the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

 protected:
-    // @ return write pointer modulo queue size in DWORDs
+    // @ return Write pointer modulo queue size in dwords
    virtual unsigned int Wptr();
-    // @ return read pointer modulo queue size in DWORDs
+    // @ return Read pointer modulo queue size in dwords
    virtual unsigned int Rptr();
-    // @ return expected m_Resources.Queue_read_ptr when all packets consumed
+    // @ return Expected m_Resources.Queue_read_ptr when all packets are consumed
    virtual unsigned int RptrWhenConsumed();

    virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_SDMA; }