kfdtest: Clean up comments

Consolidate style (use /* */ for multi-line), fix typos, use dword instad of DWORD/DWord Change-Id: I620e45c1687550db41127e45641b7d79d28223a1
2018-08-14 09:52:31 -04:00
@@ -130,10 +130,10 @@ void KFDMemoryTest::TearDown() {
 #define GB(x) ((x) << 30)

 /*
- * try to map as much as possible system memory to gpu.
- * lets see if kfd support 1TB memory correctly or not.
- * And after this test case, we can observe if there is any sideeffect.
- * NOTICE: there are memory usage limit checks in hsa/kfd according to the total
+ * Try to map as much as possible system memory to gpu
+ * to see if KFD supports 1TB memory correctly or not.
+ * After this test case, we can observe if there are any side effects.
+ * NOTICE: There are memory usage limit checks in hsa/kfd according to the total
 * physical system memory.
 */
 TEST_F(KFDMemoryTest, MMapLarge) {
@@ -187,19 +187,19 @@ TEST_F(KFDMemoryTest, MMapLarge) {
    TEST_END
 }

-/* keep memory mapped to default node
+/* Keep memory mapped to default node
 * Keep mapping/unmapping memory to/from non-default node
- * A shader running on default node  consistantly access
- * memory - make sure memory is always accessible on default,
- * i.e., there is no gpu vm fault.
+ * A shader running on default node consistantly accesses
+ * memory - make sure memory is always accessible by default,
+ * i.e. there is no gpu vm fault.
 * Synchronization b/t host program and shader:
- * 1. host initialize src and dst buffer to 0
- * 2. shader keep reading src buffer and check value
- * 3. host write src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
- * 4. shader write dst buffer to 0x5678 after src changed to 0x5678, quit
- * 5. host program quit after dst becomes 0x5678
- * Need at least two gpu nodes to run the test. The defaut node has to be a gfx9 node.
- * Otherwise, test is skipped. Use kfdtest --node=$$ to specify the defaut node
+ * 1. Host initializes src and dst buffer to 0
+ * 2. Shader keeps reading src buffer and check value
+ * 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
+ * 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
+ * 5. Host program quits after dst becomes 0x5678
+ * Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
+ * otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
 * This test case is introduced as a side-result of investigation of SWDEV-134798, which
 * is a gpu vm fault while running rocr conformance test. Here we try to simulate the
 * same test behaviour.
@@ -250,7 +250,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
        hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
    }

-    /* fill src buffer so shader quits */
+    /* Fill src buffer so shader quits */
    srcBuffer.Fill(0x5678);
    WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
    ASSERT_EQ(*dstBuffer.As<uint32_t *>(), 0x5678);
@@ -258,7 +258,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
    TEST_END
 }

-// basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
+// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
 TEST_F(KFDMemoryTest , MapMemoryToGPU) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -280,7 +280,7 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) {
    TEST_END
 }

-// following tests are for hsaKmtAllocMemory with invalid params
+// Following tests are for hsaKmtAllocMemory with invalid params
 TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -299,7 +299,7 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
    TEST_END
 }

-// basic test  for hsaKmtAllocMemory
+// Basic test for hsaKmtAllocMemory
 TEST_F(KFDMemoryTest, MemoryAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -381,7 +381,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
    HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));

    /* Create PM4 and SDMA queues before fork+COW to test queue
-     * eviction and restore */
+     * eviction and restore
+     */
    PM4Queue pm4Queue;
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
@@ -392,7 +393,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {

    /* First submit just so the queues are not empty, and to get the
     * TLB populated (in case we need to flush TLBs somewhere after
-     * updating the page tables) */
+     * updating the page tables)
+     */
    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch0.Submit(pm4Queue);
@@ -410,7 +412,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
         * make any write access to the stack because we want the
         * parent to make the first write access and get a new copy. A
         * busy loop is the safest way to do that, since any function
-         * call (e.g. sleep) would write to the stack. */
+         * call (e.g. sleep) would write to the stack.
+         */
        while (1)
        {}
        WARN() << "Shouldn't get here!" << std::endl;
@@ -419,13 +422,15 @@ TEST_F(KFDMemoryTest, MemoryRegister) {

    /* Parent process writes to COW page(s) and gets a new copy. MMU
     * notifier needs to update the GPU mapping(s) for the test to
-     * pass. */
+     * pass.
+     */
    globalData = 0xD00BED00;
    stackData[dstOffset] = 0xdeadbeef;
    stackData[sdmaOffset] = 0xdeadbeef;

    /* Terminate the child process before a possible test failure that
-     * would leave it spinning in the background indefinitely. */
+     * would leave it spinning in the background indefinitely.
+     */
    int status;
    EXPECT_EQ(0, kill(pid, SIGTERM));
    EXPECT_EQ(pid, waitpid(pid, &status, 0));
@@ -516,10 +521,11 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
    TEST_END
 }

-// FlatScratchAccess
-// Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
-// will not work for multiple GPU nodes. For now test only one default GPU node.
-// TODO: Generalize it to support multiple nodes
+/* FlatScratchAccess
+ * Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
+ * will not work for multiple GPU nodes. For now test only one default GPU node.
+ * TODO: Generalize it to support multiple nodes
+ */

 #define SCRATCH_SLICE_SIZE 0x10000
 #define SCRATCH_SLICE_NUM 3
@@ -558,24 +564,23 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
    // Map everything for test below
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL));

-    // source & destination memory buffers
    HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, defaultGPUNode);
    HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, defaultGPUNode);

-
    // Initialize the srcBuffer to some fixed value
    srcMemBuffer.Fill(0x01010101);

-    // Initialize a buffer with a DWORD copy ISA
+    // Initialize a buffer with a dword copy ISA
    m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
            "ScratchCopyDword", isaBuffer);

    const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);

-    // TODO: Add support to all GPU Nodes.
-    // The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
-    // also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
-    // HsaMemoryBuffer is now associated with a Node.
+    /* TODO: Add support to all GPU Nodes.
+     * The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
+     * also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
+     * HsaMemoryBuffer is now associated with a Node.
+     */
    if (pNodeProperties != NULL) {
        // Get the aperture of the scratch buffer
        HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
@@ -585,7 +590,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
        for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
            if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
                int numWaves = 4;  // WAVES must be >= # SE
-                int waveSize = 1;  // amount of space used by each wave in units of 256 dwords...
+                int waveSize = 1;  // Amount of space used by each wave in units of 256 dwords

                PM4Queue queue;
                ASSERT_SUCCESS(queue.Create(defaultGPUNode));
@@ -595,25 +600,24 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
                // Create a dispatch packet to copy
                Dispatch dispatchSrcToScratch(isaBuffer);

-                // setup the dispatch packet
+                // Setup the dispatch packet
                // Copying from the source Memory Buffer to the scratch buffer
                dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
                dispatchSrcToScratch.SetDim(1, 1, 1);
                dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
-                // submit the packet
+                // Submit the packet
                dispatchSrcToScratch.Submit(queue);
                dispatchSrcToScratch.Sync();

                // Create another dispatch packet to copy scratch buffer contents to destination buffer.
                Dispatch dispatchScratchToDst(isaBuffer);

-                // set the arguments to copy from the scratch buffer
-                // to the destination buffer
+                // Set the arguments to copy from the scratch buffer to the destination buffer
                dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
                dispatchScratchToDst.SetDim(1, 1, 1);
                dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());

-                // submit the packet
+                // Submit the packet
                dispatchScratchToDst.Submit(queue);
                dispatchScratchToDst.Sync();

@@ -708,7 +712,7 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular
        lastTestedSize = sizeMB;
    }

-    /* Save the biggest allocated system buffer forsignal handling test */
+    /* Save the biggest allocated system buffer for signal handling test */
    LOG() << "The biggest allocated system buffer is " << std::dec
            << lastTestedSize << "MB" << std::endl;
    if (lastSize)
@@ -781,7 +785,8 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
 * is small. For example, on a typical Carrizo platform, the biggest allocated
 * system buffer could be more than 14G even though it only has 4G memory.
 * In that situation, it will take too much time to finish the test, because of
- * the onerous memory swap operation. So we limit the buffer size that way.*/
+ * the onerous memory swap operation. So we limit the buffer size that way.
+ */
 TEST_F(KFDMemoryTest, BigBufferStressTest) {
    if (!is_dgpu()) {
        LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
@@ -804,7 +809,8 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
    BigBufferVRAM(defaultGPUNode, granularityMB, NULL);

    /* Repeatedly allocate and map big buffers in system memory until it fails,
-     * then unmap and free them. */
+     * then unmap and free them.
+     */
 #define ARRAY_ENTRIES 2048

    int i = 0;
@@ -875,7 +881,8 @@ TEST_F(KFDMemoryTest, MMBench) {
    /* Two SDMA queues to interleave user mode SDMA with memory
     * management on either SDMA engine. Make the queues long enough
     * to buffer at least nBufs x WriteData packets (7 dwords per
-     * packet). */
+     * packet).
+     */
    SDMAQueue sdmaQueue[2];
    ASSERT_SUCCESS(sdmaQueue[0].Create(defaultGPUNode, PAGE_SIZE*8));
    ASSERT_SUCCESS(sdmaQueue[1].Create(defaultGPUNode, PAGE_SIZE*8));
@@ -1094,7 +1101,8 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
 * to access its memory like a debugger would. Child copies data in
 * the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
 * the child terminates, the parent checks that the copy was
- * successful. */
+ * successful.
+ */
 TEST_F(KFDMemoryTest, PtraceAccess) {
    TEST_START(TESTPROFILE_RUNALL)

@@ -1108,13 +1116,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
    void *mem[2];
    unsigned i;

-    // Offset in the VRAM buffer to test crossing non-contiguous
-    // buffer boundaries. The second access starting from offset
-    // sizeof(HSAint64)+1 will cross a node boundary in a single access,
-    // for node sizes of 4MB or smaller.
+    /* Offset in the VRAM buffer to test crossing non-contiguous
+     * buffer boundaries. The second access starting from offset
+     * sizeof(HSAint64)+1 will cross a node boundary in a single access,
+     * for node sizes of 4MB or smaller.
+     */
    const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);

-    // alloc system memory from node 0 and initialize it
+    // Alloc system memory from node 0 and initialize it
    memFlags.ui32.NonPaged = 0;
    ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
    for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
@@ -1122,7 +1131,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
        (reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0;  // destination
    }

-    // try to alloc local memory from GPU node
+    // Try to alloc local memory from GPU node
    memFlags.ui32.NonPaged = 1;
    if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
        EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
@@ -1137,13 +1146,14 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
        mem[1] = NULL;
    }

-    // Allow any process to trace this one. If kernel is built without
-    // Yama, this is not needed, and this call will fail.
+    /* Allow any process to trace this one. If kernel is built without
+     * Yama, this is not needed, and this call will fail.
+     */
 #ifdef PR_SET_PTRACER
    prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
 #endif

-    // Find out my pid so the child can trace it
+    // Find current pid so the child can trace it
    pid_t tracePid = getpid();

    // Fork the child
@@ -1168,8 +1178,7 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
            } while (!WIFSTOPPED(traceStatus));

            for (i = 0; i < 4; i++) {
-                // Test 4 different (mis-)alignments, leaving 1-byte
-                // gaps between longs
+                // Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
                HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
                errno = 0;
                long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
@@ -1264,7 +1273,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {

    ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, memFlags, &mem));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(mem, size, NULL));
-    /* set the word before 4M boundary to 0xdeadbeefdeadbeef
+    /* Set the word before 4M boundary to 0xdeadbeefdeadbeef
     * and the word after 4M boundary to 0xcafebabecafebabe
     */
    mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
@@ -1309,7 +1318,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
                waitpid(tracePid, &traceStatus, 0);
            } while (!WIFSTOPPED(traceStatus));

-            /* peek the memory */
+            /* Peek the memory */
            errno = 0;
            HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
            ASSERT_EQ(0, errno);
@@ -1318,7 +1327,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
            ASSERT_EQ(0, errno);
            ASSERT_EQ(data[1], data1);

-            /* swap mem0 and mem1 by poking */
+            /* Swap mem0 and mem1 by poking */
            ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
            ASSERT_EQ(0, errno);
            ASSERT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
@@ -1404,7 +1413,7 @@ TEST_F(KFDMemoryTest, SignalHandling) {
    size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);

    ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
-    // verify that pDb is not null before it's being used
+    // Verify that pDb is not null before it's being used
    ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";

    pid_t childPid = fork();
@@ -1473,7 +1482,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
            return;
        }

-        /* check the first 64 bit */
+        /* Check the first 64 bits */
        EXPECT_EQ(0, pDb[0]);
        pDb[0] = 1;

@@ -1495,7 +1504,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
 }

 static inline void access(volatile void *sd, int size, int rw) {
-    /* Most like sit in cache*/
+    /* Most likely sitting in cache*/
    static struct DUMMY {
        char dummy[1024];
    } dummy;
@@ -1509,8 +1518,8 @@ static inline void access(volatile void *sd, int size, int rw) {
 }

 /*
- * on large-ber system, test the visible vram access speed.
- * kfd is not allowd to alloc visible vram on non-largebar system.
+ * On large-bar system, test the visible vram access speed.
+ * KFD is not allowed to alloc visible vram on non-largebar system.
 */
 TEST_F(KFDMemoryTest, MMBandWidth) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
@@ -1571,7 +1580,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
            memFlags.ui32.HostAccess = 1;
            memFlags.ui32.NonPaged = 0;
        } else {
-            /* alloc visible vram*/
+            /* Alloc visible vram*/
            allocNode = defaultGPUNode;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 1;