diff --git a/tests/kfdtest/src/BaseQueue.cpp b/tests/kfdtest/src/BaseQueue.cpp index f64a02fc9f..3af41aebe3 100644 --- a/tests/kfdtest/src/BaseQueue.cpp +++ b/tests/kfdtest/src/BaseQueue.cpp @@ -125,7 +125,8 @@ bool BaseQueue::AllPacketsSubmitted() { } void BaseQueue::PlacePacket(const BasePacket &packet) { - ASSERT_EQ(packet.PacketType(), PacketTypeSupported()) << "Cannot add a packet since packet type doesn't match queue"; + ASSERT_EQ(packet.PacketType(), PacketTypeSupported()) + << "Cannot add a packet since packet type doesn't match queue"; unsigned int readPtr = Rptr(); unsigned int writePtr = m_pendingWptr; diff --git a/tests/kfdtest/src/Dispatch.cpp b/tests/kfdtest/src/Dispatch.cpp index d3c50f7e60..e17911d65a 100644 --- a/tests/kfdtest/src/Dispatch.cpp +++ b/tests/kfdtest/src/Dispatch.cpp @@ -57,7 +57,7 @@ void Dispatch::SetDim(unsigned int x, unsigned int y, unsigned int z) { m_DimZ = z; } -void Dispatch::SetScratch(int numWaves, int waveSize, unsigned long long scratch_base) { +void Dispatch::SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base) { m_ComputeTmpringSize = ((waveSize << 12) | (numWaves)); m_ScratchEn = true; m_scratch_base = scratch_base; @@ -98,7 +98,7 @@ int Dispatch::SyncWithStatus(unsigned int timeout) { } void Dispatch::BuildIb() { - unsigned long long shiftedIsaAddr = m_IsaBuf.As() >> 8; + HSAuint64 shiftedIsaAddr = m_IsaBuf.As() >> 8; unsigned int arg0, arg1, arg2, arg3; SplitU64(reinterpret_cast(m_pArg1), arg0, arg1); SplitU64(reinterpret_cast(m_pArg2), arg2, arg3); @@ -118,7 +118,7 @@ void Dispatch::BuildIb() { unsigned int pgmRsrc2 = 0; pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT) & COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK; - pgmRsrc2 |= ((m_scratch_base ? 6 : 4 ) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT) + pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT) & COMPUTE_PGM_RSRC2__USER_SGPR_MASK; pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT) & COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK; @@ -132,7 +132,9 @@ void Dispatch::BuildIb() { & COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK; const unsigned int COMPUTE_PGM_RSRC[] = { - 0x000c0084 | ((m_SpiPriority & 3) << 10), // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0 DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 } + // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0 + // DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 } + 0x000c0084 | ((m_SpiPriority & 3) << 10), pgmRsrc2 }; @@ -200,18 +202,24 @@ void Dispatch::BuildIb() { m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket()); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, + ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES))); m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO, (g_TestGPUFamilyId >= FAMILY_AI) ? COMPUTE_PGM_VALUES_GFX9 : COMPUTE_PGM_VALUES_GFX8, (g_TestGPUFamilyId >= FAMILY_AI) ? ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX9) : ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX8))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC, ARRAY_SIZE(COMPUTE_PGM_RSRC))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC, + ARRAY_SIZE(COMPUTE_PGM_RSRC))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS, ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE, ARRAY_SIZE(COMPUTE_TMPRING_SIZE))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES, ARRAY_SIZE(COMPUTE_RESTART_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS, + ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE, + ARRAY_SIZE(COMPUTE_TMPRING_SIZE))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES, + ARRAY_SIZE(COMPUTE_RESTART_VALUES))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES, ARRAY_SIZE(COMPUTE_USER_DATA_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES, + ARRAY_SIZE(COMPUTE_USER_DATA_VALUES))); m_IndirectBuf.AddPacket(PM4DispatchDirectPacket(m_DimX, m_DimY, m_DimZ, DISPATCH_INIT_VALUE)); diff --git a/tests/kfdtest/src/Dispatch.hpp b/tests/kfdtest/src/Dispatch.hpp index 36983554fb..b89306c02d 100644 --- a/tests/kfdtest/src/Dispatch.hpp +++ b/tests/kfdtest/src/Dispatch.hpp @@ -42,7 +42,7 @@ class Dispatch { int SyncWithStatus(unsigned int timeout); - void SetScratch(int numWaves, int waveSize, unsigned long long scratch_base); + void SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base); void SetSpiPriority(unsigned int priority); @@ -68,7 +68,7 @@ class Dispatch { bool m_ScratchEn; unsigned int m_ComputeTmpringSize; - unsigned long long m_scratch_base; + HSAuint64 m_scratch_base; unsigned int m_SpiPriority; }; diff --git a/tests/kfdtest/src/GoogleTestExtension.hpp b/tests/kfdtest/src/GoogleTestExtension.hpp index 5633a156bf..7b888b1677 100644 --- a/tests/kfdtest/src/GoogleTestExtension.hpp +++ b/tests/kfdtest/src/GoogleTestExtension.hpp @@ -34,7 +34,7 @@ enum LOGTYPE { }; class KFDLog{}; -std::ostream& operator << (KFDLog log ,LOGTYPE level); +std::ostream& operator << (KFDLog log, LOGTYPE level); // @brief log additional details, to be displayed in the same format as other google test outputs // currently not supported by google test diff --git a/tests/kfdtest/src/IndirectBuffer.cpp b/tests/kfdtest/src/IndirectBuffer.cpp index fb31a9203c..3fa70f27f6 100644 --- a/tests/kfdtest/src/IndirectBuffer.cpp +++ b/tests/kfdtest/src/IndirectBuffer.cpp @@ -29,7 +29,8 @@ IndirectBuffer::IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId) :m_NumOfPackets(0), m_MaxSize(sizeInDWords), m_ActualSize(0), m_PacketTypeAllowed(type) { - m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/, false/*local*/, true/*exec*/); + m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/, + false/*local*/, true/*exec*/); } IndirectBuffer::~IndirectBuffer(void) { diff --git a/tests/kfdtest/src/IsaGenerator.hpp b/tests/kfdtest/src/IsaGenerator.hpp index 11f1d0ca5b..4b9c49ad9e 100644 --- a/tests/kfdtest/src/IsaGenerator.hpp +++ b/tests/kfdtest/src/IsaGenerator.hpp @@ -28,8 +28,7 @@ /* isa generation class - interface */ class IsaGenerator { - -public: + public: static IsaGenerator* Create(unsigned int familyId); virtual ~IsaGenerator() {} @@ -43,11 +42,11 @@ public: void CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf); -protected: + protected: virtual const std::string& GetAsicName() = 0; -private: + private: static const std::string ADDRESS_WATCH_SP3; }; -#endif //_ISAGENERATOR_H_ +#endif // _ISAGENERATOR_H_ diff --git a/tests/kfdtest/src/KFDBaseComponentTest.hpp b/tests/kfdtest/src/KFDBaseComponentTest.hpp index 9a12e545ce..f7a2852e65 100644 --- a/tests/kfdtest/src/KFDBaseComponentTest.hpp +++ b/tests/kfdtest/src/KFDBaseComponentTest.hpp @@ -24,9 +24,6 @@ #define __KFD_BASE_COMPONENT_TEST__H__ #include -#include "hsakmt.h" -#include "OSWrapper.hpp" -#include "KFDTestUtil.hpp" #include #include #include @@ -34,6 +31,9 @@ #include #include #include +#include "hsakmt.h" +#include "OSWrapper.hpp" +#include "KFDTestUtil.hpp" // @class KFDBaseComponentTest class KFDBaseComponentTest : public testing::Test { @@ -63,13 +63,17 @@ class KFDBaseComponentTest : public testing::Test { HsaMemFlags m_MemoryFlags; HsaNodeInfo m_NodeInfo; - // @brief SetUpTestCase function run before the first test that uses KFDOpenCloseKFDTest class fixture, and opens KFD. + // @brief SetUpTestCase function run before the first test that uses + // KFDOpenCloseKFDTest class fixture, and opens KFD. static void SetUpTestCase(); - // @brief TearDownTestCase function run after the last test from KFDOpenCloseKFDTest class fixture and calls close KFD. + // @brief TearDownTestCase function run after the last test from + // KFDOpenCloseKFDTest class fixture and calls close KFD. static void TearDownTestCase(); - // @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture, sets all common settings for the tests. + // @brief SetUp function run before every test that uses + // KFDOpenCloseKFDTest class fixture, sets all common settings for the tests. virtual void SetUp(); - // @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture. + // @brief TearDown function run after every test that uses + // KFDOpenCloseKFDTest class fixture. virtual void TearDown(); }; diff --git a/tests/kfdtest/src/KFDCWSRTest.cpp b/tests/kfdtest/src/KFDCWSRTest.cpp index 594500c032..ace560e5f7 100644 --- a/tests/kfdtest/src/KFDCWSRTest.cpp +++ b/tests/kfdtest/src/KFDCWSRTest.cpp @@ -89,7 +89,8 @@ void KFDCWSRTest::SetUp() { m_pIsaGen = IsaGenerator::Create(m_FamilyId); - // TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not set the destination based on workitem_id. + // TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not + // set the destination based on workitem_id. // Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future. wave_number = 1; diff --git a/tests/kfdtest/src/KFDDBGTest.cpp b/tests/kfdtest/src/KFDDBGTest.cpp index abff218160..52d904ef74 100644 --- a/tests/kfdtest/src/KFDDBGTest.cpp +++ b/tests/kfdtest/src/KFDDBGTest.cpp @@ -140,18 +140,17 @@ TEST_F(KFDDBGTest, BasicAddressWatch) { ASSERT_SUCCESS(hsaKmtDbgRegister(defaultGPUNode)); AddressWatchSuccess = hsaKmtDbgAddressWatch( - defaultGPUNode, // IN - 2, // # watch points - &WatchMode[0], // IN - (void **) &WatchAddress[0], // IN - &WatchMask[0], // IN, optional - NULL // IN, optional - ); + defaultGPUNode, // IN + 2, // # watch points + &WatchMode[0], // IN + reinterpret_cast(&WatchAddress[0]), // IN + &WatchMask[0], // IN, optional + NULL); // IN, optional EXPECT_EQ(AddressWatchSuccess, HSAKMT_STATUS_SUCCESS); Dispatch dispatch(isaBuf); - dispatch.SetArgs(dstBuf.As(), (void *)secDstBuf); + dispatch.SetArgs(dstBuf.As(), reinterpret_cast(secDstBuf)); dispatch.SetDim(1, 1, 1); // TODO: use Memory ordering rules w/ atomics diff --git a/tests/kfdtest/src/KFDEventTest.cpp b/tests/kfdtest/src/KFDEventTest.cpp index 200fe638b4..7d266046bc 100644 --- a/tests/kfdtest/src/KFDEventTest.cpp +++ b/tests/kfdtest/src/KFDEventTest.cpp @@ -136,7 +136,7 @@ class QueueAndSignalBenchmark { uint64_t startTime; PM4Queue queue; - HsaEvent** pHsaEvent = (HsaEvent**) calloc(eventCount, sizeof(HsaEvent*)); + HsaEvent** pHsaEvent = reinterpret_cast(calloc(eventCount, sizeof(HsaEvent*))); size_t packetSize = PM4ReleaseMemoryPacket(false, 0, 0).SizeInBytes(); int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1)); @@ -268,7 +268,8 @@ TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) { unsigned int pktSizeDwords = 0; for (i = 0; i < EVENT_NUMBER; i++) { - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId)); + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, + pHsaEvent[i]->EventId)); queue.Wait4PacketConsumption(); Delay(WAIT_BETWEEN_SUBMISSIONS_MS); diff --git a/tests/kfdtest/src/KFDEvictTest.cpp b/tests/kfdtest/src/KFDEvictTest.cpp index 70b733b566..3f7e3537a1 100644 --- a/tests/kfdtest/src/KFDEvictTest.cpp +++ b/tests/kfdtest/src/KFDEvictTest.cpp @@ -40,7 +40,8 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui totalMB = N_PROCESSES*count*(vramBufSize>>20); if (m_IsParent) { - LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="<< totalMB << ")MB VRAM in KFD" << std::endl; + LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(=" + << totalMB << ")MB VRAM in KFD" << std::endl; } HSAKMT_STATUS ret; @@ -95,7 +96,8 @@ void KFDEvictTest::AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle alloc.flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; if (m_IsParent) { - LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(=" << (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl; + LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(=" + << (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl; } ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle)); } @@ -104,79 +106,72 @@ void KFDEvictTest::FreeAmdgpuBo(amdgpu_bo_handle handle) { ASSERT_EQ(0, amdgpu_bo_free(handle)); } -static int -amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, - unsigned alignment, unsigned heap, uint64_t flags, - amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address, - amdgpu_va_handle *va_handle) -{ - struct amdgpu_bo_alloc_request request = {}; - amdgpu_bo_handle buf_handle; - amdgpu_va_handle handle; - uint64_t vmc_addr; - int r; +static int amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, + unsigned alignment, unsigned heap, uint64_t flags, + amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address, + amdgpu_va_handle *va_handle) { + struct amdgpu_bo_alloc_request request = {}; + amdgpu_bo_handle buf_handle; + amdgpu_va_handle handle; + uint64_t vmc_addr; + int r; - request.alloc_size = size; - request.phys_alignment = alignment; - request.preferred_heap = heap; - request.flags = flags; + request.alloc_size = size; + request.phys_alignment = alignment; + request.preferred_heap = heap; + request.flags = flags; - r = amdgpu_bo_alloc(dev, &request, &buf_handle); - if (r) - return r; + r = amdgpu_bo_alloc(dev, &request, &buf_handle); + if (r) + return r; - r = amdgpu_va_range_alloc(dev, - amdgpu_gpu_va_range_general, - size, alignment, 0, &vmc_addr, - &handle, 0); - if (r) - goto error_va_alloc; + r = amdgpu_va_range_alloc(dev, + amdgpu_gpu_va_range_general, + size, alignment, 0, &vmc_addr, + &handle, 0); + if (r) + goto error_va_alloc; - r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP); - if (r) - goto error_va_map; + r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP); + if (r) + goto error_va_map; - r = amdgpu_bo_cpu_map(buf_handle, cpu); - if (r) - goto error_cpu_map; + r = amdgpu_bo_cpu_map(buf_handle, cpu); + if (r) + goto error_cpu_map; - *bo = buf_handle; - *mc_address = vmc_addr; - *va_handle = handle; + *bo = buf_handle; + *mc_address = vmc_addr; + *va_handle = handle; - return 0; + return 0; error_cpu_map: - amdgpu_bo_cpu_unmap(buf_handle); + amdgpu_bo_cpu_unmap(buf_handle); error_va_map: - amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); error_va_alloc: - amdgpu_bo_free(buf_handle); - return r; + amdgpu_bo_free(buf_handle); + return r; } -static inline int -amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle, - uint64_t mc_addr, uint64_t size) -{ - amdgpu_bo_cpu_unmap(bo); - amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP); - amdgpu_va_range_free(va_handle); - amdgpu_bo_free(bo); - - return 0; +static inline int amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle, + uint64_t mc_addr, uint64_t size) { + amdgpu_bo_cpu_unmap(bo); + amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(va_handle); + amdgpu_bo_free(bo); + return 0; } -static inline int -amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1, - amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) -{ - amdgpu_bo_handle resources[] = {bo1, bo2}; +static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1, + amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) { + amdgpu_bo_handle resources[] = {bo1, bo2}; - return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list); + return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list); } void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) { @@ -204,7 +199,7 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) { &boList)); /* Fill Nop cammands in IB */ - ptr = (uint32_t *)ibResultCpu; + ptr = reinterpret_cast(ibResultCpu); for (int i = 0; i < 16; i++) ptr[i] = 0xffff1000; diff --git a/tests/kfdtest/src/KFDExceptionTest.cpp b/tests/kfdtest/src/KFDExceptionTest.cpp index 1a4a715d97..22ea893fe0 100644 --- a/tests/kfdtest/src/KFDExceptionTest.cpp +++ b/tests/kfdtest/src/KFDExceptionTest.cpp @@ -85,7 +85,7 @@ void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, } dispatch.SetDim(dimX, dimY, dimZ); - dispatch.SetArgs((void *)pSrc, (void *)pDst); + dispatch.SetArgs(reinterpret_cast(pSrc), reinterpret_cast(pDst)); dispatch.Submit(queue); m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut); diff --git a/tests/kfdtest/src/KFDGraphicsInterop.hpp b/tests/kfdtest/src/KFDGraphicsInterop.hpp index 260044cebf..3c4001ed41 100644 --- a/tests/kfdtest/src/KFDGraphicsInterop.hpp +++ b/tests/kfdtest/src/KFDGraphicsInterop.hpp @@ -28,11 +28,10 @@ // @class KFDGraphicsInteropTest // Adds access to graphics device for interoperability testing -class KFDGraphicsInterop : public KFDMemoryTest -{ -public: - KFDGraphicsInterop(void) {}; - ~KFDGraphicsInterop(void) {}; +class KFDGraphicsInterop : public KFDMemoryTest { + public: + KFDGraphicsInterop(void) {} + ~KFDGraphicsInterop(void) {} }; #endif diff --git a/tests/kfdtest/src/KFDIPCTest.cpp b/tests/kfdtest/src/KFDIPCTest.cpp index 4f7e90adac..6a95f51c9a 100644 --- a/tests/kfdtest/src/KFDIPCTest.cpp +++ b/tests/kfdtest/src/KFDIPCTest.cpp @@ -77,10 +77,10 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) { HSAuint32 *sharedLocalBuffer = NULL; /* Read from Pipe the shared Handle. Import shared Local Memory */ - ASSERT_GE(read(pipefd[0], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0); + ASSERT_GE(read(pipefd[0], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM, - (void**)&sharedLocalBuffer, &sharedSize)); + reinterpret_cast(&sharedLocalBuffer), &sharedSize)); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(sharedLocalBuffer, sharedSize, NULL)); /* Check for pattern in the shared Local Memory */ @@ -128,7 +128,7 @@ void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pip /* Share it with the child process */ ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer.As(), size, &sharedHandleLM)); - ASSERT_GE(write(pipefd[1], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0); + ASSERT_GE(write(pipefd[1], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); /* Wait for the child to finish */ waitpid(cpid, &status, 0); @@ -413,7 +413,7 @@ static int read_non_block(int fd, void *buf, int size) { int total_bytes = 0, cur_bytes = 0; int retries = 5; struct timespec tm = { 0, 100000000ULL }; - char *ptr = (char *)buf; + char *ptr = reinterpret_cast(buf); do { cur_bytes = read(fd, ptr, (size - total_bytes)); @@ -439,7 +439,7 @@ static int read_non_block(int fd, void *buf, int size) { /* Send HsaMemoryRange to another process that is connected via writePipe */ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) { - if (write_non_block(writePipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) != + if (write_non_block(writePipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; return CMA_TEST_SUCCESS; @@ -449,7 +449,7 @@ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) { CMA_TEST_STATUS KFDCMAArray::recvCMAArray(int readPipe) { int i; - if (read_non_block(readPipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) != + if (read_non_block(readPipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; @@ -704,10 +704,13 @@ TEST_F(KFDIPCTest, CMABasicTest) { HSAuint32 expected_pattern; srcRange.MemoryAddress = testLocalBuffer.As(); - srcRange.SizeInBytes = size; /* Deliberately set to value > unaligned_size. Only unaligned_size - * should be copied since dstRange.SizeInBytes == unaligned_size - */ - dstRange.MemoryAddress = (void *)(testLocalBuffer.As() + (size / 2) + unaligned_offset); + + /* Deliberately set to value > unaligned_size. Only unaligned_size + * should be copied since dstRange.SizeInBytes == unaligned_size + */ + srcRange.SizeInBytes = size; + + dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + (size / 2) + unaligned_offset); dstRange.SizeInBytes = unaligned_size; ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied)); ASSERT_EQ(copied, unaligned_size); @@ -719,7 +722,7 @@ TEST_F(KFDIPCTest, CMABasicTest) { /* Test3. Test overflow and expect failure */ srcRange.MemoryAddress = testLocalBuffer.As(); srcRange.SizeInBytes = size; - dstRange.MemoryAddress = (void *)(testLocalBuffer.As() + 4); + dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + 4); dstRange.SizeInBytes = size; /* This should overflow since offset is VA + 4 */ status = hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied); EXPECT_NE(status, HSAKMT_STATUS_SUCCESS); diff --git a/tests/kfdtest/src/KFDLocalMemoryTest.cpp b/tests/kfdtest/src/KFDLocalMemoryTest.cpp index c6119156bb..986ee4a85c 100644 --- a/tests/kfdtest/src/KFDLocalMemoryTest.cpp +++ b/tests/kfdtest/src/KFDLocalMemoryTest.cpp @@ -316,7 +316,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { break; } - void *bufferEnd = (void *)((unsigned long)pages[order].pointers[p] + void *bufferEnd = reinterpret_cast(reinterpret_cast(pages[order].pointers[p]) + size - sizeof(unsigned)); sysBuffer.As()[0] = ++value; @@ -340,7 +340,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { Dispatch dispatch3(isaBuffer); dispatch3.SetArgs(bufferEnd, - (void *)&(sysBuffer.As()[1])); + reinterpret_cast(&(sysBuffer.As()[1]))); dispatch3.Submit(queue); dispatch3.Sync(g_TestTimeOut); EXPECT_EQ(value, sysBuffer.As()[1]); @@ -349,7 +349,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { } LOG() << " Got " << pages[order].nPages << ", end of last block addr: " - << (void *)((unsigned long)pages[order].pointers[p-1] + size - 1) + << reinterpret_cast(reinterpret_cast(pages[order].pointers[p-1]) + size - 1) << std::endl; // Now free half the memory diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 8282106420..15a1c20051 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -94,14 +94,14 @@ shader ReadMemory\n\ asic(GFX9)\n\ type(CS)\n\ /* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - s_movk_i32 s18, 0x5678\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - s_store_dword s18, s[2:3], 0x0 glc\n\ - s_endpgm\n\ - end\n\ + s_movk_i32 s18, 0x5678\n\ + LOOP:\n\ + s_load_dword s16, s[0:1], 0x0 glc\n\ + s_cmp_eq_i32 s16, s18\n\ + s_cbranch_scc0 LOOP\n\ + s_store_dword s18, s[2:3], 0x0 glc\n\ + s_endpgm\n\ + end\n\ "; void KFDMemoryTest::SetUp() { @@ -127,7 +127,7 @@ void KFDMemoryTest::TearDown() { } #include -#define GB(x) ((x)<<30) +#define GB(x) ((x) << 30) /* * try to map as much as possible system memory to gpu. @@ -147,17 +147,17 @@ TEST_F(KFDMemoryTest, MMapLarge) { HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - const unsigned long nObjects = 1<<14; + const HSAuint64 nObjects = 1<<14; HSAuint64 *AlternateVAGPU = new HSAuint64[nObjects]; - ASSERT_NE((unsigned long)AlternateVAGPU, 0); + ASSERT_NE((HSAuint64)AlternateVAGPU, 0); HsaMemMapFlags mapFlags = {0}; - unsigned long s; + HSAuint64 s; char *addr; - unsigned long flags = MAP_ANONYMOUS | MAP_PRIVATE; + HSAuint64 flags = MAP_ANONYMOUS | MAP_PRIVATE; /* Test up to 1TB memory*/ s = GB(1024ULL) / nObjects; - addr = (char*)mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0); + addr = reinterpret_cast(mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0)); ASSERT_NE(addr, MAP_FAILED); memset(addr, 0, s); @@ -167,7 +167,7 @@ TEST_F(KFDMemoryTest, MMapLarge) { if (hsaKmtRegisterMemory(addr + i, s - i)) break; if (hsaKmtMapMemoryToGPUNodes(addr + i, s - i, - &AlternateVAGPU[i], mapFlags, 1, (HSAuint32 *)&defaultGPUNode)) { + &AlternateVAGPU[i], mapFlags, 1, reinterpret_cast(&defaultGPUNode))) { hsaKmtDeregisterMemory(addr + i); break; } @@ -177,8 +177,8 @@ TEST_F(KFDMemoryTest, MMapLarge) { << "GB system memory to gpu" << std::endl; while (i--) { - ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU((void*)AlternateVAGPU[i])); - ASSERT_SUCCESS(hsaKmtDeregisterMemory((void*)AlternateVAGPU[i])); + ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(AlternateVAGPU[i]))); + ASSERT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(AlternateVAGPU[i]))); } munmap(addr, s); @@ -268,7 +268,8 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) { int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb)); + ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, + reinterpret_cast(&pDb))); // verify that pDb is not null before it's being used ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer"; ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL)); @@ -292,7 +293,8 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) { TEST_START(TESTPROFILE_RUNALL) unsigned int* pDb = NULL; - EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags, (void**)&pDb)); + EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags, + reinterpret_cast(&pDb))); TEST_END } @@ -302,7 +304,7 @@ TEST_F(KFDMemoryTest, MemoryAlloc) { TEST_START(TESTPROFILE_RUNALL) unsigned int* pDb = NULL; - EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb)); + EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pDb))); TEST_END } @@ -340,7 +342,7 @@ TEST_F(KFDMemoryTest, AccessPPRMem) { * consumed by IOMMU HW. Because of that, a kernel driver workaround * is put in place to address that, so we don't need to wait here. */ - //sleep(5); + // sleep(5); VirtualFreeMemory(destBuf, PAGE_SIZE); @@ -472,10 +474,10 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) { EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32), &gpuva2)); EXPECT_TRUE(gpuva1 != gpuva2); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva2))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva2))); /* Same address, same size */ HsaMemMapFlags memFlags = {0}; @@ -496,20 +498,20 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) { sizeof(HSAuint32) * 2, &gpuva2, memFlags, nGPU, nodes)); EXPECT_EQ(gpuva1, gpuva2); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva1))); /* Confirm that we still have access to the memory, mem[2] */ PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); mem[2] = 0x0; - queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)gpuva2, + queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast(gpuva2), 0xdeadbeef)); queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0)); queue.Wait4PacketConsumption(); - EXPECT_EQ(true, WaitOnValue((unsigned int *)&mem[2], 0xdeadbeef)); + EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef)); EXPECT_SUCCESS(queue.Destroy()); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva2))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva2))); TEST_END } @@ -535,7 +537,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) { ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/, false/*exec*/, true /*scratch*/); + HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/, + false/*exec*/, true /*scratch*/); // Unmap scratch for sub-allocation mapping tests ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As())); @@ -576,7 +579,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) { if (pNodeProperties != NULL) { // Get the aperture of the scratch buffer HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks]; - EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks, memoryProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks, + memoryProperties)); for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) { if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) { @@ -684,14 +688,14 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular sizeMB = (lowMB + highMB) / 2; size = sizeMB * 1024 * 1024; ret = hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { highMB = sizeMB; continue; } ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU, - mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size)); highMB = sizeMB; @@ -740,14 +744,14 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, sizeMB = (lowMB + highMB) / 2; size = sizeMB * 1024 * 1024; ret = hsaKmtAllocMemory(defaultGPUNode, size, memFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { highMB = sizeMB; continue; } ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU, - mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size)); highMB = sizeMB; @@ -810,13 +814,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { do { ret = hsaKmtAllocMemory(0 /* system */, block_size, m_MemoryFlags, - (void**)&pDb_array[i]); + reinterpret_cast(&pDb_array[i])); if (ret) { break; } ret = hsaKmtMapMemoryToGPUNodes(pDb_array[i], block_size, - &AlternateVAGPU, mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + &AlternateVAGPU, mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb_array[i], block_size)); break; @@ -849,7 +853,7 @@ TEST_F(KFDMemoryTest, MMBench) { #define TEST_SDMA(index) (((index / nSizes) >> 1) & 0x1) void *bufs[nBufs]; - unsigned long long start, end; + HSAuint64 start, end; unsigned i; HSAKMT_STATUS ret; HsaMemFlags memFlags = {0}; @@ -898,7 +902,7 @@ TEST_F(KFDMemoryTest, MMBench) { unsigned bufSize = TEST_BUFSIZE(testIndex); unsigned memType = TEST_MEMTYPE(testIndex); bool interleaveSDMA = TEST_SDMA(testIndex); - unsigned long long allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime; + HSAuint64 allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime; HSAuint32 allocNode; if ((testIndex & (nSizes-1)) == 0) @@ -1033,16 +1037,16 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) { EXPECT_EQ(ptrInfo.SizeInBytes, (HSAuint64)localBuffer.Size()); HSAuint32 *addr = localBuffer.As() + 4; - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)addr, &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast(addr), &ptrInfo)); EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As()); } /** Registered memory: user pointer */ static volatile HSAuint32 mem[4]; // 8 bytes for register only and // 8 bytes for register to nodes - HsaMemoryBuffer hsaBuffer((void *)&mem[0], sizeof(HSAuint32)*2); + HsaMemoryBuffer hsaBuffer((void *)(&mem[0]), sizeof(HSAuint32)*2); if (is_dgpu()) { // APU doesn't use userptr - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[0], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[0]), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER); EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]); EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)hsaBuffer.As()); @@ -1053,29 +1057,29 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) { HSAuint32 nodes[nGPU]; for (unsigned int i = 0; i < nGPU; i++) nodes[i] = gpuNodes.at(i); - EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2], + EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)(&mem[2]), sizeof(HSAuint32)*2, nGPU, nodes)); - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[2], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[2]), &ptrInfo)); EXPECT_EQ(ptrInfo.NRegisteredNodes, nGPU); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)&mem[2])); + EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)(&mem[2]))); } /* Not a starting address, but an address inside the memory range * should also get the memory information */ HSAuint32 *address = hostBuffer.As() + 1; - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)address, &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast(address), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED); EXPECT_EQ(ptrInfo.CPUAddress, hostBuffer.As()); if (is_dgpu()) { - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[1], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[1]), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER); EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]); } /*** Set user data ***/ char userData[16] = "This is a test."; - EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As(), (void *)userData)); + EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As(), reinterpret_cast(userData))); EXPECT_SUCCESS(hsaKmtQueryPointerInfo(hostBuffer.As(), &ptrInfo)); EXPECT_EQ(ptrInfo.UserData, (void *)userData); @@ -1106,16 +1110,16 @@ TEST_F(KFDMemoryTest, PtraceAccess) { // Offset in the VRAM buffer to test crossing non-contiguous // buffer boundaries. The second access starting from offset - // sizeof(long)+1 will cross a node boundary in a single access, + // sizeof(HSAint64)+1 will cross a node boundary in a single access, // for node sizes of 4MB or smaller. - const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(long); + const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64); // alloc system memory from node 0 and initialize it memFlags.ui32.NonPaged = 0; ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0])); - for (i = 0; i < 4*sizeof(long) + 4; i++) { - ((HSAuint8 *)mem[0])[i] = i; // source - ((HSAuint8 *)mem[0])[PAGE_SIZE+i] = 0; // destination + for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) { + (reinterpret_cast(mem[0]))[i] = i; // source + (reinterpret_cast(mem[0]))[PAGE_SIZE+i] = 0; // destination } // try to alloc local memory from GPU node @@ -1123,10 +1127,10 @@ TEST_F(KFDMemoryTest, PtraceAccess) { if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) { EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20), memFlags, &mem[1])); - mem[1] = (void *)((HSAuint8 *)mem[1] + VRAM_OFFSET); - for (i = 0; i < 4*sizeof(long) + 4; i++) { - ((HSAuint8 *)mem[1])[i] = i; - ((HSAuint8 *)mem[1])[PAGE_SIZE+i] = 0; + mem[1] = reinterpret_cast(reinterpret_cast(mem[1]) + VRAM_OFFSET); + for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) { + (reinterpret_cast(mem[1]))[i] = i; + (reinterpret_cast(mem[1]))[PAGE_SIZE+i] = 0; } } else { LOG() << "Not testing local memory, it's invisible" << std::endl; @@ -1168,22 +1172,22 @@ TEST_F(KFDMemoryTest, PtraceAccess) { for (i = 0; i < 4; i++) { // Test 4 different (mis-)alignments, leaving 1-byte // gaps between longs - HSAuint8 *addr = (HSAuint8 *)((long *)mem[0] + i) + i; + HSAuint8 *addr = reinterpret_cast(reinterpret_cast(mem[0]) + i) + i; errno = 0; long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE, - (void *)data)); + reinterpret_cast(data))); if (mem[1] == NULL) continue; - addr = (HSAuint8 *)((long *)mem[1] + i) + i; + addr = reinterpret_cast(reinterpret_cast(mem[1]) + i) + i; errno = 0; data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE, - (void *)data)); + reinterpret_cast(data))); } } catch (...) { err = 1; @@ -1204,32 +1208,31 @@ TEST_F(KFDMemoryTest, PtraceAccess) { } // Clear gaps in the source that should not have been copied - ((uint8_t*)mem[0])[ sizeof(long) ] = 0; - ((uint8_t*)mem[0])[2*sizeof(long) + 1] = 0; - ((uint8_t*)mem[0])[3*sizeof(long) + 2] = 0; - ((uint8_t*)mem[0])[4*sizeof(long) + 3] = 0; + (reinterpret_cast(mem[0]))[ sizeof(long) ] = 0; + (reinterpret_cast(mem[0]))[2*sizeof(long) + 1] = 0; + (reinterpret_cast(mem[0]))[3*sizeof(long) + 2] = 0; + (reinterpret_cast(mem[0]))[4*sizeof(long) + 3] = 0; // Check results - EXPECT_EQ(0, memcmp(mem[0], (HSAuint8 *)mem[0] + PAGE_SIZE, + EXPECT_EQ(0, memcmp(mem[0], reinterpret_cast(mem[0]) + PAGE_SIZE, sizeof(long)*4 + 4)); // Free memory EXPECT_SUCCESS(hsaKmtFreeMemory(mem[0], PAGE_SIZE*2)); if (mem[1]) { - ((uint8_t*)mem[1])[ sizeof(long) ] = 0; - ((uint8_t*)mem[1])[2*sizeof(long) + 1] = 0; - ((uint8_t*)mem[1])[3*sizeof(long) + 2] = 0; - ((uint8_t*)mem[1])[4*sizeof(long) + 3] = 0; - EXPECT_EQ(0, memcmp(mem[1], (HSAuint8 *)mem[1] + PAGE_SIZE, - sizeof(long)*4 + 4)); - mem[1] = (void *)((HSAuint8 *)mem[1] - VRAM_OFFSET); + (reinterpret_cast(mem[1]))[ sizeof(HSAint64) ] = 0; + (reinterpret_cast(mem[1]))[2*sizeof(HSAint64) + 1] = 0; + (reinterpret_cast(mem[1]))[3*sizeof(HSAint64) + 2] = 0; + (reinterpret_cast(mem[1]))[4*sizeof(HSAint64) + 3] = 0; + EXPECT_EQ(0, memcmp(mem[1], reinterpret_cast(mem[1]) + PAGE_SIZE, + sizeof(HSAint64)*4 + 4)); + mem[1] = reinterpret_cast(reinterpret_cast(mem[1]) - VRAM_OFFSET); EXPECT_SUCCESS(hsaKmtFreeMemory(mem[1], PAGE_SIZE*2)); } TEST_END } -TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) -{ +TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) { char *hsaDebug = getenv("HSA_DEBUG"); if (!is_dgpu()) { @@ -1266,8 +1269,8 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* set the word before 4M boundary to 0xdeadbeefdeadbeef * and the word after 4M boundary to 0xcafebabecafebabe */ - mem0 = (void *)((HSAuint8 *)mem + VRAM_OFFSET); - mem1 = (void *)((HSAuint8 *)mem + VRAM_OFFSET + sizeof(HSAuint64)); + mem0 = reinterpret_cast(reinterpret_cast(mem) + VRAM_OFFSET); + mem1 = reinterpret_cast(reinterpret_cast(mem) + VRAM_OFFSET + sizeof(HSAuint64)); PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem0, @@ -1313,17 +1316,17 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* peek the memory */ errno = 0; - long data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL); + HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(data[0], data0); - long data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL); + HSAint64 data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(data[1], data1); /* swap mem0 and mem1 by poking */ - EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, (void *)data[1])); + EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast(data[1]))); EXPECT_EQ(0, errno); - EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, (void *)data[0])); + EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast(data[0]))); EXPECT_EQ(0, errno); } catch (...) { err = 1; @@ -1345,10 +1348,10 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* Use shader to read back data to check poke results */ HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - //dstBuffer is cpu accessible gtt memory + // dstBuffer is cpu accessible gtt memory HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode); m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword, - "ScratchCopyDword",isaBuffer); + "ScratchCopyDword", isaBuffer); Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(mem0, dstBuffer.As()); dispatch0.Submit(queue); @@ -1405,7 +1408,7 @@ TEST_F(KFDMemoryTest, SignalHandling) { */ size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1); - ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, (void**)&pDb)); + ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast(&pDb))); // verify that pDb is not null before it's being used ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer"; @@ -1468,7 +1471,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) { while (count--) { ret = hsaKmtAllocMemory(0 /* system */, sysBufSize, m_MemoryFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { LOG() << "Failed to allocate system buffer of" << std::dec << sysBufSizeMB << "MB" << std::endl; @@ -1496,8 +1499,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) { TEST_END } -static inline void access(volatile void *sd, int size, int rw) -{ +static inline void access(volatile void *sd, int size, int rw) { /* Most like sit in cache*/ static struct DUMMY { char dummy[1024]; @@ -1531,7 +1533,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) { #define _TEST_MEMTYPE(index) ((index / nSizes) % nMemTypes) void *bufs[nBufs]; - unsigned long long start; + HSAuint64 start; unsigned i; HSAKMT_STATUS ret; HsaMemFlags memFlags = {0}; @@ -1545,7 +1547,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) { LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl; if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode) || !vramSizeMB) { - LOG() << "not a largebar system, skip!"<NumCounters << " counter IDs" << std::endl; - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } TEST_END @@ -161,7 +162,7 @@ TEST_F(KFDPerfCountersTest, RegisterTrace) { priv_block_found = true; break; } - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } if (!priv_block_found) { @@ -202,7 +203,7 @@ TEST_F(KFDPerfCountersTest, StartStopQueryTrace) { priv_block_found = true; break; } - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } if (!priv_block_found) { diff --git a/tests/kfdtest/src/KFDQMTest.cpp b/tests/kfdtest/src/KFDQMTest.cpp index 25e4c8df73..c1dab79916 100644 --- a/tests/kfdtest/src/KFDQMTest.cpp +++ b/tests/kfdtest/src/KFDQMTest.cpp @@ -271,7 +271,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -311,7 +312,8 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -357,7 +359,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -373,13 +376,13 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) { TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) { TEST_START(TESTPROFILE_RUNALL) - static const unsigned long long TEST_TIME_SEC = 15; + static const HSAuint64 TEST_TIME_SEC = 15; - unsigned long long initialTime = GetSystemTickCountInMicroSec(); + HSAuint64 initialTime = GetSystemTickCountInMicroSec(); unsigned int numIter = 0; - unsigned long long timePassed = 0; + HSAuint64 timePassed = 0; int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; @@ -404,7 +407,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) { delete queues[1]; ++numIter; - unsigned long long curTime = GetSystemTickCountInMicroSec(); + HSAuint64 curTime = GetSystemTickCountInMicroSec(); timePassed = (curTime - initialTime) / 1000000; } while (timePassed < TEST_TIME_SEC); @@ -553,7 +556,7 @@ s_waitcnt lgkmcnt(0)\n\ end\n\ "; -long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) { +HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) { HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/); HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false); HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false); @@ -580,9 +583,9 @@ long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t m } /* To cover for outliers, allow us to get the Average time based on a specified number of iterations */ -long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) { - long long timeArray[iterations]; - long long timeTotal = 0; +HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) { + HSAint64 timeArray[iterations]; + HSAint64 timeTotal = 0; if (iterations < 1) { LOG() << "ERROR: At least 1 iteration must be performed" << std::endl; return 0; @@ -599,9 +602,11 @@ long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, } for (int x = 0; x < iterations; x++) { - long long variance = timeArray[x] / (timeTotal / iterations); + HSAint64 variance = timeArray[x] / (timeTotal / iterations); if (variance < CuNegVariance || variance > CuPosVariance) - LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x] << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")" << std::endl; + LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x] + << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")" + << std::endl; } return timeTotal / iterations; @@ -625,7 +630,7 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) { LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl; LOG() << std::hex << "# Shader engines: 0x" << numSEs << std::endl; LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl; - long long TimewithCU1, TimewithCU; + HSAint64 TimewithCU1, TimewithCU; uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */ uint32_t maskNumBits = maskNumDwords * 32; uint32_t mask[maskNumDwords]; @@ -646,10 +651,11 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) { mask[maskIndex] |= 1 << ((nCUs - 1) % 32); TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits); - ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs); + ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs); LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl; - LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; + LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) + << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance)); } @@ -685,7 +691,7 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) { LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl; LOG() << std::hex << "# Shader engines: 0x" << numShaderEngines << std::endl; LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl; - long long TimewithCU1, TimewithCU; + HSAint64 TimewithCU1, TimewithCU; uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */ uint32_t maskNumBits = maskNumDwords * 32; uint32_t mask[maskNumDwords]; @@ -716,10 +722,11 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) { int nCUs = numShaderEngines * (x + 1); TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits); - ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs); + ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs); LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl; - LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; + LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) + << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance)); } @@ -945,10 +952,10 @@ TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) { unsigned int* src = srcBuffer.As(); unsigned int* dst = destBuffer.As(); - static const unsigned long long TEST_TIME_SEC = 15; - unsigned long long initialTime, curTime; + static const HSAuint64 TEST_TIME_SEC = 15; + HSAuint64 initialTime, curTime; unsigned int numIter = 0; - unsigned long long timePassed = 0; + HSAuint64 timePassed = 0; unsigned int i; PM4Queue queues[MAX_CP_QUEUES]; @@ -1019,7 +1026,8 @@ TEST_F(KFDQMTest, CpuWriteCoherence) { EXPECT_EQ(0, queue.Rptr()); - // now that the GPU has cached the PQ contents, we modify them in CPU cache and ensure that the GPU sees the updated value: + // now that the GPU has cached the PQ contents, we modify them in CPU cache and + // ensure that the GPU sees the updated value: queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As(), 0x42, 0x42)); queue.Wait4PacketConsumption(); @@ -1046,7 +1054,7 @@ TEST_F(KFDQMTest, CreateAqlCpQueue) { TEST_END } -#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1)) +#define ALIGN_UP(x, align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1)) #define CounterToNanoSec(x) ((x) * 1000 / (is_dgpu() ? 27 : 100)) #include @@ -1056,7 +1064,7 @@ TEST_F(KFDQMTest, QueueLatency) { PM4Queue queue; const int queueSize = PAGE_SIZE * 2; - const int packetSize = PM4ReleaseMemoryPacket(0,0,0,0,0).SizeInBytes(); + const int packetSize = PM4ReleaseMemoryPacket(0, 0, 0, 0, 0).SizeInBytes(); /* We always leave one NOP(dword) empty after packet which is required by ring itself. * We also place NOPs when queue wraparound to avoid crossing buffer end. See PlacePacket(). * So the worst case is that we need two packetSize space to place one packet. @@ -1067,16 +1075,16 @@ TEST_F(KFDQMTest, QueueLatency) { */ const int reservedSpace = packetSize + queueSize % packetSize; const int slots = (queueSize - reservedSpace) / packetSize; - long queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med; - long overhead, workload; - long *queue_latency_arr = (long*)calloc(slots, sizeof(long)); + HSAint64 queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med; + HSAint64 overhead, workload; + HSAint64 *queue_latency_arr = reinterpret_cast(calloc(slots, sizeof(HSAint64))); const int skip = 2; const char *fs[skip] = {"1st", "2nd"}; HsaClockCounters *ts; HSAuint64 *qts; int i = 0; - ASSERT_NE((unsigned long)queue_latency_arr, 0); + ASSERT_NE((unsigned HSAint64)queue_latency_arr, 0); int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; @@ -1102,7 +1110,7 @@ TEST_F(KFDQMTest, QueueLatency) { i = 0; do { queue.PlacePacket(PM4ReleaseMemoryPacket(true, - (unsigned long)&qts[i], + (unsigned HSAint64)&qts[i], 0, true, 1)); @@ -1114,7 +1122,7 @@ TEST_F(KFDQMTest, QueueLatency) { /* Calculate timing which includes workload and overhead*/ i = 0; do { - long queue_latency = qts[i] - ts[i].GPUClockCounter; + HSAint64 queue_latency = qts[i] - ts[i].GPUClockCounter; ASSERT_GE(queue_latency, 0); @@ -1129,7 +1137,7 @@ TEST_F(KFDQMTest, QueueLatency) { i = 0; do { queue.PlacePacket(PM4ReleaseMemoryPacket(true, - (unsigned long)&qts[i], + (unsigned HSAint64)&qts[i], 0, true, 1)); @@ -1151,7 +1159,7 @@ TEST_F(KFDQMTest, QueueLatency) { do { /* The queue_latency is not that correct as the workload and overhead are average*/ queue_latency_arr[i] -= workload + overhead; - /* The First submit takes a long time*/ + /* The First submit takes a HSAint64 time*/ if (i < skip) LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl; } while (++i < slots); @@ -1243,13 +1251,13 @@ TEST_F(KFDQMTest, SdmaQueueWraparound) { } struct AtomicIncThreadParams { - long* pDest; + HSAint64* pDest; volatile unsigned int count; volatile bool stop; }; unsigned int AtomicIncThread(void* pCtx) { - AtomicIncThreadParams* pArgs = (AtomicIncThreadParams*)pCtx; + AtomicIncThreadParams* pArgs = reinterpret_cast(pCtx); while (pArgs->stop) {} @@ -1288,7 +1296,7 @@ TEST_F(KFDQMTest, Atomics) { ASSERT_SUCCESS(queue.Create(defaultGPUNode)); AtomicIncThreadParams params; - params.pDest = destBuf.As(); + params.pDest = destBuf.As(); params.stop = true; params.count = 0; @@ -1441,7 +1449,7 @@ TEST_F(KFDQMTest, P2PTest) { /* 1. Allocate a system buffer and allow the access to GPUs */ EXPECT_SUCCESS(hsaKmtAllocMemory(0, size, memFlags, - (void **)&sysBuf)); + reinterpret_cast(&sysBuf))); EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sysBuf, size, NULL, mapFlags, nodes.size(), &nodes[0])); #define MAGIC_NUM 0xdeadbeaf @@ -1449,7 +1457,7 @@ TEST_F(KFDQMTest, P2PTest) { /* First GPU fills mem with MAGIC_NUM*/ void *src, *dst; HSAuint32 cur = nodes[0], next; - ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, (void**)&src)); + ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast(&src))); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(src, size, NULL)); sdma_fill(cur, src, MAGIC_NUM, size); @@ -1465,7 +1473,7 @@ TEST_F(KFDQMTest, P2PTest) { } else { n = 2; next = nodes[i]; - ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, (void**)&dst)); + ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, reinterpret_cast(&dst))); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dst, size, NULL)); } @@ -1506,7 +1514,7 @@ TEST_F(KFDQMTest, SdmaEventInterrupt) { ASSERT_SUCCESS(queue.Create(defaultGPUNode)); - queue.PlaceAndSubmitPacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId)); + queue.PlaceAndSubmitPacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); queue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); diff --git a/tests/kfdtest/src/KFDQMTest.hpp b/tests/kfdtest/src/KFDQMTest.hpp index 1834d4b506..9f120f83cf 100644 --- a/tests/kfdtest/src/KFDQMTest.hpp +++ b/tests/kfdtest/src/KFDQMTest.hpp @@ -43,8 +43,8 @@ class KFDQMTest : public KFDBaseComponentTest { void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1); // void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf); - long long TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count); - long long GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations); + HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count); + HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations); protected: // members /* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */ const double CuVariance = 0.15; diff --git a/tests/kfdtest/src/KFDTestFlags.hpp b/tests/kfdtest/src/KFDTestFlags.hpp index 11321f9b88..d98d99262a 100644 --- a/tests/kfdtest/src/KFDTestFlags.hpp +++ b/tests/kfdtest/src/KFDTestFlags.hpp @@ -54,12 +54,12 @@ enum ENVCAPS{ enum KfdFamilyId { FAMILY_UNKNOWN = 0, - FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M) - FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi - FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M) - FAMILY_CZ, // Carrizo, Nolan, Amur - FAMILY_AI, // Arctic Islands - FAMILY_RV, // Raven + FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M) + FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi + FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M) + FAMILY_CZ, // Carrizo, Nolan, Amur + FAMILY_AI, // Arctic Islands + FAMILY_RV, // Raven }; #endif // __KFD_TEST_FLAGS__H__ diff --git a/tests/kfdtest/src/KFDTestMain.cpp b/tests/kfdtest/src/KFDTestMain.cpp index 7dfd24674e..847582c2bc 100644 --- a/tests/kfdtest/src/KFDTestMain.cpp +++ b/tests/kfdtest/src/KFDTestMain.cpp @@ -42,7 +42,7 @@ std::ostream& operator << (std::ostream& out, TESTPROFILE profile) { break; default: out << "INVALID"; - }; + } return out; } @@ -71,7 +71,8 @@ GTEST_API_ int main(int argc, char **argv) { bool success = GetCommandLineArguments(argc, argv, args); if (success) { - if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) && (args.HwsEnabled != HWCAP__FORCE_DISABLED)) + if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) && + (args.HwsEnabled != HWCAP__FORCE_DISABLED)) g_TestENVCaps |= ENVCAPS_HWSCHEDULING; g_TestRunProfile = args.TestProfile; diff --git a/tests/kfdtest/src/KFDTestUtil.cpp b/tests/kfdtest/src/KFDTestUtil.cpp index f8b36c01f0..2e357583b4 100644 --- a/tests/kfdtest/src/KFDTestUtil.cpp +++ b/tests/kfdtest/src/KFDTestUtil.cpp @@ -43,7 +43,7 @@ bool WaitOnValue(const volatile unsigned int *buf, unsigned int value) { return *buf == value; } -void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart) { +void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart) { rLoPart = static_cast(value); rHiPart = static_cast(value >> 32); } @@ -125,7 +125,8 @@ bool isTonga(const HsaNodeProperties *props) { const HsaMemoryBuffer HsaMemoryBuffer::Null; -HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec, bool isScratch, bool isReadOnly) +HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec, + bool isScratch, bool isReadOnly) :m_Size(size), m_pUser(NULL), m_pBuf(NULL), @@ -153,7 +154,7 @@ HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, b if (isReadOnly) m_Flags.ui32.ReadOnly = 1; - EXPECT_SUCCESS(hsaKmtAllocMemory( m_Node, m_Size, m_Flags, &m_pBuf)); + EXPECT_SUCCESS(hsaKmtAllocMemory(m_Node, m_Size, m_Flags, &m_pBuf)); if (is_dgpu()) { EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL)); m_MappedNodes = 1 << m_Node; @@ -189,9 +190,9 @@ void HsaMemoryBuffer::Fill(unsigned char value, HSAuint64 offset, HSAuint64 size ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; if (m_pUser != NULL) - memset((char *)m_pUser + offset, value, size); + memset(reinterpret_cast(m_pUser) + offset, value, size); else if (m_pBuf != NULL) - memset((char *)m_pBuf + offset, value, size); + memset(reinterpret_cast(m_pBuf) + offset, value, size); else ASSERT_TRUE(0) << "Invalid HsaMemoryBuffer"; } @@ -207,9 +208,9 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) { ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; if (m_pUser != NULL) - ptr = (HSAuint32 *)((char *)m_pUser + offset); + ptr = reinterpret_cast(reinterpret_cast(m_pUser) + offset); else if (m_pBuf != NULL) - ptr = (HSAuint32 *)((char *)m_pBuf + offset); + ptr = reinterpret_cast(reinterpret_cast(m_pBuf) + offset); ASSERT_NOTNULL(ptr); @@ -229,8 +230,8 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, BaseQueue& baseQueue, HSAuint64 offs size = size ? size : m_Size; ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; - baseQueue.PlacePacket(SDMAFillDataPacket((void *)(this->As() + offset), value, size)); - baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId)); + baseQueue.PlacePacket(SDMAFillDataPacket((reinterpret_cast(this->As() + offset)), value, size)); + baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); ASSERT_SUCCESS(hsaKmtWaitOnEvent(event, g_TestTimeOut)); @@ -250,9 +251,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern) { return false; if (m_pUser != NULL) - ptr = (HSAuint32 *)m_pUser; + ptr = reinterpret_cast(m_pUser); else if (m_pBuf != NULL) - ptr = (HSAuint32 *)m_pBuf; + ptr = reinterpret_cast(m_pBuf); else return false; @@ -284,9 +285,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern, BaseQueue *tmp = ~pattern; baseQueue.PlacePacket(SDMACopyDataPacket((void *)tmp, - (void *)(this->As() + location), + reinterpret_cast(this->As() + location), sizeof(HSAuint32))); - baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2, + baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); @@ -394,14 +395,15 @@ HsaMemoryBuffer::~HsaMemoryBuffer() { m_pBuf = NULL; } -HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node) +HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, + HSAuint64 size, unsigned int node) :m_Size(0), m_pBuf(NULL), m_graphic_handle(0), m_Node(node) { HSAuint64 flat_address; EXPECT_SUCCESS(hsaKmtMapGraphicHandle(m_Node, device_handle, buffer_handle, 0, size, &flat_address)); - m_pBuf = (void*)flat_address; + m_pBuf = reinterpret_cast(flat_address); } HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() { diff --git a/tests/kfdtest/src/KFDTestUtil.hpp b/tests/kfdtest/src/KFDTestUtil.hpp index 8f71b7ecb6..241c1a0fd3 100644 --- a/tests/kfdtest/src/KFDTestUtil.hpp +++ b/tests/kfdtest/src/KFDTestUtil.hpp @@ -36,7 +36,7 @@ class BaseQueue; // @brief: waits until the value is written to the buffer or until time out if received through args bool WaitOnValue(const volatile unsigned int *buf, unsigned int value); -void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart); +void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart); bool GetHwCapabilityHWS(); @@ -106,14 +106,14 @@ class HsaMemoryBuffer { void* m_pBuf; bool m_Local; unsigned int m_Node; - unsigned short m_MappedNodes; + HSAuint64 m_MappedNodes; }; class HsaInteropMemoryBuffer { public: - HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node); + HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, HSAuint64 size, unsigned int node); template RetType As() { @@ -135,9 +135,9 @@ class HsaInteropMemoryBuffer { const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&); private: - unsigned long long m_Size; + HSAuint64 m_Size; void* m_pBuf; - unsigned long long m_graphic_handle; + HSAuint64 m_graphic_handle; unsigned int m_Node; }; diff --git a/tests/kfdtest/src/KFDTopologyTest.cpp b/tests/kfdtest/src/KFDTopologyTest.cpp index 877899dcaa..b302f166b3 100644 --- a/tests/kfdtest/src/KFDTopologyTest.cpp +++ b/tests/kfdtest/src/KFDTopologyTest.cpp @@ -27,8 +27,8 @@ // @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp -const unsigned long long KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1; -const unsigned long long KFDTopologyTest::c_40BitAddressSpace = (1ull << 40); +const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1; +const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40); TEST_F(KFDTopologyTest , BasicTest) { TEST_START(TESTPROFILE_RUNALL) @@ -41,12 +41,14 @@ TEST_F(KFDTopologyTest , BasicTest) { if (pNodeProperties != NULL) { // checking for cpu core only if it's a cpu only node or if its KAVERY apu. if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) { - EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node << " No CPUs core are connected for node index"; + EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node + << " No CPUs core are connected for node index"; } // if it's not a cpu only node, look for a gpu core if (pNodeProperties->DeviceId != 0) { - EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node << "No GPUs core are connected."; + EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node + << "No GPUs core are connected."; // EngineId only applies to GPU, not CPU-only nodes EXPECT_GT(pNodeProperties->EngineId.ui32.uCode, 0) << "uCode version is 0"; EXPECT_GE(pNodeProperties->EngineId.ui32.Major, 7) << "Major Version is less than 7"; @@ -118,7 +120,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) { return; } HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks]; - EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks, memoryProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks, + memoryProperties)); bool GpuVMHeapFound = false; for (unsigned int bank = 0 ; bank < pNodeProperties->NumMemoryBanks ; bank++) { // Check for either private (small-bar/APU) or public (large-bar) @@ -145,9 +148,11 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) { pNodeProperties = m_NodeInfo.GetNodeProperties(node); if (pNodeProperties != NULL) { HsaCacheProperties *cacheProperties = new HsaCacheProperties[pNodeProperties->NumCaches]; - EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo, pNodeProperties->NumCaches, cacheProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo, + pNodeProperties->NumCaches, cacheProperties)); if (pNodeProperties->NumCPUCores > 0) { // this is a CPU node - LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches" << std::endl; + LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches" + << std::endl; for (unsigned n = 0; n < pNodeProperties->NumCaches; n++) { LOG()<< n << " - Level " << cacheProperties[n].CacheLevel << " Type " << cacheProperties[n].CacheType.Value << diff --git a/tests/kfdtest/src/KFDTopologyTest.hpp b/tests/kfdtest/src/KFDTopologyTest.hpp index 2ab631f8a4..005de1c04d 100644 --- a/tests/kfdtest/src/KFDTopologyTest.hpp +++ b/tests/kfdtest/src/KFDTopologyTest.hpp @@ -33,8 +33,8 @@ class KFDTopologyTest : public KFDBaseComponentTest { public: KFDTopologyTest(void) {} ~KFDTopologyTest(void) {} - static const unsigned long long c_4Gigabyte; - static const unsigned long long c_40BitAddressSpace; + static const HSAuint64 c_4Gigabyte; + static const HSAuint64 c_40BitAddressSpace; }; #endif // __KFD_TOPOLOGY_TEST__H__ diff --git a/tests/kfdtest/src/LinuxOSWrapper.cpp b/tests/kfdtest/src/LinuxOSWrapper.cpp index 90ea87b88f..74a0e65201 100644 --- a/tests/kfdtest/src/LinuxOSWrapper.cpp +++ b/tests/kfdtest/src/LinuxOSWrapper.cpp @@ -41,7 +41,9 @@ #include #include -static int protection_flags[8] = {int(PROT_NONE), int(PROT_READ), int(PROT_WRITE), int(PROT_READ | PROT_WRITE), int(PROT_EXEC), int(PROT_EXEC | PROT_READ), int(PROT_EXEC | PROT_WRITE), int(PROT_EXEC | PROT_WRITE | PROT_READ)}; +static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ | PROT_WRITE, + PROT_EXEC, PROT_EXEC | PROT_READ, PROT_EXEC | PROT_WRITE, + PROT_EXEC | PROT_WRITE | PROT_READ}; void SetConsoleTextColor(TEXTCOLOR color) { // TODO complete @@ -69,8 +71,8 @@ bool VirtualFreeMemory(void *address, unsigned int size) { return false; } -unsigned long GetLastErrorNo() { - return errno; +HSAuint64 GetLastErrorNo() { + return errno; } bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) { @@ -78,7 +80,7 @@ bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProc return false; } -unsigned long long GetSystemTickCountInMicroSec() { +HSAuint64 GetSystemTickCountInMicroSec() { struct timeval t; gettimeofday(&t, 0); return t.tv_sec * 1000000ULL + t.tv_usec; @@ -229,7 +231,7 @@ bool WaitForThread(uint64_t threadId) { return 0 == pthread_join((pthread_t)threadId, NULL); } -long AtomicInc(volatile long* pValue) { +HSAint64 AtomicInc(volatile HSAint64* pValue) { return __sync_add_and_fetch(pValue, 1); } diff --git a/tests/kfdtest/src/OSWrapper.hpp b/tests/kfdtest/src/OSWrapper.hpp index bcba47bb63..6b2f500258 100644 --- a/tests/kfdtest/src/OSWrapper.hpp +++ b/tests/kfdtest/src/OSWrapper.hpp @@ -26,6 +26,7 @@ #include #include "KFDTestFlags.hpp" +#include "hsakmt.h" #ifndef __OS__WRAPPER__H__ #define __OS__WRAPPER__H__ @@ -82,19 +83,20 @@ void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = M // @brief replacement for windows FreeVirtual func bool VirtualFreeMemory(void *address, unsigned int size); // @brief retrieve the last error number -unsigned long GetLastErrorNo(); +HSAuint64 GetLastErrorNo(); -long AtomicInc(volatile long* pValue); +HSAint64 AtomicInc(volatile HSAint64* pValue); void MemoryBarrier(); // @brief: runs the selected test case number of times required, each in a separate process -// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want to run all tests in a test case: TestCase.* and so on +// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want +// to run all tests in a test case: TestCase.* and so on // @params numOfProcesses : how many processes to run in parallel // @params runsPerProcess : how many iteration a test should do per process, must be a positive number bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1); -unsigned long long GetSystemTickCountInMicroSec(); +HSAuint64 GetSystemTickCountInMicroSec(); /**Put the system to S3/S4 power state and bring it back to S0. @return 'true' on success, 'false' on failure. diff --git a/tests/kfdtest/src/PM4Packet.cpp b/tests/kfdtest/src/PM4Packet.cpp index c0c48337a5..f8e57aa4ed 100644 --- a/tests/kfdtest/src/PM4Packet.cpp +++ b/tests/kfdtest/src/PM4Packet.cpp @@ -21,11 +21,11 @@ * */ -#include "PM4Packet.hpp" -#include "hsakmttypes.h" #include #include #include +#include "PM4Packet.hpp" +#include "hsakmttypes.h" #include "asic_reg/gfx_7_2_enum.h" @@ -52,7 +52,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const { } void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) { - m_pPacketData = (PM4WRITE_DATA_CI *)calloc(1, SizeInBytes()); + m_pPacketData = reinterpret_cast(calloc(1, SizeInBytes())); // verify that the memory is allocated successfully, cannot use assert here EXPECT_NOTNULL(m_pPacketData); @@ -84,7 +84,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, PM4_RELEASE_MEM_CI *pkt; m_packetSize = sizeof(PM4_RELEASE_MEM_CI); - pkt = (PM4_RELEASE_MEM_CI *)calloc(1, m_packetSize); + pkt = reinterpret_cast(calloc(1, m_packetSize)); m_pPacketData = pkt; EXPECT_NOTNULL(m_pPacketData); @@ -147,7 +147,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, PM4MEC_RELEASE_MEM_AI *pkt; m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI); - pkt = (PM4MEC_RELEASE_MEM_AI *)calloc(1, m_packetSize); + pkt = reinterpret_cast(calloc(1, m_packetSize)); m_pPacketData = pkt; EXPECT_NOTNULL(m_pPacketData); @@ -233,7 +233,8 @@ PM4SetShaderRegPacket::PM4SetShaderRegPacket(void) : m_packetDataAllocated(false) { } -PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs) +PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], + unsigned int numRegs) : m_packetDataAllocated(false) { InitPacket(baseOffset, regValues, numRegs); } @@ -243,11 +244,15 @@ PM4SetShaderRegPacket::~PM4SetShaderRegPacket(void) { free(m_pPacketData); } -void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs) { - m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t); // 1st register is a part of the packet struct. +void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[], + unsigned int numRegs) { + // 1st register is a part of the packet struct. + m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t); - // allocating the size of the packet, since the packet is assembled from a struct followed by an additional DWORD data - m_pPacketData = (PM4SET_SH_REG *)malloc(m_packetSize); + /* allocating the size of the packet, since the packet is assembled from a struct + * followed by an additional DWORD data + */ + m_pPacketData = reinterpret_cast(malloc(m_packetSize)); ASSERT_NOTNULL(m_pPacketData); @@ -262,11 +267,13 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i memcpy(m_pPacketData->reg_data, regValues, numRegs*sizeof(uint32_t)); } -PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) { +PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, + unsigned int dimZ, unsigned int dispatchInit) { InitPacket(dimX, dimY, dimZ, dispatchInit); } -void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) { +void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, + unsigned int dispatchInit) { memset(&m_packetData, 0, SizeInBytes()); InitPM4Header(m_packetData.header, IT_DISPATCH_DIRECT); diff --git a/tests/kfdtest/src/PM4Packet.hpp b/tests/kfdtest/src/PM4Packet.hpp index cc09d43b68..a7ab06ab5a 100644 --- a/tests/kfdtest/src/PM4Packet.hpp +++ b/tests/kfdtest/src/PM4Packet.hpp @@ -112,7 +112,7 @@ class PM4IndirectBufPacket : public PM4Packet { // empty constructor, befor using the packet call the init func PM4IndirectBufPacket(void) {} // this contructor will also init the packet, no need for adittional calls - PM4IndirectBufPacket(IndirectBuffer *pIb); + explicit PM4IndirectBufPacket(IndirectBuffer *pIb); virtual ~PM4IndirectBufPacket(void) {} // @returns the packet size in bytes diff --git a/tests/kfdtest/src/SDMAPacket.cpp b/tests/kfdtest/src/SDMAPacket.cpp index 45c7a6cd3e..d1120590ab 100644 --- a/tests/kfdtest/src/SDMAPacket.cpp +++ b/tests/kfdtest/src/SDMAPacket.cpp @@ -59,12 +59,12 @@ void SDMAWriteDataPacket::InitPacket(void* destAddr, unsigned int ndw, void *data) { packetSize = sizeof(SDMA_PKT_WRITE_UNTILED) + (ndw - 1) * sizeof(unsigned int); - packetData = (SDMA_PKT_WRITE_UNTILED *)calloc(1, packetSize); + packetData = reinterpret_cast(calloc(1, packetSize)); packetData->HEADER_UNION.op = SDMA_OP_WRITE; packetData->HEADER_UNION.sub_op = SDMA_SUBOP_WRITE_LINEAR; - SplitU64(reinterpret_cast(destAddr), + SplitU64(reinterpret_cast(destAddr), packetData->DST_ADDR_LO_UNION.DW_1_DATA, // dst_addr_31_0 packetData->DST_ADDR_HI_UNION.DW_2_DATA); // dst_addr_63_32 @@ -80,7 +80,7 @@ SDMACopyDataPacket::~SDMACopyDataPacket(void) { SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, unsigned int surfsize) { int32_t size = 0, i; - void **dst = (void**)malloc(sizeof(void*) * n); + void **dst = reinterpret_cast(malloc(sizeof(void*) * n)); const int singlePacketSize = sizeof(SDMA_PKT_COPY_LINEAR) + sizeof(SDMA_PKT_COPY_LINEAR::DST_ADDR[0]) * n; @@ -91,7 +91,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns packetSize = ((surfsize + TWO_MEG - 1) >> BITS) * singlePacketSize; - SDMA_PKT_COPY_LINEAR *pSDMA = (SDMA_PKT_COPY_LINEAR *)malloc(packetSize); + SDMA_PKT_COPY_LINEAR *pSDMA = reinterpret_cast(malloc(packetSize)); packetData = pSDMA; while (surfsize > 0) { @@ -106,19 +106,19 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns pSDMA->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR; pSDMA->HEADER_UNION.broadcast = n > 1 ? 1 : 0; pSDMA->COUNT_UNION.count = SDMA_COUNT(size); - SplitU64(reinterpret_cast(src), + SplitU64(reinterpret_cast(src), pSDMA->SRC_ADDR_LO_UNION.DW_3_DATA, // src_addr_31_0 pSDMA->SRC_ADDR_HI_UNION.DW_4_DATA); // src_addr_63_32 for (i = 0; i < n; i++) - SplitU64(reinterpret_cast(dst[i]), + SplitU64(reinterpret_cast(dst[i]), pSDMA->DST_ADDR[i].DST_ADDR_LO_UNION.DW_5_DATA, // dst_addr_31_0 pSDMA->DST_ADDR[i].DST_ADDR_HI_UNION.DW_6_DATA); // dst_addr_63_32 - pSDMA = (SDMA_PKT_COPY_LINEAR *)((char *)pSDMA + singlePacketSize); + pSDMA = reinterpret_cast(reinterpret_cast(pSDMA) + singlePacketSize); for (i = 0; i < n; i++) - dst[i] = (char *)dst[i] + size; - src = (char *)src + size; + dst[i] = reinterpret_cast(dst[i]) + size; + src = reinterpret_cast(src) + size; surfsize -= size; } free(dst); @@ -138,7 +138,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in /* SDMA support maximum 0x3fffe0 byte in one copy. Use 2M copy_size */ m_PacketSize = ((size + TWO_MEG - 1) >> BITS) * sizeof(SDMA_PKT_CONSTANT_FILL); - pSDMA = (SDMA_PKT_CONSTANT_FILL *)calloc(1, m_PacketSize); + pSDMA = reinterpret_cast(calloc(1, m_PacketSize)); m_PacketData = pSDMA; while (size > 0) { @@ -158,14 +158,14 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in pSDMA->COUNT_UNION.count = SDMA_COUNT(copy_size); - SplitU64(reinterpret_cast(dst), + SplitU64(reinterpret_cast(dst), pSDMA->DST_ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/ pSDMA->DST_ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/ pSDMA->DATA_UNION.DW_3_DATA = data; pSDMA++; - dst = (char *)dst + copy_size; + dst = reinterpret_cast(dst) + copy_size; size -= copy_size; } } @@ -185,7 +185,7 @@ void SDMAFencePacket::InitPacket(void* destAddr, unsigned int data) { packetData.HEADER_UNION.op = SDMA_OP_FENCE; - SplitU64(reinterpret_cast(destAddr), + SplitU64(reinterpret_cast(destAddr), packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/ packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/ diff --git a/tests/kfdtest/src/SDMAPacket.hpp b/tests/kfdtest/src/SDMAPacket.hpp index f7ef53f733..17b9027b5e 100644 --- a/tests/kfdtest/src/SDMAPacket.hpp +++ b/tests/kfdtest/src/SDMAPacket.hpp @@ -125,7 +125,7 @@ class SDMAFencePacket : public SDMAPacket { class SDMATrapPacket : public SDMAPacket { public: // empty constructor, befor using the packet call the init func - SDMATrapPacket(unsigned int eventID = 0); + explicit SDMATrapPacket(unsigned int eventID = 0); virtual ~SDMATrapPacket(void);