From dffac0a97eada398dcc68dce3df00a6b35a499d8 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 13 Aug 2018 09:03:31 -0400 Subject: [PATCH] kfdtest: Style cleanup Clean up the KFDTest style via CPPLint. Some warnings remain regarding volatile variables being cast to void*. This is the command used: cpplint.py --linelength=120 --filter=-readability/multiline_string,-readability/todo,-build/include,-runtime/references multiline_string is due to using ISA code todo is to avoid errors that we don't have TODO(username) instead of TODO include is about including the folder in the header includes references is regarding non-const references '&' being const or using pointers. That can be addressed later Change-Id: I3c6622da0a13dd33ab29b2bfff48be25e763b750 --- tests/kfdtest/src/BaseQueue.cpp | 3 +- tests/kfdtest/src/Dispatch.cpp | 28 ++-- tests/kfdtest/src/Dispatch.hpp | 4 +- tests/kfdtest/src/GoogleTestExtension.hpp | 2 +- tests/kfdtest/src/IndirectBuffer.cpp | 3 +- tests/kfdtest/src/IsaGenerator.hpp | 9 +- tests/kfdtest/src/KFDBaseComponentTest.hpp | 18 +- tests/kfdtest/src/KFDCWSRTest.cpp | 3 +- tests/kfdtest/src/KFDDBGTest.cpp | 15 +- tests/kfdtest/src/KFDEventTest.cpp | 5 +- tests/kfdtest/src/KFDEvictTest.cpp | 109 ++++++------ tests/kfdtest/src/KFDExceptionTest.cpp | 2 +- tests/kfdtest/src/KFDGraphicsInterop.hpp | 9 +- tests/kfdtest/src/KFDIPCTest.cpp | 25 +-- tests/kfdtest/src/KFDLocalMemoryTest.cpp | 6 +- tests/kfdtest/src/KFDMemoryTest.cpp | 186 +++++++++++---------- tests/kfdtest/src/KFDOpenCloseKFDTest.hpp | 3 +- tests/kfdtest/src/KFDPNPTest.cpp | 3 +- tests/kfdtest/src/KFDPerfCounters.cpp | 9 +- tests/kfdtest/src/KFDQMTest.cpp | 88 +++++----- tests/kfdtest/src/KFDQMTest.hpp | 4 +- tests/kfdtest/src/KFDTestFlags.hpp | 12 +- tests/kfdtest/src/KFDTestMain.cpp | 5 +- tests/kfdtest/src/KFDTestUtil.cpp | 32 ++-- tests/kfdtest/src/KFDTestUtil.hpp | 10 +- tests/kfdtest/src/KFDTopologyTest.cpp | 19 ++- tests/kfdtest/src/KFDTopologyTest.hpp | 4 +- tests/kfdtest/src/LinuxOSWrapper.cpp | 12 +- tests/kfdtest/src/OSWrapper.hpp | 10 +- tests/kfdtest/src/PM4Packet.cpp | 31 ++-- tests/kfdtest/src/PM4Packet.hpp | 2 +- tests/kfdtest/src/SDMAPacket.cpp | 26 +-- tests/kfdtest/src/SDMAPacket.hpp | 2 +- 33 files changed, 371 insertions(+), 328 deletions(-) diff --git a/tests/kfdtest/src/BaseQueue.cpp b/tests/kfdtest/src/BaseQueue.cpp index f64a02fc9f..3af41aebe3 100644 --- a/tests/kfdtest/src/BaseQueue.cpp +++ b/tests/kfdtest/src/BaseQueue.cpp @@ -125,7 +125,8 @@ bool BaseQueue::AllPacketsSubmitted() { } void BaseQueue::PlacePacket(const BasePacket &packet) { - ASSERT_EQ(packet.PacketType(), PacketTypeSupported()) << "Cannot add a packet since packet type doesn't match queue"; + ASSERT_EQ(packet.PacketType(), PacketTypeSupported()) + << "Cannot add a packet since packet type doesn't match queue"; unsigned int readPtr = Rptr(); unsigned int writePtr = m_pendingWptr; diff --git a/tests/kfdtest/src/Dispatch.cpp b/tests/kfdtest/src/Dispatch.cpp index d3c50f7e60..e17911d65a 100644 --- a/tests/kfdtest/src/Dispatch.cpp +++ b/tests/kfdtest/src/Dispatch.cpp @@ -57,7 +57,7 @@ void Dispatch::SetDim(unsigned int x, unsigned int y, unsigned int z) { m_DimZ = z; } -void Dispatch::SetScratch(int numWaves, int waveSize, unsigned long long scratch_base) { +void Dispatch::SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base) { m_ComputeTmpringSize = ((waveSize << 12) | (numWaves)); m_ScratchEn = true; m_scratch_base = scratch_base; @@ -98,7 +98,7 @@ int Dispatch::SyncWithStatus(unsigned int timeout) { } void Dispatch::BuildIb() { - unsigned long long shiftedIsaAddr = m_IsaBuf.As() >> 8; + HSAuint64 shiftedIsaAddr = m_IsaBuf.As() >> 8; unsigned int arg0, arg1, arg2, arg3; SplitU64(reinterpret_cast(m_pArg1), arg0, arg1); SplitU64(reinterpret_cast(m_pArg2), arg2, arg3); @@ -118,7 +118,7 @@ void Dispatch::BuildIb() { unsigned int pgmRsrc2 = 0; pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT) & COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK; - pgmRsrc2 |= ((m_scratch_base ? 6 : 4 ) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT) + pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT) & COMPUTE_PGM_RSRC2__USER_SGPR_MASK; pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT) & COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK; @@ -132,7 +132,9 @@ void Dispatch::BuildIb() { & COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK; const unsigned int COMPUTE_PGM_RSRC[] = { - 0x000c0084 | ((m_SpiPriority & 3) << 10), // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0 DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 } + // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0 + // DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 } + 0x000c0084 | ((m_SpiPriority & 3) << 10), pgmRsrc2 }; @@ -200,18 +202,24 @@ void Dispatch::BuildIb() { m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket()); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, + ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES))); m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO, (g_TestGPUFamilyId >= FAMILY_AI) ? COMPUTE_PGM_VALUES_GFX9 : COMPUTE_PGM_VALUES_GFX8, (g_TestGPUFamilyId >= FAMILY_AI) ? ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX9) : ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX8))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC, ARRAY_SIZE(COMPUTE_PGM_RSRC))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC, + ARRAY_SIZE(COMPUTE_PGM_RSRC))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS, ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE, ARRAY_SIZE(COMPUTE_TMPRING_SIZE))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES, ARRAY_SIZE(COMPUTE_RESTART_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS, + ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE, + ARRAY_SIZE(COMPUTE_TMPRING_SIZE))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES, + ARRAY_SIZE(COMPUTE_RESTART_VALUES))); - m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES, ARRAY_SIZE(COMPUTE_USER_DATA_VALUES))); + m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES, + ARRAY_SIZE(COMPUTE_USER_DATA_VALUES))); m_IndirectBuf.AddPacket(PM4DispatchDirectPacket(m_DimX, m_DimY, m_DimZ, DISPATCH_INIT_VALUE)); diff --git a/tests/kfdtest/src/Dispatch.hpp b/tests/kfdtest/src/Dispatch.hpp index 36983554fb..b89306c02d 100644 --- a/tests/kfdtest/src/Dispatch.hpp +++ b/tests/kfdtest/src/Dispatch.hpp @@ -42,7 +42,7 @@ class Dispatch { int SyncWithStatus(unsigned int timeout); - void SetScratch(int numWaves, int waveSize, unsigned long long scratch_base); + void SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base); void SetSpiPriority(unsigned int priority); @@ -68,7 +68,7 @@ class Dispatch { bool m_ScratchEn; unsigned int m_ComputeTmpringSize; - unsigned long long m_scratch_base; + HSAuint64 m_scratch_base; unsigned int m_SpiPriority; }; diff --git a/tests/kfdtest/src/GoogleTestExtension.hpp b/tests/kfdtest/src/GoogleTestExtension.hpp index 5633a156bf..7b888b1677 100644 --- a/tests/kfdtest/src/GoogleTestExtension.hpp +++ b/tests/kfdtest/src/GoogleTestExtension.hpp @@ -34,7 +34,7 @@ enum LOGTYPE { }; class KFDLog{}; -std::ostream& operator << (KFDLog log ,LOGTYPE level); +std::ostream& operator << (KFDLog log, LOGTYPE level); // @brief log additional details, to be displayed in the same format as other google test outputs // currently not supported by google test diff --git a/tests/kfdtest/src/IndirectBuffer.cpp b/tests/kfdtest/src/IndirectBuffer.cpp index fb31a9203c..3fa70f27f6 100644 --- a/tests/kfdtest/src/IndirectBuffer.cpp +++ b/tests/kfdtest/src/IndirectBuffer.cpp @@ -29,7 +29,8 @@ IndirectBuffer::IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId) :m_NumOfPackets(0), m_MaxSize(sizeInDWords), m_ActualSize(0), m_PacketTypeAllowed(type) { - m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/, false/*local*/, true/*exec*/); + m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/, + false/*local*/, true/*exec*/); } IndirectBuffer::~IndirectBuffer(void) { diff --git a/tests/kfdtest/src/IsaGenerator.hpp b/tests/kfdtest/src/IsaGenerator.hpp index 11f1d0ca5b..4b9c49ad9e 100644 --- a/tests/kfdtest/src/IsaGenerator.hpp +++ b/tests/kfdtest/src/IsaGenerator.hpp @@ -28,8 +28,7 @@ /* isa generation class - interface */ class IsaGenerator { - -public: + public: static IsaGenerator* Create(unsigned int familyId); virtual ~IsaGenerator() {} @@ -43,11 +42,11 @@ public: void CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf); -protected: + protected: virtual const std::string& GetAsicName() = 0; -private: + private: static const std::string ADDRESS_WATCH_SP3; }; -#endif //_ISAGENERATOR_H_ +#endif // _ISAGENERATOR_H_ diff --git a/tests/kfdtest/src/KFDBaseComponentTest.hpp b/tests/kfdtest/src/KFDBaseComponentTest.hpp index 9a12e545ce..f7a2852e65 100644 --- a/tests/kfdtest/src/KFDBaseComponentTest.hpp +++ b/tests/kfdtest/src/KFDBaseComponentTest.hpp @@ -24,9 +24,6 @@ #define __KFD_BASE_COMPONENT_TEST__H__ #include -#include "hsakmt.h" -#include "OSWrapper.hpp" -#include "KFDTestUtil.hpp" #include #include #include @@ -34,6 +31,9 @@ #include #include #include +#include "hsakmt.h" +#include "OSWrapper.hpp" +#include "KFDTestUtil.hpp" // @class KFDBaseComponentTest class KFDBaseComponentTest : public testing::Test { @@ -63,13 +63,17 @@ class KFDBaseComponentTest : public testing::Test { HsaMemFlags m_MemoryFlags; HsaNodeInfo m_NodeInfo; - // @brief SetUpTestCase function run before the first test that uses KFDOpenCloseKFDTest class fixture, and opens KFD. + // @brief SetUpTestCase function run before the first test that uses + // KFDOpenCloseKFDTest class fixture, and opens KFD. static void SetUpTestCase(); - // @brief TearDownTestCase function run after the last test from KFDOpenCloseKFDTest class fixture and calls close KFD. + // @brief TearDownTestCase function run after the last test from + // KFDOpenCloseKFDTest class fixture and calls close KFD. static void TearDownTestCase(); - // @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture, sets all common settings for the tests. + // @brief SetUp function run before every test that uses + // KFDOpenCloseKFDTest class fixture, sets all common settings for the tests. virtual void SetUp(); - // @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture. + // @brief TearDown function run after every test that uses + // KFDOpenCloseKFDTest class fixture. virtual void TearDown(); }; diff --git a/tests/kfdtest/src/KFDCWSRTest.cpp b/tests/kfdtest/src/KFDCWSRTest.cpp index 594500c032..ace560e5f7 100644 --- a/tests/kfdtest/src/KFDCWSRTest.cpp +++ b/tests/kfdtest/src/KFDCWSRTest.cpp @@ -89,7 +89,8 @@ void KFDCWSRTest::SetUp() { m_pIsaGen = IsaGenerator::Create(m_FamilyId); - // TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not set the destination based on workitem_id. + // TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not + // set the destination based on workitem_id. // Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future. wave_number = 1; diff --git a/tests/kfdtest/src/KFDDBGTest.cpp b/tests/kfdtest/src/KFDDBGTest.cpp index abff218160..52d904ef74 100644 --- a/tests/kfdtest/src/KFDDBGTest.cpp +++ b/tests/kfdtest/src/KFDDBGTest.cpp @@ -140,18 +140,17 @@ TEST_F(KFDDBGTest, BasicAddressWatch) { ASSERT_SUCCESS(hsaKmtDbgRegister(defaultGPUNode)); AddressWatchSuccess = hsaKmtDbgAddressWatch( - defaultGPUNode, // IN - 2, // # watch points - &WatchMode[0], // IN - (void **) &WatchAddress[0], // IN - &WatchMask[0], // IN, optional - NULL // IN, optional - ); + defaultGPUNode, // IN + 2, // # watch points + &WatchMode[0], // IN + reinterpret_cast(&WatchAddress[0]), // IN + &WatchMask[0], // IN, optional + NULL); // IN, optional EXPECT_EQ(AddressWatchSuccess, HSAKMT_STATUS_SUCCESS); Dispatch dispatch(isaBuf); - dispatch.SetArgs(dstBuf.As(), (void *)secDstBuf); + dispatch.SetArgs(dstBuf.As(), reinterpret_cast(secDstBuf)); dispatch.SetDim(1, 1, 1); // TODO: use Memory ordering rules w/ atomics diff --git a/tests/kfdtest/src/KFDEventTest.cpp b/tests/kfdtest/src/KFDEventTest.cpp index 200fe638b4..7d266046bc 100644 --- a/tests/kfdtest/src/KFDEventTest.cpp +++ b/tests/kfdtest/src/KFDEventTest.cpp @@ -136,7 +136,7 @@ class QueueAndSignalBenchmark { uint64_t startTime; PM4Queue queue; - HsaEvent** pHsaEvent = (HsaEvent**) calloc(eventCount, sizeof(HsaEvent*)); + HsaEvent** pHsaEvent = reinterpret_cast(calloc(eventCount, sizeof(HsaEvent*))); size_t packetSize = PM4ReleaseMemoryPacket(false, 0, 0).SizeInBytes(); int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1)); @@ -268,7 +268,8 @@ TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) { unsigned int pktSizeDwords = 0; for (i = 0; i < EVENT_NUMBER; i++) { - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId)); + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, + pHsaEvent[i]->EventId)); queue.Wait4PacketConsumption(); Delay(WAIT_BETWEEN_SUBMISSIONS_MS); diff --git a/tests/kfdtest/src/KFDEvictTest.cpp b/tests/kfdtest/src/KFDEvictTest.cpp index 70b733b566..3f7e3537a1 100644 --- a/tests/kfdtest/src/KFDEvictTest.cpp +++ b/tests/kfdtest/src/KFDEvictTest.cpp @@ -40,7 +40,8 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui totalMB = N_PROCESSES*count*(vramBufSize>>20); if (m_IsParent) { - LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="<< totalMB << ")MB VRAM in KFD" << std::endl; + LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(=" + << totalMB << ")MB VRAM in KFD" << std::endl; } HSAKMT_STATUS ret; @@ -95,7 +96,8 @@ void KFDEvictTest::AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle alloc.flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; if (m_IsParent) { - LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(=" << (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl; + LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(=" + << (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl; } ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle)); } @@ -104,79 +106,72 @@ void KFDEvictTest::FreeAmdgpuBo(amdgpu_bo_handle handle) { ASSERT_EQ(0, amdgpu_bo_free(handle)); } -static int -amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, - unsigned alignment, unsigned heap, uint64_t flags, - amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address, - amdgpu_va_handle *va_handle) -{ - struct amdgpu_bo_alloc_request request = {}; - amdgpu_bo_handle buf_handle; - amdgpu_va_handle handle; - uint64_t vmc_addr; - int r; +static int amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, + unsigned alignment, unsigned heap, uint64_t flags, + amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address, + amdgpu_va_handle *va_handle) { + struct amdgpu_bo_alloc_request request = {}; + amdgpu_bo_handle buf_handle; + amdgpu_va_handle handle; + uint64_t vmc_addr; + int r; - request.alloc_size = size; - request.phys_alignment = alignment; - request.preferred_heap = heap; - request.flags = flags; + request.alloc_size = size; + request.phys_alignment = alignment; + request.preferred_heap = heap; + request.flags = flags; - r = amdgpu_bo_alloc(dev, &request, &buf_handle); - if (r) - return r; + r = amdgpu_bo_alloc(dev, &request, &buf_handle); + if (r) + return r; - r = amdgpu_va_range_alloc(dev, - amdgpu_gpu_va_range_general, - size, alignment, 0, &vmc_addr, - &handle, 0); - if (r) - goto error_va_alloc; + r = amdgpu_va_range_alloc(dev, + amdgpu_gpu_va_range_general, + size, alignment, 0, &vmc_addr, + &handle, 0); + if (r) + goto error_va_alloc; - r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP); - if (r) - goto error_va_map; + r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP); + if (r) + goto error_va_map; - r = amdgpu_bo_cpu_map(buf_handle, cpu); - if (r) - goto error_cpu_map; + r = amdgpu_bo_cpu_map(buf_handle, cpu); + if (r) + goto error_cpu_map; - *bo = buf_handle; - *mc_address = vmc_addr; - *va_handle = handle; + *bo = buf_handle; + *mc_address = vmc_addr; + *va_handle = handle; - return 0; + return 0; error_cpu_map: - amdgpu_bo_cpu_unmap(buf_handle); + amdgpu_bo_cpu_unmap(buf_handle); error_va_map: - amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); error_va_alloc: - amdgpu_bo_free(buf_handle); - return r; + amdgpu_bo_free(buf_handle); + return r; } -static inline int -amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle, - uint64_t mc_addr, uint64_t size) -{ - amdgpu_bo_cpu_unmap(bo); - amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP); - amdgpu_va_range_free(va_handle); - amdgpu_bo_free(bo); - - return 0; +static inline int amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle, + uint64_t mc_addr, uint64_t size) { + amdgpu_bo_cpu_unmap(bo); + amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(va_handle); + amdgpu_bo_free(bo); + return 0; } -static inline int -amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1, - amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) -{ - amdgpu_bo_handle resources[] = {bo1, bo2}; +static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1, + amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) { + amdgpu_bo_handle resources[] = {bo1, bo2}; - return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list); + return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list); } void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) { @@ -204,7 +199,7 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) { &boList)); /* Fill Nop cammands in IB */ - ptr = (uint32_t *)ibResultCpu; + ptr = reinterpret_cast(ibResultCpu); for (int i = 0; i < 16; i++) ptr[i] = 0xffff1000; diff --git a/tests/kfdtest/src/KFDExceptionTest.cpp b/tests/kfdtest/src/KFDExceptionTest.cpp index 1a4a715d97..22ea893fe0 100644 --- a/tests/kfdtest/src/KFDExceptionTest.cpp +++ b/tests/kfdtest/src/KFDExceptionTest.cpp @@ -85,7 +85,7 @@ void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, } dispatch.SetDim(dimX, dimY, dimZ); - dispatch.SetArgs((void *)pSrc, (void *)pDst); + dispatch.SetArgs(reinterpret_cast(pSrc), reinterpret_cast(pDst)); dispatch.Submit(queue); m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut); diff --git a/tests/kfdtest/src/KFDGraphicsInterop.hpp b/tests/kfdtest/src/KFDGraphicsInterop.hpp index 260044cebf..3c4001ed41 100644 --- a/tests/kfdtest/src/KFDGraphicsInterop.hpp +++ b/tests/kfdtest/src/KFDGraphicsInterop.hpp @@ -28,11 +28,10 @@ // @class KFDGraphicsInteropTest // Adds access to graphics device for interoperability testing -class KFDGraphicsInterop : public KFDMemoryTest -{ -public: - KFDGraphicsInterop(void) {}; - ~KFDGraphicsInterop(void) {}; +class KFDGraphicsInterop : public KFDMemoryTest { + public: + KFDGraphicsInterop(void) {} + ~KFDGraphicsInterop(void) {} }; #endif diff --git a/tests/kfdtest/src/KFDIPCTest.cpp b/tests/kfdtest/src/KFDIPCTest.cpp index 4f7e90adac..6a95f51c9a 100644 --- a/tests/kfdtest/src/KFDIPCTest.cpp +++ b/tests/kfdtest/src/KFDIPCTest.cpp @@ -77,10 +77,10 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) { HSAuint32 *sharedLocalBuffer = NULL; /* Read from Pipe the shared Handle. Import shared Local Memory */ - ASSERT_GE(read(pipefd[0], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0); + ASSERT_GE(read(pipefd[0], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM, - (void**)&sharedLocalBuffer, &sharedSize)); + reinterpret_cast(&sharedLocalBuffer), &sharedSize)); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(sharedLocalBuffer, sharedSize, NULL)); /* Check for pattern in the shared Local Memory */ @@ -128,7 +128,7 @@ void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pip /* Share it with the child process */ ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer.As(), size, &sharedHandleLM)); - ASSERT_GE(write(pipefd[1], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0); + ASSERT_GE(write(pipefd[1], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); /* Wait for the child to finish */ waitpid(cpid, &status, 0); @@ -413,7 +413,7 @@ static int read_non_block(int fd, void *buf, int size) { int total_bytes = 0, cur_bytes = 0; int retries = 5; struct timespec tm = { 0, 100000000ULL }; - char *ptr = (char *)buf; + char *ptr = reinterpret_cast(buf); do { cur_bytes = read(fd, ptr, (size - total_bytes)); @@ -439,7 +439,7 @@ static int read_non_block(int fd, void *buf, int size) { /* Send HsaMemoryRange to another process that is connected via writePipe */ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) { - if (write_non_block(writePipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) != + if (write_non_block(writePipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; return CMA_TEST_SUCCESS; @@ -449,7 +449,7 @@ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) { CMA_TEST_STATUS KFDCMAArray::recvCMAArray(int readPipe) { int i; - if (read_non_block(readPipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) != + if (read_non_block(readPipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; @@ -704,10 +704,13 @@ TEST_F(KFDIPCTest, CMABasicTest) { HSAuint32 expected_pattern; srcRange.MemoryAddress = testLocalBuffer.As(); - srcRange.SizeInBytes = size; /* Deliberately set to value > unaligned_size. Only unaligned_size - * should be copied since dstRange.SizeInBytes == unaligned_size - */ - dstRange.MemoryAddress = (void *)(testLocalBuffer.As() + (size / 2) + unaligned_offset); + + /* Deliberately set to value > unaligned_size. Only unaligned_size + * should be copied since dstRange.SizeInBytes == unaligned_size + */ + srcRange.SizeInBytes = size; + + dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + (size / 2) + unaligned_offset); dstRange.SizeInBytes = unaligned_size; ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied)); ASSERT_EQ(copied, unaligned_size); @@ -719,7 +722,7 @@ TEST_F(KFDIPCTest, CMABasicTest) { /* Test3. Test overflow and expect failure */ srcRange.MemoryAddress = testLocalBuffer.As(); srcRange.SizeInBytes = size; - dstRange.MemoryAddress = (void *)(testLocalBuffer.As() + 4); + dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + 4); dstRange.SizeInBytes = size; /* This should overflow since offset is VA + 4 */ status = hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied); EXPECT_NE(status, HSAKMT_STATUS_SUCCESS); diff --git a/tests/kfdtest/src/KFDLocalMemoryTest.cpp b/tests/kfdtest/src/KFDLocalMemoryTest.cpp index c6119156bb..986ee4a85c 100644 --- a/tests/kfdtest/src/KFDLocalMemoryTest.cpp +++ b/tests/kfdtest/src/KFDLocalMemoryTest.cpp @@ -316,7 +316,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { break; } - void *bufferEnd = (void *)((unsigned long)pages[order].pointers[p] + void *bufferEnd = reinterpret_cast(reinterpret_cast(pages[order].pointers[p]) + size - sizeof(unsigned)); sysBuffer.As()[0] = ++value; @@ -340,7 +340,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { Dispatch dispatch3(isaBuffer); dispatch3.SetArgs(bufferEnd, - (void *)&(sysBuffer.As()[1])); + reinterpret_cast(&(sysBuffer.As()[1]))); dispatch3.Submit(queue); dispatch3.Sync(g_TestTimeOut); EXPECT_EQ(value, sysBuffer.As()[1]); @@ -349,7 +349,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { } LOG() << " Got " << pages[order].nPages << ", end of last block addr: " - << (void *)((unsigned long)pages[order].pointers[p-1] + size - 1) + << reinterpret_cast(reinterpret_cast(pages[order].pointers[p-1]) + size - 1) << std::endl; // Now free half the memory diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 8282106420..15a1c20051 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -94,14 +94,14 @@ shader ReadMemory\n\ asic(GFX9)\n\ type(CS)\n\ /* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - s_movk_i32 s18, 0x5678\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - s_store_dword s18, s[2:3], 0x0 glc\n\ - s_endpgm\n\ - end\n\ + s_movk_i32 s18, 0x5678\n\ + LOOP:\n\ + s_load_dword s16, s[0:1], 0x0 glc\n\ + s_cmp_eq_i32 s16, s18\n\ + s_cbranch_scc0 LOOP\n\ + s_store_dword s18, s[2:3], 0x0 glc\n\ + s_endpgm\n\ + end\n\ "; void KFDMemoryTest::SetUp() { @@ -127,7 +127,7 @@ void KFDMemoryTest::TearDown() { } #include -#define GB(x) ((x)<<30) +#define GB(x) ((x) << 30) /* * try to map as much as possible system memory to gpu. @@ -147,17 +147,17 @@ TEST_F(KFDMemoryTest, MMapLarge) { HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - const unsigned long nObjects = 1<<14; + const HSAuint64 nObjects = 1<<14; HSAuint64 *AlternateVAGPU = new HSAuint64[nObjects]; - ASSERT_NE((unsigned long)AlternateVAGPU, 0); + ASSERT_NE((HSAuint64)AlternateVAGPU, 0); HsaMemMapFlags mapFlags = {0}; - unsigned long s; + HSAuint64 s; char *addr; - unsigned long flags = MAP_ANONYMOUS | MAP_PRIVATE; + HSAuint64 flags = MAP_ANONYMOUS | MAP_PRIVATE; /* Test up to 1TB memory*/ s = GB(1024ULL) / nObjects; - addr = (char*)mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0); + addr = reinterpret_cast(mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0)); ASSERT_NE(addr, MAP_FAILED); memset(addr, 0, s); @@ -167,7 +167,7 @@ TEST_F(KFDMemoryTest, MMapLarge) { if (hsaKmtRegisterMemory(addr + i, s - i)) break; if (hsaKmtMapMemoryToGPUNodes(addr + i, s - i, - &AlternateVAGPU[i], mapFlags, 1, (HSAuint32 *)&defaultGPUNode)) { + &AlternateVAGPU[i], mapFlags, 1, reinterpret_cast(&defaultGPUNode))) { hsaKmtDeregisterMemory(addr + i); break; } @@ -177,8 +177,8 @@ TEST_F(KFDMemoryTest, MMapLarge) { << "GB system memory to gpu" << std::endl; while (i--) { - ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU((void*)AlternateVAGPU[i])); - ASSERT_SUCCESS(hsaKmtDeregisterMemory((void*)AlternateVAGPU[i])); + ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(AlternateVAGPU[i]))); + ASSERT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(AlternateVAGPU[i]))); } munmap(addr, s); @@ -268,7 +268,8 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) { int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb)); + ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, + reinterpret_cast(&pDb))); // verify that pDb is not null before it's being used ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer"; ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL)); @@ -292,7 +293,8 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) { TEST_START(TESTPROFILE_RUNALL) unsigned int* pDb = NULL; - EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags, (void**)&pDb)); + EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags, + reinterpret_cast(&pDb))); TEST_END } @@ -302,7 +304,7 @@ TEST_F(KFDMemoryTest, MemoryAlloc) { TEST_START(TESTPROFILE_RUNALL) unsigned int* pDb = NULL; - EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb)); + EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pDb))); TEST_END } @@ -340,7 +342,7 @@ TEST_F(KFDMemoryTest, AccessPPRMem) { * consumed by IOMMU HW. Because of that, a kernel driver workaround * is put in place to address that, so we don't need to wait here. */ - //sleep(5); + // sleep(5); VirtualFreeMemory(destBuf, PAGE_SIZE); @@ -472,10 +474,10 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) { EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32), &gpuva2)); EXPECT_TRUE(gpuva1 != gpuva2); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva2))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva2))); /* Same address, same size */ HsaMemMapFlags memFlags = {0}; @@ -496,20 +498,20 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) { sizeof(HSAuint32) * 2, &gpuva2, memFlags, nGPU, nodes)); EXPECT_EQ(gpuva1, gpuva2); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva1))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva1))); /* Confirm that we still have access to the memory, mem[2] */ PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); mem[2] = 0x0; - queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)gpuva2, + queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast(gpuva2), 0xdeadbeef)); queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0)); queue.Wait4PacketConsumption(); - EXPECT_EQ(true, WaitOnValue((unsigned int *)&mem[2], 0xdeadbeef)); + EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef)); EXPECT_SUCCESS(queue.Destroy()); - EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2)); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast(gpuva2))); + EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast(gpuva2))); TEST_END } @@ -535,7 +537,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) { ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/, false/*exec*/, true /*scratch*/); + HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/, + false/*exec*/, true /*scratch*/); // Unmap scratch for sub-allocation mapping tests ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As())); @@ -576,7 +579,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) { if (pNodeProperties != NULL) { // Get the aperture of the scratch buffer HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks]; - EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks, memoryProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks, + memoryProperties)); for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) { if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) { @@ -684,14 +688,14 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular sizeMB = (lowMB + highMB) / 2; size = sizeMB * 1024 * 1024; ret = hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { highMB = sizeMB; continue; } ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU, - mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size)); highMB = sizeMB; @@ -740,14 +744,14 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, sizeMB = (lowMB + highMB) / 2; size = sizeMB * 1024 * 1024; ret = hsaKmtAllocMemory(defaultGPUNode, size, memFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { highMB = sizeMB; continue; } ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU, - mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size)); highMB = sizeMB; @@ -810,13 +814,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { do { ret = hsaKmtAllocMemory(0 /* system */, block_size, m_MemoryFlags, - (void**)&pDb_array[i]); + reinterpret_cast(&pDb_array[i])); if (ret) { break; } ret = hsaKmtMapMemoryToGPUNodes(pDb_array[i], block_size, - &AlternateVAGPU, mapFlags, 1, (HSAuint32 *)&defaultGPUNode); + &AlternateVAGPU, mapFlags, 1, reinterpret_cast(&defaultGPUNode)); if (ret) { ASSERT_SUCCESS(hsaKmtFreeMemory(pDb_array[i], block_size)); break; @@ -849,7 +853,7 @@ TEST_F(KFDMemoryTest, MMBench) { #define TEST_SDMA(index) (((index / nSizes) >> 1) & 0x1) void *bufs[nBufs]; - unsigned long long start, end; + HSAuint64 start, end; unsigned i; HSAKMT_STATUS ret; HsaMemFlags memFlags = {0}; @@ -898,7 +902,7 @@ TEST_F(KFDMemoryTest, MMBench) { unsigned bufSize = TEST_BUFSIZE(testIndex); unsigned memType = TEST_MEMTYPE(testIndex); bool interleaveSDMA = TEST_SDMA(testIndex); - unsigned long long allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime; + HSAuint64 allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime; HSAuint32 allocNode; if ((testIndex & (nSizes-1)) == 0) @@ -1033,16 +1037,16 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) { EXPECT_EQ(ptrInfo.SizeInBytes, (HSAuint64)localBuffer.Size()); HSAuint32 *addr = localBuffer.As() + 4; - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)addr, &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast(addr), &ptrInfo)); EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As()); } /** Registered memory: user pointer */ static volatile HSAuint32 mem[4]; // 8 bytes for register only and // 8 bytes for register to nodes - HsaMemoryBuffer hsaBuffer((void *)&mem[0], sizeof(HSAuint32)*2); + HsaMemoryBuffer hsaBuffer((void *)(&mem[0]), sizeof(HSAuint32)*2); if (is_dgpu()) { // APU doesn't use userptr - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[0], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[0]), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER); EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]); EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)hsaBuffer.As()); @@ -1053,29 +1057,29 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) { HSAuint32 nodes[nGPU]; for (unsigned int i = 0; i < nGPU; i++) nodes[i] = gpuNodes.at(i); - EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2], + EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)(&mem[2]), sizeof(HSAuint32)*2, nGPU, nodes)); - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[2], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[2]), &ptrInfo)); EXPECT_EQ(ptrInfo.NRegisteredNodes, nGPU); - EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)&mem[2])); + EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)(&mem[2]))); } /* Not a starting address, but an address inside the memory range * should also get the memory information */ HSAuint32 *address = hostBuffer.As() + 1; - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)address, &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast(address), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED); EXPECT_EQ(ptrInfo.CPUAddress, hostBuffer.As()); if (is_dgpu()) { - EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[1], &ptrInfo)); + EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[1]), &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER); EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]); } /*** Set user data ***/ char userData[16] = "This is a test."; - EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As(), (void *)userData)); + EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As(), reinterpret_cast(userData))); EXPECT_SUCCESS(hsaKmtQueryPointerInfo(hostBuffer.As(), &ptrInfo)); EXPECT_EQ(ptrInfo.UserData, (void *)userData); @@ -1106,16 +1110,16 @@ TEST_F(KFDMemoryTest, PtraceAccess) { // Offset in the VRAM buffer to test crossing non-contiguous // buffer boundaries. The second access starting from offset - // sizeof(long)+1 will cross a node boundary in a single access, + // sizeof(HSAint64)+1 will cross a node boundary in a single access, // for node sizes of 4MB or smaller. - const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(long); + const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64); // alloc system memory from node 0 and initialize it memFlags.ui32.NonPaged = 0; ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0])); - for (i = 0; i < 4*sizeof(long) + 4; i++) { - ((HSAuint8 *)mem[0])[i] = i; // source - ((HSAuint8 *)mem[0])[PAGE_SIZE+i] = 0; // destination + for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) { + (reinterpret_cast(mem[0]))[i] = i; // source + (reinterpret_cast(mem[0]))[PAGE_SIZE+i] = 0; // destination } // try to alloc local memory from GPU node @@ -1123,10 +1127,10 @@ TEST_F(KFDMemoryTest, PtraceAccess) { if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) { EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20), memFlags, &mem[1])); - mem[1] = (void *)((HSAuint8 *)mem[1] + VRAM_OFFSET); - for (i = 0; i < 4*sizeof(long) + 4; i++) { - ((HSAuint8 *)mem[1])[i] = i; - ((HSAuint8 *)mem[1])[PAGE_SIZE+i] = 0; + mem[1] = reinterpret_cast(reinterpret_cast(mem[1]) + VRAM_OFFSET); + for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) { + (reinterpret_cast(mem[1]))[i] = i; + (reinterpret_cast(mem[1]))[PAGE_SIZE+i] = 0; } } else { LOG() << "Not testing local memory, it's invisible" << std::endl; @@ -1168,22 +1172,22 @@ TEST_F(KFDMemoryTest, PtraceAccess) { for (i = 0; i < 4; i++) { // Test 4 different (mis-)alignments, leaving 1-byte // gaps between longs - HSAuint8 *addr = (HSAuint8 *)((long *)mem[0] + i) + i; + HSAuint8 *addr = reinterpret_cast(reinterpret_cast(mem[0]) + i) + i; errno = 0; long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE, - (void *)data)); + reinterpret_cast(data))); if (mem[1] == NULL) continue; - addr = (HSAuint8 *)((long *)mem[1] + i) + i; + addr = reinterpret_cast(reinterpret_cast(mem[1]) + i) + i; errno = 0; data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE, - (void *)data)); + reinterpret_cast(data))); } } catch (...) { err = 1; @@ -1204,32 +1208,31 @@ TEST_F(KFDMemoryTest, PtraceAccess) { } // Clear gaps in the source that should not have been copied - ((uint8_t*)mem[0])[ sizeof(long) ] = 0; - ((uint8_t*)mem[0])[2*sizeof(long) + 1] = 0; - ((uint8_t*)mem[0])[3*sizeof(long) + 2] = 0; - ((uint8_t*)mem[0])[4*sizeof(long) + 3] = 0; + (reinterpret_cast(mem[0]))[ sizeof(long) ] = 0; + (reinterpret_cast(mem[0]))[2*sizeof(long) + 1] = 0; + (reinterpret_cast(mem[0]))[3*sizeof(long) + 2] = 0; + (reinterpret_cast(mem[0]))[4*sizeof(long) + 3] = 0; // Check results - EXPECT_EQ(0, memcmp(mem[0], (HSAuint8 *)mem[0] + PAGE_SIZE, + EXPECT_EQ(0, memcmp(mem[0], reinterpret_cast(mem[0]) + PAGE_SIZE, sizeof(long)*4 + 4)); // Free memory EXPECT_SUCCESS(hsaKmtFreeMemory(mem[0], PAGE_SIZE*2)); if (mem[1]) { - ((uint8_t*)mem[1])[ sizeof(long) ] = 0; - ((uint8_t*)mem[1])[2*sizeof(long) + 1] = 0; - ((uint8_t*)mem[1])[3*sizeof(long) + 2] = 0; - ((uint8_t*)mem[1])[4*sizeof(long) + 3] = 0; - EXPECT_EQ(0, memcmp(mem[1], (HSAuint8 *)mem[1] + PAGE_SIZE, - sizeof(long)*4 + 4)); - mem[1] = (void *)((HSAuint8 *)mem[1] - VRAM_OFFSET); + (reinterpret_cast(mem[1]))[ sizeof(HSAint64) ] = 0; + (reinterpret_cast(mem[1]))[2*sizeof(HSAint64) + 1] = 0; + (reinterpret_cast(mem[1]))[3*sizeof(HSAint64) + 2] = 0; + (reinterpret_cast(mem[1]))[4*sizeof(HSAint64) + 3] = 0; + EXPECT_EQ(0, memcmp(mem[1], reinterpret_cast(mem[1]) + PAGE_SIZE, + sizeof(HSAint64)*4 + 4)); + mem[1] = reinterpret_cast(reinterpret_cast(mem[1]) - VRAM_OFFSET); EXPECT_SUCCESS(hsaKmtFreeMemory(mem[1], PAGE_SIZE*2)); } TEST_END } -TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) -{ +TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) { char *hsaDebug = getenv("HSA_DEBUG"); if (!is_dgpu()) { @@ -1266,8 +1269,8 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* set the word before 4M boundary to 0xdeadbeefdeadbeef * and the word after 4M boundary to 0xcafebabecafebabe */ - mem0 = (void *)((HSAuint8 *)mem + VRAM_OFFSET); - mem1 = (void *)((HSAuint8 *)mem + VRAM_OFFSET + sizeof(HSAuint64)); + mem0 = reinterpret_cast(reinterpret_cast(mem) + VRAM_OFFSET); + mem1 = reinterpret_cast(reinterpret_cast(mem) + VRAM_OFFSET + sizeof(HSAuint64)); PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem0, @@ -1313,17 +1316,17 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* peek the memory */ errno = 0; - long data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL); + HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(data[0], data0); - long data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL); + HSAint64 data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL); EXPECT_EQ(0, errno); EXPECT_EQ(data[1], data1); /* swap mem0 and mem1 by poking */ - EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, (void *)data[1])); + EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast(data[1]))); EXPECT_EQ(0, errno); - EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, (void *)data[0])); + EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast(data[0]))); EXPECT_EQ(0, errno); } catch (...) { err = 1; @@ -1345,10 +1348,10 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) /* Use shader to read back data to check poke results */ HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - //dstBuffer is cpu accessible gtt memory + // dstBuffer is cpu accessible gtt memory HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode); m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword, - "ScratchCopyDword",isaBuffer); + "ScratchCopyDword", isaBuffer); Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(mem0, dstBuffer.As()); dispatch0.Submit(queue); @@ -1405,7 +1408,7 @@ TEST_F(KFDMemoryTest, SignalHandling) { */ size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1); - ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, (void**)&pDb)); + ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast(&pDb))); // verify that pDb is not null before it's being used ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer"; @@ -1468,7 +1471,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) { while (count--) { ret = hsaKmtAllocMemory(0 /* system */, sysBufSize, m_MemoryFlags, - (void**)&pDb); + reinterpret_cast(&pDb)); if (ret) { LOG() << "Failed to allocate system buffer of" << std::dec << sysBufSizeMB << "MB" << std::endl; @@ -1496,8 +1499,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) { TEST_END } -static inline void access(volatile void *sd, int size, int rw) -{ +static inline void access(volatile void *sd, int size, int rw) { /* Most like sit in cache*/ static struct DUMMY { char dummy[1024]; @@ -1531,7 +1533,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) { #define _TEST_MEMTYPE(index) ((index / nSizes) % nMemTypes) void *bufs[nBufs]; - unsigned long long start; + HSAuint64 start; unsigned i; HSAKMT_STATUS ret; HsaMemFlags memFlags = {0}; @@ -1545,7 +1547,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) { LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl; if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode) || !vramSizeMB) { - LOG() << "not a largebar system, skip!"<NumCounters << " counter IDs" << std::endl; - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } TEST_END @@ -161,7 +162,7 @@ TEST_F(KFDPerfCountersTest, RegisterTrace) { priv_block_found = true; break; } - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } if (!priv_block_found) { @@ -202,7 +203,7 @@ TEST_F(KFDPerfCountersTest, StartStopQueryTrace) { priv_block_found = true; break; } - block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters]; + block = reinterpret_cast(&block->Counters[block->NumCounters]); } if (!priv_block_found) { diff --git a/tests/kfdtest/src/KFDQMTest.cpp b/tests/kfdtest/src/KFDQMTest.cpp index 25e4c8df73..c1dab79916 100644 --- a/tests/kfdtest/src/KFDQMTest.cpp +++ b/tests/kfdtest/src/KFDQMTest.cpp @@ -271,7 +271,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -311,7 +312,8 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -357,7 +359,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) { // don't sync since we don't expect rptr to change when the queue is disabled. Delay(2000); - ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!"; + ASSERT_EQ(destBuf.As()[0], 0xFFFFFFFF) + << "Packet executed even though the queue is supposed to be disabled!"; ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false)); @@ -373,13 +376,13 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) { TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) { TEST_START(TESTPROFILE_RUNALL) - static const unsigned long long TEST_TIME_SEC = 15; + static const HSAuint64 TEST_TIME_SEC = 15; - unsigned long long initialTime = GetSystemTickCountInMicroSec(); + HSAuint64 initialTime = GetSystemTickCountInMicroSec(); unsigned int numIter = 0; - unsigned long long timePassed = 0; + HSAuint64 timePassed = 0; int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; @@ -404,7 +407,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) { delete queues[1]; ++numIter; - unsigned long long curTime = GetSystemTickCountInMicroSec(); + HSAuint64 curTime = GetSystemTickCountInMicroSec(); timePassed = (curTime - initialTime) / 1000000; } while (timePassed < TEST_TIME_SEC); @@ -553,7 +556,7 @@ s_waitcnt lgkmcnt(0)\n\ end\n\ "; -long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) { +HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) { HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/); HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false); HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false); @@ -580,9 +583,9 @@ long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t m } /* To cover for outliers, allow us to get the Average time based on a specified number of iterations */ -long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) { - long long timeArray[iterations]; - long long timeTotal = 0; +HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) { + HSAint64 timeArray[iterations]; + HSAint64 timeTotal = 0; if (iterations < 1) { LOG() << "ERROR: At least 1 iteration must be performed" << std::endl; return 0; @@ -599,9 +602,11 @@ long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, } for (int x = 0; x < iterations; x++) { - long long variance = timeArray[x] / (timeTotal / iterations); + HSAint64 variance = timeArray[x] / (timeTotal / iterations); if (variance < CuNegVariance || variance > CuPosVariance) - LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x] << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")" << std::endl; + LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x] + << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")" + << std::endl; } return timeTotal / iterations; @@ -625,7 +630,7 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) { LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl; LOG() << std::hex << "# Shader engines: 0x" << numSEs << std::endl; LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl; - long long TimewithCU1, TimewithCU; + HSAint64 TimewithCU1, TimewithCU; uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */ uint32_t maskNumBits = maskNumDwords * 32; uint32_t mask[maskNumDwords]; @@ -646,10 +651,11 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) { mask[maskIndex] |= 1 << ((nCUs - 1) % 32); TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits); - ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs); + ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs); LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl; - LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; + LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) + << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance)); } @@ -685,7 +691,7 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) { LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl; LOG() << std::hex << "# Shader engines: 0x" << numShaderEngines << std::endl; LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl; - long long TimewithCU1, TimewithCU; + HSAint64 TimewithCU1, TimewithCU; uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */ uint32_t maskNumBits = maskNumDwords * 32; uint32_t mask[maskNumDwords]; @@ -716,10 +722,11 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) { int nCUs = numShaderEngines * (x + 1); TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits); - ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs); + ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs); LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl; - LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; + LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) + << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl; ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance)); } @@ -945,10 +952,10 @@ TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) { unsigned int* src = srcBuffer.As(); unsigned int* dst = destBuffer.As(); - static const unsigned long long TEST_TIME_SEC = 15; - unsigned long long initialTime, curTime; + static const HSAuint64 TEST_TIME_SEC = 15; + HSAuint64 initialTime, curTime; unsigned int numIter = 0; - unsigned long long timePassed = 0; + HSAuint64 timePassed = 0; unsigned int i; PM4Queue queues[MAX_CP_QUEUES]; @@ -1019,7 +1026,8 @@ TEST_F(KFDQMTest, CpuWriteCoherence) { EXPECT_EQ(0, queue.Rptr()); - // now that the GPU has cached the PQ contents, we modify them in CPU cache and ensure that the GPU sees the updated value: + // now that the GPU has cached the PQ contents, we modify them in CPU cache and + // ensure that the GPU sees the updated value: queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As(), 0x42, 0x42)); queue.Wait4PacketConsumption(); @@ -1046,7 +1054,7 @@ TEST_F(KFDQMTest, CreateAqlCpQueue) { TEST_END } -#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1)) +#define ALIGN_UP(x, align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1)) #define CounterToNanoSec(x) ((x) * 1000 / (is_dgpu() ? 27 : 100)) #include @@ -1056,7 +1064,7 @@ TEST_F(KFDQMTest, QueueLatency) { PM4Queue queue; const int queueSize = PAGE_SIZE * 2; - const int packetSize = PM4ReleaseMemoryPacket(0,0,0,0,0).SizeInBytes(); + const int packetSize = PM4ReleaseMemoryPacket(0, 0, 0, 0, 0).SizeInBytes(); /* We always leave one NOP(dword) empty after packet which is required by ring itself. * We also place NOPs when queue wraparound to avoid crossing buffer end. See PlacePacket(). * So the worst case is that we need two packetSize space to place one packet. @@ -1067,16 +1075,16 @@ TEST_F(KFDQMTest, QueueLatency) { */ const int reservedSpace = packetSize + queueSize % packetSize; const int slots = (queueSize - reservedSpace) / packetSize; - long queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med; - long overhead, workload; - long *queue_latency_arr = (long*)calloc(slots, sizeof(long)); + HSAint64 queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med; + HSAint64 overhead, workload; + HSAint64 *queue_latency_arr = reinterpret_cast(calloc(slots, sizeof(HSAint64))); const int skip = 2; const char *fs[skip] = {"1st", "2nd"}; HsaClockCounters *ts; HSAuint64 *qts; int i = 0; - ASSERT_NE((unsigned long)queue_latency_arr, 0); + ASSERT_NE((unsigned HSAint64)queue_latency_arr, 0); int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; @@ -1102,7 +1110,7 @@ TEST_F(KFDQMTest, QueueLatency) { i = 0; do { queue.PlacePacket(PM4ReleaseMemoryPacket(true, - (unsigned long)&qts[i], + (unsigned HSAint64)&qts[i], 0, true, 1)); @@ -1114,7 +1122,7 @@ TEST_F(KFDQMTest, QueueLatency) { /* Calculate timing which includes workload and overhead*/ i = 0; do { - long queue_latency = qts[i] - ts[i].GPUClockCounter; + HSAint64 queue_latency = qts[i] - ts[i].GPUClockCounter; ASSERT_GE(queue_latency, 0); @@ -1129,7 +1137,7 @@ TEST_F(KFDQMTest, QueueLatency) { i = 0; do { queue.PlacePacket(PM4ReleaseMemoryPacket(true, - (unsigned long)&qts[i], + (unsigned HSAint64)&qts[i], 0, true, 1)); @@ -1151,7 +1159,7 @@ TEST_F(KFDQMTest, QueueLatency) { do { /* The queue_latency is not that correct as the workload and overhead are average*/ queue_latency_arr[i] -= workload + overhead; - /* The First submit takes a long time*/ + /* The First submit takes a HSAint64 time*/ if (i < skip) LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl; } while (++i < slots); @@ -1243,13 +1251,13 @@ TEST_F(KFDQMTest, SdmaQueueWraparound) { } struct AtomicIncThreadParams { - long* pDest; + HSAint64* pDest; volatile unsigned int count; volatile bool stop; }; unsigned int AtomicIncThread(void* pCtx) { - AtomicIncThreadParams* pArgs = (AtomicIncThreadParams*)pCtx; + AtomicIncThreadParams* pArgs = reinterpret_cast(pCtx); while (pArgs->stop) {} @@ -1288,7 +1296,7 @@ TEST_F(KFDQMTest, Atomics) { ASSERT_SUCCESS(queue.Create(defaultGPUNode)); AtomicIncThreadParams params; - params.pDest = destBuf.As(); + params.pDest = destBuf.As(); params.stop = true; params.count = 0; @@ -1441,7 +1449,7 @@ TEST_F(KFDQMTest, P2PTest) { /* 1. Allocate a system buffer and allow the access to GPUs */ EXPECT_SUCCESS(hsaKmtAllocMemory(0, size, memFlags, - (void **)&sysBuf)); + reinterpret_cast(&sysBuf))); EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sysBuf, size, NULL, mapFlags, nodes.size(), &nodes[0])); #define MAGIC_NUM 0xdeadbeaf @@ -1449,7 +1457,7 @@ TEST_F(KFDQMTest, P2PTest) { /* First GPU fills mem with MAGIC_NUM*/ void *src, *dst; HSAuint32 cur = nodes[0], next; - ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, (void**)&src)); + ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast(&src))); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(src, size, NULL)); sdma_fill(cur, src, MAGIC_NUM, size); @@ -1465,7 +1473,7 @@ TEST_F(KFDQMTest, P2PTest) { } else { n = 2; next = nodes[i]; - ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, (void**)&dst)); + ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, reinterpret_cast(&dst))); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dst, size, NULL)); } @@ -1506,7 +1514,7 @@ TEST_F(KFDQMTest, SdmaEventInterrupt) { ASSERT_SUCCESS(queue.Create(defaultGPUNode)); - queue.PlaceAndSubmitPacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId)); + queue.PlaceAndSubmitPacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); queue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); diff --git a/tests/kfdtest/src/KFDQMTest.hpp b/tests/kfdtest/src/KFDQMTest.hpp index 1834d4b506..9f120f83cf 100644 --- a/tests/kfdtest/src/KFDQMTest.hpp +++ b/tests/kfdtest/src/KFDQMTest.hpp @@ -43,8 +43,8 @@ class KFDQMTest : public KFDBaseComponentTest { void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1); // void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf); - long long TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count); - long long GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations); + HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count); + HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations); protected: // members /* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */ const double CuVariance = 0.15; diff --git a/tests/kfdtest/src/KFDTestFlags.hpp b/tests/kfdtest/src/KFDTestFlags.hpp index 11321f9b88..d98d99262a 100644 --- a/tests/kfdtest/src/KFDTestFlags.hpp +++ b/tests/kfdtest/src/KFDTestFlags.hpp @@ -54,12 +54,12 @@ enum ENVCAPS{ enum KfdFamilyId { FAMILY_UNKNOWN = 0, - FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M) - FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi - FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M) - FAMILY_CZ, // Carrizo, Nolan, Amur - FAMILY_AI, // Arctic Islands - FAMILY_RV, // Raven + FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M) + FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi + FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M) + FAMILY_CZ, // Carrizo, Nolan, Amur + FAMILY_AI, // Arctic Islands + FAMILY_RV, // Raven }; #endif // __KFD_TEST_FLAGS__H__ diff --git a/tests/kfdtest/src/KFDTestMain.cpp b/tests/kfdtest/src/KFDTestMain.cpp index 7dfd24674e..847582c2bc 100644 --- a/tests/kfdtest/src/KFDTestMain.cpp +++ b/tests/kfdtest/src/KFDTestMain.cpp @@ -42,7 +42,7 @@ std::ostream& operator << (std::ostream& out, TESTPROFILE profile) { break; default: out << "INVALID"; - }; + } return out; } @@ -71,7 +71,8 @@ GTEST_API_ int main(int argc, char **argv) { bool success = GetCommandLineArguments(argc, argv, args); if (success) { - if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) && (args.HwsEnabled != HWCAP__FORCE_DISABLED)) + if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) && + (args.HwsEnabled != HWCAP__FORCE_DISABLED)) g_TestENVCaps |= ENVCAPS_HWSCHEDULING; g_TestRunProfile = args.TestProfile; diff --git a/tests/kfdtest/src/KFDTestUtil.cpp b/tests/kfdtest/src/KFDTestUtil.cpp index f8b36c01f0..2e357583b4 100644 --- a/tests/kfdtest/src/KFDTestUtil.cpp +++ b/tests/kfdtest/src/KFDTestUtil.cpp @@ -43,7 +43,7 @@ bool WaitOnValue(const volatile unsigned int *buf, unsigned int value) { return *buf == value; } -void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart) { +void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart) { rLoPart = static_cast(value); rHiPart = static_cast(value >> 32); } @@ -125,7 +125,8 @@ bool isTonga(const HsaNodeProperties *props) { const HsaMemoryBuffer HsaMemoryBuffer::Null; -HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec, bool isScratch, bool isReadOnly) +HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec, + bool isScratch, bool isReadOnly) :m_Size(size), m_pUser(NULL), m_pBuf(NULL), @@ -153,7 +154,7 @@ HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, b if (isReadOnly) m_Flags.ui32.ReadOnly = 1; - EXPECT_SUCCESS(hsaKmtAllocMemory( m_Node, m_Size, m_Flags, &m_pBuf)); + EXPECT_SUCCESS(hsaKmtAllocMemory(m_Node, m_Size, m_Flags, &m_pBuf)); if (is_dgpu()) { EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL)); m_MappedNodes = 1 << m_Node; @@ -189,9 +190,9 @@ void HsaMemoryBuffer::Fill(unsigned char value, HSAuint64 offset, HSAuint64 size ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; if (m_pUser != NULL) - memset((char *)m_pUser + offset, value, size); + memset(reinterpret_cast(m_pUser) + offset, value, size); else if (m_pBuf != NULL) - memset((char *)m_pBuf + offset, value, size); + memset(reinterpret_cast(m_pBuf) + offset, value, size); else ASSERT_TRUE(0) << "Invalid HsaMemoryBuffer"; } @@ -207,9 +208,9 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) { ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; if (m_pUser != NULL) - ptr = (HSAuint32 *)((char *)m_pUser + offset); + ptr = reinterpret_cast(reinterpret_cast(m_pUser) + offset); else if (m_pBuf != NULL) - ptr = (HSAuint32 *)((char *)m_pBuf + offset); + ptr = reinterpret_cast(reinterpret_cast(m_pBuf) + offset); ASSERT_NOTNULL(ptr); @@ -229,8 +230,8 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, BaseQueue& baseQueue, HSAuint64 offs size = size ? size : m_Size; ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl; - baseQueue.PlacePacket(SDMAFillDataPacket((void *)(this->As() + offset), value, size)); - baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId)); + baseQueue.PlacePacket(SDMAFillDataPacket((reinterpret_cast(this->As() + offset)), value, size)); + baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); ASSERT_SUCCESS(hsaKmtWaitOnEvent(event, g_TestTimeOut)); @@ -250,9 +251,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern) { return false; if (m_pUser != NULL) - ptr = (HSAuint32 *)m_pUser; + ptr = reinterpret_cast(m_pUser); else if (m_pBuf != NULL) - ptr = (HSAuint32 *)m_pBuf; + ptr = reinterpret_cast(m_pBuf); else return false; @@ -284,9 +285,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern, BaseQueue *tmp = ~pattern; baseQueue.PlacePacket(SDMACopyDataPacket((void *)tmp, - (void *)(this->As() + location), + reinterpret_cast(this->As() + location), sizeof(HSAuint32))); - baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2, + baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast(event->EventData.HWData2), event->EventId)); baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId)); @@ -394,14 +395,15 @@ HsaMemoryBuffer::~HsaMemoryBuffer() { m_pBuf = NULL; } -HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node) +HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, + HSAuint64 size, unsigned int node) :m_Size(0), m_pBuf(NULL), m_graphic_handle(0), m_Node(node) { HSAuint64 flat_address; EXPECT_SUCCESS(hsaKmtMapGraphicHandle(m_Node, device_handle, buffer_handle, 0, size, &flat_address)); - m_pBuf = (void*)flat_address; + m_pBuf = reinterpret_cast(flat_address); } HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() { diff --git a/tests/kfdtest/src/KFDTestUtil.hpp b/tests/kfdtest/src/KFDTestUtil.hpp index 8f71b7ecb6..241c1a0fd3 100644 --- a/tests/kfdtest/src/KFDTestUtil.hpp +++ b/tests/kfdtest/src/KFDTestUtil.hpp @@ -36,7 +36,7 @@ class BaseQueue; // @brief: waits until the value is written to the buffer or until time out if received through args bool WaitOnValue(const volatile unsigned int *buf, unsigned int value); -void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart); +void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart); bool GetHwCapabilityHWS(); @@ -106,14 +106,14 @@ class HsaMemoryBuffer { void* m_pBuf; bool m_Local; unsigned int m_Node; - unsigned short m_MappedNodes; + HSAuint64 m_MappedNodes; }; class HsaInteropMemoryBuffer { public: - HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node); + HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, HSAuint64 size, unsigned int node); template RetType As() { @@ -135,9 +135,9 @@ class HsaInteropMemoryBuffer { const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&); private: - unsigned long long m_Size; + HSAuint64 m_Size; void* m_pBuf; - unsigned long long m_graphic_handle; + HSAuint64 m_graphic_handle; unsigned int m_Node; }; diff --git a/tests/kfdtest/src/KFDTopologyTest.cpp b/tests/kfdtest/src/KFDTopologyTest.cpp index 877899dcaa..b302f166b3 100644 --- a/tests/kfdtest/src/KFDTopologyTest.cpp +++ b/tests/kfdtest/src/KFDTopologyTest.cpp @@ -27,8 +27,8 @@ // @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp -const unsigned long long KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1; -const unsigned long long KFDTopologyTest::c_40BitAddressSpace = (1ull << 40); +const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1; +const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40); TEST_F(KFDTopologyTest , BasicTest) { TEST_START(TESTPROFILE_RUNALL) @@ -41,12 +41,14 @@ TEST_F(KFDTopologyTest , BasicTest) { if (pNodeProperties != NULL) { // checking for cpu core only if it's a cpu only node or if its KAVERY apu. if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) { - EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node << " No CPUs core are connected for node index"; + EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node + << " No CPUs core are connected for node index"; } // if it's not a cpu only node, look for a gpu core if (pNodeProperties->DeviceId != 0) { - EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node << "No GPUs core are connected."; + EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node + << "No GPUs core are connected."; // EngineId only applies to GPU, not CPU-only nodes EXPECT_GT(pNodeProperties->EngineId.ui32.uCode, 0) << "uCode version is 0"; EXPECT_GE(pNodeProperties->EngineId.ui32.Major, 7) << "Major Version is less than 7"; @@ -118,7 +120,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) { return; } HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks]; - EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks, memoryProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks, + memoryProperties)); bool GpuVMHeapFound = false; for (unsigned int bank = 0 ; bank < pNodeProperties->NumMemoryBanks ; bank++) { // Check for either private (small-bar/APU) or public (large-bar) @@ -145,9 +148,11 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) { pNodeProperties = m_NodeInfo.GetNodeProperties(node); if (pNodeProperties != NULL) { HsaCacheProperties *cacheProperties = new HsaCacheProperties[pNodeProperties->NumCaches]; - EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo, pNodeProperties->NumCaches, cacheProperties)); + EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo, + pNodeProperties->NumCaches, cacheProperties)); if (pNodeProperties->NumCPUCores > 0) { // this is a CPU node - LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches" << std::endl; + LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches" + << std::endl; for (unsigned n = 0; n < pNodeProperties->NumCaches; n++) { LOG()<< n << " - Level " << cacheProperties[n].CacheLevel << " Type " << cacheProperties[n].CacheType.Value << diff --git a/tests/kfdtest/src/KFDTopologyTest.hpp b/tests/kfdtest/src/KFDTopologyTest.hpp index 2ab631f8a4..005de1c04d 100644 --- a/tests/kfdtest/src/KFDTopologyTest.hpp +++ b/tests/kfdtest/src/KFDTopologyTest.hpp @@ -33,8 +33,8 @@ class KFDTopologyTest : public KFDBaseComponentTest { public: KFDTopologyTest(void) {} ~KFDTopologyTest(void) {} - static const unsigned long long c_4Gigabyte; - static const unsigned long long c_40BitAddressSpace; + static const HSAuint64 c_4Gigabyte; + static const HSAuint64 c_40BitAddressSpace; }; #endif // __KFD_TOPOLOGY_TEST__H__ diff --git a/tests/kfdtest/src/LinuxOSWrapper.cpp b/tests/kfdtest/src/LinuxOSWrapper.cpp index 90ea87b88f..74a0e65201 100644 --- a/tests/kfdtest/src/LinuxOSWrapper.cpp +++ b/tests/kfdtest/src/LinuxOSWrapper.cpp @@ -41,7 +41,9 @@ #include #include -static int protection_flags[8] = {int(PROT_NONE), int(PROT_READ), int(PROT_WRITE), int(PROT_READ | PROT_WRITE), int(PROT_EXEC), int(PROT_EXEC | PROT_READ), int(PROT_EXEC | PROT_WRITE), int(PROT_EXEC | PROT_WRITE | PROT_READ)}; +static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ | PROT_WRITE, + PROT_EXEC, PROT_EXEC | PROT_READ, PROT_EXEC | PROT_WRITE, + PROT_EXEC | PROT_WRITE | PROT_READ}; void SetConsoleTextColor(TEXTCOLOR color) { // TODO complete @@ -69,8 +71,8 @@ bool VirtualFreeMemory(void *address, unsigned int size) { return false; } -unsigned long GetLastErrorNo() { - return errno; +HSAuint64 GetLastErrorNo() { + return errno; } bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) { @@ -78,7 +80,7 @@ bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProc return false; } -unsigned long long GetSystemTickCountInMicroSec() { +HSAuint64 GetSystemTickCountInMicroSec() { struct timeval t; gettimeofday(&t, 0); return t.tv_sec * 1000000ULL + t.tv_usec; @@ -229,7 +231,7 @@ bool WaitForThread(uint64_t threadId) { return 0 == pthread_join((pthread_t)threadId, NULL); } -long AtomicInc(volatile long* pValue) { +HSAint64 AtomicInc(volatile HSAint64* pValue) { return __sync_add_and_fetch(pValue, 1); } diff --git a/tests/kfdtest/src/OSWrapper.hpp b/tests/kfdtest/src/OSWrapper.hpp index bcba47bb63..6b2f500258 100644 --- a/tests/kfdtest/src/OSWrapper.hpp +++ b/tests/kfdtest/src/OSWrapper.hpp @@ -26,6 +26,7 @@ #include #include "KFDTestFlags.hpp" +#include "hsakmt.h" #ifndef __OS__WRAPPER__H__ #define __OS__WRAPPER__H__ @@ -82,19 +83,20 @@ void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = M // @brief replacement for windows FreeVirtual func bool VirtualFreeMemory(void *address, unsigned int size); // @brief retrieve the last error number -unsigned long GetLastErrorNo(); +HSAuint64 GetLastErrorNo(); -long AtomicInc(volatile long* pValue); +HSAint64 AtomicInc(volatile HSAint64* pValue); void MemoryBarrier(); // @brief: runs the selected test case number of times required, each in a separate process -// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want to run all tests in a test case: TestCase.* and so on +// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want +// to run all tests in a test case: TestCase.* and so on // @params numOfProcesses : how many processes to run in parallel // @params runsPerProcess : how many iteration a test should do per process, must be a positive number bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1); -unsigned long long GetSystemTickCountInMicroSec(); +HSAuint64 GetSystemTickCountInMicroSec(); /**Put the system to S3/S4 power state and bring it back to S0. @return 'true' on success, 'false' on failure. diff --git a/tests/kfdtest/src/PM4Packet.cpp b/tests/kfdtest/src/PM4Packet.cpp index c0c48337a5..f8e57aa4ed 100644 --- a/tests/kfdtest/src/PM4Packet.cpp +++ b/tests/kfdtest/src/PM4Packet.cpp @@ -21,11 +21,11 @@ * */ -#include "PM4Packet.hpp" -#include "hsakmttypes.h" #include #include #include +#include "PM4Packet.hpp" +#include "hsakmttypes.h" #include "asic_reg/gfx_7_2_enum.h" @@ -52,7 +52,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const { } void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) { - m_pPacketData = (PM4WRITE_DATA_CI *)calloc(1, SizeInBytes()); + m_pPacketData = reinterpret_cast(calloc(1, SizeInBytes())); // verify that the memory is allocated successfully, cannot use assert here EXPECT_NOTNULL(m_pPacketData); @@ -84,7 +84,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, PM4_RELEASE_MEM_CI *pkt; m_packetSize = sizeof(PM4_RELEASE_MEM_CI); - pkt = (PM4_RELEASE_MEM_CI *)calloc(1, m_packetSize); + pkt = reinterpret_cast(calloc(1, m_packetSize)); m_pPacketData = pkt; EXPECT_NOTNULL(m_pPacketData); @@ -147,7 +147,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, PM4MEC_RELEASE_MEM_AI *pkt; m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI); - pkt = (PM4MEC_RELEASE_MEM_AI *)calloc(1, m_packetSize); + pkt = reinterpret_cast(calloc(1, m_packetSize)); m_pPacketData = pkt; EXPECT_NOTNULL(m_pPacketData); @@ -233,7 +233,8 @@ PM4SetShaderRegPacket::PM4SetShaderRegPacket(void) : m_packetDataAllocated(false) { } -PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs) +PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], + unsigned int numRegs) : m_packetDataAllocated(false) { InitPacket(baseOffset, regValues, numRegs); } @@ -243,11 +244,15 @@ PM4SetShaderRegPacket::~PM4SetShaderRegPacket(void) { free(m_pPacketData); } -void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs) { - m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t); // 1st register is a part of the packet struct. +void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[], + unsigned int numRegs) { + // 1st register is a part of the packet struct. + m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t); - // allocating the size of the packet, since the packet is assembled from a struct followed by an additional DWORD data - m_pPacketData = (PM4SET_SH_REG *)malloc(m_packetSize); + /* allocating the size of the packet, since the packet is assembled from a struct + * followed by an additional DWORD data + */ + m_pPacketData = reinterpret_cast(malloc(m_packetSize)); ASSERT_NOTNULL(m_pPacketData); @@ -262,11 +267,13 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i memcpy(m_pPacketData->reg_data, regValues, numRegs*sizeof(uint32_t)); } -PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) { +PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, + unsigned int dimZ, unsigned int dispatchInit) { InitPacket(dimX, dimY, dimZ, dispatchInit); } -void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) { +void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, + unsigned int dispatchInit) { memset(&m_packetData, 0, SizeInBytes()); InitPM4Header(m_packetData.header, IT_DISPATCH_DIRECT); diff --git a/tests/kfdtest/src/PM4Packet.hpp b/tests/kfdtest/src/PM4Packet.hpp index cc09d43b68..a7ab06ab5a 100644 --- a/tests/kfdtest/src/PM4Packet.hpp +++ b/tests/kfdtest/src/PM4Packet.hpp @@ -112,7 +112,7 @@ class PM4IndirectBufPacket : public PM4Packet { // empty constructor, befor using the packet call the init func PM4IndirectBufPacket(void) {} // this contructor will also init the packet, no need for adittional calls - PM4IndirectBufPacket(IndirectBuffer *pIb); + explicit PM4IndirectBufPacket(IndirectBuffer *pIb); virtual ~PM4IndirectBufPacket(void) {} // @returns the packet size in bytes diff --git a/tests/kfdtest/src/SDMAPacket.cpp b/tests/kfdtest/src/SDMAPacket.cpp index 45c7a6cd3e..d1120590ab 100644 --- a/tests/kfdtest/src/SDMAPacket.cpp +++ b/tests/kfdtest/src/SDMAPacket.cpp @@ -59,12 +59,12 @@ void SDMAWriteDataPacket::InitPacket(void* destAddr, unsigned int ndw, void *data) { packetSize = sizeof(SDMA_PKT_WRITE_UNTILED) + (ndw - 1) * sizeof(unsigned int); - packetData = (SDMA_PKT_WRITE_UNTILED *)calloc(1, packetSize); + packetData = reinterpret_cast(calloc(1, packetSize)); packetData->HEADER_UNION.op = SDMA_OP_WRITE; packetData->HEADER_UNION.sub_op = SDMA_SUBOP_WRITE_LINEAR; - SplitU64(reinterpret_cast(destAddr), + SplitU64(reinterpret_cast(destAddr), packetData->DST_ADDR_LO_UNION.DW_1_DATA, // dst_addr_31_0 packetData->DST_ADDR_HI_UNION.DW_2_DATA); // dst_addr_63_32 @@ -80,7 +80,7 @@ SDMACopyDataPacket::~SDMACopyDataPacket(void) { SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, unsigned int surfsize) { int32_t size = 0, i; - void **dst = (void**)malloc(sizeof(void*) * n); + void **dst = reinterpret_cast(malloc(sizeof(void*) * n)); const int singlePacketSize = sizeof(SDMA_PKT_COPY_LINEAR) + sizeof(SDMA_PKT_COPY_LINEAR::DST_ADDR[0]) * n; @@ -91,7 +91,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns packetSize = ((surfsize + TWO_MEG - 1) >> BITS) * singlePacketSize; - SDMA_PKT_COPY_LINEAR *pSDMA = (SDMA_PKT_COPY_LINEAR *)malloc(packetSize); + SDMA_PKT_COPY_LINEAR *pSDMA = reinterpret_cast(malloc(packetSize)); packetData = pSDMA; while (surfsize > 0) { @@ -106,19 +106,19 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns pSDMA->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR; pSDMA->HEADER_UNION.broadcast = n > 1 ? 1 : 0; pSDMA->COUNT_UNION.count = SDMA_COUNT(size); - SplitU64(reinterpret_cast(src), + SplitU64(reinterpret_cast(src), pSDMA->SRC_ADDR_LO_UNION.DW_3_DATA, // src_addr_31_0 pSDMA->SRC_ADDR_HI_UNION.DW_4_DATA); // src_addr_63_32 for (i = 0; i < n; i++) - SplitU64(reinterpret_cast(dst[i]), + SplitU64(reinterpret_cast(dst[i]), pSDMA->DST_ADDR[i].DST_ADDR_LO_UNION.DW_5_DATA, // dst_addr_31_0 pSDMA->DST_ADDR[i].DST_ADDR_HI_UNION.DW_6_DATA); // dst_addr_63_32 - pSDMA = (SDMA_PKT_COPY_LINEAR *)((char *)pSDMA + singlePacketSize); + pSDMA = reinterpret_cast(reinterpret_cast(pSDMA) + singlePacketSize); for (i = 0; i < n; i++) - dst[i] = (char *)dst[i] + size; - src = (char *)src + size; + dst[i] = reinterpret_cast(dst[i]) + size; + src = reinterpret_cast(src) + size; surfsize -= size; } free(dst); @@ -138,7 +138,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in /* SDMA support maximum 0x3fffe0 byte in one copy. Use 2M copy_size */ m_PacketSize = ((size + TWO_MEG - 1) >> BITS) * sizeof(SDMA_PKT_CONSTANT_FILL); - pSDMA = (SDMA_PKT_CONSTANT_FILL *)calloc(1, m_PacketSize); + pSDMA = reinterpret_cast(calloc(1, m_PacketSize)); m_PacketData = pSDMA; while (size > 0) { @@ -158,14 +158,14 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in pSDMA->COUNT_UNION.count = SDMA_COUNT(copy_size); - SplitU64(reinterpret_cast(dst), + SplitU64(reinterpret_cast(dst), pSDMA->DST_ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/ pSDMA->DST_ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/ pSDMA->DATA_UNION.DW_3_DATA = data; pSDMA++; - dst = (char *)dst + copy_size; + dst = reinterpret_cast(dst) + copy_size; size -= copy_size; } } @@ -185,7 +185,7 @@ void SDMAFencePacket::InitPacket(void* destAddr, unsigned int data) { packetData.HEADER_UNION.op = SDMA_OP_FENCE; - SplitU64(reinterpret_cast(destAddr), + SplitU64(reinterpret_cast(destAddr), packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/ packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/ diff --git a/tests/kfdtest/src/SDMAPacket.hpp b/tests/kfdtest/src/SDMAPacket.hpp index f7ef53f733..17b9027b5e 100644 --- a/tests/kfdtest/src/SDMAPacket.hpp +++ b/tests/kfdtest/src/SDMAPacket.hpp @@ -125,7 +125,7 @@ class SDMAFencePacket : public SDMAPacket { class SDMATrapPacket : public SDMAPacket { public: // empty constructor, befor using the packet call the init func - SDMATrapPacket(unsigned int eventID = 0); + explicit SDMATrapPacket(unsigned int eventID = 0); virtual ~SDMATrapPacket(void);