Clean up the KFDTest style via CPPLint. Some warnings remain regarding
volatile variables being cast to void*. This is the command used:
cpplint.py --linelength=120
--filter=-readability/multiline_string,-readability/todo,-build/include,-runtime/references

multiline_string is due to using ISA code
todo is to avoid errors that we don't have TODO(username) instead of TODO
include is about including the folder in the header includes
references is regarding non-const references '&' being const or using
pointers. That can be addressed later

Change-Id: I3c6622da0a13dd33ab29b2bfff48be25e763b750
Этот коммит содержится в:
Kent Russell
2018-08-13 09:03:31 -04:00
родитель 3f7b6356fd
Коммит dffac0a97e
33 изменённых файлов: 371 добавлений и 328 удалений
+2 -1
Просмотреть файл
@@ -125,7 +125,8 @@ bool BaseQueue::AllPacketsSubmitted() {
}
void BaseQueue::PlacePacket(const BasePacket &packet) {
ASSERT_EQ(packet.PacketType(), PacketTypeSupported()) << "Cannot add a packet since packet type doesn't match queue";
ASSERT_EQ(packet.PacketType(), PacketTypeSupported())
<< "Cannot add a packet since packet type doesn't match queue";
unsigned int readPtr = Rptr();
unsigned int writePtr = m_pendingWptr;
+18 -10
Просмотреть файл
@@ -57,7 +57,7 @@ void Dispatch::SetDim(unsigned int x, unsigned int y, unsigned int z) {
m_DimZ = z;
}
void Dispatch::SetScratch(int numWaves, int waveSize, unsigned long long scratch_base) {
void Dispatch::SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base) {
m_ComputeTmpringSize = ((waveSize << 12) | (numWaves));
m_ScratchEn = true;
m_scratch_base = scratch_base;
@@ -98,7 +98,7 @@ int Dispatch::SyncWithStatus(unsigned int timeout) {
}
void Dispatch::BuildIb() {
unsigned long long shiftedIsaAddr = m_IsaBuf.As<uint64_t>() >> 8;
HSAuint64 shiftedIsaAddr = m_IsaBuf.As<uint64_t>() >> 8;
unsigned int arg0, arg1, arg2, arg3;
SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);
@@ -118,7 +118,7 @@ void Dispatch::BuildIb() {
unsigned int pgmRsrc2 = 0;
pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT)
& COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK;
pgmRsrc2 |= ((m_scratch_base ? 6 : 4 ) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT)
pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT)
& COMPUTE_PGM_RSRC2__USER_SGPR_MASK;
pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT)
& COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK;
@@ -132,7 +132,9 @@ void Dispatch::BuildIb() {
& COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK;
const unsigned int COMPUTE_PGM_RSRC[] = {
0x000c0084 | ((m_SpiPriority & 3) << 10), // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0 DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 }
// PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 PRIV: 0
// DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 }
0x000c0084 | ((m_SpiPriority & 3) << 10),
pgmRsrc2
};
@@ -200,18 +202,24 @@ void Dispatch::BuildIb() {
m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket());
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES,
ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO,
(g_TestGPUFamilyId >= FAMILY_AI) ? COMPUTE_PGM_VALUES_GFX9 : COMPUTE_PGM_VALUES_GFX8,
(g_TestGPUFamilyId >= FAMILY_AI) ? ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX9) : ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX8)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC, ARRAY_SIZE(COMPUTE_PGM_RSRC)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC,
ARRAY_SIZE(COMPUTE_PGM_RSRC)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS, ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE, ARRAY_SIZE(COMPUTE_TMPRING_SIZE)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES, ARRAY_SIZE(COMPUTE_RESTART_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS,
ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE,
ARRAY_SIZE(COMPUTE_TMPRING_SIZE)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES,
ARRAY_SIZE(COMPUTE_RESTART_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES, ARRAY_SIZE(COMPUTE_USER_DATA_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES,
ARRAY_SIZE(COMPUTE_USER_DATA_VALUES)));
m_IndirectBuf.AddPacket(PM4DispatchDirectPacket(m_DimX, m_DimY, m_DimZ, DISPATCH_INIT_VALUE));
+2 -2
Просмотреть файл
@@ -42,7 +42,7 @@ class Dispatch {
int SyncWithStatus(unsigned int timeout);
void SetScratch(int numWaves, int waveSize, unsigned long long scratch_base);
void SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base);
void SetSpiPriority(unsigned int priority);
@@ -68,7 +68,7 @@ class Dispatch {
bool m_ScratchEn;
unsigned int m_ComputeTmpringSize;
unsigned long long m_scratch_base;
HSAuint64 m_scratch_base;
unsigned int m_SpiPriority;
};
+1 -1
Просмотреть файл
@@ -34,7 +34,7 @@ enum LOGTYPE {
};
class KFDLog{};
std::ostream& operator << (KFDLog log ,LOGTYPE level);
std::ostream& operator << (KFDLog log, LOGTYPE level);
// @brief log additional details, to be displayed in the same format as other google test outputs
// currently not supported by google test
+2 -1
Просмотреть файл
@@ -29,7 +29,8 @@
IndirectBuffer::IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId)
:m_NumOfPackets(0), m_MaxSize(sizeInDWords), m_ActualSize(0), m_PacketTypeAllowed(type) {
m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/, false/*local*/, true/*exec*/);
m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/,
false/*local*/, true/*exec*/);
}
IndirectBuffer::~IndirectBuffer(void) {
+4 -5
Просмотреть файл
@@ -28,8 +28,7 @@
/* isa generation class - interface */
class IsaGenerator {
public:
public:
static IsaGenerator* Create(unsigned int familyId);
virtual ~IsaGenerator() {}
@@ -43,11 +42,11 @@ public:
void CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf);
protected:
protected:
virtual const std::string& GetAsicName() = 0;
private:
private:
static const std::string ADDRESS_WATCH_SP3;
};
#endif //_ISAGENERATOR_H_
#endif // _ISAGENERATOR_H_
+11 -7
Просмотреть файл
@@ -24,9 +24,6 @@
#define __KFD_BASE_COMPONENT_TEST__H__
#include <gtest/gtest.h>
#include "hsakmt.h"
#include "OSWrapper.hpp"
#include "KFDTestUtil.hpp"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -34,6 +31,9 @@
#include <amdgpu.h>
#include <amdgpu_drm.h>
#include <sys/param.h>
#include "hsakmt.h"
#include "OSWrapper.hpp"
#include "KFDTestUtil.hpp"
// @class KFDBaseComponentTest
class KFDBaseComponentTest : public testing::Test {
@@ -63,13 +63,17 @@ class KFDBaseComponentTest : public testing::Test {
HsaMemFlags m_MemoryFlags;
HsaNodeInfo m_NodeInfo;
// @brief SetUpTestCase function run before the first test that uses KFDOpenCloseKFDTest class fixture, and opens KFD.
// @brief SetUpTestCase function run before the first test that uses
// KFDOpenCloseKFDTest class fixture, and opens KFD.
static void SetUpTestCase();
// @brief TearDownTestCase function run after the last test from KFDOpenCloseKFDTest class fixture and calls close KFD.
// @brief TearDownTestCase function run after the last test from
// KFDOpenCloseKFDTest class fixture and calls close KFD.
static void TearDownTestCase();
// @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
// @brief SetUp function run before every test that uses
// KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
virtual void SetUp();
// @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture.
// @brief TearDown function run after every test that uses
// KFDOpenCloseKFDTest class fixture.
virtual void TearDown();
};
+2 -1
Просмотреть файл
@@ -89,7 +89,8 @@ void KFDCWSRTest::SetUp() {
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
// TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not set the destination based on workitem_id.
// TODO: Seems in the ISA, I can not get the workitem_id as expected, so I can not
// set the destination based on workitem_id.
// Set the wave_num to 1 for now as a workarpound. Will set it to 8 or even 256 in the future.
wave_number = 1;
+7 -8
Просмотреть файл
@@ -140,18 +140,17 @@ TEST_F(KFDDBGTest, BasicAddressWatch) {
ASSERT_SUCCESS(hsaKmtDbgRegister(defaultGPUNode));
AddressWatchSuccess = hsaKmtDbgAddressWatch(
defaultGPUNode, // IN
2, // # watch points
&WatchMode[0], // IN
(void **) &WatchAddress[0], // IN
&WatchMask[0], // IN, optional
NULL // IN, optional
);
defaultGPUNode, // IN
2, // # watch points
&WatchMode[0], // IN
reinterpret_cast<void **>(&WatchAddress[0]), // IN
&WatchMask[0], // IN, optional
NULL); // IN, optional
EXPECT_EQ(AddressWatchSuccess, HSAKMT_STATUS_SUCCESS);
Dispatch dispatch(isaBuf);
dispatch.SetArgs(dstBuf.As<void*>(), (void *)secDstBuf);
dispatch.SetArgs(dstBuf.As<void*>(), reinterpret_cast<void *>(secDstBuf));
dispatch.SetDim(1, 1, 1);
// TODO: use Memory ordering rules w/ atomics
+3 -2
Просмотреть файл
@@ -136,7 +136,7 @@ class QueueAndSignalBenchmark {
uint64_t startTime;
PM4Queue queue;
HsaEvent** pHsaEvent = (HsaEvent**) calloc(eventCount, sizeof(HsaEvent*));
HsaEvent** pHsaEvent = reinterpret_cast<HsaEvent**>(calloc(eventCount, sizeof(HsaEvent*)));
size_t packetSize = PM4ReleaseMemoryPacket(false, 0, 0).SizeInBytes();
int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1));
@@ -268,7 +268,8 @@ TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) {
unsigned int pktSizeDwords = 0;
for (i = 0; i < EVENT_NUMBER; i++) {
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId));
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2,
pHsaEvent[i]->EventId));
queue.Wait4PacketConsumption();
Delay(WAIT_BETWEEN_SUBMISSIONS_MS);
+52 -57
Просмотреть файл
@@ -40,7 +40,8 @@ void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAui
totalMB = N_PROCESSES*count*(vramBufSize>>20);
if (m_IsParent) {
LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="<< totalMB << ")MB VRAM in KFD" << std::endl;
LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="
<< totalMB << ")MB VRAM in KFD" << std::endl;
}
HSAKMT_STATUS ret;
@@ -95,7 +96,8 @@ void KFDEvictTest::AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle
alloc.flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
if (m_IsParent) {
LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(=" << (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl;
LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(="
<< (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl;
}
ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));
}
@@ -104,79 +106,72 @@ void KFDEvictTest::FreeAmdgpuBo(amdgpu_bo_handle handle) {
ASSERT_EQ(0, amdgpu_bo_free(handle));
}
static int
amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
unsigned alignment, unsigned heap, uint64_t flags,
amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
amdgpu_va_handle *va_handle)
{
struct amdgpu_bo_alloc_request request = {};
amdgpu_bo_handle buf_handle;
amdgpu_va_handle handle;
uint64_t vmc_addr;
int r;
static int amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
unsigned alignment, unsigned heap, uint64_t flags,
amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
amdgpu_va_handle *va_handle) {
struct amdgpu_bo_alloc_request request = {};
amdgpu_bo_handle buf_handle;
amdgpu_va_handle handle;
uint64_t vmc_addr;
int r;
request.alloc_size = size;
request.phys_alignment = alignment;
request.preferred_heap = heap;
request.flags = flags;
request.alloc_size = size;
request.phys_alignment = alignment;
request.preferred_heap = heap;
request.flags = flags;
r = amdgpu_bo_alloc(dev, &request, &buf_handle);
if (r)
return r;
r = amdgpu_bo_alloc(dev, &request, &buf_handle);
if (r)
return r;
r = amdgpu_va_range_alloc(dev,
amdgpu_gpu_va_range_general,
size, alignment, 0, &vmc_addr,
&handle, 0);
if (r)
goto error_va_alloc;
r = amdgpu_va_range_alloc(dev,
amdgpu_gpu_va_range_general,
size, alignment, 0, &vmc_addr,
&handle, 0);
if (r)
goto error_va_alloc;
r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
r = amdgpu_bo_cpu_map(buf_handle, cpu);
if (r)
goto error_cpu_map;
r = amdgpu_bo_cpu_map(buf_handle, cpu);
if (r)
goto error_cpu_map;
*bo = buf_handle;
*mc_address = vmc_addr;
*va_handle = handle;
*bo = buf_handle;
*mc_address = vmc_addr;
*va_handle = handle;
return 0;
return 0;
error_cpu_map:
amdgpu_bo_cpu_unmap(buf_handle);
amdgpu_bo_cpu_unmap(buf_handle);
error_va_map:
amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
error_va_alloc:
amdgpu_bo_free(buf_handle);
return r;
amdgpu_bo_free(buf_handle);
return r;
}
static inline int
amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
uint64_t mc_addr, uint64_t size)
{
amdgpu_bo_cpu_unmap(bo);
amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(va_handle);
amdgpu_bo_free(bo);
return 0;
static inline int amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
uint64_t mc_addr, uint64_t size) {
amdgpu_bo_cpu_unmap(bo);
amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(va_handle);
amdgpu_bo_free(bo);
return 0;
}
static inline int
amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list)
{
amdgpu_bo_handle resources[] = {bo1, bo2};
static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) {
amdgpu_bo_handle resources[] = {bo1, bo2};
return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
}
void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
@@ -204,7 +199,7 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
&boList));
/* Fill Nop cammands in IB */
ptr = (uint32_t *)ibResultCpu;
ptr = reinterpret_cast<uint32_t *>(ibResultCpu);
for (int i = 0; i < 16; i++)
ptr[i] = 0xffff1000;
+1 -1
Просмотреть файл
@@ -85,7 +85,7 @@ void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc,
}
dispatch.SetDim(dimX, dimY, dimZ);
dispatch.SetArgs((void *)pSrc, (void *)pDst);
dispatch.SetArgs(reinterpret_cast<void *>(pSrc), reinterpret_cast<void *>(pDst));
dispatch.Submit(queue);
m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
+4 -5
Просмотреть файл
@@ -28,11 +28,10 @@
// @class KFDGraphicsInteropTest
// Adds access to graphics device for interoperability testing
class KFDGraphicsInterop : public KFDMemoryTest
{
public:
KFDGraphicsInterop(void) {};
~KFDGraphicsInterop(void) {};
class KFDGraphicsInterop : public KFDMemoryTest {
public:
KFDGraphicsInterop(void) {}
~KFDGraphicsInterop(void) {}
};
#endif
+14 -11
Просмотреть файл
@@ -77,10 +77,10 @@ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
HSAuint32 *sharedLocalBuffer = NULL;
/* Read from Pipe the shared Handle. Import shared Local Memory */
ASSERT_GE(read(pipefd[0], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0);
ASSERT_GE(read(pipefd[0], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM,
(void**)&sharedLocalBuffer, &sharedSize));
reinterpret_cast<void**>(&sharedLocalBuffer), &sharedSize));
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(sharedLocalBuffer, sharedSize, NULL));
/* Check for pattern in the shared Local Memory */
@@ -128,7 +128,7 @@ void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pip
/* Share it with the child process */
ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer.As<void*>(), size, &sharedHandleLM));
ASSERT_GE(write(pipefd[1], (void*)&sharedHandleLM, sizeof(sharedHandleLM)), 0);
ASSERT_GE(write(pipefd[1], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
/* Wait for the child to finish */
waitpid(cpid, &status, 0);
@@ -413,7 +413,7 @@ static int read_non_block(int fd, void *buf, int size) {
int total_bytes = 0, cur_bytes = 0;
int retries = 5;
struct timespec tm = { 0, 100000000ULL };
char *ptr = (char *)buf;
char *ptr = reinterpret_cast<char *>(buf);
do {
cur_bytes = read(fd, ptr, (size - total_bytes));
@@ -439,7 +439,7 @@ static int read_non_block(int fd, void *buf, int size) {
/* Send HsaMemoryRange to another process that is connected via writePipe */
CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) {
if (write_non_block(writePipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) !=
if (write_non_block(writePipe, reinterpret_cast<void*>(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) !=
sizeof(m_HsaMemoryRange))
return CMA_IPC_PIPE_ERROR;
return CMA_TEST_SUCCESS;
@@ -449,7 +449,7 @@ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) {
CMA_TEST_STATUS KFDCMAArray::recvCMAArray(int readPipe) {
int i;
if (read_non_block(readPipe, (void*)&m_HsaMemoryRange, sizeof(m_HsaMemoryRange)) !=
if (read_non_block(readPipe, reinterpret_cast<void*>(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) !=
sizeof(m_HsaMemoryRange))
return CMA_IPC_PIPE_ERROR;
@@ -704,10 +704,13 @@ TEST_F(KFDIPCTest, CMABasicTest) {
HSAuint32 expected_pattern;
srcRange.MemoryAddress = testLocalBuffer.As<void*>();
srcRange.SizeInBytes = size; /* Deliberately set to value > unaligned_size. Only unaligned_size
* should be copied since dstRange.SizeInBytes == unaligned_size
*/
dstRange.MemoryAddress = (void *)(testLocalBuffer.As<char*>() + (size / 2) + unaligned_offset);
/* Deliberately set to value > unaligned_size. Only unaligned_size
* should be copied since dstRange.SizeInBytes == unaligned_size
*/
srcRange.SizeInBytes = size;
dstRange.MemoryAddress = reinterpret_cast<void *>(testLocalBuffer.As<char*>() + (size / 2) + unaligned_offset);
dstRange.SizeInBytes = unaligned_size;
ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied));
ASSERT_EQ(copied, unaligned_size);
@@ -719,7 +722,7 @@ TEST_F(KFDIPCTest, CMABasicTest) {
/* Test3. Test overflow and expect failure */
srcRange.MemoryAddress = testLocalBuffer.As<void*>();
srcRange.SizeInBytes = size;
dstRange.MemoryAddress = (void *)(testLocalBuffer.As<char*>() + 4);
dstRange.MemoryAddress = reinterpret_cast<void *>(testLocalBuffer.As<char*>() + 4);
dstRange.SizeInBytes = size; /* This should overflow since offset is VA + 4 */
status = hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied);
EXPECT_NE(status, HSAKMT_STATUS_SUCCESS);
+3 -3
Просмотреть файл
@@ -316,7 +316,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
break;
}
void *bufferEnd = (void *)((unsigned long)pages[order].pointers[p]
void *bufferEnd = reinterpret_cast<void *>(reinterpret_cast<unsigned long>(pages[order].pointers[p])
+ size - sizeof(unsigned));
sysBuffer.As<unsigned *>()[0] = ++value;
@@ -340,7 +340,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
Dispatch dispatch3(isaBuffer);
dispatch3.SetArgs(bufferEnd,
(void *)&(sysBuffer.As<unsigned*>()[1]));
reinterpret_cast<void *>(&(sysBuffer.As<unsigned*>()[1])));
dispatch3.Submit(queue);
dispatch3.Sync(g_TestTimeOut);
EXPECT_EQ(value, sysBuffer.As<unsigned *>()[1]);
@@ -349,7 +349,7 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
}
LOG() << " Got " << pages[order].nPages
<< ", end of last block addr: "
<< (void *)((unsigned long)pages[order].pointers[p-1] + size - 1)
<< reinterpret_cast<void *>(reinterpret_cast<unsigned long>(pages[order].pointers[p-1]) + size - 1)
<< std::endl;
// Now free half the memory
+94 -92
Просмотреть файл
@@ -94,14 +94,14 @@ shader ReadMemory\n\
asic(GFX9)\n\
type(CS)\n\
/* Assume src address in s0, s1 and dst address in s2, s3*/\n\
s_movk_i32 s18, 0x5678\n\
LOOP:\n\
s_load_dword s16, s[0:1], 0x0 glc\n\
s_cmp_eq_i32 s16, s18\n\
s_cbranch_scc0 LOOP\n\
s_store_dword s18, s[2:3], 0x0 glc\n\
s_endpgm\n\
end\n\
s_movk_i32 s18, 0x5678\n\
LOOP:\n\
s_load_dword s16, s[0:1], 0x0 glc\n\
s_cmp_eq_i32 s16, s18\n\
s_cbranch_scc0 LOOP\n\
s_store_dword s18, s[2:3], 0x0 glc\n\
s_endpgm\n\
end\n\
";
void KFDMemoryTest::SetUp() {
@@ -127,7 +127,7 @@ void KFDMemoryTest::TearDown() {
}
#include <sys/mman.h>
#define GB(x) ((x)<<30)
#define GB(x) ((x) << 30)
/*
* try to map as much as possible system memory to gpu.
@@ -147,17 +147,17 @@ TEST_F(KFDMemoryTest, MMapLarge) {
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
const unsigned long nObjects = 1<<14;
const HSAuint64 nObjects = 1<<14;
HSAuint64 *AlternateVAGPU = new HSAuint64[nObjects];
ASSERT_NE((unsigned long)AlternateVAGPU, 0);
ASSERT_NE((HSAuint64)AlternateVAGPU, 0);
HsaMemMapFlags mapFlags = {0};
unsigned long s;
HSAuint64 s;
char *addr;
unsigned long flags = MAP_ANONYMOUS | MAP_PRIVATE;
HSAuint64 flags = MAP_ANONYMOUS | MAP_PRIVATE;
/* Test up to 1TB memory*/
s = GB(1024ULL) / nObjects;
addr = (char*)mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0);
addr = reinterpret_cast<char*>(mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0));
ASSERT_NE(addr, MAP_FAILED);
memset(addr, 0, s);
@@ -167,7 +167,7 @@ TEST_F(KFDMemoryTest, MMapLarge) {
if (hsaKmtRegisterMemory(addr + i, s - i))
break;
if (hsaKmtMapMemoryToGPUNodes(addr + i, s - i,
&AlternateVAGPU[i], mapFlags, 1, (HSAuint32 *)&defaultGPUNode)) {
&AlternateVAGPU[i], mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode))) {
hsaKmtDeregisterMemory(addr + i);
break;
}
@@ -177,8 +177,8 @@ TEST_F(KFDMemoryTest, MMapLarge) {
<< "GB system memory to gpu" << std::endl;
while (i--) {
ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU((void*)AlternateVAGPU[i]));
ASSERT_SUCCESS(hsaKmtDeregisterMemory((void*)AlternateVAGPU[i]));
ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void*>(AlternateVAGPU[i])));
ASSERT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void*>(AlternateVAGPU[i])));
}
munmap(addr, s);
@@ -268,7 +268,8 @@ TEST_F(KFDMemoryTest , MapMemoryToGPU) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb));
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags,
reinterpret_cast<void**>(&pDb)));
// verify that pDb is not null before it's being used
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL));
@@ -292,7 +293,8 @@ TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
TEST_START(TESTPROFILE_RUNALL)
unsigned int* pDb = NULL;
EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags, (void**)&pDb));
EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags,
reinterpret_cast<void**>(&pDb)));
TEST_END
}
@@ -302,7 +304,7 @@ TEST_F(KFDMemoryTest, MemoryAlloc) {
TEST_START(TESTPROFILE_RUNALL)
unsigned int* pDb = NULL;
EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, (void**)&pDb));
EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
TEST_END
}
@@ -340,7 +342,7 @@ TEST_F(KFDMemoryTest, AccessPPRMem) {
* consumed by IOMMU HW. Because of that, a kernel driver workaround
* is put in place to address that, so we don't need to wait here.
*/
//sleep(5);
// sleep(5);
VirtualFreeMemory(destBuf, PAGE_SIZE);
@@ -472,10 +474,10 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32),
&gpuva2));
EXPECT_TRUE(gpuva1 != gpuva2);
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1));
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2));
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));
/* Same address, same size */
HsaMemMapFlags memFlags = {0};
@@ -496,20 +498,20 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
sizeof(HSAuint32) * 2,
&gpuva2, memFlags, nGPU, nodes));
EXPECT_EQ(gpuva1, gpuva2);
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva1));
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva1));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
/* Confirm that we still have access to the memory, mem[2] */
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
mem[2] = 0x0;
queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)gpuva2,
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast<unsigned int *>(gpuva2),
0xdeadbeef));
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0));
queue.Wait4PacketConsumption();
EXPECT_EQ(true, WaitOnValue((unsigned int *)&mem[2], 0xdeadbeef));
EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef));
EXPECT_SUCCESS(queue.Destroy());
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU((void *)gpuva2));
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)gpuva2));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));
TEST_END
}
@@ -535,7 +537,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/, false/*exec*/, true /*scratch*/);
HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, defaultGPUNode, false/*zero*/, false/*local*/,
false/*exec*/, true /*scratch*/);
// Unmap scratch for sub-allocation mapping tests
ASSERT_SUCCESS(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<void*>()));
@@ -576,7 +579,8 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
if (pNodeProperties != NULL) {
// Get the aperture of the scratch buffer
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks, memoryProperties));
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, pNodeProperties->NumMemoryBanks,
memoryProperties));
for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
@@ -684,14 +688,14 @@ void KFDMemoryTest::BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granular
sizeMB = (lowMB + highMB) / 2;
size = sizeMB * 1024 * 1024;
ret = hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags,
(void**)&pDb);
reinterpret_cast<void**>(&pDb));
if (ret) {
highMB = sizeMB;
continue;
}
ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU,
mapFlags, 1, (HSAuint32 *)&defaultGPUNode);
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
if (ret) {
ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size));
highMB = sizeMB;
@@ -740,14 +744,14 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
sizeMB = (lowMB + highMB) / 2;
size = sizeMB * 1024 * 1024;
ret = hsaKmtAllocMemory(defaultGPUNode, size, memFlags,
(void**)&pDb);
reinterpret_cast<void**>(&pDb));
if (ret) {
highMB = sizeMB;
continue;
}
ret = hsaKmtMapMemoryToGPUNodes(pDb, size, &AlternateVAGPU,
mapFlags, 1, (HSAuint32 *)&defaultGPUNode);
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
if (ret) {
ASSERT_SUCCESS(hsaKmtFreeMemory(pDb, size));
highMB = sizeMB;
@@ -810,13 +814,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
do {
ret = hsaKmtAllocMemory(0 /* system */, block_size, m_MemoryFlags,
(void**)&pDb_array[i]);
reinterpret_cast<void**>(&pDb_array[i]));
if (ret) {
break;
}
ret = hsaKmtMapMemoryToGPUNodes(pDb_array[i], block_size,
&AlternateVAGPU, mapFlags, 1, (HSAuint32 *)&defaultGPUNode);
&AlternateVAGPU, mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode));
if (ret) {
ASSERT_SUCCESS(hsaKmtFreeMemory(pDb_array[i], block_size));
break;
@@ -849,7 +853,7 @@ TEST_F(KFDMemoryTest, MMBench) {
#define TEST_SDMA(index) (((index / nSizes) >> 1) & 0x1)
void *bufs[nBufs];
unsigned long long start, end;
HSAuint64 start, end;
unsigned i;
HSAKMT_STATUS ret;
HsaMemFlags memFlags = {0};
@@ -898,7 +902,7 @@ TEST_F(KFDMemoryTest, MMBench) {
unsigned bufSize = TEST_BUFSIZE(testIndex);
unsigned memType = TEST_MEMTYPE(testIndex);
bool interleaveSDMA = TEST_SDMA(testIndex);
unsigned long long allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime;
HSAuint64 allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime;
HSAuint32 allocNode;
if ((testIndex & (nSizes-1)) == 0)
@@ -1033,16 +1037,16 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
EXPECT_EQ(ptrInfo.SizeInBytes, (HSAuint64)localBuffer.Size());
HSAuint32 *addr = localBuffer.As<HSAuint32 *>() + 4;
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)addr, &ptrInfo));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(addr), &ptrInfo));
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As<void*>());
}
/** Registered memory: user pointer */
static volatile HSAuint32 mem[4]; // 8 bytes for register only and
// 8 bytes for register to nodes
HsaMemoryBuffer hsaBuffer((void *)&mem[0], sizeof(HSAuint32)*2);
HsaMemoryBuffer hsaBuffer((void *)(&mem[0]), sizeof(HSAuint32)*2);
if (is_dgpu()) { // APU doesn't use userptr
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[0], &ptrInfo));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[0]), &ptrInfo));
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER);
EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]);
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)hsaBuffer.As<void*>());
@@ -1053,29 +1057,29 @@ TEST_F(KFDMemoryTest, QueryPointerInfo) {
HSAuint32 nodes[nGPU];
for (unsigned int i = 0; i < nGPU; i++)
nodes[i] = gpuNodes.at(i);
EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2],
EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)(&mem[2]),
sizeof(HSAuint32)*2, nGPU, nodes));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[2], &ptrInfo));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[2]), &ptrInfo));
EXPECT_EQ(ptrInfo.NRegisteredNodes, nGPU);
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)&mem[2]));
EXPECT_SUCCESS(hsaKmtDeregisterMemory((void *)(&mem[2])));
}
/* Not a starting address, but an address inside the memory range
* should also get the memory information
*/
HSAuint32 *address = hostBuffer.As<HSAuint32 *>() + 1;
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)address, &ptrInfo));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(address), &ptrInfo));
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_ALLOCATED);
EXPECT_EQ(ptrInfo.CPUAddress, hostBuffer.As<void*>());
if (is_dgpu()) {
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)&mem[1], &ptrInfo));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((void *)(&mem[1]), &ptrInfo));
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_USER);
EXPECT_EQ(ptrInfo.CPUAddress, &mem[0]);
}
/*** Set user data ***/
char userData[16] = "This is a test.";
EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As<HSAuint32 *>(), (void *)userData));
EXPECT_SUCCESS(hsaKmtSetMemoryUserData(hostBuffer.As<HSAuint32 *>(), reinterpret_cast<void *>(userData)));
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo));
EXPECT_EQ(ptrInfo.UserData, (void *)userData);
@@ -1106,16 +1110,16 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
// Offset in the VRAM buffer to test crossing non-contiguous
// buffer boundaries. The second access starting from offset
// sizeof(long)+1 will cross a node boundary in a single access,
// sizeof(HSAint64)+1 will cross a node boundary in a single access,
// for node sizes of 4MB or smaller.
const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(long);
const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);
// alloc system memory from node 0 and initialize it
memFlags.ui32.NonPaged = 0;
ASSERT_SUCCESS(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]));
for (i = 0; i < 4*sizeof(long) + 4; i++) {
((HSAuint8 *)mem[0])[i] = i; // source
((HSAuint8 *)mem[0])[PAGE_SIZE+i] = 0; // destination
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
(reinterpret_cast<HSAuint8 *>(mem[0]))[i] = i; // source
(reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0; // destination
}
// try to alloc local memory from GPU node
@@ -1123,10 +1127,10 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
if (m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode)) {
EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE*2 + (4 << 20),
memFlags, &mem[1]));
mem[1] = (void *)((HSAuint8 *)mem[1] + VRAM_OFFSET);
for (i = 0; i < 4*sizeof(long) + 4; i++) {
((HSAuint8 *)mem[1])[i] = i;
((HSAuint8 *)mem[1])[PAGE_SIZE+i] = 0;
mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) + VRAM_OFFSET);
for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
(reinterpret_cast<HSAuint8 *>(mem[1]))[i] = i;
(reinterpret_cast<HSAuint8 *>(mem[1]))[PAGE_SIZE+i] = 0;
}
} else {
LOG() << "Not testing local memory, it's invisible" << std::endl;
@@ -1168,22 +1172,22 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
for (i = 0; i < 4; i++) {
// Test 4 different (mis-)alignments, leaving 1-byte
// gaps between longs
HSAuint8 *addr = (HSAuint8 *)((long *)mem[0] + i) + i;
HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
errno = 0;
long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
EXPECT_EQ(0, errno);
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
(void *)data));
reinterpret_cast<void *>(data)));
if (mem[1] == NULL)
continue;
addr = (HSAuint8 *)((long *)mem[1] + i) + i;
addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[1]) + i) + i;
errno = 0;
data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
EXPECT_EQ(0, errno);
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
(void *)data));
reinterpret_cast<void *>(data)));
}
} catch (...) {
err = 1;
@@ -1204,32 +1208,31 @@ TEST_F(KFDMemoryTest, PtraceAccess) {
}
// Clear gaps in the source that should not have been copied
((uint8_t*)mem[0])[ sizeof(long) ] = 0;
((uint8_t*)mem[0])[2*sizeof(long) + 1] = 0;
((uint8_t*)mem[0])[3*sizeof(long) + 2] = 0;
((uint8_t*)mem[0])[4*sizeof(long) + 3] = 0;
(reinterpret_cast<uint8_t*>(mem[0]))[ sizeof(long) ] = 0;
(reinterpret_cast<uint8_t*>(mem[0]))[2*sizeof(long) + 1] = 0;
(reinterpret_cast<uint8_t*>(mem[0]))[3*sizeof(long) + 2] = 0;
(reinterpret_cast<uint8_t*>(mem[0]))[4*sizeof(long) + 3] = 0;
// Check results
EXPECT_EQ(0, memcmp(mem[0], (HSAuint8 *)mem[0] + PAGE_SIZE,
EXPECT_EQ(0, memcmp(mem[0], reinterpret_cast<HSAuint8 *>(mem[0]) + PAGE_SIZE,
sizeof(long)*4 + 4));
// Free memory
EXPECT_SUCCESS(hsaKmtFreeMemory(mem[0], PAGE_SIZE*2));
if (mem[1]) {
((uint8_t*)mem[1])[ sizeof(long) ] = 0;
((uint8_t*)mem[1])[2*sizeof(long) + 1] = 0;
((uint8_t*)mem[1])[3*sizeof(long) + 2] = 0;
((uint8_t*)mem[1])[4*sizeof(long) + 3] = 0;
EXPECT_EQ(0, memcmp(mem[1], (HSAuint8 *)mem[1] + PAGE_SIZE,
sizeof(long)*4 + 4));
mem[1] = (void *)((HSAuint8 *)mem[1] - VRAM_OFFSET);
(reinterpret_cast<uint8_t*>(mem[1]))[ sizeof(HSAint64) ] = 0;
(reinterpret_cast<uint8_t*>(mem[1]))[2*sizeof(HSAint64) + 1] = 0;
(reinterpret_cast<uint8_t*>(mem[1]))[3*sizeof(HSAint64) + 2] = 0;
(reinterpret_cast<uint8_t*>(mem[1]))[4*sizeof(HSAint64) + 3] = 0;
EXPECT_EQ(0, memcmp(mem[1], reinterpret_cast<HSAuint8 *>(mem[1]) + PAGE_SIZE,
sizeof(HSAint64)*4 + 4));
mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) - VRAM_OFFSET);
EXPECT_SUCCESS(hsaKmtFreeMemory(mem[1], PAGE_SIZE*2));
}
TEST_END
}
TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram)
{
TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
char *hsaDebug = getenv("HSA_DEBUG");
if (!is_dgpu()) {
@@ -1266,8 +1269,8 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram)
/* set the word before 4M boundary to 0xdeadbeefdeadbeef
* and the word after 4M boundary to 0xcafebabecafebabe
*/
mem0 = (void *)((HSAuint8 *)mem + VRAM_OFFSET);
mem1 = (void *)((HSAuint8 *)mem + VRAM_OFFSET + sizeof(HSAuint64));
mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
mem1 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET + sizeof(HSAuint64));
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem0,
@@ -1313,17 +1316,17 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram)
/* peek the memory */
errno = 0;
long data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
EXPECT_EQ(0, errno);
EXPECT_EQ(data[0], data0);
long data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL);
HSAint64 data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL);
EXPECT_EQ(0, errno);
EXPECT_EQ(data[1], data1);
/* swap mem0 and mem1 by poking */
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, (void *)data[1]));
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])));
EXPECT_EQ(0, errno);
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, (void *)data[0]));
EXPECT_EQ(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])));
EXPECT_EQ(0, errno);
} catch (...) {
err = 1;
@@ -1345,10 +1348,10 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram)
/* Use shader to read back data to check poke results */
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
//dstBuffer is cpu accessible gtt memory
// dstBuffer is cpu accessible gtt memory
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode);
m_pIsaGen->CompileShader((m_FamilyId >= FAMILY_AI) ? gfx9_ScratchCopyDword : gfx8_ScratchCopyDword,
"ScratchCopyDword",isaBuffer);
"ScratchCopyDword", isaBuffer);
Dispatch dispatch0(isaBuffer);
dispatch0.SetArgs(mem0, dstBuffer.As<void*>());
dispatch0.Submit(queue);
@@ -1405,7 +1408,7 @@ TEST_F(KFDMemoryTest, SignalHandling) {
*/
size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);
ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, (void**)&pDb));
ASSERT_SUCCESS(hsaKmtAllocMemory(0 /* system */, size, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));
// verify that pDb is not null before it's being used
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
@@ -1468,7 +1471,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
while (count--) {
ret = hsaKmtAllocMemory(0 /* system */, sysBufSize, m_MemoryFlags,
(void**)&pDb);
reinterpret_cast<void**>(&pDb));
if (ret) {
LOG() << "Failed to allocate system buffer of" << std::dec << sysBufSizeMB
<< "MB" << std::endl;
@@ -1496,8 +1499,7 @@ TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
TEST_END
}
static inline void access(volatile void *sd, int size, int rw)
{
static inline void access(volatile void *sd, int size, int rw) {
/* Most like sit in cache*/
static struct DUMMY {
char dummy[1024];
@@ -1531,7 +1533,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
#define _TEST_MEMTYPE(index) ((index / nSizes) % nMemTypes)
void *bufs[nBufs];
unsigned long long start;
HSAuint64 start;
unsigned i;
HSAKMT_STATUS ret;
HsaMemFlags memFlags = {0};
@@ -1545,7 +1547,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;
if (!m_NodeInfo.IsGPUNodeLargeBar(defaultGPUNode) || !vramSizeMB) {
LOG() << "not a largebar system, skip!"<<std::endl;
LOG() << "not a largebar system, skip!" << std::endl;
return;
}
@@ -1562,7 +1564,7 @@ TEST_F(KFDMemoryTest, MMBandWidth) {
for (testIndex = 0; testIndex < nTests; testIndex++) {
unsigned bufSize = _TEST_BUFSIZE(testIndex);
unsigned memType = _TEST_MEMTYPE(testIndex);
unsigned long long mcpRTime, mcpWTime, accessRTime, accessWTime;
HSAuint64 mcpRTime, mcpWTime, accessRTime, accessWTime;
HSAuint32 allocNode;
if ((testIndex & (nSizes-1)) == 0)
+2 -1
Просмотреть файл
@@ -34,7 +34,8 @@ class KFDOpenCloseKFDTest : public testing::Test {
~KFDOpenCloseKFDTest(void) {}
protected:
// @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture, sets all common settings for the tests.
// @brief SetUp function run before every test that uses KFDOpenCloseKFDTest class fixture,
// sets all common settings for the tests.
virtual void SetUp();
// @brief TearDown function run after every test that uses KFDOpenCloseKFDTest class fixture.
virtual void TearDown();
+2 -1
Просмотреть файл
@@ -93,7 +93,8 @@ TEST_F(KFDPNPTest, DisableAndCreateQueue) {
DisableKfd();
EnableKfd();
ASSERT_NE(HSAKMT_STATUS_SUCCESS, queue.Create(defaultGPUNode)) << "Queue creation should fail after a topology change.";
ASSERT_NE(HSAKMT_STATUS_SUCCESS, queue.Create(defaultGPUNode))
<< "Queue creation should fail after a topology change.";
TEST_END
}
+5 -4
Просмотреть файл
@@ -70,7 +70,8 @@ static struct block_name_table {
{"DRIVER ", {0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa}}
};
void KFDPerfCountersTest::GetBlockName(HSA_UUID uuid, char *name, uint32_t name_len, char *uuid_str, uint32_t uuid_str_len) {
void KFDPerfCountersTest::GetBlockName(HSA_UUID uuid, char *name, uint32_t name_len,
char *uuid_str, uint32_t uuid_str_len) {
uint32_t i, table_size;
table_size = sizeof(block_lookup_table) / sizeof(struct block_name_table);
@@ -134,7 +135,7 @@ TEST_F(KFDPerfCountersTest, GetCounterProperties) {
LOG() << name << " (" << uuid_string << "): " << type << ", " <<
block->NumCounters << " counter IDs" << std::endl;
block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
}
TEST_END
@@ -161,7 +162,7 @@ TEST_F(KFDPerfCountersTest, RegisterTrace) {
priv_block_found = true;
break;
}
block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
}
if (!priv_block_found) {
@@ -202,7 +203,7 @@ TEST_F(KFDPerfCountersTest, StartStopQueryTrace) {
priv_block_found = true;
break;
}
block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
}
if (!priv_block_found) {
+48 -40
Просмотреть файл
@@ -271,7 +271,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) {
// don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!";
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
<< "Packet executed even though the queue is supposed to be disabled!";
ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false));
@@ -311,7 +312,8 @@ TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) {
// don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!";
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
<< "Packet executed even though the queue is supposed to be disabled!";
ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false));
@@ -357,7 +359,8 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {
// don't sync since we don't expect rptr to change when the queue is disabled.
Delay(2000);
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF) << "Packet executed even though the queue is supposed to be disabled!";
ASSERT_EQ(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF)
<< "Packet executed even though the queue is supposed to be disabled!";
ASSERT_SUCCESS(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false));
@@ -373,13 +376,13 @@ TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {
TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
TEST_START(TESTPROFILE_RUNALL)
static const unsigned long long TEST_TIME_SEC = 15;
static const HSAuint64 TEST_TIME_SEC = 15;
unsigned long long initialTime = GetSystemTickCountInMicroSec();
HSAuint64 initialTime = GetSystemTickCountInMicroSec();
unsigned int numIter = 0;
unsigned long long timePassed = 0;
HSAuint64 timePassed = 0;
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
@@ -404,7 +407,7 @@ TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
delete queues[1];
++numIter;
unsigned long long curTime = GetSystemTickCountInMicroSec();
HSAuint64 curTime = GetSystemTickCountInMicroSec();
timePassed = (curTime - initialTime) / 1000000;
} while (timePassed < TEST_TIME_SEC);
@@ -553,7 +556,7 @@ s_waitcnt lgkmcnt(0)\n\
end\n\
";
long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) {
HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) {
HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false);
HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false);
@@ -580,9 +583,9 @@ long long KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t m
}
/* To cover for outliers, allow us to get the Average time based on a specified number of iterations */
long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) {
long long timeArray[iterations];
long long timeTotal = 0;
HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) {
HSAint64 timeArray[iterations];
HSAint64 timeTotal = 0;
if (iterations < 1) {
LOG() << "ERROR: At least 1 iteration must be performed" << std::endl;
return 0;
@@ -599,9 +602,11 @@ long long KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask,
}
for (int x = 0; x < iterations; x++) {
long long variance = timeArray[x] / (timeTotal / iterations);
HSAint64 variance = timeArray[x] / (timeTotal / iterations);
if (variance < CuNegVariance || variance > CuPosVariance)
LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x] << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")" << std::endl;
LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x]
<< ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")"
<< std::endl;
}
return timeTotal / iterations;
@@ -625,7 +630,7 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) {
LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl;
LOG() << std::hex << "# Shader engines: 0x" << numSEs << std::endl;
LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl;
long long TimewithCU1, TimewithCU;
HSAint64 TimewithCU1, TimewithCU;
uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */
uint32_t maskNumBits = maskNumDwords * 32;
uint32_t mask[maskNumDwords];
@@ -646,10 +651,11 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) {
mask[maskIndex] |= 1 << ((nCUs - 1) % 32);
TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits);
ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs);
ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs);
LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl;
LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl;
LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8)
<< ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl;
ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance));
}
@@ -685,7 +691,7 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) {
LOG() << std::hex << "# SIMDs per CPU: 0x" << pNodeProperties->NumSIMDPerCU << std::endl;
LOG() << std::hex << "# Shader engines: 0x" << numShaderEngines << std::endl;
LOG() << std::hex << "# Active CUs: 0x" << ActiveCU << std::endl;
long long TimewithCU1, TimewithCU;
HSAint64 TimewithCU1, TimewithCU;
uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */
uint32_t maskNumBits = maskNumDwords * 32;
uint32_t mask[maskNumDwords];
@@ -716,10 +722,11 @@ TEST_F(KFDQMTest, BasicCuMaskingEven) {
int nCUs = numShaderEngines * (x + 1);
TimewithCU = TimeConsumedwithCUMask(defaultGPUNode, mask, maskNumBits);
ratio = (double)TimewithCU1 / ((double)TimewithCU * nCUs);
ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs);
LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl;
LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8) << ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl;
LOG() << std::setprecision(2) << CuNegVariance << " <= " << std::fixed << std::setprecision(8)
<< ratio << " <= " << std::setprecision(2) << CuPosVariance << std::endl;
ASSERT_TRUE((ratio >= CuNegVariance) && (ratio <= CuPosVariance));
}
@@ -945,10 +952,10 @@ TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) {
unsigned int* src = srcBuffer.As<unsigned int*>();
unsigned int* dst = destBuffer.As<unsigned int*>();
static const unsigned long long TEST_TIME_SEC = 15;
unsigned long long initialTime, curTime;
static const HSAuint64 TEST_TIME_SEC = 15;
HSAuint64 initialTime, curTime;
unsigned int numIter = 0;
unsigned long long timePassed = 0;
HSAuint64 timePassed = 0;
unsigned int i;
PM4Queue queues[MAX_CP_QUEUES];
@@ -1019,7 +1026,8 @@ TEST_F(KFDQMTest, CpuWriteCoherence) {
EXPECT_EQ(0, queue.Rptr());
// now that the GPU has cached the PQ contents, we modify them in CPU cache and ensure that the GPU sees the updated value:
// now that the GPU has cached the PQ contents, we modify them in CPU cache and
// ensure that the GPU sees the updated value:
queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x42, 0x42));
queue.Wait4PacketConsumption();
@@ -1046,7 +1054,7 @@ TEST_F(KFDQMTest, CreateAqlCpQueue) {
TEST_END
}
#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
#define ALIGN_UP(x, align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
#define CounterToNanoSec(x) ((x) * 1000 / (is_dgpu() ? 27 : 100))
#include<algorithm>
@@ -1056,7 +1064,7 @@ TEST_F(KFDQMTest, QueueLatency) {
PM4Queue queue;
const int queueSize = PAGE_SIZE * 2;
const int packetSize = PM4ReleaseMemoryPacket(0,0,0,0,0).SizeInBytes();
const int packetSize = PM4ReleaseMemoryPacket(0, 0, 0, 0, 0).SizeInBytes();
/* We always leave one NOP(dword) empty after packet which is required by ring itself.
* We also place NOPs when queue wraparound to avoid crossing buffer end. See PlacePacket().
* So the worst case is that we need two packetSize space to place one packet.
@@ -1067,16 +1075,16 @@ TEST_F(KFDQMTest, QueueLatency) {
*/
const int reservedSpace = packetSize + queueSize % packetSize;
const int slots = (queueSize - reservedSpace) / packetSize;
long queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med;
long overhead, workload;
long *queue_latency_arr = (long*)calloc(slots, sizeof(long));
HSAint64 queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med;
HSAint64 overhead, workload;
HSAint64 *queue_latency_arr = reinterpret_cast<HSAint64*>(calloc(slots, sizeof(HSAint64)));
const int skip = 2;
const char *fs[skip] = {"1st", "2nd"};
HsaClockCounters *ts;
HSAuint64 *qts;
int i = 0;
ASSERT_NE((unsigned long)queue_latency_arr, 0);
ASSERT_NE((unsigned HSAint64)queue_latency_arr, 0);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
@@ -1102,7 +1110,7 @@ TEST_F(KFDQMTest, QueueLatency) {
i = 0;
do {
queue.PlacePacket(PM4ReleaseMemoryPacket(true,
(unsigned long)&qts[i],
(unsigned HSAint64)&qts[i],
0,
true,
1));
@@ -1114,7 +1122,7 @@ TEST_F(KFDQMTest, QueueLatency) {
/* Calculate timing which includes workload and overhead*/
i = 0;
do {
long queue_latency = qts[i] - ts[i].GPUClockCounter;
HSAint64 queue_latency = qts[i] - ts[i].GPUClockCounter;
ASSERT_GE(queue_latency, 0);
@@ -1129,7 +1137,7 @@ TEST_F(KFDQMTest, QueueLatency) {
i = 0;
do {
queue.PlacePacket(PM4ReleaseMemoryPacket(true,
(unsigned long)&qts[i],
(unsigned HSAint64)&qts[i],
0,
true,
1));
@@ -1151,7 +1159,7 @@ TEST_F(KFDQMTest, QueueLatency) {
do {
/* The queue_latency is not that correct as the workload and overhead are average*/
queue_latency_arr[i] -= workload + overhead;
/* The First submit takes a long time*/
/* The First submit takes a HSAint64 time*/
if (i < skip)
LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl;
} while (++i < slots);
@@ -1243,13 +1251,13 @@ TEST_F(KFDQMTest, SdmaQueueWraparound) {
}
struct AtomicIncThreadParams {
long* pDest;
HSAint64* pDest;
volatile unsigned int count;
volatile bool stop;
};
unsigned int AtomicIncThread(void* pCtx) {
AtomicIncThreadParams* pArgs = (AtomicIncThreadParams*)pCtx;
AtomicIncThreadParams* pArgs = reinterpret_cast<AtomicIncThreadParams*>(pCtx);
while (pArgs->stop)
{}
@@ -1288,7 +1296,7 @@ TEST_F(KFDQMTest, Atomics) {
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
AtomicIncThreadParams params;
params.pDest = destBuf.As<long*>();
params.pDest = destBuf.As<HSAint64*>();
params.stop = true;
params.count = 0;
@@ -1441,7 +1449,7 @@ TEST_F(KFDQMTest, P2PTest) {
/* 1. Allocate a system buffer and allow the access to GPUs */
EXPECT_SUCCESS(hsaKmtAllocMemory(0, size, memFlags,
(void **)&sysBuf));
reinterpret_cast<void **>(&sysBuf)));
EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sysBuf, size, NULL,
mapFlags, nodes.size(), &nodes[0]));
#define MAGIC_NUM 0xdeadbeaf
@@ -1449,7 +1457,7 @@ TEST_F(KFDQMTest, P2PTest) {
/* First GPU fills mem with MAGIC_NUM*/
void *src, *dst;
HSAuint32 cur = nodes[0], next;
ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, (void**)&src));
ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast<void**>(&src)));
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(src, size, NULL));
sdma_fill(cur, src, MAGIC_NUM, size);
@@ -1465,7 +1473,7 @@ TEST_F(KFDQMTest, P2PTest) {
} else {
n = 2;
next = nodes[i];
ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, (void**)&dst));
ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, reinterpret_cast<void**>(&dst)));
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dst, size, NULL));
}
@@ -1506,7 +1514,7 @@ TEST_F(KFDQMTest, SdmaEventInterrupt) {
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
queue.PlaceAndSubmitPacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId));
queue.PlaceAndSubmitPacket(SDMAFencePacket(reinterpret_cast<void*>(event->EventData.HWData2), event->EventId));
queue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
+2 -2
Просмотреть файл
@@ -43,8 +43,8 @@ class KFDQMTest : public KFDBaseComponentTest {
void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1);
// void SyncDispatchWithSleep(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf);
long long TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
long long GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
protected: // members
/* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */
const double CuVariance = 0.15;
+6 -6
Просмотреть файл
@@ -54,12 +54,12 @@ enum ENVCAPS{
enum KfdFamilyId {
FAMILY_UNKNOWN = 0,
FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M)
FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M)
FAMILY_CZ, // Carrizo, Nolan, Amur
FAMILY_AI, // Arctic Islands
FAMILY_RV, // Raven
FAMILY_CI, // Sea Islands: Hawaii (P), Maui (P), Bonaire (M)
FAMILY_KV, // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
FAMILY_VI, // Volcanic Islands: Iceland (V), Tonga (M)
FAMILY_CZ, // Carrizo, Nolan, Amur
FAMILY_AI, // Arctic Islands
FAMILY_RV, // Raven
};
#endif // __KFD_TEST_FLAGS__H__
+3 -2
Просмотреть файл
@@ -42,7 +42,7 @@ std::ostream& operator << (std::ostream& out, TESTPROFILE profile) {
break;
default:
out << "INVALID";
};
}
return out;
}
@@ -71,7 +71,8 @@ GTEST_API_ int main(int argc, char **argv) {
bool success = GetCommandLineArguments(argc, argv, args);
if (success) {
if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) && (args.HwsEnabled != HWCAP__FORCE_DISABLED))
if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) &&
(args.HwsEnabled != HWCAP__FORCE_DISABLED))
g_TestENVCaps |= ENVCAPS_HWSCHEDULING;
g_TestRunProfile = args.TestProfile;
+17 -15
Просмотреть файл
@@ -43,7 +43,7 @@ bool WaitOnValue(const volatile unsigned int *buf, unsigned int value) {
return *buf == value;
}
void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart) {
void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart) {
rLoPart = static_cast<unsigned int>(value);
rHiPart = static_cast<unsigned int>(value >> 32);
}
@@ -125,7 +125,8 @@ bool isTonga(const HsaNodeProperties *props) {
const HsaMemoryBuffer HsaMemoryBuffer::Null;
HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec, bool isScratch, bool isReadOnly)
HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec,
bool isScratch, bool isReadOnly)
:m_Size(size),
m_pUser(NULL),
m_pBuf(NULL),
@@ -153,7 +154,7 @@ HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, b
if (isReadOnly)
m_Flags.ui32.ReadOnly = 1;
EXPECT_SUCCESS(hsaKmtAllocMemory( m_Node, m_Size, m_Flags, &m_pBuf));
EXPECT_SUCCESS(hsaKmtAllocMemory(m_Node, m_Size, m_Flags, &m_pBuf));
if (is_dgpu()) {
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL));
m_MappedNodes = 1 << m_Node;
@@ -189,9 +190,9 @@ void HsaMemoryBuffer::Fill(unsigned char value, HSAuint64 offset, HSAuint64 size
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
if (m_pUser != NULL)
memset((char *)m_pUser + offset, value, size);
memset(reinterpret_cast<char *>(m_pUser) + offset, value, size);
else if (m_pBuf != NULL)
memset((char *)m_pBuf + offset, value, size);
memset(reinterpret_cast<char *>(m_pBuf) + offset, value, size);
else
ASSERT_TRUE(0) << "Invalid HsaMemoryBuffer";
}
@@ -207,9 +208,9 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) {
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
if (m_pUser != NULL)
ptr = (HSAuint32 *)((char *)m_pUser + offset);
ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pUser) + offset);
else if (m_pBuf != NULL)
ptr = (HSAuint32 *)((char *)m_pBuf + offset);
ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pBuf) + offset);
ASSERT_NOTNULL(ptr);
@@ -229,8 +230,8 @@ void HsaMemoryBuffer::Fill(HSAuint32 value, BaseQueue& baseQueue, HSAuint64 offs
size = size ? size : m_Size;
ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;
baseQueue.PlacePacket(SDMAFillDataPacket((void *)(this->As<char*>() + offset), value, size));
baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2, event->EventId));
baseQueue.PlacePacket(SDMAFillDataPacket((reinterpret_cast<void *>(this->As<char*>() + offset)), value, size));
baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast<void*>(event->EventData.HWData2), event->EventId));
baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
ASSERT_SUCCESS(hsaKmtWaitOnEvent(event, g_TestTimeOut));
@@ -250,9 +251,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern) {
return false;
if (m_pUser != NULL)
ptr = (HSAuint32 *)m_pUser;
ptr = reinterpret_cast<HSAuint32 *>(m_pUser);
else if (m_pBuf != NULL)
ptr = (HSAuint32 *)m_pBuf;
ptr = reinterpret_cast<HSAuint32 *>(m_pBuf);
else
return false;
@@ -284,9 +285,9 @@ bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern, BaseQueue
*tmp = ~pattern;
baseQueue.PlacePacket(SDMACopyDataPacket((void *)tmp,
(void *)(this->As<HSAuint64>() + location),
reinterpret_cast<void *>(this->As<HSAuint64>() + location),
sizeof(HSAuint32)));
baseQueue.PlacePacket(SDMAFencePacket((void*)event->EventData.HWData2,
baseQueue.PlacePacket(SDMAFencePacket(reinterpret_cast<void*>(event->EventData.HWData2),
event->EventId));
baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
@@ -394,14 +395,15 @@ HsaMemoryBuffer::~HsaMemoryBuffer() {
m_pBuf = NULL;
}
HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node)
HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle,
HSAuint64 size, unsigned int node)
:m_Size(0),
m_pBuf(NULL),
m_graphic_handle(0),
m_Node(node) {
HSAuint64 flat_address;
EXPECT_SUCCESS(hsaKmtMapGraphicHandle(m_Node, device_handle, buffer_handle, 0, size, &flat_address));
m_pBuf = (void*)flat_address;
m_pBuf = reinterpret_cast<void*>(flat_address);
}
HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
+5 -5
Просмотреть файл
@@ -36,7 +36,7 @@ class BaseQueue;
// @brief: waits until the value is written to the buffer or until time out if received through args
bool WaitOnValue(const volatile unsigned int *buf, unsigned int value);
void SplitU64(const unsigned long long value, unsigned int& rLoPart, unsigned int& rHiPart);
void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart);
bool GetHwCapabilityHWS();
@@ -106,14 +106,14 @@ class HsaMemoryBuffer {
void* m_pBuf;
bool m_Local;
unsigned int m_Node;
unsigned short m_MappedNodes;
HSAuint64 m_MappedNodes;
};
class HsaInteropMemoryBuffer {
public:
HsaInteropMemoryBuffer(unsigned long long device_handle, unsigned long long buffer_handle, unsigned long long size, unsigned int node);
HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, HSAuint64 size, unsigned int node);
template<typename RetType>
RetType As() {
@@ -135,9 +135,9 @@ class HsaInteropMemoryBuffer {
const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&);
private:
unsigned long long m_Size;
HSAuint64 m_Size;
void* m_pBuf;
unsigned long long m_graphic_handle;
HSAuint64 m_graphic_handle;
unsigned int m_Node;
};
+12 -7
Просмотреть файл
@@ -27,8 +27,8 @@
// @todo complete topology test according to whats in: hsathk\source\windows\kmt_topology.cpp
const unsigned long long KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
const unsigned long long KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);
const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);
TEST_F(KFDTopologyTest , BasicTest) {
TEST_START(TESTPROFILE_RUNALL)
@@ -41,12 +41,14 @@ TEST_F(KFDTopologyTest , BasicTest) {
if (pNodeProperties != NULL) {
// checking for cpu core only if it's a cpu only node or if its KAVERY apu.
if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) {
EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node << " No CPUs core are connected for node index";
EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node
<< " No CPUs core are connected for node index";
}
// if it's not a cpu only node, look for a gpu core
if (pNodeProperties->DeviceId != 0) {
EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node << "No GPUs core are connected.";
EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node
<< "No GPUs core are connected.";
// EngineId only applies to GPU, not CPU-only nodes
EXPECT_GT(pNodeProperties->EngineId.ui32.uCode, 0) << "uCode version is 0";
EXPECT_GE(pNodeProperties->EngineId.ui32.Major, 7) << "Major Version is less than 7";
@@ -118,7 +120,8 @@ TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
return;
}
HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks, memoryProperties));
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks,
memoryProperties));
bool GpuVMHeapFound = false;
for (unsigned int bank = 0 ; bank < pNodeProperties->NumMemoryBanks ; bank++) {
// Check for either private (small-bar/APU) or public (large-bar)
@@ -145,9 +148,11 @@ TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
pNodeProperties = m_NodeInfo.GetNodeProperties(node);
if (pNodeProperties != NULL) {
HsaCacheProperties *cacheProperties = new HsaCacheProperties[pNodeProperties->NumCaches];
EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo, pNodeProperties->NumCaches, cacheProperties));
EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo,
pNodeProperties->NumCaches, cacheProperties));
if (pNodeProperties->NumCPUCores > 0) { // this is a CPU node
LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches" << std::endl;
LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches"
<< std::endl;
for (unsigned n = 0; n < pNodeProperties->NumCaches; n++) {
LOG()<< n << " - Level " << cacheProperties[n].CacheLevel <<
" Type " << cacheProperties[n].CacheType.Value <<
+2 -2
Просмотреть файл
@@ -33,8 +33,8 @@ class KFDTopologyTest : public KFDBaseComponentTest {
public:
KFDTopologyTest(void) {}
~KFDTopologyTest(void) {}
static const unsigned long long c_4Gigabyte;
static const unsigned long long c_40BitAddressSpace;
static const HSAuint64 c_4Gigabyte;
static const HSAuint64 c_40BitAddressSpace;
};
#endif // __KFD_TOPOLOGY_TEST__H__
+7 -5
Просмотреть файл
@@ -41,7 +41,9 @@
#include <sys/ioctl.h>
#include <sys/time.h>
static int protection_flags[8] = {int(PROT_NONE), int(PROT_READ), int(PROT_WRITE), int(PROT_READ | PROT_WRITE), int(PROT_EXEC), int(PROT_EXEC | PROT_READ), int(PROT_EXEC | PROT_WRITE), int(PROT_EXEC | PROT_WRITE | PROT_READ)};
static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ | PROT_WRITE,
PROT_EXEC, PROT_EXEC | PROT_READ, PROT_EXEC | PROT_WRITE,
PROT_EXEC | PROT_WRITE | PROT_READ};
void SetConsoleTextColor(TEXTCOLOR color) {
// TODO complete
@@ -69,8 +71,8 @@ bool VirtualFreeMemory(void *address, unsigned int size) {
return false;
}
unsigned long GetLastErrorNo() {
return errno;
HSAuint64 GetLastErrorNo() {
return errno;
}
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) {
@@ -78,7 +80,7 @@ bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProc
return false;
}
unsigned long long GetSystemTickCountInMicroSec() {
HSAuint64 GetSystemTickCountInMicroSec() {
struct timeval t;
gettimeofday(&t, 0);
return t.tv_sec * 1000000ULL + t.tv_usec;
@@ -229,7 +231,7 @@ bool WaitForThread(uint64_t threadId) {
return 0 == pthread_join((pthread_t)threadId, NULL);
}
long AtomicInc(volatile long* pValue) {
HSAint64 AtomicInc(volatile HSAint64* pValue) {
return __sync_add_and_fetch(pValue, 1);
}
+6 -4
Просмотреть файл
@@ -26,6 +26,7 @@
#include <string>
#include "KFDTestFlags.hpp"
#include "hsakmt.h"
#ifndef __OS__WRAPPER__H__
#define __OS__WRAPPER__H__
@@ -82,19 +83,20 @@ void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = M
// @brief replacement for windows FreeVirtual func
bool VirtualFreeMemory(void *address, unsigned int size);
// @brief retrieve the last error number
unsigned long GetLastErrorNo();
HSAuint64 GetLastErrorNo();
long AtomicInc(volatile long* pValue);
HSAint64 AtomicInc(volatile HSAint64* pValue);
void MemoryBarrier();
// @brief: runs the selected test case number of times required, each in a separate process
// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want to run all tests in a test case: TestCase.* and so on
// @params testToRun : can be a specific test testcase like TestCase.TestName or if you want
// to run all tests in a test case: TestCase.* and so on
// @params numOfProcesses : how many processes to run in parallel
// @params runsPerProcess : how many iteration a test should do per process, must be a positive number
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1);
unsigned long long GetSystemTickCountInMicroSec();
HSAuint64 GetSystemTickCountInMicroSec();
/**Put the system to S3/S4 power state and bring it back to S0.
@return 'true' on success, 'false' on failure.
+19 -12
Просмотреть файл
@@ -21,11 +21,11 @@
*
*/
#include "PM4Packet.hpp"
#include "hsakmttypes.h"
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "PM4Packet.hpp"
#include "hsakmttypes.h"
#include "asic_reg/gfx_7_2_enum.h"
@@ -52,7 +52,7 @@ unsigned int PM4WriteDataPacket::SizeInBytes() const {
}
void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
m_pPacketData = (PM4WRITE_DATA_CI *)calloc(1, SizeInBytes());
m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(calloc(1, SizeInBytes()));
// verify that the memory is allocated successfully, cannot use assert here
EXPECT_NOTNULL(m_pPacketData);
@@ -84,7 +84,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address,
PM4_RELEASE_MEM_CI *pkt;
m_packetSize = sizeof(PM4_RELEASE_MEM_CI);
pkt = (PM4_RELEASE_MEM_CI *)calloc(1, m_packetSize);
pkt = reinterpret_cast<PM4_RELEASE_MEM_CI *>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
@@ -147,7 +147,7 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address,
PM4MEC_RELEASE_MEM_AI *pkt;
m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI);
pkt = (PM4MEC_RELEASE_MEM_AI *)calloc(1, m_packetSize);
pkt = reinterpret_cast<PM4MEC_RELEASE_MEM_AI *>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
@@ -233,7 +233,8 @@ PM4SetShaderRegPacket::PM4SetShaderRegPacket(void)
: m_packetDataAllocated(false) {
}
PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs)
PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[],
unsigned int numRegs)
: m_packetDataAllocated(false) {
InitPacket(baseOffset, regValues, numRegs);
}
@@ -243,11 +244,15 @@ PM4SetShaderRegPacket::~PM4SetShaderRegPacket(void) {
free(m_pPacketData);
}
void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs) {
m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t); // 1st register is a part of the packet struct.
void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[],
unsigned int numRegs) {
// 1st register is a part of the packet struct.
m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);
// allocating the size of the packet, since the packet is assembled from a struct followed by an additional DWORD data
m_pPacketData = (PM4SET_SH_REG *)malloc(m_packetSize);
/* allocating the size of the packet, since the packet is assembled from a struct
* followed by an additional DWORD data
*/
m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(malloc(m_packetSize));
ASSERT_NOTNULL(m_pPacketData);
@@ -262,11 +267,13 @@ void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned i
memcpy(m_pPacketData->reg_data, regValues, numRegs*sizeof(uint32_t));
}
PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) {
PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY,
unsigned int dimZ, unsigned int dispatchInit) {
InitPacket(dimX, dimY, dimZ, dispatchInit);
}
void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit) {
void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ,
unsigned int dispatchInit) {
memset(&m_packetData, 0, SizeInBytes());
InitPM4Header(m_packetData.header, IT_DISPATCH_DIRECT);
+1 -1
Просмотреть файл
@@ -112,7 +112,7 @@ class PM4IndirectBufPacket : public PM4Packet {
// empty constructor, befor using the packet call the init func
PM4IndirectBufPacket(void) {}
// this contructor will also init the packet, no need for adittional calls
PM4IndirectBufPacket(IndirectBuffer *pIb);
explicit PM4IndirectBufPacket(IndirectBuffer *pIb);
virtual ~PM4IndirectBufPacket(void) {}
// @returns the packet size in bytes
+13 -13
Просмотреть файл
@@ -59,12 +59,12 @@ void SDMAWriteDataPacket::InitPacket(void* destAddr, unsigned int ndw,
void *data) {
packetSize = sizeof(SDMA_PKT_WRITE_UNTILED) +
(ndw - 1) * sizeof(unsigned int);
packetData = (SDMA_PKT_WRITE_UNTILED *)calloc(1, packetSize);
packetData = reinterpret_cast<SDMA_PKT_WRITE_UNTILED *>(calloc(1, packetSize));
packetData->HEADER_UNION.op = SDMA_OP_WRITE;
packetData->HEADER_UNION.sub_op = SDMA_SUBOP_WRITE_LINEAR;
SplitU64(reinterpret_cast<unsigned long long>(destAddr),
SplitU64(reinterpret_cast<HSAuint64>(destAddr),
packetData->DST_ADDR_LO_UNION.DW_1_DATA, // dst_addr_31_0
packetData->DST_ADDR_HI_UNION.DW_2_DATA); // dst_addr_63_32
@@ -80,7 +80,7 @@ SDMACopyDataPacket::~SDMACopyDataPacket(void) {
SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, unsigned int surfsize) {
int32_t size = 0, i;
void **dst = (void**)malloc(sizeof(void*) * n);
void **dst = reinterpret_cast<void**>(malloc(sizeof(void*) * n));
const int singlePacketSize = sizeof(SDMA_PKT_COPY_LINEAR) +
sizeof(SDMA_PKT_COPY_LINEAR::DST_ADDR[0]) * n;
@@ -91,7 +91,7 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns
packetSize = ((surfsize + TWO_MEG - 1) >> BITS) * singlePacketSize;
SDMA_PKT_COPY_LINEAR *pSDMA = (SDMA_PKT_COPY_LINEAR *)malloc(packetSize);
SDMA_PKT_COPY_LINEAR *pSDMA = reinterpret_cast<SDMA_PKT_COPY_LINEAR *>(malloc(packetSize));
packetData = pSDMA;
while (surfsize > 0) {
@@ -106,19 +106,19 @@ SDMACopyDataPacket::SDMACopyDataPacket(void *const dsts[], void *src, int n, uns
pSDMA->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR;
pSDMA->HEADER_UNION.broadcast = n > 1 ? 1 : 0;
pSDMA->COUNT_UNION.count = SDMA_COUNT(size);
SplitU64(reinterpret_cast<unsigned long long>(src),
SplitU64(reinterpret_cast<HSAuint64>(src),
pSDMA->SRC_ADDR_LO_UNION.DW_3_DATA, // src_addr_31_0
pSDMA->SRC_ADDR_HI_UNION.DW_4_DATA); // src_addr_63_32
for (i = 0; i < n; i++)
SplitU64(reinterpret_cast<unsigned long long>(dst[i]),
SplitU64(reinterpret_cast<HSAuint64>(dst[i]),
pSDMA->DST_ADDR[i].DST_ADDR_LO_UNION.DW_5_DATA, // dst_addr_31_0
pSDMA->DST_ADDR[i].DST_ADDR_HI_UNION.DW_6_DATA); // dst_addr_63_32
pSDMA = (SDMA_PKT_COPY_LINEAR *)((char *)pSDMA + singlePacketSize);
pSDMA = reinterpret_cast<SDMA_PKT_COPY_LINEAR *>(reinterpret_cast<char *>(pSDMA) + singlePacketSize);
for (i = 0; i < n; i++)
dst[i] = (char *)dst[i] + size;
src = (char *)src + size;
dst[i] = reinterpret_cast<char *>(dst[i]) + size;
src = reinterpret_cast<char *>(src) + size;
surfsize -= size;
}
free(dst);
@@ -138,7 +138,7 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in
/* SDMA support maximum 0x3fffe0 byte in one copy. Use 2M copy_size */
m_PacketSize = ((size + TWO_MEG - 1) >> BITS) * sizeof(SDMA_PKT_CONSTANT_FILL);
pSDMA = (SDMA_PKT_CONSTANT_FILL *)calloc(1, m_PacketSize);
pSDMA = reinterpret_cast<SDMA_PKT_CONSTANT_FILL *>(calloc(1, m_PacketSize));
m_PacketData = pSDMA;
while (size > 0) {
@@ -158,14 +158,14 @@ SDMAFillDataPacket::SDMAFillDataPacket(void *dst, unsigned int data, unsigned in
pSDMA->COUNT_UNION.count = SDMA_COUNT(copy_size);
SplitU64(reinterpret_cast<unsigned long long>(dst),
SplitU64(reinterpret_cast<HSAuint64>(dst),
pSDMA->DST_ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
pSDMA->DST_ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/
pSDMA->DATA_UNION.DW_3_DATA = data;
pSDMA++;
dst = (char *)dst + copy_size;
dst = reinterpret_cast<char *>(dst) + copy_size;
size -= copy_size;
}
}
@@ -185,7 +185,7 @@ void SDMAFencePacket::InitPacket(void* destAddr, unsigned int data) {
packetData.HEADER_UNION.op = SDMA_OP_FENCE;
SplitU64(reinterpret_cast<unsigned long long>(destAddr),
SplitU64(reinterpret_cast<HSAuint64>(destAddr),
packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/
+1 -1
Просмотреть файл
@@ -125,7 +125,7 @@ class SDMAFencePacket : public SDMAPacket {
class SDMATrapPacket : public SDMAPacket {
public:
// empty constructor, befor using the packet call the init func
SDMATrapPacket(unsigned int eventID = 0);
explicit SDMATrapPacket(unsigned int eventID = 0);
virtual ~SDMATrapPacket(void);