From e9882daf11a9d2e6c7a30e708e61d8b06d289015 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 17 Jul 2019 14:25:02 -0400 Subject: [PATCH] KFDTest : Add gfx1xxx release_mem and acquire_mem packet support use family ID as parameter when construct the packets Change-Id: I6c1706954ab7b8cbb8bef2aab16edf21f5e1abf0 Signed-off-by: shaoyunl --- tests/kfdtest/src/Dispatch.cpp | 6 +- tests/kfdtest/src/KFDEventTest.cpp | 8 +- tests/kfdtest/src/KFDMemoryTest.cpp | 4 +- tests/kfdtest/src/KFDQMTest.cpp | 18 +- tests/kfdtest/src/KFDRASTest.cpp | 2 +- tests/kfdtest/src/KFDTestUtilQueue.cpp | 4 +- tests/kfdtest/src/PM4Packet.cpp | 231 ++++++++++++++++--------- tests/kfdtest/src/PM4Packet.hpp | 34 ++-- tests/kfdtest/src/PM4Queue.cpp | 2 +- 9 files changed, 196 insertions(+), 113 deletions(-) diff --git a/tests/kfdtest/src/Dispatch.cpp b/tests/kfdtest/src/Dispatch.cpp index 6fc9ffe56c..802841060c 100644 --- a/tests/kfdtest/src/Dispatch.cpp +++ b/tests/kfdtest/src/Dispatch.cpp @@ -80,7 +80,7 @@ void Dispatch::Submit(BaseQueue& queue) { EventData.EventData.SyncVar.SyncVar.UserData, m_pEop->EventId)); } - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, m_pEop->EventData.HWData2, m_pEop->EventId)); + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(g_TestGPUFamilyId, false, m_pEop->EventData.HWData2, m_pEop->EventId)); if (!queue.GetSkipWaitConsump()) queue.Wait4PacketConsumption(); @@ -200,7 +200,7 @@ void Dispatch::BuildIb() { // ORDERED_APPEND_MODE=0, USE_THREAD_DIMENSIONS=1, ORDER_MODE=0, DISPATCH_CACHE_CNTL=0, // SCALAR_L1_INV_VOL=0, VECTOR_L1_INV_VOL=0, DATA_ATC=?, RESTORE=0} - m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket()); + m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket(g_TestGPUFamilyId)); m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES, ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES))); @@ -225,5 +225,5 @@ void Dispatch::BuildIb() { m_IndirectBuf.AddPacket(PM4PartialFlushPacket()); - m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket()); + m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket(g_TestGPUFamilyId)); } diff --git a/tests/kfdtest/src/KFDEventTest.cpp b/tests/kfdtest/src/KFDEventTest.cpp index 16e53613fc..9aebb53b46 100644 --- a/tests/kfdtest/src/KFDEventTest.cpp +++ b/tests/kfdtest/src/KFDEventTest.cpp @@ -103,7 +103,7 @@ TEST_F(KFDEventTest, SignalEvent) { /* From gfx9 onward, m_pHsaEvent->EventId will also be passed to int_ctxid in * the Release Mem packet, which is used as context id in ISR. */ - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId)); queue.Wait4PacketConsumption(); @@ -148,7 +148,7 @@ class QueueAndSignalBenchmark { PM4Queue queue; HsaEvent** pHsaEvent = reinterpret_cast(calloc(eventCount, sizeof(HsaEvent*))); - size_t packetSize = PM4ReleaseMemoryPacket(false, 0, 0).SizeInBytes(); + size_t packetSize = PM4ReleaseMemoryPacket(g_TestGPUFamilyId, false, 0, 0).SizeInBytes(); int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1)); time = 0; @@ -162,7 +162,7 @@ class QueueAndSignalBenchmark { if (r != HSAKMT_STATUS_SUCCESS) goto exit; - queue.PlacePacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId)); + queue.PlacePacket(PM4ReleaseMemoryPacket(g_TestGPUFamilyId, false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId)); } startTime = gettime(); @@ -278,7 +278,7 @@ TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) { unsigned int pktSizeDwords = 0; for (i = 0; i < EVENT_NUMBER; i++) { - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, pHsaEvent[i]->EventData.HWData2, + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId)); queue.Wait4PacketConsumption(); diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 7fe844c14d..69b0e5e477 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -564,7 +564,7 @@ TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) { mem[2] = 0x0; queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast(gpuva2), 0xdeadbeef)); - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0)); + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0)); queue.Wait4PacketConsumption(); EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef)); EXPECT_SUCCESS(queue.Destroy()); @@ -1450,7 +1450,7 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) { data0[0], data0[1])); queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem1, data1[0], data1[1])); - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(true, 0, 0)); + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0)); queue.Wait4PacketConsumption(); /* Allow any process to trace this one. If kernel is built without diff --git a/tests/kfdtest/src/KFDQMTest.cpp b/tests/kfdtest/src/KFDQMTest.cpp index 5f9dada9f0..07b83c6dea 100644 --- a/tests/kfdtest/src/KFDQMTest.cpp +++ b/tests/kfdtest/src/KFDQMTest.cpp @@ -1092,7 +1092,7 @@ TEST_F(KFDQMTest, QueueLatency) { PM4Queue queue; const int queueSize = PAGE_SIZE * 2; - const int packetSize = PM4ReleaseMemoryPacket(0, 0, 0, 0, 0).SizeInBytes(); + const int packetSize = PM4ReleaseMemoryPacket(m_FamilyId, 0, 0, 0, 0, 0).SizeInBytes(); /* We always leave one NOP(dword) empty after packet which is required by ring itself. * We also place NOPs when queue wraparound to avoid crossing buffer end. See PlacePacket(). * So the worst case is that we need two packetSize space to place one packet. @@ -1141,7 +1141,7 @@ TEST_F(KFDQMTest, QueueLatency) { /* Submit packets serially*/ i = 0; do { - queue.PlacePacket(PM4ReleaseMemoryPacket(true, + queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)&qts[i], 0, true, @@ -1168,7 +1168,7 @@ TEST_F(KFDQMTest, QueueLatency) { /* Workload of queue packet itself */ i = 0; do { - queue.PlacePacket(PM4ReleaseMemoryPacket(true, + queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)&qts[i], 0, true, @@ -1549,7 +1549,7 @@ TEST_F(KFDQMTest, PM4EventInterrupt) { const HSAuint64 bufSize = PAGE_SIZE; const int packetCount = bufSize / sizeof(unsigned int); const int totalPacketSize = packetCount * PM4WriteDataPacket(0, 0).SizeInBytes() + - PM4ReleaseMemoryPacket(0, 0, 0).SizeInBytes(); + PM4ReleaseMemoryPacket(m_FamilyId, 0, 0, 0).SizeInBytes(); const int queueSize = RoundToPowerOf2(totalPacketSize); /* 4 PM4 queues will be running at same time.*/ @@ -1575,7 +1575,7 @@ TEST_F(KFDQMTest, PM4EventInterrupt) { queue[i].PlacePacket(PM4WriteDataPacket(buf[i] + index, 0xdeadbeaf)); /* releaseMemory packet makes sure all previous written data is visible.*/ - queue[i].PlacePacket(PM4ReleaseMemoryPacket(0, + queue[i].PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, 0, reinterpret_cast(event[i]->EventData.HWData2), event[i]->EventId, true)); @@ -1748,10 +1748,10 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) { * the first queue. */ otherQueue.PlacePacket( - PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr, + PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_write_ptr, pendingWptr, false)); otherQueue.PlacePacket( - PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_DoorBell, + PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_DoorBell, pendingWptr, false)); #endif @@ -1775,10 +1775,10 @@ TEST_F(KFDQMTest, GPUDoorbellWrite) { * the PM4 packet on the first queue. */ otherQueue.PlacePacket( - PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_write_ptr, + PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_write_ptr, pendingWptr64, true)); otherQueue.PlacePacket( - PM4ReleaseMemoryPacket(true, (HSAuint64)qRes->Queue_DoorBell, + PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_DoorBell, pendingWptr64, true)); #endif diff --git a/tests/kfdtest/src/KFDRASTest.cpp b/tests/kfdtest/src/KFDRASTest.cpp index 09d61a03fa..428db0e16d 100644 --- a/tests/kfdtest/src/KFDRASTest.cpp +++ b/tests/kfdtest/src/KFDRASTest.cpp @@ -164,7 +164,7 @@ TEST_F(KFDRASTest, MixEventsTest) { ASSERT_SUCCESS(queue.Create(m_defaultGPUNode)); - queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(false, + queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, pHsaEvent->EventData.HWData2, pHsaEvent->EventId)); queue.Wait4PacketConsumption(); diff --git a/tests/kfdtest/src/KFDTestUtilQueue.cpp b/tests/kfdtest/src/KFDTestUtilQueue.cpp index 8369e5a9f8..991485fbd4 100644 --- a/tests/kfdtest/src/KFDTestUtilQueue.cpp +++ b/tests/kfdtest/src/KFDTestUtilQueue.cpp @@ -103,7 +103,7 @@ class AsyncMPSQ { if (m_queueType == HSA_QUEUE_SDMA) return SDMATimePacket(0).SizeInBytes(); else if (m_queueType == HSA_QUEUE_COMPUTE) - return PM4ReleaseMemoryPacket(0, 0, 0, 0, 0).SizeInBytes(); + return PM4ReleaseMemoryPacket(g_TestGPUFamilyId, 0, 0, 0, 0, 0).SizeInBytes(); return 0; } @@ -126,7 +126,7 @@ class AsyncMPSQ { PlacePacket(SDMATimePacket(addr)); else if (m_queueType == HSA_QUEUE_COMPUTE) PlacePacket( - PM4ReleaseMemoryPacket(true, (HSAuint64)addr, 0, true, true)); + PM4ReleaseMemoryPacket(g_TestGPUFamilyId, true, (HSAuint64)addr, 0, true, true)); else WARN() << "Unsupported queue type!" << std::endl; } diff --git a/tests/kfdtest/src/PM4Packet.cpp b/tests/kfdtest/src/PM4Packet.cpp index 71e919d1b7..503e1b799c 100644 --- a/tests/kfdtest/src/PM4Packet.cpp +++ b/tests/kfdtest/src/PM4Packet.cpp @@ -73,41 +73,46 @@ void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) { memcpy(m_pPacketData->data, data, m_ndw * sizeof(uint32_t)); } -PM4ReleaseMemoryPacket::~PM4ReleaseMemoryPacket(void) { - if (m_pPacketData) - free(m_pPacketData); +PM4ReleaseMemoryPacket::PM4ReleaseMemoryPacket(unsigned int familyId, bool isPolling, + uint64_t address, uint64_t data, bool is64bit, bool isTimeStamp):m_pPacketData(NULL) { + + if (familyId < FAMILY_AI) + InitPacketCI(isPolling, address, data, is64bit, isTimeStamp); + else if (familyId < FAMILY_NV) + InitPacketAI(isPolling, address, data, is64bit, isTimeStamp); + else + InitPacketNV(isPolling, address, data, is64bit, isTimeStamp); } -void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, - uint64_t data, bool is64bit, bool isTimeStamp) { - if (g_TestGPUFamilyId < FAMILY_AI) { - PM4_RELEASE_MEM_CI *pkt; +void PM4ReleaseMemoryPacket::InitPacketCI(bool isPolling, uint64_t address, + uint64_t data, bool is64bit, bool isTimeStamp) { + PM4_RELEASE_MEM_CI *pkt; - m_packetSize = sizeof(PM4_RELEASE_MEM_CI); - pkt = reinterpret_cast(calloc(1, m_packetSize)); - m_pPacketData = pkt; - EXPECT_NOTNULL(m_pPacketData); + m_packetSize = sizeof(PM4_RELEASE_MEM_CI); + pkt = reinterpret_cast(calloc(1, m_packetSize)); + m_pPacketData = pkt; + EXPECT_NOTNULL(m_pPacketData); - InitPM4Header(pkt->header, IT_RELEASE_MEM); + InitPM4Header(pkt->header, IT_RELEASE_MEM); - pkt->bitfields2.event_type = 0x14; - pkt->bitfields2.event_index = event_index_mec_release_mem_EVENT_WRITE_EOP_5; + pkt->bitfields2.event_type = 0x14; + pkt->bitfields2.event_index = event_index_mec_release_mem_EVENT_WRITE_EOP_5; // Possible values: // 0101(5): EVENT_WRITE_EOP event types // 0110(6): Reserved for EVENT_WRITE_EOS packet. // 0111(7): Reserved (previously) for EVENT_WRITE packet. - pkt->bitfields2.l2_wb = 1; - pkt->bitfields2.l2_inv = 1; - pkt->bitfields2.cache_policy = cache_policy_mec_release_mem_BYPASS_2; - pkt->bitfields2.atc = is_dgpu() ? + pkt->bitfields2.l2_wb = 1; + pkt->bitfields2.l2_inv = 1; + pkt->bitfields2.cache_policy = cache_policy_mec_release_mem_BYPASS_2; + pkt->bitfields2.atc = is_dgpu() ? atc_mec_release_mem_ci_NOT_USE_ATC_0 : atc_mec_release_mem_ci_USE_ATC_1; // ATC setting for fences and timestamps to the MC or TCL2. - pkt->bitfields3.dst_sel = dst_sel_mec_release_mem_MEMORY_CONTROLLER_0; + pkt->bitfields3.dst_sel = dst_sel_mec_release_mem_MEMORY_CONTROLLER_0; // Possible values: // 0 - memory_controller. // 1 - tc_l2. - if (address) { - pkt->bitfields3.int_sel = (isPolling ? + if (address) { + pkt->bitfields3.int_sel = (isPolling ? int_sel_mec_release_mem_SEND_DATA_AFTER_WRITE_CONFIRM_3 : int_sel_mec_release_mem_SEND_INTERRUPT_AFTER_WRITE_CONFIRM_2); // Possible values: @@ -116,10 +121,10 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, // 2 - Send Interrupt when Write Confirm (WC) is received from the MC. // 3 - Wait for WC, but dont send interrupt (applicable to 7.3+) [g73_1] // 4 - Reserved for INTERRUPT packet - if (isTimeStamp && is64bit) - pkt->bitfields3.data_sel = data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3; - else - pkt->bitfields3.data_sel = is64bit ? + if (isTimeStamp && is64bit) + pkt->bitfields3.data_sel = data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3; + else + pkt->bitfields3.data_sel = is64bit ? data_sel_mec_release_mem_SEND_64_BIT_DATA_2 : data_sel_mec_release_mem_SEND_32_BIT_LOW_1; // Possible values: @@ -131,62 +136,108 @@ void PM4ReleaseMemoryPacket::InitPacket(bool isPolling, uint64_t address, // 5 - Store GDS Data to memory. // 6 - Reserved for use by the CP for Signal Semaphore. // 7 - Reserved for use by the CP for Wait Semaphore. - } else { - pkt->bitfields3.int_sel = (isPolling ? + } else { + pkt->bitfields3.int_sel = (isPolling ? int_sel_mec_release_mem_NONE_0 : int_sel_mec_release_mem_SEND_INTERRUPT_ONLY_1); - pkt->bitfields3.data_sel = data_sel_mec_release_mem_NONE_0; - } + pkt->bitfields3.data_sel = data_sel_mec_release_mem_NONE_0; + } - pkt->bitfields4a.address_lo_dword_aligned = static_cast((address&0xffffffff) >> 2); - pkt->addr_hi = static_cast(address>>32); + pkt->bitfields4a.address_lo_dword_aligned = static_cast((address&0xffffffff) >> 2); + pkt->addr_hi = static_cast(address>>32); - pkt->data_lo = static_cast(data); - pkt->data_hi = static_cast(data >> 32); - } else { - PM4MEC_RELEASE_MEM_AI *pkt; + pkt->data_lo = static_cast(data); + pkt->data_hi = static_cast(data >> 32); +} +void PM4ReleaseMemoryPacket::InitPacketAI(bool isPolling, uint64_t address, + uint64_t data, bool is64bit, bool isTimeStamp) { + PM4MEC_RELEASE_MEM_AI *pkt; - m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI); - pkt = reinterpret_cast(calloc(1, m_packetSize)); - m_pPacketData = pkt; - EXPECT_NOTNULL(m_pPacketData); + m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI); + pkt = reinterpret_cast(calloc(1, m_packetSize)); + m_pPacketData = pkt; + EXPECT_NOTNULL(m_pPacketData); - InitPM4Header(pkt->header, IT_RELEASE_MEM); + InitPM4Header(pkt->header, IT_RELEASE_MEM); - pkt->bitfields2.event_type = 0x14; - pkt->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; - pkt->bitfields2.tc_wb_action_ena = 1; - pkt->bitfields2.tc_action_ena = 1; - pkt->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + pkt->bitfields2.event_type = 0x14; + pkt->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + pkt->bitfields2.tc_wb_action_ena = 1; + pkt->bitfields2.tc_action_ena = 1; + pkt->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; - pkt->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller; + pkt->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller; - if (address) { - pkt->bitfields3.int_sel = (isPolling ? + if (address) { + pkt->bitfields3.int_sel = (isPolling ? int_sel__mec_release_mem__send_data_after_write_confirm: int_sel__mec_release_mem__send_interrupt_after_write_confirm); - if (isTimeStamp && is64bit) - pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter; - else - pkt->bitfields3.data_sel = is64bit ? + if (isTimeStamp && is64bit) + pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter; + else + pkt->bitfields3.data_sel = is64bit ? data_sel__mec_release_mem__send_64_bit_data : data_sel__mec_release_mem__send_32_bit_low; - } else { - pkt->bitfields3.int_sel = (isPolling ? + } else { + pkt->bitfields3.int_sel = (isPolling ? int_sel__mec_release_mem__none: int_sel__mec_release_mem__send_interrupt_only); - pkt->bitfields3.data_sel = data_sel__mec_release_mem__none; - } - - pkt->bitfields4a.address_lo_32b = static_cast((address&0xffffffff) >> 2); - pkt->address_hi = static_cast(address>>32); - - pkt->data_lo = static_cast(data); - pkt->data_hi = static_cast(data >> 32); - - pkt->int_ctxid = static_cast(data); + pkt->bitfields3.data_sel = data_sel__mec_release_mem__none; } + + pkt->bitfields4a.address_lo_32b = static_cast((address&0xffffffff) >> 2); + pkt->address_hi = static_cast(address>>32); + + pkt->data_lo = static_cast(data); + pkt->data_hi = static_cast(data >> 32); + + pkt->int_ctxid = static_cast(data); +} + +void PM4ReleaseMemoryPacket::InitPacketNV(bool isPolling, uint64_t address, + uint64_t data, bool is64bit, bool isTimeStamp) { + PM4MEC_RELEASE_MEM_NV *pkt; + + m_packetSize = sizeof(PM4_MEC_RELEASE_MEM_NV); + pkt = reinterpret_cast(calloc(1, m_packetSize)); + m_pPacketData = pkt; + EXPECT_NOTNULL(m_pPacketData); + + InitPM4Header(pkt->header, IT_RELEASE_MEM); + + pkt->bitfields2.event_type = 0x14; + pkt->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + pkt->bitfields2.gcr_cntl = (1<<10) | (1<<9) | (1<<8) | (1<<3) | (1<<2); + pkt->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + + pkt->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller; + + if (address) { + pkt->bitfields3.int_sel = (isPolling ? + int_sel__mec_release_mem__send_data_after_write_confirm: + int_sel__mec_release_mem__send_interrupt_after_write_confirm); + + if (isTimeStamp && is64bit) + pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter; + else + pkt->bitfields3.data_sel = is64bit ? + data_sel__mec_release_mem__send_64_bit_data : + data_sel__mec_release_mem__send_32_bit_low; + } else { + pkt->bitfields3.int_sel = (isPolling ? + int_sel__mec_release_mem__none: + int_sel__mec_release_mem__send_interrupt_only); + pkt->bitfields3.data_sel = data_sel__mec_release_mem__none; + } + + pkt->bitfields4a.address_lo_32b = static_cast((address&0xffffffff) >> 2); + pkt->address_hi = static_cast(address>>32); + + pkt->data_lo = static_cast(data); + pkt->data_hi = static_cast(data >> 32); + + pkt->int_ctxid = static_cast(data); } PM4IndirectBufPacket::PM4IndirectBufPacket(IndirectBuffer *pIb) { @@ -211,22 +262,48 @@ void PM4IndirectBufPacket::InitPacket(IndirectBuffer *pIb) { m_packetData.bitfields4.vmid = 0; // in iommutest: vmid = queueParams.VMID; m_packetData.bitfields4.cache_policy = cache_policy_indirect_buffer_BYPASS_2; } +PM4AcquireMemoryPacket::PM4AcquireMemoryPacket(unsigned int familyId):m_pPacketData(NULL) +{ -PM4AcquireMemoryPacket::PM4AcquireMemoryPacket(void) { - memset(&m_packetData, 0, SizeInBytes()); - InitPM4Header(m_packetData.header, IT_ACQUIRE_MEM); - - m_packetData.bitfields2.coher_cntl = 0x28c00000; // copied from the way the HSART does this. - m_packetData.bitfields2.engine = engine_acquire_mem_PFP_0; - m_packetData.coher_size = 0xFFFFFFFF; - m_packetData.bitfields3.coher_size_hi = 0; - m_packetData.coher_base_lo = 0; - m_packetData.bitfields4.coher_base_hi = 0; - m_packetData.bitfields5.poll_interval = 4; // copied from the way the HSART does this. + if (familyId < FAMILY_NV) + InitPacketAI(); + else + InitPacketNV(); } -unsigned int PM4AcquireMemoryPacket::SizeInBytes() const { - return sizeof(PM4ACQUIRE_MEM); +void PM4AcquireMemoryPacket::InitPacketAI(void) { + + PM4ACQUIRE_MEM *pkt; + m_packetSize = sizeof(PM4ACQUIRE_MEM); + pkt = reinterpret_cast(calloc(1, m_packetSize)); + m_pPacketData = pkt; + EXPECT_NOTNULL(m_pPacketData); + InitPM4Header(pkt->header, IT_ACQUIRE_MEM); + pkt->bitfields2.coher_cntl = 0x28c00000; // copied from the way the HSART does this. + pkt->bitfields2.engine = engine_acquire_mem_PFP_0; + pkt->coher_size = 0xFFFFFFFF; + pkt->bitfields3.coher_size_hi = 0; + pkt->coher_base_lo = 0; + pkt->bitfields4.coher_base_hi = 0; + pkt->bitfields5.poll_interval = 4; // copied from the way the HSART does this. +} +void PM4AcquireMemoryPacket::InitPacketNV(void) { + PM4ACQUIRE_MEM_NV *pkt; + m_packetSize = sizeof(PM4ACQUIRE_MEM_NV); + pkt = reinterpret_cast(calloc(1, m_packetSize)); + m_pPacketData = pkt; + EXPECT_NOTNULL(m_pPacketData); + InitPM4Header(pkt->header, IT_ACQUIRE_MEM); + pkt->coher_size = 0xFFFFFFFF; + pkt->bitfields3.coher_size_hi = 0; + pkt->coher_base_lo = 0; + pkt->bitfields4.coher_base_hi = 0; + pkt->bitfields5.poll_interval = 4; //copied from the way the HSART does this. + /* Invalidate gL2, gL1 with range base + * Invalidate GLV, GLK (L0$) + * Invalidate all Icache (GLI) + */ + pkt->bitfields6.gcr_cntl = (1<<14|1<<9|1<<8|1<<7|1); } PM4SetShaderRegPacket::PM4SetShaderRegPacket(void) diff --git a/tests/kfdtest/src/PM4Packet.hpp b/tests/kfdtest/src/PM4Packet.hpp index 8f2f5753b5..8624db0dd4 100644 --- a/tests/kfdtest/src/PM4Packet.hpp +++ b/tests/kfdtest/src/PM4Packet.hpp @@ -29,6 +29,7 @@ #include "pm4_pkt_struct_common.h" #include "pm4_pkt_struct_ci.h" #include "pm4_pkt_struct_ai.h" +#include "pm4_pkt_struct_nv.h" #include "IndirectBuffer.hpp" // @class PM4Packet: Marks a group of all PM4 packets @@ -87,21 +88,24 @@ class PM4ReleaseMemoryPacket : public PM4Packet { // Empty constructor, before using the packet call the init func PM4ReleaseMemoryPacket(void): m_pPacketData(NULL) {} // This contructor will also init the packet, no need for additional calls - PM4ReleaseMemoryPacket(bool isPolling, uint64_t address, uint64_t data, - bool is64bit = false, bool isTimeStamp = false): m_pPacketData(NULL) { - InitPacket(isPolling, address, data, is64bit, isTimeStamp); - } + PM4ReleaseMemoryPacket(unsigned int familyId, bool isPolling, uint64_t address, uint64_t data, + bool is64bit = false, bool isTimeStamp = false); - virtual ~PM4ReleaseMemoryPacket(void); + virtual ~PM4ReleaseMemoryPacket(void) {if (m_pPacketData)free(m_pPacketData);} // @returns Packet size in bytes virtual unsigned int SizeInBytes() const { return m_packetSize; } // @returns Pointer to the packet virtual const void *GetPacket() const { return m_pPacketData; } // @brief Initialise the packet - void InitPacket(bool isPolling, uint64_t address, uint64_t data, - bool is64bit = false, bool isTimeStamp = false); private: + void InitPacketCI(bool isPolling, uint64_t address, uint64_t data, + bool is64bit = false, bool isTimeStamp = false); + void InitPacketAI(bool isPolling, uint64_t address, uint64_t data, + bool is64bit = false, bool isTimeStamp = false); + void InitPacketNV(bool isPolling, uint64_t address, uint64_t data, + bool is64bit = false, bool isTimeStamp = false); + void *m_pPacketData; unsigned int m_packetSize; }; @@ -130,17 +134,19 @@ class PM4IndirectBufPacket : public PM4Packet { // @class PM4AcquireMemoryPacket class PM4AcquireMemoryPacket : public PM4Packet { public: - PM4AcquireMemoryPacket(void); - virtual ~PM4AcquireMemoryPacket(void) {} + PM4AcquireMemoryPacket(unsigned int familyId); + virtual ~PM4AcquireMemoryPacket(void) {if (m_pPacketData)free(m_pPacketData);} // @returns the packet size in bytes - virtual unsigned int SizeInBytes() const; - // @returns a pointer to the packet - virtual const void *GetPacket() const { return &m_packetData; } + virtual unsigned int SizeInBytes() const { return m_packetSize; } + // @returns Pointer to the packet + virtual const void *GetPacket() const { return m_pPacketData; } private: - // PM4ACQUIRE_MEM struct contains all the packet's data - PM4ACQUIRE_MEM m_packetData; + void InitPacketAI(void); + void InitPacketNV(void); + void *m_pPacketData; + unsigned int m_packetSize; }; // @class PM4SetShaderRegPacket Packet that writes to consecutive registers starting at baseOffset. diff --git a/tests/kfdtest/src/PM4Queue.cpp b/tests/kfdtest/src/PM4Queue.cpp index 548e35986c..6613ec8909 100644 --- a/tests/kfdtest/src/PM4Queue.cpp +++ b/tests/kfdtest/src/PM4Queue.cpp @@ -73,7 +73,7 @@ void PM4Queue::SubmitPacket() { void PM4Queue::Wait4PacketConsumption(HsaEvent *event, unsigned int timeOut) { if (event) { - PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(0, + PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(g_TestGPUFamilyId, 0, event->EventData.HWData2, event->EventId, true));