Files
rocm-systems/tests/kfdtest/src/PM4Packet.cpp
T
shaoyunl b4e834ab61 KFDTest: remove the usage global g_TestGPUFamilyId
Adjust the KFDTest for multi-gou support

Change-Id: Ib3491e3f645d35fdba6ab702d65fcc86f48d3958
Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
2019-07-19 13:26:49 -04:00

390 rader
16 KiB
C++

/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "PM4Packet.hpp"
#include "hsakmttypes.h"
#include "KFDBaseComponentTest.hpp"
#include "asic_reg/gfx_7_2_enum.h"
unsigned int PM4Packet::CalcCountValue() const {
return (SizeInDWords() - (sizeof(PM4_TYPE_3_HEADER) / sizeof(uint32_t)) - 1);
}
void PM4Packet::InitPM4Header(PM4_TYPE_3_HEADER &header, it_opcode_type opCode) {
header.count = CalcCountValue();
header.opcode = opCode;
header.type = PM4_TYPE_3;
header.shaderType = 1; // compute
header.predicate = 0;
header.reserved1 = 0;
}
PM4WriteDataPacket::~PM4WriteDataPacket(void) {
if (m_pPacketData)
free(m_pPacketData);
}
unsigned int PM4WriteDataPacket::SizeInBytes() const {
return (offsetof(PM4WRITE_DATA_CI, data) + m_ndw*sizeof(uint32_t));
}
void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(calloc(1, SizeInBytes()));
// Verify that the memory is allocated successfully, cannot use assert here
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(m_pPacketData->header, IT_WRITE_DATA);
m_pPacketData->bitfields2.dst_sel = dst_sel_mec_write_data_MEMORY_5; // memory-async
m_pPacketData->bitfields2.addr_incr = addr_incr_mec_write_data_INCREMENT_ADDR_0; // increment addr
m_pPacketData->bitfields2.wr_confirm = wr_confirm_mec_write_data_WAIT_FOR_CONFIRMATION_1;
m_pPacketData->bitfields2.atc = is_dgpu() ?
atc_write_data_NOT_USE_ATC_0 : atc_write_data_USE_ATC_1;
m_pPacketData->bitfields2.cache_policy = cache_policy_mec_write_data_BYPASS_2;
m_pPacketData->dst_addr_lo = static_cast<uint32_t>(
reinterpret_cast<uint64_t>(destBuf)); // byte addr
m_pPacketData->dst_address_hi = static_cast<uint32_t>(
reinterpret_cast<uint64_t>(destBuf) >> 32);
memcpy(m_pPacketData->data, data, m_ndw * sizeof(uint32_t));
}
PM4ReleaseMemoryPacket::PM4ReleaseMemoryPacket(unsigned int familyId, bool isPolling,
uint64_t address, uint64_t data, bool is64bit, bool isTimeStamp):m_pPacketData(NULL) {
m_FamilyId = familyId;
if (familyId < FAMILY_AI)
InitPacketCI(isPolling, address, data, is64bit, isTimeStamp);
else if (familyId < FAMILY_NV)
InitPacketAI(isPolling, address, data, is64bit, isTimeStamp);
else
InitPacketNV(isPolling, address, data, is64bit, isTimeStamp);
}
void PM4ReleaseMemoryPacket::InitPacketCI(bool isPolling, uint64_t address,
uint64_t data, bool is64bit, bool isTimeStamp) {
PM4_RELEASE_MEM_CI *pkt;
m_packetSize = sizeof(PM4_RELEASE_MEM_CI);
pkt = reinterpret_cast<PM4_RELEASE_MEM_CI *>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(pkt->header, IT_RELEASE_MEM);
pkt->bitfields2.event_type = 0x14;
pkt->bitfields2.event_index = event_index_mec_release_mem_EVENT_WRITE_EOP_5;
// Possible values:
// 0101(5): EVENT_WRITE_EOP event types
// 0110(6): Reserved for EVENT_WRITE_EOS packet.
// 0111(7): Reserved (previously) for EVENT_WRITE packet.
pkt->bitfields2.l2_wb = 1;
pkt->bitfields2.l2_inv = 1;
pkt->bitfields2.cache_policy = cache_policy_mec_release_mem_BYPASS_2;
pkt->bitfields2.atc = is_dgpu() ?
atc_mec_release_mem_ci_NOT_USE_ATC_0 :
atc_mec_release_mem_ci_USE_ATC_1; // ATC setting for fences and timestamps to the MC or TCL2.
pkt->bitfields3.dst_sel = dst_sel_mec_release_mem_MEMORY_CONTROLLER_0;
// Possible values:
// 0 - memory_controller.
// 1 - tc_l2.
if (address) {
pkt->bitfields3.int_sel = (isPolling ?
int_sel_mec_release_mem_SEND_DATA_AFTER_WRITE_CONFIRM_3 :
int_sel_mec_release_mem_SEND_INTERRUPT_AFTER_WRITE_CONFIRM_2);
// Possible values:
// 0 - None (Do not send an interrupt).
// 1 - Send Interrupt Only. Program DATA_SEL 0".
// 2 - Send Interrupt when Write Confirm (WC) is received from the MC.
// 3 - Wait for WC, but dont send interrupt (applicable to 7.3+) [g73_1]
// 4 - Reserved for INTERRUPT packet
if (isTimeStamp && is64bit)
pkt->bitfields3.data_sel = data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3;
else
pkt->bitfields3.data_sel = is64bit ?
data_sel_mec_release_mem_SEND_64_BIT_DATA_2 :
data_sel_mec_release_mem_SEND_32_BIT_LOW_1;
// Possible values:
// 0 - None, i.e., Discard Data.
// 1 - Send 32-bit Data Low (Discard Data High).
// 2 - Send 64-bit Data.
// 3 - Send current value of the 64 bit global GPU clock counter.
// 4 - Send current value of the 64 bit system clock counter.
// 5 - Store GDS Data to memory.
// 6 - Reserved for use by the CP for Signal Semaphore.
// 7 - Reserved for use by the CP for Wait Semaphore.
} else {
pkt->bitfields3.int_sel = (isPolling ?
int_sel_mec_release_mem_NONE_0 :
int_sel_mec_release_mem_SEND_INTERRUPT_ONLY_1);
pkt->bitfields3.data_sel = data_sel_mec_release_mem_NONE_0;
}
pkt->bitfields4a.address_lo_dword_aligned = static_cast<uint32_t>((address&0xffffffff) >> 2);
pkt->addr_hi = static_cast<uint32_t>(address>>32);
pkt->data_lo = static_cast<uint32_t>(data);
pkt->data_hi = static_cast<uint32_t>(data >> 32);
}
void PM4ReleaseMemoryPacket::InitPacketAI(bool isPolling, uint64_t address,
uint64_t data, bool is64bit, bool isTimeStamp) {
PM4MEC_RELEASE_MEM_AI *pkt;
m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI);
pkt = reinterpret_cast<PM4MEC_RELEASE_MEM_AI *>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(pkt->header, IT_RELEASE_MEM);
pkt->bitfields2.event_type = 0x14;
pkt->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
pkt->bitfields2.tc_wb_action_ena = 1;
pkt->bitfields2.tc_action_ena = 1;
pkt->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
pkt->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller;
if (address) {
pkt->bitfields3.int_sel = (isPolling ?
int_sel__mec_release_mem__send_data_after_write_confirm:
int_sel__mec_release_mem__send_interrupt_after_write_confirm);
if (isTimeStamp && is64bit)
pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter;
else
pkt->bitfields3.data_sel = is64bit ?
data_sel__mec_release_mem__send_64_bit_data :
data_sel__mec_release_mem__send_32_bit_low;
} else {
pkt->bitfields3.int_sel = (isPolling ?
int_sel__mec_release_mem__none:
int_sel__mec_release_mem__send_interrupt_only);
pkt->bitfields3.data_sel = data_sel__mec_release_mem__none;
}
pkt->bitfields4a.address_lo_32b = static_cast<uint32_t>((address&0xffffffff) >> 2);
pkt->address_hi = static_cast<uint32_t>(address>>32);
pkt->data_lo = static_cast<uint32_t>(data);
pkt->data_hi = static_cast<uint32_t>(data >> 32);
pkt->int_ctxid = static_cast<uint32_t>(data);
}
void PM4ReleaseMemoryPacket::InitPacketNV(bool isPolling, uint64_t address,
uint64_t data, bool is64bit, bool isTimeStamp) {
PM4MEC_RELEASE_MEM_NV *pkt;
m_packetSize = sizeof(PM4_MEC_RELEASE_MEM_NV);
pkt = reinterpret_cast<PM4_MEC_RELEASE_MEM_NV *>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(pkt->header, IT_RELEASE_MEM);
pkt->bitfields2.event_type = 0x14;
pkt->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
pkt->bitfields2.gcr_cntl = (1<<10) | (1<<9) | (1<<8) | (1<<3) | (1<<2);
pkt->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
pkt->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller;
if (address) {
pkt->bitfields3.int_sel = (isPolling ?
int_sel__mec_release_mem__send_data_after_write_confirm:
int_sel__mec_release_mem__send_interrupt_after_write_confirm);
if (isTimeStamp && is64bit)
pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter;
else
pkt->bitfields3.data_sel = is64bit ?
data_sel__mec_release_mem__send_64_bit_data :
data_sel__mec_release_mem__send_32_bit_low;
} else {
pkt->bitfields3.int_sel = (isPolling ?
int_sel__mec_release_mem__none:
int_sel__mec_release_mem__send_interrupt_only);
pkt->bitfields3.data_sel = data_sel__mec_release_mem__none;
}
pkt->bitfields4a.address_lo_32b = static_cast<uint32_t>((address&0xffffffff) >> 2);
pkt->address_hi = static_cast<uint32_t>(address>>32);
pkt->data_lo = static_cast<uint32_t>(data);
pkt->data_hi = static_cast<uint32_t>(data >> 32);
pkt->int_ctxid = static_cast<uint32_t>(data);
}
PM4IndirectBufPacket::PM4IndirectBufPacket(IndirectBuffer *pIb) {
InitPacket(pIb);
}
unsigned int PM4IndirectBufPacket::SizeInBytes() const {
return sizeof(PM4MEC_INDIRECT_BUFFER);
}
void PM4IndirectBufPacket::InitPacket(IndirectBuffer *pIb) {
memset(&m_packetData, 0, SizeInBytes());
InitPM4Header(m_packetData.header, IT_INDIRECT_BUFFER);
m_packetData.bitfields2.ib_base_lo = static_cast<HSAuint32>((reinterpret_cast<HSAuint64>(pIb->Addr()))) >> 2;
m_packetData.bitfields3.ib_base_hi = reinterpret_cast<HSAuint64>(pIb->Addr()) >> 32;
m_packetData.bitfields4.ib_size = pIb->SizeInDWord();
m_packetData.bitfields4.chain = 0;
m_packetData.bitfields4.offload_polling = 0;
m_packetData.bitfields4.volatile_setting = 0;
m_packetData.bitfields4.valid = 1;
m_packetData.bitfields4.vmid = 0; // in iommutest: vmid = queueParams.VMID;
m_packetData.bitfields4.cache_policy = cache_policy_indirect_buffer_BYPASS_2;
}
PM4AcquireMemoryPacket::PM4AcquireMemoryPacket(unsigned int familyId):m_pPacketData(NULL)
{
m_FamilyId = familyId;
if (familyId < FAMILY_NV)
InitPacketAI();
else
InitPacketNV();
}
void PM4AcquireMemoryPacket::InitPacketAI(void) {
PM4ACQUIRE_MEM *pkt;
m_packetSize = sizeof(PM4ACQUIRE_MEM);
pkt = reinterpret_cast<PM4ACQUIRE_MEM*>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(pkt->header, IT_ACQUIRE_MEM);
pkt->bitfields2.coher_cntl = 0x28c00000; // copied from the way the HSART does this.
pkt->bitfields2.engine = engine_acquire_mem_PFP_0;
pkt->coher_size = 0xFFFFFFFF;
pkt->bitfields3.coher_size_hi = 0;
pkt->coher_base_lo = 0;
pkt->bitfields4.coher_base_hi = 0;
pkt->bitfields5.poll_interval = 4; // copied from the way the HSART does this.
}
void PM4AcquireMemoryPacket::InitPacketNV(void) {
PM4ACQUIRE_MEM_NV *pkt;
m_packetSize = sizeof(PM4ACQUIRE_MEM_NV);
pkt = reinterpret_cast<PM4ACQUIRE_MEM_NV*>(calloc(1, m_packetSize));
m_pPacketData = pkt;
EXPECT_NOTNULL(m_pPacketData);
InitPM4Header(pkt->header, IT_ACQUIRE_MEM);
pkt->coher_size = 0xFFFFFFFF;
pkt->bitfields3.coher_size_hi = 0;
pkt->coher_base_lo = 0;
pkt->bitfields4.coher_base_hi = 0;
pkt->bitfields5.poll_interval = 4; //copied from the way the HSART does this.
/* Invalidate gL2, gL1 with range base
* Invalidate GLV, GLK (L0$)
* Invalidate all Icache (GLI)
*/
pkt->bitfields6.gcr_cntl = (1<<14|1<<9|1<<8|1<<7|1);
}
PM4SetShaderRegPacket::PM4SetShaderRegPacket(void)
: m_packetDataAllocated(false) {
}
PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[],
unsigned int numRegs)
: m_packetDataAllocated(false) {
InitPacket(baseOffset, regValues, numRegs);
}
PM4SetShaderRegPacket::~PM4SetShaderRegPacket(void) {
if (m_packetDataAllocated)
free(m_pPacketData);
}
void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[],
unsigned int numRegs) {
// 1st register is a part of the packet struct.
m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);
/* Allocating the size of the packet, since the packet is assembled from a struct
* followed by an additional dword data
*/
m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(malloc(m_packetSize));
EXPECT_NOTNULL(m_pPacketData);
m_packetDataAllocated = true;
memset(m_pPacketData, 0, m_packetSize);
InitPM4Header(m_pPacketData->header, IT_SET_SH_REG);
m_pPacketData->bitfields2.reg_offset = baseOffset - PERSISTENT_SPACE_START;
memcpy(m_pPacketData->reg_data, regValues, numRegs*sizeof(uint32_t));
}
PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY,
unsigned int dimZ, unsigned int dispatchInit) {
InitPacket(dimX, dimY, dimZ, dispatchInit);
}
void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ,
unsigned int dispatchInit) {
memset(&m_packetData, 0, SizeInBytes());
InitPM4Header(m_packetData.header, IT_DISPATCH_DIRECT);
m_packetData.dim_x = dimX;
m_packetData.dim_y = dimY;
m_packetData.dim_z = dimZ;
m_packetData.dispatch_initiator = dispatchInit;
}
unsigned int PM4DispatchDirectPacket::SizeInBytes() const {
return sizeof(PM4DISPATCH_DIRECT);
}
PM4PartialFlushPacket::PM4PartialFlushPacket(void) {
memset(&m_packetData, 0, SizeInBytes());
InitPM4Header(m_packetData.header, IT_EVENT_WRITE);
m_packetData.bitfields2.event_index = event_index_event_write_CS_VS_PS_PARTIAL_FLUSH_4;
m_packetData.bitfields2.event_type = CS_PARTIAL_FLUSH;
}
unsigned int PM4PartialFlushPacket::SizeInBytes() const {
// For PARTIAL_FLUSH_CS packets, the last 2 dwordS don't exist.
return sizeof(PM4EVENT_WRITE) - sizeof(uint32_t)*2;
}
PM4NopPacket::PM4NopPacket(void) {
InitPM4Header(m_packetData, IT_NOP);
}
unsigned int PM4NopPacket::SizeInBytes() const {
return sizeof(m_packetData);
}