Fix queue interception in tools.
1. Correct amd::AqlQueue::ExecutePM4 to support interception. 2. Minor fixes to AqlPacket and SoftCP. 3. Minimal change to disable interception of runtime internal queues. Change-Id: I103fece2ebf9a188d27f01e61221c737405d7253
This commit is contained in:
@@ -64,15 +64,13 @@ struct AqlPacket {
|
||||
hsa_agent_dispatch_packet_t agent;
|
||||
};
|
||||
|
||||
uint8_t type() {
|
||||
uint8_t type() const {
|
||||
return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
|
||||
((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
|
||||
}
|
||||
|
||||
bool IsValid() {
|
||||
const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE;
|
||||
return (packet_type > HSA_PACKET_TYPE_INVALID &&
|
||||
packet_type <= HSA_PACKET_TYPE_BARRIER_OR);
|
||||
bool IsValid() const {
|
||||
return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID);
|
||||
}
|
||||
|
||||
std::string string() const {
|
||||
|
||||
@@ -775,17 +775,20 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
|
||||
// pm4_ib_buf_ is a shared resource, so mutually exclude here.
|
||||
ScopedAcquire<KernelMutex> lock(&pm4_ib_mutex_);
|
||||
|
||||
// Obtain a queue slot for a single AQL packet.
|
||||
uint64_t write_idx = AddWriteIndexAcqRel(1);
|
||||
// Obtain reference to any container queue.
|
||||
core::Queue* queue = core::Queue::Convert(public_handle());
|
||||
|
||||
while ((write_idx - LoadReadIndexRelaxed()) > public_handle()->size) {
|
||||
// Obtain a queue slot for a single AQL packet.
|
||||
uint64_t write_idx = queue->AddWriteIndexAcqRel(1);
|
||||
|
||||
while ((write_idx - queue->LoadReadIndexRelaxed()) > queue->amd_queue_.hsa_queue.size) {
|
||||
os::YieldThread();
|
||||
}
|
||||
|
||||
uint32_t slot_idx = uint32_t(write_idx % public_handle()->size);
|
||||
uint32_t slot_idx = uint32_t(write_idx % queue->amd_queue_.hsa_queue.size);
|
||||
constexpr uint32_t slot_size_b = 0x40;
|
||||
uint32_t* queue_slot =
|
||||
(uint32_t*)(uintptr_t(public_handle()->base_address) + (slot_idx * slot_size_b));
|
||||
(uint32_t*)(uintptr_t(queue->amd_queue_.hsa_queue.base_address) + (slot_idx * slot_size_b));
|
||||
|
||||
// Copy client PM4 command into IB.
|
||||
assert(cmd_size_b < pm4_ib_size_b_ && "PM4 exceeds IB size");
|
||||
@@ -877,12 +880,13 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
|
||||
atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release);
|
||||
|
||||
// Submit the packet slot.
|
||||
core::Signal* doorbell =
|
||||
core::Signal::Convert(public_handle()->doorbell_signal);
|
||||
core::Signal* doorbell = core::Signal::Convert(queue->amd_queue_.hsa_queue.doorbell_signal);
|
||||
doorbell->StoreRelease(write_idx);
|
||||
|
||||
// Wait for the packet to be consumed.
|
||||
while (LoadReadIndexRelaxed() <= write_idx) {
|
||||
// Should be switched to a signal wait when aql_pm4_ib can be used on all
|
||||
// supported platforms.
|
||||
while (queue->LoadReadIndexRelaxed() <= write_idx) {
|
||||
os::YieldThread();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -512,18 +512,10 @@ hsa_status_t GpuAgent::VisitRegion(
|
||||
}
|
||||
|
||||
core::Queue* GpuAgent::CreateInterceptibleQueue() {
|
||||
// Until tools runtime is merged in we need to use HSA API
|
||||
// rather than GpuAgent::QueueCreate to allow interception.
|
||||
hsa_queue_t* queue_handle;
|
||||
hsa_status_t status =
|
||||
HSA::hsa_queue_create(public_handle(), minAqlSize_, HSA_QUEUE_TYPE_MULTI,
|
||||
NULL, NULL, 0, 0, &queue_handle);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return core::Queue::Convert(queue_handle);
|
||||
// Disabled intercept of internal queues pending tools updates.
|
||||
core::Queue* queue = nullptr;
|
||||
QueueCreate(minAqlSize_, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue);
|
||||
return queue;
|
||||
}
|
||||
|
||||
core::Blit* GpuAgent::CreateBlitSdma() {
|
||||
|
||||
Reference in New Issue
Block a user