Fix queue interception in tools.

1. Correct amd::AqlQueue::ExecutePM4 to support interception.
2. Minor fixes to AqlPacket and SoftCP.
3. Minimal change to disable interception of runtime internal queues.

Change-Id: I103fece2ebf9a188d27f01e61221c737405d7253
This commit is contained in:
Sean Keely
2017-07-11 00:28:35 -05:00
parent 29b5b5c029
commit bc0bd00746
3 changed files with 19 additions and 25 deletions
+3 -5
View File
@@ -64,15 +64,13 @@ struct AqlPacket {
hsa_agent_dispatch_packet_t agent;
};
uint8_t type() {
uint8_t type() const {
return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
}
bool IsValid() {
const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE;
return (packet_type > HSA_PACKET_TYPE_INVALID &&
packet_type <= HSA_PACKET_TYPE_BARRIER_OR);
bool IsValid() const {
return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID);
}
std::string string() const {
@@ -775,17 +775,20 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
// pm4_ib_buf_ is a shared resource, so mutually exclude here.
ScopedAcquire<KernelMutex> lock(&pm4_ib_mutex_);
// Obtain a queue slot for a single AQL packet.
uint64_t write_idx = AddWriteIndexAcqRel(1);
// Obtain reference to any container queue.
core::Queue* queue = core::Queue::Convert(public_handle());
while ((write_idx - LoadReadIndexRelaxed()) > public_handle()->size) {
// Obtain a queue slot for a single AQL packet.
uint64_t write_idx = queue->AddWriteIndexAcqRel(1);
while ((write_idx - queue->LoadReadIndexRelaxed()) > queue->amd_queue_.hsa_queue.size) {
os::YieldThread();
}
uint32_t slot_idx = uint32_t(write_idx % public_handle()->size);
uint32_t slot_idx = uint32_t(write_idx % queue->amd_queue_.hsa_queue.size);
constexpr uint32_t slot_size_b = 0x40;
uint32_t* queue_slot =
(uint32_t*)(uintptr_t(public_handle()->base_address) + (slot_idx * slot_size_b));
(uint32_t*)(uintptr_t(queue->amd_queue_.hsa_queue.base_address) + (slot_idx * slot_size_b));
// Copy client PM4 command into IB.
assert(cmd_size_b < pm4_ib_size_b_ && "PM4 exceeds IB size");
@@ -877,12 +880,13 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) {
atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release);
// Submit the packet slot.
core::Signal* doorbell =
core::Signal::Convert(public_handle()->doorbell_signal);
core::Signal* doorbell = core::Signal::Convert(queue->amd_queue_.hsa_queue.doorbell_signal);
doorbell->StoreRelease(write_idx);
// Wait for the packet to be consumed.
while (LoadReadIndexRelaxed() <= write_idx) {
// Should be switched to a signal wait when aql_pm4_ib can be used on all
// supported platforms.
while (queue->LoadReadIndexRelaxed() <= write_idx) {
os::YieldThread();
}
}
@@ -512,18 +512,10 @@ hsa_status_t GpuAgent::VisitRegion(
}
core::Queue* GpuAgent::CreateInterceptibleQueue() {
// Until tools runtime is merged in we need to use HSA API
// rather than GpuAgent::QueueCreate to allow interception.
hsa_queue_t* queue_handle;
hsa_status_t status =
HSA::hsa_queue_create(public_handle(), minAqlSize_, HSA_QUEUE_TYPE_MULTI,
NULL, NULL, 0, 0, &queue_handle);
if (status != HSA_STATUS_SUCCESS) {
return NULL;
}
return core::Queue::Convert(queue_handle);
// Disabled intercept of internal queues pending tools updates.
core::Queue* queue = nullptr;
QueueCreate(minAqlSize_, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue);
return queue;
}
core::Blit* GpuAgent::CreateBlitSdma() {