PC Sampling: Create dedicated CP queue

Create dedicated CP queue with highest priority for PC Sampling. Reduce
the highest priority that LRT's can set for existing API so that PC
Sampling queue will always have highest priority over any other CP
queues

Change-Id: Ia70d74415edc83b4862a3e18dbdbd7cebe73ab47
This commit is contained in:
David Yat Sin
2023-09-05 16:10:41 +00:00
parent a842247482
commit a83f872a23
3 changed files with 22 additions and 7 deletions
+3 -2
View File
@@ -536,8 +536,9 @@ class GpuAgent : public GpuAgentInt {
// @brief AQL queues for cache management and blit compute usage.
enum QueueEnum {
QueueUtility, // Cache management and device to {host,device} blit compute
QueueBlitOnly, // Host to device blit
QueueUtility, // Cache management and device to {host,device} blit compute
QueueBlitOnly, // Host to device blit
QueuePCSampling, // Dedicated high priority queue for PC Sampling
QueueCount
};
@@ -741,18 +741,29 @@ core::Blit* GpuAgent::CreateBlitKernel(core::Queue* queue) {
void GpuAgent::InitDma() {
// Setup lazy init pointers on queues and blits.
auto queue_lambda = [this]() {
auto ret = CreateInterceptibleQueue();
if (ret == nullptr)
auto queue_lambda = [this](HSA_QUEUE_PRIORITY priority = HSA_QUEUE_PRIORITY_NORMAL) {
auto queue = CreateInterceptibleQueue();
if (queue == nullptr)
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
"Internal queue creation failed.");
return ret;
if (priority != HSA_QUEUE_PRIORITY_NORMAL)
if (queue->SetPriority(priority) != HSA_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR,
"Failed to increase queue priority for PC Sampling");
return queue;
};
// Dedicated compute queue for host-to-device blits.
queues_[QueueBlitOnly].reset(queue_lambda);
// Share utility queue with device-to-host blits.
queues_[QueueUtility].reset(queue_lambda);
// Dedicated compute queue for PC Sampling CP-DMA commands. We need a dedicated queue that runs at
// highest priority because we do not want the CP-DMA commands to be delayed/blocked due to
// other dispatches/barriers that could be in the other AQL queues.
queues_[QueuePCSampling].reset(queue_lambda(HSA_QUEUE_PRIORITY_MAXIMUM));
// Decide which engine to use for blits.
auto blit_lambda = [this](bool use_xgmi, lazy_ptr<core::Queue>& queue, bool isHostToDev) {
Flag::SDMA_OVERRIDE sdma_override = core::Runtime::runtime_singleton_->flag().enable_sdma();
@@ -1072,10 +1072,13 @@ hsa_status_t hsa_amd_queue_set_priority(hsa_queue_t* queue,
core::Queue* cmd_queue = core::Queue::Convert(queue);
IS_VALID(cmd_queue);
// Highest queue priority allowed for HSA user is HSA_QUEUE_PRIORITY_HIGH
// HSA_QUEUE_PRIORITY_MAXIMUM is reserved for PC Sampling and can only be allocated internally
// in ROCR
static std::map<hsa_amd_queue_priority_t, HSA_QUEUE_PRIORITY> ext_kmt_priomap = {
{HSA_AMD_QUEUE_PRIORITY_LOW, HSA_QUEUE_PRIORITY_MINIMUM},
{HSA_AMD_QUEUE_PRIORITY_NORMAL, HSA_QUEUE_PRIORITY_NORMAL},
{HSA_AMD_QUEUE_PRIORITY_HIGH, HSA_QUEUE_PRIORITY_MAXIMUM},
{HSA_AMD_QUEUE_PRIORITY_HIGH, HSA_QUEUE_PRIORITY_HIGH},
};
auto priority_it = ext_kmt_priomap.find(priority);