From ce61e3301bc823e95dbbd653e8f59d570833301f Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Tue, 18 Feb 2025 17:53:58 -0800 Subject: [PATCH] rocr: Add large_bar_enabled var to the GPU agent Adds a bool to the GPU agent and a public member method to check if the GPU supports large BAR. This is needed so we can check if large BAR is supported when a user tries to allocate an AQL queue in device memory on a given GPU agent. Also adds an exception to the AQL queue if device-side AQL queues are requested and the GPU owner of the AQL doesn't support large BAR. Otherwise, ROCr will currently allow device-side queues that can cause faults when the user tries to touch their ring buffers and the user will not know why the faults are occuring. This relies on the fact that the KFD does not exposed any links from the CPU to the GPU if large BAR is not enabled (though links from the GPU to the CPU may still be exposed by the KFD). [ROCm/ROCR-Runtime commit: f2c482d9231ed351ab2c5508a7ed5aadb9ab1774] --- .../runtime/hsa-runtime/core/inc/amd_gpu_agent.h | 4 ++++ .../hsa-runtime/core/runtime/amd_aql_queue.cpp | 5 +++++ .../hsa-runtime/core/runtime/amd_gpu_agent.cpp | 11 +++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 6e071719c8..1653823823 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -437,6 +437,8 @@ class GpuAgent : public GpuAgentInt { /// @brief Override from AMD::GpuAgentInt. __forceinline bool is_xgmi_cpu_gpu() const { return xgmi_cpu_gpu_; } + /// @brief Is large BAR support enabled for this GPU. + __forceinline bool LargeBarEnabled() const { return large_bar_enabled_; } const size_t MAX_SCRATCH_APERTURE_PER_XCC = (1ULL << 32); size_t MaxScratchDevice() const { return properties_.NumXcc * MAX_SCRATCH_APERTURE_PER_XCC; } @@ -831,6 +833,8 @@ class GpuAgent : public GpuAgentInt { /// @brief XGMI CPU<->GPU bool xgmi_cpu_gpu_ = false; + /// @brief Is PCIe large BAR enabled. + bool large_bar_enabled_ = false; }; } // namespace amd diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 1be77c6063..95e759ebad 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -738,6 +738,11 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) { assert(IsMultipleOf(ring_buf_alloc_bytes_, 4096) && "Ring buffer sizes must be 4KiB aligned."); if (IsDeviceMemRingBuf()) { + if (!agent_->LargeBarEnabled()) { + throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION, + "Trying to allocate an AQL ring buffer in device memory without " + "large BAR PCIe enabled."); + } ring_buf_ = agent_->coarsegrain_allocator()( ring_buf_alloc_bytes_, core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateUncached); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 63560a14a4..0e90f8c7d0 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -232,10 +232,13 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna #endif - auto& firstCpu = core::Runtime::runtime_singleton_->cpu_agents()[0]; - auto linkInfo = core::Runtime::runtime_singleton_->GetLinkInfo(firstCpu->node_id(), - node_id()); - xgmi_cpu_gpu_ = (linkInfo.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI); + auto& first_cpu = core::Runtime::runtime_singleton_->cpu_agents()[0]; + auto link_info = core::Runtime::runtime_singleton_->GetLinkInfo(first_cpu->node_id(), node_id()); + xgmi_cpu_gpu_ = (link_info.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI); + + if (link_info.num_hop >= 1) { + large_bar_enabled_ = true; + } // Populate region list. InitRegionList();