rocr: Add large_bar_enabled var to the GPU agent
Adds a bool to the GPU agent and a public member method to
check if the GPU supports large BAR. This is needed so we can
check if large BAR is supported when a user tries to allocate
an AQL queue in device memory on a given GPU agent.
Also adds an exception to the AQL queue if device-side AQL queues
are requested and the GPU owner of the AQL doesn't support large
BAR. Otherwise, ROCr will currently allow device-side queues
that can cause faults when the user tries to touch their ring
buffers and the user will not know why the faults are occuring.
This relies on the fact that the KFD does not exposed any links
from the CPU to the GPU if large BAR is not enabled (though
links from the GPU to the CPU may still be exposed by the KFD).
[ROCm/ROCR-Runtime commit: f2c482d923]
Этот коммит содержится в:
коммит произвёл
Yat Sin, David
родитель
6f37386eb2
Коммит
ce61e3301b
@@ -437,6 +437,8 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
/// @brief Override from AMD::GpuAgentInt.
|
||||
__forceinline bool is_xgmi_cpu_gpu() const { return xgmi_cpu_gpu_; }
|
||||
/// @brief Is large BAR support enabled for this GPU.
|
||||
__forceinline bool LargeBarEnabled() const { return large_bar_enabled_; }
|
||||
|
||||
const size_t MAX_SCRATCH_APERTURE_PER_XCC = (1ULL << 32);
|
||||
size_t MaxScratchDevice() const { return properties_.NumXcc * MAX_SCRATCH_APERTURE_PER_XCC; }
|
||||
@@ -831,6 +833,8 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
/// @brief XGMI CPU<->GPU
|
||||
bool xgmi_cpu_gpu_ = false;
|
||||
/// @brief Is PCIe large BAR enabled.
|
||||
bool large_bar_enabled_ = false;
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
@@ -738,6 +738,11 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
|
||||
assert(IsMultipleOf(ring_buf_alloc_bytes_, 4096) && "Ring buffer sizes must be 4KiB aligned.");
|
||||
|
||||
if (IsDeviceMemRingBuf()) {
|
||||
if (!agent_->LargeBarEnabled()) {
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION,
|
||||
"Trying to allocate an AQL ring buffer in device memory without "
|
||||
"large BAR PCIe enabled.");
|
||||
}
|
||||
ring_buf_ = agent_->coarsegrain_allocator()(
|
||||
ring_buf_alloc_bytes_,
|
||||
core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateUncached);
|
||||
|
||||
@@ -232,10 +232,13 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna
|
||||
|
||||
#endif
|
||||
|
||||
auto& firstCpu = core::Runtime::runtime_singleton_->cpu_agents()[0];
|
||||
auto linkInfo = core::Runtime::runtime_singleton_->GetLinkInfo(firstCpu->node_id(),
|
||||
node_id());
|
||||
xgmi_cpu_gpu_ = (linkInfo.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI);
|
||||
auto& first_cpu = core::Runtime::runtime_singleton_->cpu_agents()[0];
|
||||
auto link_info = core::Runtime::runtime_singleton_->GetLinkInfo(first_cpu->node_id(), node_id());
|
||||
xgmi_cpu_gpu_ = (link_info.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI);
|
||||
|
||||
if (link_info.num_hop >= 1) {
|
||||
large_bar_enabled_ = true;
|
||||
}
|
||||
|
||||
// Populate region list.
|
||||
InitRegionList();
|
||||
|
||||
Ссылка в новой задаче
Block a user