From 20806577ce633ca63f7647fc7f2225f6945631e4 Mon Sep 17 00:00:00 2001 From: Honglei Huang Date: Tue, 8 Jul 2025 11:01:05 +0800 Subject: [PATCH] rocr: support multiple driver types in agent initialization Modify agent initialization to support different driver types, to enable KFD_VIRTIO dirver for CPU and GPU agent here. 1. Add driver_type parameter to CpuAgent and GpuAgent constructors 2. Update topology discovery to handle multiple driver types 3. Fix MakeMemoryResident return value check in VirtioDriver 4. Add helper function IsGPUDriver to check driver types 5. Update agent discovery to iterate through all available drivers This change makes the runtime more flexible by removing hardcoded KFD driver assumptions and properly handling different driver backends. Signed-off-by: Honglei Huang --- runtime/hsa-runtime/core/inc/amd_cpu_agent.h | 5 +- runtime/hsa-runtime/core/inc/amd_gpu_agent.h | 14 ++-- runtime/hsa-runtime/core/inc/driver.h | 9 ++- runtime/hsa-runtime/core/inc/runtime.h | 8 +++ .../core/runtime/amd_cpu_agent.cpp | 7 +- .../core/runtime/amd_gpu_agent.cpp | 4 +- .../hsa-runtime/core/runtime/amd_topology.cpp | 64 ++++++++++--------- 7 files changed, 67 insertions(+), 44 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h index 2c3f9afa4d..bfa080cf8c 100644 --- a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h @@ -51,6 +51,7 @@ #include "core/inc/agent.h" #include "core/inc/queue.h" #include "core/inc/cache.h" +#include "core/inc/driver.h" namespace rocr { namespace AMD { @@ -62,7 +63,9 @@ class CpuAgent : public core::Agent { // @param [in] node Node id. Each CPU in different socket will get distinct // id. // @param [in] node_props Node property. - CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props); + // @param [in] driver_type Driver type. Default is KFD. + CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, + core::DriverType driver_type = core::DriverType::KFD); // @brief CpuAgent destructor. ~CpuAgent(); diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index f7dd5d26c6..e45a165e97 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -73,10 +73,11 @@ typedef ScratchCache::ScratchInfo ScratchInfo; class GpuAgentInt : public core::Agent { public: // @brief Constructor - GpuAgentInt(uint32_t node_id) - : core::Agent(core::Runtime::runtime_singleton_->AgentDriver( - core::DriverType::KFD), - node_id, core::Agent::DeviceType::kAmdGpuDevice) {} + // @param [in] node_id Node id. + // @param [in] driver_type Driver type. Default is KFD. + GpuAgentInt(uint32_t node_id, core::DriverType driver_type) + : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node_id, + core::Agent::DeviceType::kAmdGpuDevice) {} // @brief Ensure blits are ready (performance hint). virtual void PreloadBlits() {} @@ -231,7 +232,10 @@ class GpuAgent : public GpuAgentInt { // id. // @param [in] node_props Node property. // @param [in] xnack_mode XNACK mode of device. - GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index); + // @param [in] index Index of the GPU device. + // @param [in] driver_type Driver type. Default is KFD. + GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index, + core::DriverType driver_type = core::DriverType::KFD); // @brief GPU agent destructor. ~GpuAgent(); diff --git a/runtime/hsa-runtime/core/inc/driver.h b/runtime/hsa-runtime/core/inc/driver.h index c4cdc19f24..3a4082e24f 100644 --- a/runtime/hsa-runtime/core/inc/driver.h +++ b/runtime/hsa-runtime/core/inc/driver.h @@ -58,7 +58,14 @@ class Queue; enum class DriverQuery { GET_DRIVER_VERSION }; -enum class DriverType { XDNA = 0, KFD, KFD_VIRTIO, NUM_DRIVER_TYPES }; +enum class DriverType { + XDNA = 0, + KFD, +#ifdef HSAKMT_VIRTIO_ENABLED + KFD_VIRTIO, +#endif + NUM_DRIVER_TYPES +}; /// @brief Handle for exported / imported memory. struct ShareableHandle { diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index c47a5f2d25..9ba67f34d3 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -510,6 +510,14 @@ class Runtime { std::vector>& AgentDrivers() { return agent_drivers_; } + static bool IsGPUDriver(DriverType driver_type) { + return driver_type == core::DriverType::KFD +#ifdef HSAKMT_VIRTIO_ENABLED + || driver_type == core::DriverType::KFD_VIRTIO +#endif + ; + } + protected: static void AsyncEventsLoop(void*); static void AsyncIPCSockServerConnLoop(void*); diff --git a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp index 5cd3cb8a4f..37eda03d85 100644 --- a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp @@ -54,10 +54,9 @@ namespace rocr { namespace AMD { -CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties &node_props) - : core::Agent( - core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD), - node, kAmdCpuDevice), +CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, + core::DriverType driver_type) + : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node, kAmdCpuDevice), properties_(node_props) { InitRegionList(); diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 7273bbd9b2..3b830af294 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -93,8 +93,8 @@ namespace AMD { const uint64_t CP_DMA_DATA_TRANSFER_CNT_MAX = (1 << 26); GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, - uint32_t index) - : GpuAgentInt(node), + uint32_t index, core::DriverType driver_type) + : GpuAgentInt(node, driver_type), properties_(node_props), current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT), scratch_used_large_(0), diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index 29956d9423..5a9a909662 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -78,17 +78,14 @@ namespace rocr { namespace AMD { // Anonymous namespace. namespace { -#if _WIN32 -constexpr size_t num_drivers = 0; -#elif __linux__ -constexpr size_t num_drivers = 2 -#ifdef HSAKMT_VIRTIO_ENABLED - + 1 -#endif - ; -#endif -const std::array&)>, num_drivers> +const std::array&)>, +#if _WIN32 + 0 +#elif __linux__ + static_cast(core::DriverType::NUM_DRIVER_TYPES) +#endif + > discover_driver_funcs = { #ifdef __linux__ KfdDriver::DiscoverDriver, @@ -121,14 +118,14 @@ bool InitializeDriver(std::unique_ptr& driver) { return true; } -void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { - CpuAgent* cpu = new CpuAgent(node_id, node_prop); +void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop, core::DriverType driver_type) { + CpuAgent* cpu = new CpuAgent(node_id, node_prop, driver_type); cpu->Enable(); core::Runtime::runtime_singleton_->RegisterAgent(cpu, true); } GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode, - bool enabled) { + bool enabled, core::DriverType driver_type) { GpuAgent* gpu = nullptr; if (node_prop.NumFComputeCores == 0) { // Ignore non GPUs. @@ -136,7 +133,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac } try { gpu = new GpuAgent(node_id, node_prop, xnack_mode, - core::Runtime::runtime_singleton_->gpu_agents().size()); + core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type); const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version; @@ -163,7 +160,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac node_prop.Capability.ui32.SRAM_EDCSupport = 1; delete gpu; gpu = new GpuAgent(node_id, node_prop, xnack_mode, - core::Runtime::runtime_singleton_->gpu_agents().size()); + core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type); } } } catch (const hsa_exception& e) { @@ -268,24 +265,29 @@ void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode, bool enable const int32_t invalidIdx = -1; int32_t list_sz = gpu_list.size(); HsaNodeProperties node_prop = {0}; - const auto& gpu_driver = core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD); - for (int32_t idx = 0; idx < list_sz; idx++) { - if (gpu_list[idx] == invalidIdx) { - break; + for (const auto& gpu_driver : core::Runtime::runtime_singleton_->AgentDrivers()) { + if (!core::Runtime::IsGPUDriver(gpu_driver->kernel_driver_type_)) { + continue; } - // Obtain properties of the node - hsa_status_t ret = gpu_driver.GetNodeProperties(node_prop, gpu_list[idx]); - assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties"); + for (int32_t idx = 0; idx < list_sz; idx++) { + if (gpu_list[idx] == invalidIdx) { + break; + } - // disable interrupt signal for DTIF platform - if (core::Runtime::runtime_singleton_->flag().enable_dtif()) - core::g_use_interrupt_wait = false; + // Obtain properties of the node + hsa_status_t ret = gpu_driver->GetNodeProperties(node_prop, gpu_list[idx]); + assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties"); - // Instantiate a Gpu device. The IO links - // of this node have already been registered - assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); - DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled); + // disable interrupt signal for DTIF platform + if (core::Runtime::runtime_singleton_->flag().enable_dtif()) + core::g_use_interrupt_wait = false; + + // Instantiate a Gpu device. The IO links + // of this node have already been registered + assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); + DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled, gpu_driver->kernel_driver_type_); + } } } @@ -346,7 +348,7 @@ bool BuildTopology() { /// @todo: Add support for AIEs. // Query if env ROCR_VISIBLE_DEVICES is defined. If defined // determine number and order of GPU devices to be surfaced. - if (filter && driver->kernel_driver_type_ == core::DriverType::KFD) { + if (filter && (core::Runtime::IsGPUDriver(driver->kernel_driver_type_))) { rvdFilter.BuildRvdTokenList(); rvdFilter.BuildDeviceUuidList(node_props_vec); visibleCnt = rvdFilter.BuildUsrDeviceList(); @@ -361,7 +363,7 @@ bool BuildTopology() { for (auto& node_props : node_props_vec) { if (node_props.NumCPUCores) { // Node has CPU cores so instantiate a CPU agent. - DiscoverCpu(node_id, node_props); + DiscoverCpu(node_id, node_props, driver->kernel_driver_type_); } if (node_props.NumNeuralCores) {