diff --git a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h index 2c3f9afa4d..bfa080cf8c 100644 --- a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h @@ -51,6 +51,7 @@ #include "core/inc/agent.h" #include "core/inc/queue.h" #include "core/inc/cache.h" +#include "core/inc/driver.h" namespace rocr { namespace AMD { @@ -62,7 +63,9 @@ class CpuAgent : public core::Agent { // @param [in] node Node id. Each CPU in different socket will get distinct // id. // @param [in] node_props Node property. - CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props); + // @param [in] driver_type Driver type. Default is KFD. + CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, + core::DriverType driver_type = core::DriverType::KFD); // @brief CpuAgent destructor. ~CpuAgent(); diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index f7dd5d26c6..e45a165e97 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -73,10 +73,11 @@ typedef ScratchCache::ScratchInfo ScratchInfo; class GpuAgentInt : public core::Agent { public: // @brief Constructor - GpuAgentInt(uint32_t node_id) - : core::Agent(core::Runtime::runtime_singleton_->AgentDriver( - core::DriverType::KFD), - node_id, core::Agent::DeviceType::kAmdGpuDevice) {} + // @param [in] node_id Node id. + // @param [in] driver_type Driver type. Default is KFD. + GpuAgentInt(uint32_t node_id, core::DriverType driver_type) + : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node_id, + core::Agent::DeviceType::kAmdGpuDevice) {} // @brief Ensure blits are ready (performance hint). virtual void PreloadBlits() {} @@ -231,7 +232,10 @@ class GpuAgent : public GpuAgentInt { // id. // @param [in] node_props Node property. // @param [in] xnack_mode XNACK mode of device. - GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index); + // @param [in] index Index of the GPU device. + // @param [in] driver_type Driver type. Default is KFD. + GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index, + core::DriverType driver_type = core::DriverType::KFD); // @brief GPU agent destructor. ~GpuAgent(); diff --git a/runtime/hsa-runtime/core/inc/driver.h b/runtime/hsa-runtime/core/inc/driver.h index c4cdc19f24..3a4082e24f 100644 --- a/runtime/hsa-runtime/core/inc/driver.h +++ b/runtime/hsa-runtime/core/inc/driver.h @@ -58,7 +58,14 @@ class Queue; enum class DriverQuery { GET_DRIVER_VERSION }; -enum class DriverType { XDNA = 0, KFD, KFD_VIRTIO, NUM_DRIVER_TYPES }; +enum class DriverType { + XDNA = 0, + KFD, +#ifdef HSAKMT_VIRTIO_ENABLED + KFD_VIRTIO, +#endif + NUM_DRIVER_TYPES +}; /// @brief Handle for exported / imported memory. struct ShareableHandle { diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index c47a5f2d25..9ba67f34d3 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -510,6 +510,14 @@ class Runtime { std::vector>& AgentDrivers() { return agent_drivers_; } + static bool IsGPUDriver(DriverType driver_type) { + return driver_type == core::DriverType::KFD +#ifdef HSAKMT_VIRTIO_ENABLED + || driver_type == core::DriverType::KFD_VIRTIO +#endif + ; + } + protected: static void AsyncEventsLoop(void*); static void AsyncIPCSockServerConnLoop(void*); diff --git a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp index 5cd3cb8a4f..37eda03d85 100644 --- a/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp @@ -54,10 +54,9 @@ namespace rocr { namespace AMD { -CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties &node_props) - : core::Agent( - core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD), - node, kAmdCpuDevice), +CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, + core::DriverType driver_type) + : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node, kAmdCpuDevice), properties_(node_props) { InitRegionList(); diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 7273bbd9b2..3b830af294 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -93,8 +93,8 @@ namespace AMD { const uint64_t CP_DMA_DATA_TRANSFER_CNT_MAX = (1 << 26); GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, - uint32_t index) - : GpuAgentInt(node), + uint32_t index, core::DriverType driver_type) + : GpuAgentInt(node, driver_type), properties_(node_props), current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT), scratch_used_large_(0), diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index 29956d9423..5a9a909662 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -78,17 +78,14 @@ namespace rocr { namespace AMD { // Anonymous namespace. namespace { -#if _WIN32 -constexpr size_t num_drivers = 0; -#elif __linux__ -constexpr size_t num_drivers = 2 -#ifdef HSAKMT_VIRTIO_ENABLED - + 1 -#endif - ; -#endif -const std::array&)>, num_drivers> +const std::array&)>, +#if _WIN32 + 0 +#elif __linux__ + static_cast(core::DriverType::NUM_DRIVER_TYPES) +#endif + > discover_driver_funcs = { #ifdef __linux__ KfdDriver::DiscoverDriver, @@ -121,14 +118,14 @@ bool InitializeDriver(std::unique_ptr& driver) { return true; } -void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { - CpuAgent* cpu = new CpuAgent(node_id, node_prop); +void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop, core::DriverType driver_type) { + CpuAgent* cpu = new CpuAgent(node_id, node_prop, driver_type); cpu->Enable(); core::Runtime::runtime_singleton_->RegisterAgent(cpu, true); } GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode, - bool enabled) { + bool enabled, core::DriverType driver_type) { GpuAgent* gpu = nullptr; if (node_prop.NumFComputeCores == 0) { // Ignore non GPUs. @@ -136,7 +133,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac } try { gpu = new GpuAgent(node_id, node_prop, xnack_mode, - core::Runtime::runtime_singleton_->gpu_agents().size()); + core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type); const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version; @@ -163,7 +160,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac node_prop.Capability.ui32.SRAM_EDCSupport = 1; delete gpu; gpu = new GpuAgent(node_id, node_prop, xnack_mode, - core::Runtime::runtime_singleton_->gpu_agents().size()); + core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type); } } } catch (const hsa_exception& e) { @@ -268,24 +265,29 @@ void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode, bool enable const int32_t invalidIdx = -1; int32_t list_sz = gpu_list.size(); HsaNodeProperties node_prop = {0}; - const auto& gpu_driver = core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD); - for (int32_t idx = 0; idx < list_sz; idx++) { - if (gpu_list[idx] == invalidIdx) { - break; + for (const auto& gpu_driver : core::Runtime::runtime_singleton_->AgentDrivers()) { + if (!core::Runtime::IsGPUDriver(gpu_driver->kernel_driver_type_)) { + continue; } - // Obtain properties of the node - hsa_status_t ret = gpu_driver.GetNodeProperties(node_prop, gpu_list[idx]); - assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties"); + for (int32_t idx = 0; idx < list_sz; idx++) { + if (gpu_list[idx] == invalidIdx) { + break; + } - // disable interrupt signal for DTIF platform - if (core::Runtime::runtime_singleton_->flag().enable_dtif()) - core::g_use_interrupt_wait = false; + // Obtain properties of the node + hsa_status_t ret = gpu_driver->GetNodeProperties(node_prop, gpu_list[idx]); + assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties"); - // Instantiate a Gpu device. The IO links - // of this node have already been registered - assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); - DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled); + // disable interrupt signal for DTIF platform + if (core::Runtime::runtime_singleton_->flag().enable_dtif()) + core::g_use_interrupt_wait = false; + + // Instantiate a Gpu device. The IO links + // of this node have already been registered + assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); + DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled, gpu_driver->kernel_driver_type_); + } } } @@ -346,7 +348,7 @@ bool BuildTopology() { /// @todo: Add support for AIEs. // Query if env ROCR_VISIBLE_DEVICES is defined. If defined // determine number and order of GPU devices to be surfaced. - if (filter && driver->kernel_driver_type_ == core::DriverType::KFD) { + if (filter && (core::Runtime::IsGPUDriver(driver->kernel_driver_type_))) { rvdFilter.BuildRvdTokenList(); rvdFilter.BuildDeviceUuidList(node_props_vec); visibleCnt = rvdFilter.BuildUsrDeviceList(); @@ -361,7 +363,7 @@ bool BuildTopology() { for (auto& node_props : node_props_vec) { if (node_props.NumCPUCores) { // Node has CPU cores so instantiate a CPU agent. - DiscoverCpu(node_id, node_props); + DiscoverCpu(node_id, node_props, driver->kernel_driver_type_); } if (node_props.NumNeuralCores) {