diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index 165b34f230..4c45bd6bec 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -204,6 +204,11 @@ set ( SRCS core/driver/driver.cpp libamdhsacode/amd_hsa_code.cpp libamdhsacode/amd_core_dump.cpp ) +if ( BUILD_THUNK_VIRTIO ) + list(APPEND SRCS core/driver/virtio/amd_kfd_virtio_driver.cpp) + target_compile_definitions(hsa-runtime64 PRIVATE HSAKMT_VIRTIO_ENABLED=1) +endif() + target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${SRCS} ) ## Depend on trap handler target. diff --git a/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp b/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp new file mode 100644 index 0000000000..a6a832b3c6 --- /dev/null +++ b/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp @@ -0,0 +1,514 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_virtio_driver.h" +#include "hsakmt/hsakmt_virtio.h" + +#include +#include + +#include "core/inc/amd_gpu_agent.h" +#include "core/inc/amd_memory_region.h" +#include "core/inc/runtime.h" + +extern r_debug _amdgpu_r_debug; + +namespace rocr { +namespace AMD { + +KfdVirtioDriver::KfdVirtioDriver(std::string devnode_name) + : core::Driver(core::DriverType::KFD_VIRTIO, std::move(devnode_name)) {} + +hsa_status_t KfdVirtioDriver::DiscoverDriver(std::unique_ptr& driver) { + auto tmp_driver = std::unique_ptr(new KfdVirtioDriver("")); + + if (tmp_driver->Open() == HSA_STATUS_SUCCESS) { + driver = std::move(tmp_driver); + return HSA_STATUS_SUCCESS; + } + + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::Open() { + return vhsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::Close() { + return vhsaKmtCloseKFD() == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::Init() { + HSAKMT_STATUS ret = + vhsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); + uint32_t caps_mask = 0; + + if (ret != HSAKMT_STATUS_SUCCESS && ret != HSAKMT_STATUS_NOT_SUPPORTED) return HSA_STATUS_ERROR; + + if (vhsaKmtGetRuntimeCapabilities(&caps_mask) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + core::Runtime::runtime_singleton_->KfdVersion( + ret != HSAKMT_STATUS_NOT_SUPPORTED, + !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + + if (vhsaKmtGetVersion(&version_) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + core::Runtime::runtime_singleton_->KfdVersion(version_); + + if (version_.KernelInterfaceMajorVersion == 1 && version_.KernelInterfaceMinorVersion == 0) + core::g_use_interrupt_wait = false; + + /* Force disable interrupt wait in VIRTIO driver temporarily */ + core::g_use_interrupt_wait = false; + + /* Force disable XNACK in VIRTIO driver temporarily */ + core::Runtime::runtime_singleton_->XnackEnabled(false); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::ShutDown() { + HSAKMT_STATUS ret = vhsaKmtRuntimeDisable(); + if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + ret = vhsaKmtReleaseSystemProperties(); + + if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return Close(); +} + +hsa_status_t KfdVirtioDriver::QueryKernelModeDriver(core::DriverQuery query) { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetSystemProperties(HsaSystemProperties& sys_props) const { + if (vhsaKmtAcquireSystemProperties(&sys_props) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetNodeProperties(HsaNodeProperties& node_props, + uint32_t node_id) const { + if (vhsaKmtGetNodeProperties(node_id, &node_props) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const { + if (vhsaKmtGetNodeIoLinkProperties(node_id, io_link_props.size(), io_link_props.data()) != + HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetMemoryProperties( + uint32_t node_id, std::vector& mem_props) const { + if (mem_props.empty()) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + if (vhsaKmtGetNodeMemoryProperties(node_id, mem_props.size(), mem_props.data()) != + HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetCacheProperties( + uint32_t node_id, uint32_t processor_id, std::vector& cache_props) const { + if (cache_props.empty()) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + if (vhsaKmtGetNodeCacheProperties(node_id, 0, cache_props.size(), cache_props.data()) != + HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const { + assert(device_handle != nullptr); + + if (vhsaKmtGetAMDGPUDeviceHandle(node_id, device_handle) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::GetClockCounters(uint32_t node_id, + HsaClockCounters* clock_counter) const { + assert(clock_counter != nullptr); + + if (vhsaKmtGetClockCounters(node_id, clock_counter) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size, + const void* buffer_base, + uint64_t buffer_base_size) const { + if (vhsaKmtSetTrapHandler(node_id, const_cast(base), base_size, + const_cast(buffer_base), + buffer_base_size) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::AllocateMemory(const core::MemoryRegion& mem_region, + core::MemoryRegion::AllocateFlags alloc_flags, + void** mem, size_t size, uint32_t agent_node_id) { + const MemoryRegion& m_region(static_cast(mem_region)); + HsaMemFlags kmt_alloc_flags(m_region.mem_flags()); + HSAKMT_STATUS ret; + + kmt_alloc_flags.ui32.ExecuteAccess = + (alloc_flags & core::MemoryRegion::AllocateExecutable ? 1 : 0); + kmt_alloc_flags.ui32.AQLQueueMemory = + (alloc_flags & core::MemoryRegion::AllocateDoubleMap ? 1 : 0); + + if (m_region.IsSystem() && (alloc_flags & core::MemoryRegion::AllocateNonPaged)) { + kmt_alloc_flags.ui32.NonPaged = 1; + } + + if (!m_region.IsLocalMemory() && (alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Allocating a memory handle for virtual memory + kmt_alloc_flags.ui32.NoAddress = !!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly); + + // Allocate pseudo fine grain memory + kmt_alloc_flags.ui32.CoarseGrain = + (alloc_flags & core::MemoryRegion::AllocatePCIeRW ? 0 : kmt_alloc_flags.ui32.CoarseGrain); + + kmt_alloc_flags.ui32.NoSubstitute = + (alloc_flags & core::MemoryRegion::AllocatePinned ? 1 : kmt_alloc_flags.ui32.NoSubstitute); + + kmt_alloc_flags.ui32.GTTAccess = + (alloc_flags & core::MemoryRegion::AllocateGTTAccess ? 1 : kmt_alloc_flags.ui32.GTTAccess); + + kmt_alloc_flags.ui32.Uncached = + (alloc_flags & core::MemoryRegion::AllocateUncached ? 1 : kmt_alloc_flags.ui32.Uncached); + + if (m_region.IsLocalMemory()) { + // Allocate physically contiguous memory. AllocateKfdMemory function call + // will fail if this flag is not supported in KFD. + kmt_alloc_flags.ui32.Contiguous = + (alloc_flags & core::MemoryRegion::AllocateContiguous ? 1 + : kmt_alloc_flags.ui32.Contiguous); + } + + //// Only allow using the suballocator for ordinary VRAM. + if (m_region.IsLocalMemory() && !kmt_alloc_flags.ui32.NoAddress) { + bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc(); + // Avoid modifying executable or queue allocations. + bool useSubAlloc = subAllocEnabled; + useSubAlloc &= ((alloc_flags & (~core::MemoryRegion::AllocateRestrict)) == 0); + + if (useSubAlloc) { + *mem = m_region.fragment_alloc(size); + + if ((alloc_flags & core::MemoryRegion::AllocateAsan)) { + // TODO: Implement ASAN support for VIRTIO driver + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + return HSA_STATUS_SUCCESS; + } + } + + const uint32_t node_id = (alloc_flags & core::MemoryRegion::AllocateGTTAccess) + ? agent_node_id + : m_region.owner()->node_id(); + + //// Allocate memory. + //// If it fails attempt to release memory from the block allocator and retry. + ret = vhsaKmtAllocMemory(node_id, size, kmt_alloc_flags, mem); + if (ret != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + if (*mem == nullptr) { + m_region.owner()->Trim(); + ret = vhsaKmtAllocMemory(node_id, size, kmt_alloc_flags, mem); + if (ret != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + } + + if (*mem != nullptr) { + if (kmt_alloc_flags.ui32.NoAddress) return HSA_STATUS_SUCCESS; + + // Commit the memory. + // For system memory, on non-restricted allocation, map it to all GPUs. On + // restricted allocation, only CPU is allowed to access by default, so + // no need to map + // For local memory, only map it to the owning GPU. Mapping to other GPU, + // if the access is allowed, is performed on AllowAccess. + HsaMemMapFlags map_flag = m_region.map_flags(); + size_t map_node_count = 1; + const uint32_t owner_node_id = m_region.owner()->node_id(); + const uint32_t* map_node_id = &owner_node_id; + + if (m_region.IsSystem()) { + if ((alloc_flags & core::MemoryRegion::AllocateRestrict) == 0) { + // Map to all GPU agents. + map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size(); + + if (map_node_count == 0) { + // No need to pin since no GPU in the platform. + return HSA_STATUS_SUCCESS; + } + + map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0]; + } else { + // No need to pin it for CPU exclusive access. + return HSA_STATUS_SUCCESS; + } + } + + uint64_t alternate_va = 0; + const bool is_resident = + (MakeMemoryResident(*mem, size, &alternate_va, &map_flag, map_node_count, map_node_id) == + HSA_STATUS_SUCCESS); + + const bool require_pinning = + (!m_region.full_profile() || m_region.IsLocalMemory() || m_region.IsScratch()); + + if (require_pinning && !is_resident) { + vhsaKmtFreeMemory(*mem, size); + *mem = nullptr; + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + if ((alloc_flags & core::MemoryRegion::AllocateAsan)) { + // TODO: Implement ASAN support for VIRTIO driver + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + return HSA_STATUS_SUCCESS; + } + + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; +} + +hsa_status_t KfdVirtioDriver::FreeMemory(void* mem, size_t size) { + MakeMemoryUnresident(mem); + return vhsaKmtFreeMemory(mem, size) == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS + : HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::AllocateScratchMemory(uint32_t node_id, uint64_t size, + void** mem) const { + assert(mem != nullptr); + assert(size != 0); + + HsaMemFlags flags = {}; + flags.ui32.Scratch = 1; + flags.ui32.HostAccess = 1; + void* ptr = nullptr; + + HSAKMT_STATUS ret = vhsaKmtAllocMemory(node_id, size, flags, &ptr); + if (ret != HSAKMT_STATUS_SUCCESS || ptr == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + *mem = ptr; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::RegisterMemory(void* ptr, uint64_t size, + HsaMemFlags mem_flags) const { + assert(ptr != nullptr); + assert(size != 0); + + if (vhsaKmtRegisterMemoryWithFlags(ptr, size, mem_flags) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::DeregisterMemory(void* ptr) const { + if (vhsaKmtDeregisterMemory(ptr) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::AvailableMemory(uint32_t node_id, uint64_t* available_size) const { + assert(available_size != nullptr); + + if (vhsaKmtAvailableMemory(node_id, available_size) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::MakeMemoryResident(const void* mem, size_t size, + uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, + uint32_t num_nodes, const uint32_t* nodes) const { + assert(mem != nullptr); + assert(size != 0); + + if (mem_flags == nullptr && nodes == nullptr) { + if (vhsaKmtMapMemoryToGPU(const_cast(mem), size, alternate_va) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + } else if (mem_flags != nullptr && nodes != nullptr) { + if (vhsaKmtMapMemoryToGPUNodes(const_cast(mem), size, alternate_va, *mem_flags, + num_nodes, + const_cast(nodes)) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + } else { + debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::MakeMemoryUnresident(const void* mem) const { + vhsaKmtUnmapMemoryToGPU(const_cast(mem)); + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct, + HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id, + void* queue_addr, uint64_t queue_size_bytes, + HsaEvent* event, HsaQueueResource& queue_resource) const { + if (vhsaKmtCreateQueueExt(node_id, type, queue_pct, priority, sdma_engine_id, queue_addr, + queue_size_bytes, event, &queue_resource) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::DestroyQueue(HSA_QUEUEID queue_id) const { + if (vhsaKmtDestroyQueue(queue_id) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_percentage, + HSA_QUEUE_PRIORITY priority, void* queue_mem, + uint64_t queue_size, HsaEvent* event) const { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t num_cu_mask, + uint32_t* cu_mask) const { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_GWS, + uint32_t* GWS) const { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::ImportDMABuf(int dmabuf_fd, core::Agent& agent, + core::ShareableHandle& handle) { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::Map(core::ShareableHandle handle, void* mem, size_t offset, + size_t size, hsa_access_permission_t perms) { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::Unmap(core::ShareableHandle handle, void* mem, size_t offset, + size_t size) { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::ReleaseShareableHandle(core::ShareableHandle& handle) { + return HSA_STATUS_ERROR; +} + +hsa_status_t KfdVirtioDriver::GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const { + if (vhsaKmtGetTileConfig(node_id, config) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::SPMAcquire(uint32_t node_id) const { return HSA_STATUS_ERROR; } + +hsa_status_t KfdVirtioDriver::SPMRelease(uint32_t node_id) const { return HSA_STATUS_ERROR; } + +hsa_status_t KfdVirtioDriver::SPMSetDestBuffer(uint32_t node_id, uint32_t size, uint32_t* timeout, + uint32_t* size_copied, void* dest, + bool* is_data_loss) const { + return HSA_STATUS_ERROR; +} + + +hsa_status_t KfdVirtioDriver::OpenSMI(uint32_t node_id, int* fd) const { return HSA_STATUS_ERROR; } + +hsa_status_t KfdVirtioDriver::GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const { + assert(frequency != nullptr); + + amdgpu_gpu_info info; + amdgpu_device_handle handle; + if (GetDeviceHandle(node_id, reinterpret_cast(&handle)) != HSA_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + if (vamdgpu_query_gpu_info(handle, &info) < 0) return HSA_STATUS_ERROR; + + // Reported by libdrm in KHz. + *frequency = uint64_t(info.gpu_counter_freq) * 1000ull; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdVirtioDriver::IsModelEnabled(bool* enable) const { + *enable = false; + return HSA_STATUS_SUCCESS; +} + +} // namespace AMD +} // namespace rocr diff --git a/runtime/hsa-runtime/core/inc/amd_virtio_driver.h b/runtime/hsa-runtime/core/inc/amd_virtio_driver.h new file mode 100644 index 0000000000..fd229e94f6 --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_virtio_driver.h @@ -0,0 +1,124 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_ +#define HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_ + +#include +#include + +#include "hsakmt/hsakmt.h" + +#include "core/inc/driver.h" +#include "core/inc/memory_region.h" + +namespace rocr { +namespace AMD { + +class KfdVirtioDriver final : public core::Driver { + public: + KfdVirtioDriver(std::string devnode_name); + + static hsa_status_t DiscoverDriver(std::unique_ptr& driver); + + hsa_status_t Init() override; + hsa_status_t ShutDown() override; + hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override; + hsa_status_t Open() override; + hsa_status_t Close() override; + hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override; + hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override; + hsa_status_t GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const override; + hsa_status_t GetMemoryProperties(uint32_t node_id, + std::vector& mem_props) const override; + hsa_status_t GetCacheProperties(uint32_t node_id, uint32_t processor_id, + std::vector& cache_props) const override; + hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const; + hsa_status_t GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const; + hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size, + const void* buffer_base, uint64_t buffer_base_size) const; + hsa_status_t AllocateMemory(const core::MemoryRegion& mem_region, + core::MemoryRegion::AllocateFlags alloc_flags, void** mem, + size_t size, uint32_t agent_node_id) override; + hsa_status_t FreeMemory(void* mem, size_t size) override; + hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const; + hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override; + hsa_status_t DeregisterMemory(void* ptr) const override; + hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const; + hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, uint32_t num_nodes, + const uint32_t* nodes) const override; + hsa_status_t MakeMemoryUnresident(const void* mem) const override; + hsa_status_t CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct, + HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id, void* queue_addr, + uint64_t queue_size_bytes, HsaEvent* event, + HsaQueueResource& queue_resource) const override; + hsa_status_t DestroyQueue(HSA_QUEUEID queue_id) const override; + hsa_status_t UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_percentage, + HSA_QUEUE_PRIORITY priority, void* queue_mem, uint64_t queue_size, + HsaEvent* event) const override; + hsa_status_t SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t num_cu_mask, + uint32_t* cu_mask) const override; + hsa_status_t AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_GWS, uint32_t* GWS) const override; + hsa_status_t ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) override; + hsa_status_t ImportDMABuf(int dmabuf_fd, core::Agent& agent, + core::ShareableHandle& handle) override; + hsa_status_t Map(core::ShareableHandle handle, void* mem, size_t offset, size_t size, + hsa_access_permission_t perms) override; + hsa_status_t Unmap(core::ShareableHandle handle, void* mem, size_t offset, size_t size) override; + hsa_status_t ReleaseShareableHandle(core::ShareableHandle& handle) override; + hsa_status_t GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const; + hsa_status_t SPMAcquire(uint32_t node_id) const override; + hsa_status_t SPMRelease(uint32_t node_id) const override; + hsa_status_t SPMSetDestBuffer(uint32_t node_id, uint32_t size, uint32_t* timeout, + uint32_t* size_copied, void* dest, + bool* is_data_loss) const override; + hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override; + hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const; + hsa_status_t IsModelEnabled(bool* enable) const override; +}; + +} // namespace AMD +} // namespace rocr + +#endif // HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_ diff --git a/runtime/hsa-runtime/core/inc/driver.h b/runtime/hsa-runtime/core/inc/driver.h index 84d07f4041..c4cdc19f24 100644 --- a/runtime/hsa-runtime/core/inc/driver.h +++ b/runtime/hsa-runtime/core/inc/driver.h @@ -58,7 +58,7 @@ class Queue; enum class DriverQuery { GET_DRIVER_VERSION }; -enum class DriverType { XDNA = 0, KFD, NUM_DRIVER_TYPES }; +enum class DriverType { XDNA = 0, KFD, KFD_VIRTIO, NUM_DRIVER_TYPES }; /// @brief Handle for exported / imported memory. struct ShareableHandle { diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index 55b75591e6..29956d9423 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -68,6 +68,9 @@ #include "core/inc/amd_memory_region.h" #include "core/inc/runtime.h" #include "core/util/utils.h" +#ifdef HSAKMT_VIRTIO_ENABLED +#include "core/inc/amd_virtio_driver.h" +#endif extern r_debug _amdgpu_r_debug; @@ -78,13 +81,21 @@ namespace { #if _WIN32 constexpr size_t num_drivers = 0; #elif __linux__ -constexpr size_t num_drivers = 2; +constexpr size_t num_drivers = 2 +#ifdef HSAKMT_VIRTIO_ENABLED + + 1 +#endif + ; #endif const std::array&)>, num_drivers> discover_driver_funcs = { #ifdef __linux__ - KfdDriver::DiscoverDriver, XdnaDriver::DiscoverDriver + KfdDriver::DiscoverDriver, + XdnaDriver::DiscoverDriver, +#ifdef HSAKMT_VIRTIO_ENABLED + KfdVirtioDriver::DiscoverDriver, +#endif #endif };