SWDEV-558848 - vmm api support for rocr on windows (#1761)

* SWDEV-558848 - vmm api support for rocr on windows

* Fixes to VMM handle Map/Unmap Set/Get Access

* Fix GetShareableHandle to use pointer for shareable handle

* Update os specific map/unmap memory calls

* clang format update

* Minor syntax fixes from code review

Co-authored-by: Yiannis Papadopoulos <102817138+ypapadop-amd@users.noreply.github.com>

---------

Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Co-authored-by: Yiannis Papadopoulos <102817138+ypapadop-amd@users.noreply.github.com>
Этот коммит содержится в:
Rahul Manocha
2025-12-10 08:39:51 -08:00
коммит произвёл GitHub
родитель 465633d707
Коммит 0c1f87a7f6
15 изменённых файлов: 121 добавлений и 52 удалений
+11
Просмотреть файл
@@ -547,6 +547,17 @@ hsaKmtExportDMABufHandle(
HSAuint64 *Offset //OUT
);
/**
Export GPU Memory handle
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetMemoryHandle(
void* MemoryAddress, // IN
HSAuint64 SizeInBytes, // IN
uint64_t* SharedMemoryHandle // OUT
);
/**
Export a memory buffer for sharing with other processes
+1
Просмотреть файл
@@ -90,6 +90,7 @@ hsaKmtPcSamplingStart;
hsaKmtPcSamplingStop;
hsaKmtPcSamplingSupport;
hsaKmtAisReadWriteFile;
hsaKmtGetMemoryHandle;
local: *;
};
+8
Просмотреть файл
@@ -929,3 +929,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId,
return hsaKmtGetAMDGPUDeviceHandleCtx(&hsakmt_primary_kfd_ctx, NodeId, DeviceHandle);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtGetMemoryHandle(void *MemoryAddress, HSAuint64 SizeInBytes,
uint64_t *SharedMemoryHandle) {
CHECK_KFD_OPEN();
return HSAKMT_STATUS_NOT_SUPPORTED;
}
+15 -22
Просмотреть файл
@@ -71,6 +71,7 @@ static_assert(
(sizeof(core::ShareableHandle::handle) >= sizeof(amdgpu_bo_handle)) &&
(alignof(core::ShareableHandle::handle) >= alignof(amdgpu_bo_handle)),
"ShareableHandle cannot store a amdgpu_bo_handle");
#endif
namespace {
@@ -89,7 +90,6 @@ __forceinline uint64_t drm_perm(hsa_access_permission_t perm) {
}
} // namespace
#endif
KfdDriver::KfdDriver(std::string devnode_name)
: core::Driver(core::DriverType::KFD, std::move(devnode_name)) {}
@@ -415,6 +415,16 @@ hsa_status_t KfdDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::GetShareableHandle(void* mem, size_t size, core::ShareableHandle* handle) {
uint64_t mem_handle;
HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtGetMemoryHandle(mem, size, &mem_handle));
if (status != HSAKMT_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
}
handle->handle = mem_handle;
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
size_t *offset) {
int dmabuf_fd_res = -1;
@@ -436,25 +446,19 @@ hsa_status_t KfdDriver::ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
hsa_status_t KfdDriver::ImportDMABuf(int dmabuf_fd, core::Agent &agent,
core::ShareableHandle &handle) {
#if defined(__linux__)
auto &gpu_agent = static_cast<GpuAgent &>(agent);
auto& gpu_agent = static_cast<GpuAgent&>(agent);
amdgpu_bo_import_result res;
auto ret = DRM_CALL(amdgpu_bo_import(
gpu_agent.libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res));
if (ret)
return HSA_STATUS_ERROR;
auto ret = DRM_CALL(
amdgpu_bo_import(gpu_agent.libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res));
if (ret) return HSA_STATUS_ERROR;
handle.handle = reinterpret_cast<uint64_t>(res.buf_handle);
#else
assert(!"Unimplemented!");
#endif
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::Map(core::ShareableHandle handle, void *mem,
size_t offset, size_t size,
hsa_access_permission_t perms) {
#if defined(__linux__)
const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
if (!ldrm_bo)
return HSA_STATUS_ERROR;
@@ -462,15 +466,11 @@ hsa_status_t KfdDriver::Map(core::ShareableHandle handle, void *mem,
if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast<uint64_t>(mem),
drm_perm(perms), AMDGPU_VA_OP_MAP)) != 0)
return HSA_STATUS_ERROR;
#else
assert(!"Unimplemented!");
#endif
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::Unmap(core::ShareableHandle handle, void *mem,
size_t offset, size_t size) {
#if defined(__linux__)
const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
if (!ldrm_bo)
return HSA_STATUS_ERROR;
@@ -478,14 +478,10 @@ hsa_status_t KfdDriver::Unmap(core::ShareableHandle handle, void *mem,
if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast<uint64_t>(mem), 0,
AMDGPU_VA_OP_UNMAP)) != 0)
return HSA_STATUS_ERROR;
#else
assert(!"Unimplemented!");
#endif
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::ReleaseShareableHandle(core::ShareableHandle &handle) {
#if defined(__linux__)
const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
if (!ldrm_bo)
return HSA_STATUS_ERROR;
@@ -495,9 +491,6 @@ hsa_status_t KfdDriver::ReleaseShareableHandle(core::ShareableHandle &handle) {
return HSA_STATUS_ERROR;
handle = {};
#else
assert(!"Unimplemented!");
#endif
return HSA_STATUS_SUCCESS;
}
+4
Просмотреть файл
@@ -1029,5 +1029,9 @@ hsa_status_t XdnaDriver::MakeMemoryResident(const void* mem, size_t size, uint64
hsa_status_t XdnaDriver::MakeMemoryUnresident(const void* mem) const { return HSA_STATUS_ERROR; }
hsa_status_t XdnaDriver::GetShareableHandle(void* mem, size_t size, core::ShareableHandle* handle) {
return HSA_STATUS_ERROR;
}
} // namespace AMD
} // namespace rocr
+1 -1
Просмотреть файл
@@ -116,7 +116,7 @@ public:
hsa_status_t Unmap(core::ShareableHandle handle, void *mem, size_t offset,
size_t size) override;
hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override;
hsa_status_t GetShareableHandle(void* mem, size_t size, core::ShareableHandle* handle) override;
hsa_status_t SPMAcquire(uint32_t preferred_node_id) const override;
hsa_status_t SPMRelease(uint32_t preferred_node_id) const override;
hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, uint32_t* timeout,
+1
Просмотреть файл
@@ -225,6 +225,7 @@ public:
size_t size, hsa_access_permission_t perms) override;
hsa_status_t Unmap(core::ShareableHandle handle, void *mem, size_t offset,
size_t size) override;
hsa_status_t GetShareableHandle(void* mem, size_t size, core::ShareableHandle* handle) override;
hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override;
/// @brief Submits @p num_pkts packets in a command chain.
+8
Просмотреть файл
@@ -232,6 +232,14 @@ public:
virtual hsa_status_t Unmap(core::ShareableHandle handle, void *mem,
size_t offset, size_t size) = 0;
/// @brief Get Shareable Memory Handle for physical memory
///
/// @param[in] mem physical memory handle
/// @param[in] size size of memory allocated in bytes
/// @param[out] handle handle of the memory object
virtual hsa_status_t GetShareableHandle(void* mem, size_t size,
core::ShareableHandle* handle) = 0;
/// @brief Releases the object associated with the handle.
///
/// @param[in] handle handle of the object to release
+4 -1
Просмотреть файл
@@ -332,7 +332,9 @@ class ThunkLoader {
HsaAisFlags flags, \
HSAuint64 *SizeCopiedInBytes, \
HSAint32 *status);
typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetMemoryHandle))(void *MemoryAddress, \
HSAuint64 SizeInBytes, \
uint64_t *SharedMemoryHandle);
/* drm API */
typedef int (DRM_DEF(amdgpu_device_initialize))(int fd, \
uint32_t *major_version, \
@@ -480,6 +482,7 @@ class ThunkLoader {
HSAKMT_DEF(hsaKmtModelEnabled)* HSAKMT_PFN(hsaKmtModelEnabled);
HSAKMT_DEF(hsaKmtQueueRingDoorbell)* HSAKMT_PFN(hsaKmtQueueRingDoorbell);
HSAKMT_DEF(hsaKmtAisReadWriteFile)* HSAKMT_PFN(hsaKmtAisReadWriteFile);
HSAKMT_DEF(hsaKmtGetMemoryHandle)* HSAKMT_PFN(hsaKmtGetMemoryHandle);
DRM_DEF(amdgpu_device_initialize)* DRM_PFN(amdgpu_device_initialize);
DRM_DEF(amdgpu_device_deinitialize)* DRM_PFN(amdgpu_device_deinitialize);
+27 -26
Просмотреть файл
@@ -2628,7 +2628,7 @@ void Runtime::CheckVirtualMemApiSupport() {
virtual_mem_api_supported_ = true;
}
#else
virtual_mem_api_supported_ = false;
virtual_mem_api_supported_ = true;
#endif
}
}
@@ -3672,11 +3672,21 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset,
if (status != HSA_STATUS_SUCCESS)
return status;
close(dmabuf_fd);
if (dmabuf_fd != -1) {
close(dmabuf_fd);
}
// Get address that memory is mapped to
ret = GetAmdgpuDeviceArgs(agent, shareable_handle, &drm_fd, &drm_cpu_addr);
if (ret) return HSA_STATUS_ERROR;
if (shareable_handle.IsValid()) {
ret = GetAmdgpuDeviceArgs(agent, shareable_handle, &drm_fd, &drm_cpu_addr);
if (ret) return HSA_STATUS_ERROR;
} else {
hsa_status_t status = agent_driver.GetShareableHandle(memoryHandleIt->first, size, &shareable_handle);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
drm_cpu_addr = reinterpret_cast<uint64_t>(va);
}
mapped_handle_map_.emplace(
std::piecewise_construct, std::forward_as_tuple(va),
@@ -3795,22 +3805,17 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() {
hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
#if defined(__linux__)
if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) {
void* mapped_ptr =
mmap(va, size, PermissionsToMmapFlags(perms), MAP_SHARED | MAP_FIXED, mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr));
if (mapped_ptr != va)
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
return HSA_STATUS_ERROR;
}
}
} else {
hsa_status_t status = targetAgent->driver().Map(
shareable_handle, va, mappedHandle->offset, size, perms);
if (status != HSA_STATUS_SUCCESS)
return status;
#else
assert(!"Unimplemented!");
#endif
}
permissions = perms;
return HSA_STATUS_SUCCESS;
@@ -3819,21 +3824,15 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
#if defined(__linux__)
if (munmap(va, size) != 0) return HSA_STATUS_ERROR;
/* We need to keep the CPU mapping. So change it to PROT_NONE */
void* mapped_ptr = mmap(va, mappedHandle->size, PROT_NONE, MAP_SHARED | MAP_FIXED,
mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr));
if (mapped_ptr != va)
hsa_access_permission_t perms = HSA_ACCESS_PERMISSION_NONE;
if (!rocr::os::UnmapMemory(va, size)) {
return HSA_STATUS_ERROR;
permissions = HSA_ACCESS_PERMISSION_NONE;
#else
assert(!"Unimplemented!");
#endif
}
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
return HSA_STATUS_ERROR;
}
permissions = perms;
}
} else {
return targetAgent->driver().Unmap(
@@ -3850,6 +3849,7 @@ Runtime::MappedHandle::MappedHandle(MemoryHandle *mem_handle, AddressHandle *add
shareable_handle(shareable_handle)
{
/* Create a CPU mapping with PROT_NONE */
#if defined(__linux__)
auto cpu_agent = static_cast<AMD::GpuAgent*>(agentOwner())->GetNearestCpuAgent();
auto agentPermsIt = allowed_agents.emplace(std::piecewise_construct,
std::forward_as_tuple(cpu_agent),
@@ -3860,6 +3860,7 @@ Runtime::MappedHandle::MappedHandle(MemoryHandle *mem_handle, AddressHandle *add
auto ret = agentPermsIt->second.EnableAccess(HSA_ACCESS_PERMISSION_NONE);
if (ret != HSA_STATUS_SUCCESS)
throw AMD::hsa_exception(ret, "Failed to create default CPU mapping");
#endif
}
// Note: VMemorySetAccessPerHandle should be called with &memory_lock_ held
+4
Просмотреть файл
@@ -390,6 +390,9 @@ namespace core {
HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)dlsym(thunk_handle, "hsaKmtAisReadWriteFile");
if (HSAKMT_PFN(hsaKmtAisReadWriteFile) == NULL) goto ERROR;
HSAKMT_PFN(hsaKmtGetMemoryHandle) = (HSAKMT_DEF(hsaKmtGetMemoryHandle)*)dlsym(thunk_handle, "hsaKmtGetMemoryHandle");
if (HSAKMT_PFN(hsaKmtGetMemoryHandle) == NULL) goto ERROR;
DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)dlsym(thunk_handle, "amdgpu_device_deinitialize");
if (DRM_PFN(amdgpu_device_deinitialize) == NULL) goto ERROR;
@@ -521,6 +524,7 @@ ERROR:
#endif
HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)(&hsaKmtModelEnabled);
HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)(&hsaKmtAisReadWriteFile);
HSAKMT_PFN(hsaKmtGetMemoryHandle) = (HSAKMT_DEF(hsaKmtGetMemoryHandle)*)(&hsaKmtGetMemoryHandle);
DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)(&amdgpu_device_initialize);
DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)(&amdgpu_device_deinitialize);
+10
Просмотреть файл
@@ -849,6 +849,16 @@ size_t PageSize() {
return g_page_size_;
}
bool UnmapMemory(void* va, size_t size) { return ::munmap(va, size) == 0; }
bool MapMemory(void* va, size_t size, MemProt perms, int fd, uint64_t cpu_addr) {
void* mapped_ptr =
mmap(va, size, MemProtToOsProt(perms), MAP_SHARED | MAP_FIXED, fd, cpu_addr);
if (mapped_ptr != va)
return false;
return true;
}
void* ReserveMemory(void* start, size_t size, size_t alignment, MemProt prot) {
size = AlignUp(size, PageSize());
// check for invalid input size
+16 -2
Просмотреть файл
@@ -50,9 +50,10 @@
#include <sys/mman.h>
#endif
#include "os.h"
namespace rocr {
#ifdef __linux__
#if defined(__linux__)
/// @brief Converts @ref hsa_access_permission_t to mmap memory protection
/// flags.
__forceinline int PermissionsToMmapFlags(hsa_access_permission_t perms) {
@@ -69,7 +70,20 @@ __forceinline int PermissionsToMmapFlags(hsa_access_permission_t perms) {
}
}
#endif
__forceinline rocr::os::MemProt PermissionsToMemProt(hsa_access_permission_t perms) {
switch (perms) {
case HSA_ACCESS_PERMISSION_RO:
return rocr::os::MEM_PROT_READ;
case HSA_ACCESS_PERMISSION_WO:
return rocr::os::MEM_PROT_RW;
case HSA_ACCESS_PERMISSION_RW:
return rocr::os::MEM_PROT_RW;
case HSA_ACCESS_PERMISSION_NONE:
return rocr::os::MEM_PROT_NONE;
default:
return rocr::os::MEM_PROT_NONE;
}
}
} // namespace rocr
#endif // HSA_RUNTIME_CORE_UTIL_MEMORY_H_
+3
Просмотреть файл
@@ -351,6 +351,9 @@ bool ReleaseMemory(void* addr, size_t size);
bool CommitMemory(void* addr, size_t size, MemProt prot = MEM_PROT_NONE);
/// Uncommit a chunk of memory previously committed with commitMemory.
bool UncommitMemory(void* addr, size_t size);
/// Changes the Protection of a region of committed pages in virtual address space
bool UnmapMemory(void* addr, size_t size);
bool MapMemory(void* addr, size_t size, MemProt prot, int fd, uint64_t cpu_addr);
uint64_t HostTotalPhysicalMemory();
+8
Просмотреть файл
@@ -459,6 +459,14 @@ uint64_t HostTotalPhysicalMemory() {
return totalPhys;
}
bool UnmapMemory(void* addr, size_t size) { return VirtualFree(addr, size, MEM_RELEASE) != 0; }
bool MapMemory(void* addr, size_t size, MemProt perms, int fd [[maybe_unused]],
uint64_t cpu_addr [[maybe_unused]]) {
DWORD OldProtect;
return VirtualProtect(addr, size, memProtToOsProt(perms), &OldProtect) != 0;
}
int Ffs(int i) {
int res = 0;
unsigned long index;