From 4bd1b90e629c909a89a50aad9099e105766ebbde Mon Sep 17 00:00:00 2001 From: hongkzha-amd Date: Fri, 5 Dec 2025 07:50:48 +0800 Subject: [PATCH] rocr: Add WSL support by conditionally handling DRM operations (#2081) This patch enhances compatibility for DXG environments by introducing conditional checks for DRM operations, particularly around buffer object metadata handling in IPC scenarios. These changes improve robustness in DXG IPC memory management without impacting existing functionality in standard Linux environments. Signed-off-by: Horatio Zhang --- .../hsa-runtime/core/inc/thunk_loader.h | 8 ++ .../hsa-runtime/core/runtime/runtime.cpp | 80 ++++++++++--------- .../hsa-runtime/core/runtime/thunk_loader.cpp | 8 ++ 3 files changed, 60 insertions(+), 36 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h index a7b344a2ce..a0dcdce09f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h @@ -365,6 +365,12 @@ class ThunkLoader { uint64_t flags, \ uint32_t op); + typedef int (DRM_DEF(amdgpu_bo_query_info))(amdgpu_bo_handle bo, \ + struct amdgpu_bo_info* info); + + typedef int (DRM_DEF(amdgpu_bo_set_metadata))(amdgpu_bo_handle bo, \ + struct amdgpu_bo_metadata* info); + typedef int (DRM_DEF(drmCommandWriteRead))(int fd, \ unsigned long drmCommandIndex, \ void *data, \ @@ -483,6 +489,8 @@ class ThunkLoader { DRM_DEF(amdgpu_bo_export)* DRM_PFN(amdgpu_bo_export); DRM_DEF(amdgpu_bo_import)* DRM_PFN(amdgpu_bo_import); DRM_DEF(amdgpu_bo_va_op)* DRM_PFN(amdgpu_bo_va_op); + DRM_DEF(amdgpu_bo_query_info)* DRM_PFN(amdgpu_bo_query_info); + DRM_DEF(amdgpu_bo_set_metadata)* DRM_PFN(amdgpu_bo_set_metadata); DRM_DEF(drmCommandWriteRead)* DRM_PFN(drmCommandWriteRead); private: diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 764dcc10a3..172e3d26b2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -378,13 +378,15 @@ hsa_status_t Runtime::FreeMemory(void* ptr) { //clear the set metadata here if possible if theres an existing ldrm_bo if (it->second.ldrm_bo) { #if defined(__linux__) - struct amdgpu_bo_info info = {0}; - auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info); + if (!thunkLoader()->IsDXG()) { + struct amdgpu_bo_info info = {0}; + auto err = DRM_CALL(amdgpu_bo_query_info(it->second.ldrm_bo, &info)); - //clear metadata - amdgpu_bo_metadata zero_metadata = {0}; - memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); - amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata); + //clear metadata + amdgpu_bo_metadata zero_metadata = {0}; + memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); + DRM_CALL(amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata)); + } #else assert(!"Unimplemented!"); #endif @@ -1404,29 +1406,31 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han if (agent->device_type() == Agent::kAmdGpuDevice) { #if defined(__linux__) - AMD::GpuAgent* agent_ = reinterpret_cast(agent); - amdgpu_bo_import_result res; + if (!thunkLoader()->IsDXG()) { + AMD::GpuAgent* agent_ = reinterpret_cast(agent); + amdgpu_bo_import_result res; - srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); - handle->handle[7] = rand(); + srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + handle->handle[7] = rand(); - //libdrm import for buffer object handle - if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { - fprintf(stderr, "Error in amdgpu_bo_import\n"); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } + //libdrm import for buffer object handle + if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { + fprintf(stderr, "Error in amdgpu_bo_import\n"); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } - //query buffer object for pre existing metadata - struct amdgpu_bo_info info = {0}; - if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) { - handle->handle[7] = info.metadata.umd_metadata[0]; - } else { - amdgpu_bo_metadata buf_info = {0}; - buf_info.size_metadata = sizeof(uint32_t); - buf_info.umd_metadata[0] = handle->handle[7]; + //query buffer object for pre existing metadata + struct amdgpu_bo_info info = {0}; + if (!DRM_CALL(amdgpu_bo_query_info(res.buf_handle, &info)) && !!info.metadata.size_metadata) { + handle->handle[7] = info.metadata.umd_metadata[0]; + } else { + amdgpu_bo_metadata buf_info = {0}; + buf_info.size_metadata = sizeof(uint32_t); + buf_info.umd_metadata[0] = handle->handle[7]; - amdgpu_bo_set_metadata(res.buf_handle, &buf_info); - allocation_map_[ptr].ldrm_bo = res.buf_handle; + DRM_CALL(amdgpu_bo_set_metadata(res.buf_handle, &buf_info)); + allocation_map_[ptr].ldrm_bo = res.buf_handle; + } } #else assert(!"Unimplemented!"); @@ -1593,15 +1597,17 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT; if (ipc_dmabuf_supported_ && !isSysMem) { #if defined(__linux__) - // use the bo from the allocation map - // Only check metadata for GPU memory - struct amdgpu_bo_info info = {0}; - int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info); + if (!thunkLoader()->IsDXG()) { + // use the bo from the allocation map + // Only check metadata for GPU memory + struct amdgpu_bo_info info = {0}; + int ret = DRM_CALL(amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info)); - // Validate metadata for IPC handle - if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { - fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; + // Validate metadata for IPC handle + if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { + fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } } #else assert(!"Unimplemented!"); @@ -3790,11 +3796,13 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() { hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) { if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) { #if defined(__linux__) - void* mapped_ptr = + if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) { + void* mapped_ptr = mmap(va, size, PermissionsToMmapFlags(perms), MAP_SHARED | MAP_FIXED, mappedHandle->drm_fd, reinterpret_cast(mappedHandle->drm_cpu_addr)); - if (mapped_ptr != va) - return HSA_STATUS_ERROR; + if (mapped_ptr != va) + return HSA_STATUS_ERROR; + } } else { hsa_status_t status = targetAgent->driver().Map( shareable_handle, va, mappedHandle->offset, size, perms); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp index 5afdd14195..127f4f743c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp @@ -411,6 +411,12 @@ namespace core { DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)dlsym(thunk_handle, "amdgpu_bo_va_op"); if (DRM_PFN(amdgpu_bo_va_op) == NULL) goto ERROR; + DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)dlsym(thunk_handle, "amdgpu_bo_query_info"); + if (DRM_PFN(amdgpu_bo_query_info) == NULL) goto ERROR; + + DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)dlsym(thunk_handle, "amdgpu_bo_set_metadata"); + if (DRM_PFN(amdgpu_bo_set_metadata) == NULL) goto ERROR; + DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)dlsym(thunk_handle, "drmCommandWriteRead"); if (DRM_PFN(drmCommandWriteRead) == NULL) goto ERROR; debug_print("Load all DTIF APIs OK!\n"); @@ -524,6 +530,8 @@ ERROR: DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)(&amdgpu_bo_export); DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)(&amdgpu_bo_import); DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)(&amdgpu_bo_va_op); + DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)(&amdgpu_bo_query_info); + DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)(&amdgpu_bo_set_metadata); #if defined(__linux__) DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)(&drmCommandWriteRead); #endif