rocr: Add WSL support by conditionally handling DRM operations (#2081)

This patch enhances compatibility for DXG environments by introducing conditional
checks for DRM operations, particularly around buffer object metadata handling
in IPC scenarios. These changes improve robustness in DXG IPC memory management
without impacting existing functionality in standard Linux environments.

Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
此提交包含在:
hongkzha-amd
2025-12-05 07:50:48 +08:00
提交者 GitHub
父節點 092ca13f4f
當前提交 4bd1b90e62
共有 3 個檔案被更改,包括 60 行新增36 行删除
+8
查看文件
@@ -365,6 +365,12 @@ class ThunkLoader {
uint64_t flags, \
uint32_t op);
typedef int (DRM_DEF(amdgpu_bo_query_info))(amdgpu_bo_handle bo, \
struct amdgpu_bo_info* info);
typedef int (DRM_DEF(amdgpu_bo_set_metadata))(amdgpu_bo_handle bo, \
struct amdgpu_bo_metadata* info);
typedef int (DRM_DEF(drmCommandWriteRead))(int fd, \
unsigned long drmCommandIndex, \
void *data, \
@@ -483,6 +489,8 @@ class ThunkLoader {
DRM_DEF(amdgpu_bo_export)* DRM_PFN(amdgpu_bo_export);
DRM_DEF(amdgpu_bo_import)* DRM_PFN(amdgpu_bo_import);
DRM_DEF(amdgpu_bo_va_op)* DRM_PFN(amdgpu_bo_va_op);
DRM_DEF(amdgpu_bo_query_info)* DRM_PFN(amdgpu_bo_query_info);
DRM_DEF(amdgpu_bo_set_metadata)* DRM_PFN(amdgpu_bo_set_metadata);
DRM_DEF(drmCommandWriteRead)* DRM_PFN(drmCommandWriteRead);
private:
+44 -36
查看文件
@@ -378,13 +378,15 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
//clear the set metadata here if possible if theres an existing ldrm_bo
if (it->second.ldrm_bo) {
#if defined(__linux__)
struct amdgpu_bo_info info = {0};
auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info);
if (!thunkLoader()->IsDXG()) {
struct amdgpu_bo_info info = {0};
auto err = DRM_CALL(amdgpu_bo_query_info(it->second.ldrm_bo, &info));
//clear metadata
amdgpu_bo_metadata zero_metadata = {0};
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
//clear metadata
amdgpu_bo_metadata zero_metadata = {0};
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
DRM_CALL(amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata));
}
#else
assert(!"Unimplemented!");
#endif
@@ -1404,29 +1406,31 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
if (agent->device_type() == Agent::kAmdGpuDevice) {
#if defined(__linux__)
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
amdgpu_bo_import_result res;
if (!thunkLoader()->IsDXG()) {
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
amdgpu_bo_import_result res;
srand(static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
handle->handle[7] = rand();
srand(static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
handle->handle[7] = rand();
//libdrm import for buffer object handle
if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) {
fprintf(stderr, "Error in amdgpu_bo_import\n");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
//libdrm import for buffer object handle
if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) {
fprintf(stderr, "Error in amdgpu_bo_import\n");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
//query buffer object for pre existing metadata
struct amdgpu_bo_info info = {0};
if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) {
handle->handle[7] = info.metadata.umd_metadata[0];
} else {
amdgpu_bo_metadata buf_info = {0};
buf_info.size_metadata = sizeof(uint32_t);
buf_info.umd_metadata[0] = handle->handle[7];
//query buffer object for pre existing metadata
struct amdgpu_bo_info info = {0};
if (!DRM_CALL(amdgpu_bo_query_info(res.buf_handle, &info)) && !!info.metadata.size_metadata) {
handle->handle[7] = info.metadata.umd_metadata[0];
} else {
amdgpu_bo_metadata buf_info = {0};
buf_info.size_metadata = sizeof(uint32_t);
buf_info.umd_metadata[0] = handle->handle[7];
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
allocation_map_[ptr].ldrm_bo = res.buf_handle;
DRM_CALL(amdgpu_bo_set_metadata(res.buf_handle, &buf_info));
allocation_map_[ptr].ldrm_bo = res.buf_handle;
}
}
#else
assert(!"Unimplemented!");
@@ -1593,15 +1597,17 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if (ipc_dmabuf_supported_ && !isSysMem) {
#if defined(__linux__)
// use the bo from the allocation map
// Only check metadata for GPU memory
struct amdgpu_bo_info info = {0};
int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info);
if (!thunkLoader()->IsDXG()) {
// use the bo from the allocation map
// Only check metadata for GPU memory
struct amdgpu_bo_info info = {0};
int ret = DRM_CALL(amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info));
// Validate metadata for IPC handle
if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) {
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
// Validate metadata for IPC handle
if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) {
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
#else
assert(!"Unimplemented!");
@@ -3790,11 +3796,13 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() {
hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
#if defined(__linux__)
void* mapped_ptr =
if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) {
void* mapped_ptr =
mmap(va, size, PermissionsToMmapFlags(perms), MAP_SHARED | MAP_FIXED, mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr));
if (mapped_ptr != va)
return HSA_STATUS_ERROR;
if (mapped_ptr != va)
return HSA_STATUS_ERROR;
}
} else {
hsa_status_t status = targetAgent->driver().Map(
shareable_handle, va, mappedHandle->offset, size, perms);
+8
查看文件
@@ -411,6 +411,12 @@ namespace core {
DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)dlsym(thunk_handle, "amdgpu_bo_va_op");
if (DRM_PFN(amdgpu_bo_va_op) == NULL) goto ERROR;
DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)dlsym(thunk_handle, "amdgpu_bo_query_info");
if (DRM_PFN(amdgpu_bo_query_info) == NULL) goto ERROR;
DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)dlsym(thunk_handle, "amdgpu_bo_set_metadata");
if (DRM_PFN(amdgpu_bo_set_metadata) == NULL) goto ERROR;
DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)dlsym(thunk_handle, "drmCommandWriteRead");
if (DRM_PFN(drmCommandWriteRead) == NULL) goto ERROR;
debug_print("Load all DTIF APIs OK!\n");
@@ -524,6 +530,8 @@ ERROR:
DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)(&amdgpu_bo_export);
DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)(&amdgpu_bo_import);
DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)(&amdgpu_bo_va_op);
DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)(&amdgpu_bo_query_info);
DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)(&amdgpu_bo_set_metadata);
#if defined(__linux__)
DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)(&drmCommandWriteRead);
#endif