diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h index a7b344a2ce..a0dcdce09f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h @@ -365,6 +365,12 @@ class ThunkLoader { uint64_t flags, \ uint32_t op); + typedef int (DRM_DEF(amdgpu_bo_query_info))(amdgpu_bo_handle bo, \ + struct amdgpu_bo_info* info); + + typedef int (DRM_DEF(amdgpu_bo_set_metadata))(amdgpu_bo_handle bo, \ + struct amdgpu_bo_metadata* info); + typedef int (DRM_DEF(drmCommandWriteRead))(int fd, \ unsigned long drmCommandIndex, \ void *data, \ @@ -483,6 +489,8 @@ class ThunkLoader { DRM_DEF(amdgpu_bo_export)* DRM_PFN(amdgpu_bo_export); DRM_DEF(amdgpu_bo_import)* DRM_PFN(amdgpu_bo_import); DRM_DEF(amdgpu_bo_va_op)* DRM_PFN(amdgpu_bo_va_op); + DRM_DEF(amdgpu_bo_query_info)* DRM_PFN(amdgpu_bo_query_info); + DRM_DEF(amdgpu_bo_set_metadata)* DRM_PFN(amdgpu_bo_set_metadata); DRM_DEF(drmCommandWriteRead)* DRM_PFN(drmCommandWriteRead); private: diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 764dcc10a3..172e3d26b2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -378,13 +378,15 @@ hsa_status_t Runtime::FreeMemory(void* ptr) { //clear the set metadata here if possible if theres an existing ldrm_bo if (it->second.ldrm_bo) { #if defined(__linux__) - struct amdgpu_bo_info info = {0}; - auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info); + if (!thunkLoader()->IsDXG()) { + struct amdgpu_bo_info info = {0}; + auto err = DRM_CALL(amdgpu_bo_query_info(it->second.ldrm_bo, &info)); - //clear metadata - amdgpu_bo_metadata zero_metadata = {0}; - memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); - amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata); + //clear metadata + amdgpu_bo_metadata zero_metadata = {0}; + memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); + DRM_CALL(amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata)); + } #else assert(!"Unimplemented!"); #endif @@ -1404,29 +1406,31 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han if (agent->device_type() == Agent::kAmdGpuDevice) { #if defined(__linux__) - AMD::GpuAgent* agent_ = reinterpret_cast(agent); - amdgpu_bo_import_result res; + if (!thunkLoader()->IsDXG()) { + AMD::GpuAgent* agent_ = reinterpret_cast(agent); + amdgpu_bo_import_result res; - srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); - handle->handle[7] = rand(); + srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + handle->handle[7] = rand(); - //libdrm import for buffer object handle - if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { - fprintf(stderr, "Error in amdgpu_bo_import\n"); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } + //libdrm import for buffer object handle + if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { + fprintf(stderr, "Error in amdgpu_bo_import\n"); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } - //query buffer object for pre existing metadata - struct amdgpu_bo_info info = {0}; - if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) { - handle->handle[7] = info.metadata.umd_metadata[0]; - } else { - amdgpu_bo_metadata buf_info = {0}; - buf_info.size_metadata = sizeof(uint32_t); - buf_info.umd_metadata[0] = handle->handle[7]; + //query buffer object for pre existing metadata + struct amdgpu_bo_info info = {0}; + if (!DRM_CALL(amdgpu_bo_query_info(res.buf_handle, &info)) && !!info.metadata.size_metadata) { + handle->handle[7] = info.metadata.umd_metadata[0]; + } else { + amdgpu_bo_metadata buf_info = {0}; + buf_info.size_metadata = sizeof(uint32_t); + buf_info.umd_metadata[0] = handle->handle[7]; - amdgpu_bo_set_metadata(res.buf_handle, &buf_info); - allocation_map_[ptr].ldrm_bo = res.buf_handle; + DRM_CALL(amdgpu_bo_set_metadata(res.buf_handle, &buf_info)); + allocation_map_[ptr].ldrm_bo = res.buf_handle; + } } #else assert(!"Unimplemented!"); @@ -1593,15 +1597,17 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT; if (ipc_dmabuf_supported_ && !isSysMem) { #if defined(__linux__) - // use the bo from the allocation map - // Only check metadata for GPU memory - struct amdgpu_bo_info info = {0}; - int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info); + if (!thunkLoader()->IsDXG()) { + // use the bo from the allocation map + // Only check metadata for GPU memory + struct amdgpu_bo_info info = {0}; + int ret = DRM_CALL(amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info)); - // Validate metadata for IPC handle - if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { - fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; + // Validate metadata for IPC handle + if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { + fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } } #else assert(!"Unimplemented!"); @@ -3790,11 +3796,13 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() { hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) { if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) { #if defined(__linux__) - void* mapped_ptr = + if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) { + void* mapped_ptr = mmap(va, size, PermissionsToMmapFlags(perms), MAP_SHARED | MAP_FIXED, mappedHandle->drm_fd, reinterpret_cast(mappedHandle->drm_cpu_addr)); - if (mapped_ptr != va) - return HSA_STATUS_ERROR; + if (mapped_ptr != va) + return HSA_STATUS_ERROR; + } } else { hsa_status_t status = targetAgent->driver().Map( shareable_handle, va, mappedHandle->offset, size, perms); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp index 5afdd14195..127f4f743c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp @@ -411,6 +411,12 @@ namespace core { DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)dlsym(thunk_handle, "amdgpu_bo_va_op"); if (DRM_PFN(amdgpu_bo_va_op) == NULL) goto ERROR; + DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)dlsym(thunk_handle, "amdgpu_bo_query_info"); + if (DRM_PFN(amdgpu_bo_query_info) == NULL) goto ERROR; + + DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)dlsym(thunk_handle, "amdgpu_bo_set_metadata"); + if (DRM_PFN(amdgpu_bo_set_metadata) == NULL) goto ERROR; + DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)dlsym(thunk_handle, "drmCommandWriteRead"); if (DRM_PFN(drmCommandWriteRead) == NULL) goto ERROR; debug_print("Load all DTIF APIs OK!\n"); @@ -524,6 +530,8 @@ ERROR: DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)(&amdgpu_bo_export); DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)(&amdgpu_bo_import); DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)(&amdgpu_bo_va_op); + DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)(&amdgpu_bo_query_info); + DRM_PFN(amdgpu_bo_set_metadata) = (DRM_DEF(amdgpu_bo_set_metadata)*)(&amdgpu_bo_set_metadata); #if defined(__linux__) DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)(&drmCommandWriteRead); #endif