diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index 44c1952730..24715d4c24 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -915,9 +915,8 @@ class Runtime { void InitIPCDmaBufSupport(); bool ipc_dmabuf_supported_; int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, - amdgpu_bo_import_result *res, unsigned int numNodes, HSAuint32 *nodes, - void **importAddress, HSAuint64 *importSize); + void **importAddress, HSAuint64 *importSize, bool isdmabufSysmem); }; } // namespace core diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index e4b923ac21..da7be89994 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -365,6 +365,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) { notifiers = std::move(it->second.notifiers); + //track the exporter BO to clear meta data via set_metadata + //clear the set metadata here if possible if theres an existing ldrm_bo + if (it->second.ldrm_bo) { + struct amdgpu_bo_info info = {0}; + auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info); + + //clear metadata + amdgpu_bo_metadata zero_metadata = {0}; + memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); + amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata); + } + allocation_map_.erase(it); } @@ -1380,6 +1392,34 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han hsa_status_t err = agent->driver().ExportDMABuf(ptr, len, &dmabuf_fd, &dmabufOffset); assert(dmabufOffset/pageSize == fragOffset && "DMA Buf inconsistent with pointer offset."); if (err != HSA_STATUS_SUCCESS) return err; + + if (agent->device_type() == Agent::kAmdGpuDevice) { + AMD::GpuAgent* agent_ = reinterpret_cast(agent); + amdgpu_bo_import_result res; + + srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + handle->handle[7] = rand(); + + //libdrm import for buffer object handle + if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { + fprintf(stderr, "Error in amdgpu_bo_import\n"); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + //query buffer object for pre existing metadata + struct amdgpu_bo_info info = {0}; + if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) { + handle->handle[7] = info.metadata.umd_metadata[0]; + } else { + amdgpu_bo_metadata buf_info = {0}; + buf_info.size_metadata = sizeof(uint32_t); + buf_info.umd_metadata[0] = handle->handle[7]; + + amdgpu_bo_set_metadata(res.buf_handle, &buf_info); + allocation_map_[ptr].ldrm_bo = res.buf_handle; + } + } + close(dmabuf_fd); ScopedAcquire lock(&ipc_sock_server_lock_); @@ -1426,9 +1466,8 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han } int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, - amdgpu_bo_import_result *res, unsigned int numNodes, HSAuint32 *nodes, - void **importAddress, HSAuint64 *importSize) { + void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem) { int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!"); if (socket_fd == -1) return -1; @@ -1475,20 +1514,28 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, dmabuf_fd = ReceiveDmaBufFd(socket_fd); if (dmabuf_fd == -1) return -1; + amdgpu_bo_import_result res = {0}; HsaGraphicsResourceInfo info; HSA_REGISTER_MEM_FLAGS regFlags; - regFlags.ui32.requiresVAddr = !!res ? 0 : 1; + regFlags.ui32.requiresVAddr = !isDmabufSysmem; int err = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodesExt(dmabuf_fd, &info, numNodes, nodes, regFlags)); if (err == HSAKMT_STATUS_SUCCESS) { *importAddress = info.MemoryAddress; *importSize = info.SizeInBytes; - if (res) { + + if (isDmabufSysmem) HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress)); - // Manually libDRM import and GPU map system memory - AMD::GpuAgent* agent = reinterpret_cast(agents_by_node_[info.NodeId][0]); - err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, - dmabuf_fd, res)); + AMD::GpuAgent* agent = reinterpret_cast(agents_by_node_[info.NodeId][0]); + err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, + dmabuf_fd, &res)); + + // Store the buffer object handle in allocation map for later use + if (err == HSAKMT_STATUS_SUCCESS) { + ScopedAcquire lock(&memory_lock_); + allocation_map_[*importAddress] = + AllocationRegion(nullptr, *importSize, *importSize, core::MemoryRegion::AllocateNoFlags); + allocation_map_[*importAddress].ldrm_bo = res.buf_handle; } close(dmabuf_fd); } @@ -1521,17 +1568,30 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, allocation_map_[importAddress].ldrm_bo = ldrm_bo; }; - auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, - amdgpu_bo_import_result *res) { - int ret = ipc_dmabuf_supported_ ? - IPCClientImport(importHandle.handle[2], dmaBufFDHandle, res, - numNodes, nodes, &importAddress, &importSize) : - HSAKMT_CALL(hsaKmtRegisterSharedHandle(reinterpret_cast(&importHandle), - &importAddress, &importSize)); - if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT; + auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, bool isSysMem) { - return HSA_STATUS_SUCCESS; - }; + int ret = ipc_dmabuf_supported_ ? IPCClientImport(importHandle.handle[2], dmaBufFDHandle, numNodes, + nodes, &importAddress, &importSize, isSysMem) : + HSAKMT_CALL(hsaKmtRegisterSharedHandle( + reinterpret_cast(&importHandle), + &importAddress, &importSize + )); + + if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT; + if (ipc_dmabuf_supported_ && !isSysMem) { + // use the bo from the allocation map + // Only check metadata for GPU memory + struct amdgpu_bo_info info = {0}; + int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info); + + // Validate metadata for IPC handle + if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { + fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + } + return HSA_STATUS_SUCCESS; + }; auto mapMemoryToNodes = [&](unsigned int numNodes, HSAuint32 *nodes) { HSAuint64 altAddress; @@ -1573,10 +1633,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, #if defined(__linux__) if (num_agents == 0) { - amdgpu_bo_import_result res; bool isDmabufSysMem = ipc_dmabuf_supported_ && importHandle.handle[3]; - hsa_status_t err = importMemory(0, NULL, isDmabufSysMem ? &res : NULL); + hsa_status_t err = importMemory(0, NULL, isDmabufSysMem); if (err != HSA_STATUS_SUCCESS) return err; if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL); @@ -1589,7 +1648,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, // Create a shared cpu access pointer for user void *cpuPtr; - amdgpu_bo_handle bo = res.buf_handle; + amdgpu_bo_handle bo = allocation_map_[importAddress].ldrm_bo; int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr)); if (ret) return errCleanup(bo); @@ -1620,8 +1679,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, for (uint32_t i = 0; i < num_agents; i++) agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]); - hsa_status_t err = importMemory(num_agents, nodes, NULL); + hsa_status_t err = importMemory(num_agents, nodes, false); if (err != HSA_STATUS_SUCCESS) return err; + return mapMemoryToNodes(num_agents, nodes); } @@ -2217,7 +2277,7 @@ void Runtime::Unload() { // Close IPC socket server if (ipc_sock_server_conns_.size()) IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE, - NULL, 0, NULL, NULL, NULL); + 0, NULL, NULL, NULL, false); svm_profile_.reset(nullptr);