Query IPC handles on shared memory export/import for any metadata as a
means to uniquely identify handles that happen to be backed by buffers
that point to the same memory.
Этот коммит содержится в:
Sunday Clement
2025-10-16 05:07:02 -04:00
коммит произвёл GitHub
родитель f2ccc96cfd
Коммит c23c320b4d
2 изменённых файлов: 84 добавлений и 25 удалений
+1 -2
Просмотреть файл
@@ -915,9 +915,8 @@ class Runtime {
void InitIPCDmaBufSupport();
bool ipc_dmabuf_supported_;
int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
amdgpu_bo_import_result *res,
unsigned int numNodes, HSAuint32 *nodes,
void **importAddress, HSAuint64 *importSize);
void **importAddress, HSAuint64 *importSize, bool isdmabufSysmem);
};
} // namespace core
+83 -23
Просмотреть файл
@@ -365,6 +365,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
notifiers = std::move(it->second.notifiers);
//track the exporter BO to clear meta data via set_metadata
//clear the set metadata here if possible if theres an existing ldrm_bo
if (it->second.ldrm_bo) {
struct amdgpu_bo_info info = {0};
auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info);
//clear metadata
amdgpu_bo_metadata zero_metadata = {0};
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
}
allocation_map_.erase(it);
}
@@ -1380,6 +1392,34 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
hsa_status_t err = agent->driver().ExportDMABuf(ptr, len, &dmabuf_fd, &dmabufOffset);
assert(dmabufOffset/pageSize == fragOffset && "DMA Buf inconsistent with pointer offset.");
if (err != HSA_STATUS_SUCCESS) return err;
if (agent->device_type() == Agent::kAmdGpuDevice) {
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
amdgpu_bo_import_result res;
srand(static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
handle->handle[7] = rand();
//libdrm import for buffer object handle
if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) {
fprintf(stderr, "Error in amdgpu_bo_import\n");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
//query buffer object for pre existing metadata
struct amdgpu_bo_info info = {0};
if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) {
handle->handle[7] = info.metadata.umd_metadata[0];
} else {
amdgpu_bo_metadata buf_info = {0};
buf_info.size_metadata = sizeof(uint32_t);
buf_info.umd_metadata[0] = handle->handle[7];
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
allocation_map_[ptr].ldrm_bo = res.buf_handle;
}
}
close(dmabuf_fd);
ScopedAcquire<KernelMutex> lock(&ipc_sock_server_lock_);
@@ -1426,9 +1466,8 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
}
int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
amdgpu_bo_import_result *res,
unsigned int numNodes, HSAuint32 *nodes,
void **importAddress, HSAuint64 *importSize) {
void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem) {
int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!");
if (socket_fd == -1) return -1;
@@ -1475,20 +1514,28 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
dmabuf_fd = ReceiveDmaBufFd(socket_fd);
if (dmabuf_fd == -1) return -1;
amdgpu_bo_import_result res = {0};
HsaGraphicsResourceInfo info;
HSA_REGISTER_MEM_FLAGS regFlags;
regFlags.ui32.requiresVAddr = !!res ? 0 : 1;
regFlags.ui32.requiresVAddr = !isDmabufSysmem;
int err = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodesExt(dmabuf_fd, &info, numNodes, nodes, regFlags));
if (err == HSAKMT_STATUS_SUCCESS) {
*importAddress = info.MemoryAddress;
*importSize = info.SizeInBytes;
if (res) {
if (isDmabufSysmem)
HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress));
// Manually libDRM import and GPU map system memory
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
dmabuf_fd, res));
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
dmabuf_fd, &res));
// Store the buffer object handle in allocation map for later use
if (err == HSAKMT_STATUS_SUCCESS) {
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
allocation_map_[*importAddress] =
AllocationRegion(nullptr, *importSize, *importSize, core::MemoryRegion::AllocateNoFlags);
allocation_map_[*importAddress].ldrm_bo = res.buf_handle;
}
close(dmabuf_fd);
}
@@ -1521,17 +1568,30 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
allocation_map_[importAddress].ldrm_bo = ldrm_bo;
};
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes,
amdgpu_bo_import_result *res) {
int ret = ipc_dmabuf_supported_ ?
IPCClientImport(importHandle.handle[2], dmaBufFDHandle, res,
numNodes, nodes, &importAddress, &importSize) :
HSAKMT_CALL(hsaKmtRegisterSharedHandle(reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
&importAddress, &importSize));
if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, bool isSysMem) {
return HSA_STATUS_SUCCESS;
};
int ret = ipc_dmabuf_supported_ ? IPCClientImport(importHandle.handle[2], dmaBufFDHandle, numNodes,
nodes, &importAddress, &importSize, isSysMem) :
HSAKMT_CALL(hsaKmtRegisterSharedHandle(
reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
&importAddress, &importSize
));
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if (ipc_dmabuf_supported_ && !isSysMem) {
// use the bo from the allocation map
// Only check metadata for GPU memory
struct amdgpu_bo_info info = {0};
int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info);
// Validate metadata for IPC handle
if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) {
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
return HSA_STATUS_SUCCESS;
};
auto mapMemoryToNodes = [&](unsigned int numNodes, HSAuint32 *nodes) {
HSAuint64 altAddress;
@@ -1573,10 +1633,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
#if defined(__linux__)
if (num_agents == 0) {
amdgpu_bo_import_result res;
bool isDmabufSysMem = ipc_dmabuf_supported_ && importHandle.handle[3];
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem ? &res : NULL);
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem);
if (err != HSA_STATUS_SUCCESS) return err;
if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL);
@@ -1589,7 +1648,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
// Create a shared cpu access pointer for user
void *cpuPtr;
amdgpu_bo_handle bo = res.buf_handle;
amdgpu_bo_handle bo = allocation_map_[importAddress].ldrm_bo;
int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr));
if (ret) return errCleanup(bo);
@@ -1620,8 +1679,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
for (uint32_t i = 0; i < num_agents; i++)
agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]);
hsa_status_t err = importMemory(num_agents, nodes, NULL);
hsa_status_t err = importMemory(num_agents, nodes, false);
if (err != HSA_STATUS_SUCCESS) return err;
return mapMemoryToNodes(num_agents, nodes);
}
@@ -2217,7 +2277,7 @@ void Runtime::Unload() {
// Close IPC socket server
if (ipc_sock_server_conns_.size())
IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE,
NULL, 0, NULL, NULL, NULL);
0, NULL, NULL, NULL, false);
svm_profile_.reset(nullptr);