rocr: Make IPC Handles Unique (#795)
Query IPC handles on shared memory export/import for any metadata as a means to uniquely identify handles that happen to be backed by buffers that point to the same memory.
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
f2ccc96cfd
Коммит
c23c320b4d
@@ -915,9 +915,8 @@ class Runtime {
|
||||
void InitIPCDmaBufSupport();
|
||||
bool ipc_dmabuf_supported_;
|
||||
int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
|
||||
amdgpu_bo_import_result *res,
|
||||
unsigned int numNodes, HSAuint32 *nodes,
|
||||
void **importAddress, HSAuint64 *importSize);
|
||||
void **importAddress, HSAuint64 *importSize, bool isdmabufSysmem);
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
|
||||
@@ -365,6 +365,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
|
||||
|
||||
notifiers = std::move(it->second.notifiers);
|
||||
|
||||
//track the exporter BO to clear meta data via set_metadata
|
||||
//clear the set metadata here if possible if theres an existing ldrm_bo
|
||||
if (it->second.ldrm_bo) {
|
||||
struct amdgpu_bo_info info = {0};
|
||||
auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info);
|
||||
|
||||
//clear metadata
|
||||
amdgpu_bo_metadata zero_metadata = {0};
|
||||
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
|
||||
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
|
||||
}
|
||||
|
||||
allocation_map_.erase(it);
|
||||
}
|
||||
|
||||
@@ -1380,6 +1392,34 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
|
||||
hsa_status_t err = agent->driver().ExportDMABuf(ptr, len, &dmabuf_fd, &dmabufOffset);
|
||||
assert(dmabufOffset/pageSize == fragOffset && "DMA Buf inconsistent with pointer offset.");
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
if (agent->device_type() == Agent::kAmdGpuDevice) {
|
||||
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
|
||||
amdgpu_bo_import_result res;
|
||||
|
||||
srand(static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
|
||||
handle->handle[7] = rand();
|
||||
|
||||
//libdrm import for buffer object handle
|
||||
if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) {
|
||||
fprintf(stderr, "Error in amdgpu_bo_import\n");
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
//query buffer object for pre existing metadata
|
||||
struct amdgpu_bo_info info = {0};
|
||||
if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) {
|
||||
handle->handle[7] = info.metadata.umd_metadata[0];
|
||||
} else {
|
||||
amdgpu_bo_metadata buf_info = {0};
|
||||
buf_info.size_metadata = sizeof(uint32_t);
|
||||
buf_info.umd_metadata[0] = handle->handle[7];
|
||||
|
||||
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
|
||||
allocation_map_[ptr].ldrm_bo = res.buf_handle;
|
||||
}
|
||||
}
|
||||
|
||||
close(dmabuf_fd);
|
||||
|
||||
ScopedAcquire<KernelMutex> lock(&ipc_sock_server_lock_);
|
||||
@@ -1426,9 +1466,8 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
|
||||
}
|
||||
|
||||
int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
|
||||
amdgpu_bo_import_result *res,
|
||||
unsigned int numNodes, HSAuint32 *nodes,
|
||||
void **importAddress, HSAuint64 *importSize) {
|
||||
void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem) {
|
||||
int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!");
|
||||
if (socket_fd == -1) return -1;
|
||||
@@ -1475,20 +1514,28 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
|
||||
dmabuf_fd = ReceiveDmaBufFd(socket_fd);
|
||||
if (dmabuf_fd == -1) return -1;
|
||||
|
||||
amdgpu_bo_import_result res = {0};
|
||||
HsaGraphicsResourceInfo info;
|
||||
HSA_REGISTER_MEM_FLAGS regFlags;
|
||||
regFlags.ui32.requiresVAddr = !!res ? 0 : 1;
|
||||
regFlags.ui32.requiresVAddr = !isDmabufSysmem;
|
||||
int err = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodesExt(dmabuf_fd, &info, numNodes, nodes, regFlags));
|
||||
if (err == HSAKMT_STATUS_SUCCESS) {
|
||||
*importAddress = info.MemoryAddress;
|
||||
*importSize = info.SizeInBytes;
|
||||
if (res) {
|
||||
|
||||
if (isDmabufSysmem)
|
||||
HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress));
|
||||
|
||||
// Manually libDRM import and GPU map system memory
|
||||
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
|
||||
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
|
||||
dmabuf_fd, res));
|
||||
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
|
||||
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
|
||||
dmabuf_fd, &res));
|
||||
|
||||
// Store the buffer object handle in allocation map for later use
|
||||
if (err == HSAKMT_STATUS_SUCCESS) {
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
allocation_map_[*importAddress] =
|
||||
AllocationRegion(nullptr, *importSize, *importSize, core::MemoryRegion::AllocateNoFlags);
|
||||
allocation_map_[*importAddress].ldrm_bo = res.buf_handle;
|
||||
}
|
||||
close(dmabuf_fd);
|
||||
}
|
||||
@@ -1521,17 +1568,30 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
allocation_map_[importAddress].ldrm_bo = ldrm_bo;
|
||||
};
|
||||
|
||||
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes,
|
||||
amdgpu_bo_import_result *res) {
|
||||
int ret = ipc_dmabuf_supported_ ?
|
||||
IPCClientImport(importHandle.handle[2], dmaBufFDHandle, res,
|
||||
numNodes, nodes, &importAddress, &importSize) :
|
||||
HSAKMT_CALL(hsaKmtRegisterSharedHandle(reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
|
||||
&importAddress, &importSize));
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, bool isSysMem) {
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
int ret = ipc_dmabuf_supported_ ? IPCClientImport(importHandle.handle[2], dmaBufFDHandle, numNodes,
|
||||
nodes, &importAddress, &importSize, isSysMem) :
|
||||
HSAKMT_CALL(hsaKmtRegisterSharedHandle(
|
||||
reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
|
||||
&importAddress, &importSize
|
||||
));
|
||||
|
||||
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
if (ipc_dmabuf_supported_ && !isSysMem) {
|
||||
// use the bo from the allocation map
|
||||
// Only check metadata for GPU memory
|
||||
struct amdgpu_bo_info info = {0};
|
||||
int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info);
|
||||
|
||||
// Validate metadata for IPC handle
|
||||
if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) {
|
||||
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
auto mapMemoryToNodes = [&](unsigned int numNodes, HSAuint32 *nodes) {
|
||||
HSAuint64 altAddress;
|
||||
@@ -1573,10 +1633,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
|
||||
#if defined(__linux__)
|
||||
if (num_agents == 0) {
|
||||
amdgpu_bo_import_result res;
|
||||
bool isDmabufSysMem = ipc_dmabuf_supported_ && importHandle.handle[3];
|
||||
|
||||
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem ? &res : NULL);
|
||||
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL);
|
||||
|
||||
@@ -1589,7 +1648,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
|
||||
// Create a shared cpu access pointer for user
|
||||
void *cpuPtr;
|
||||
amdgpu_bo_handle bo = res.buf_handle;
|
||||
amdgpu_bo_handle bo = allocation_map_[importAddress].ldrm_bo;
|
||||
int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr));
|
||||
if (ret) return errCleanup(bo);
|
||||
|
||||
@@ -1620,8 +1679,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
for (uint32_t i = 0; i < num_agents; i++)
|
||||
agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]);
|
||||
|
||||
hsa_status_t err = importMemory(num_agents, nodes, NULL);
|
||||
hsa_status_t err = importMemory(num_agents, nodes, false);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
return mapMemoryToNodes(num_agents, nodes);
|
||||
}
|
||||
|
||||
@@ -2217,7 +2277,7 @@ void Runtime::Unload() {
|
||||
// Close IPC socket server
|
||||
if (ipc_sock_server_conns_.size())
|
||||
IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
0, NULL, NULL, NULL, false);
|
||||
|
||||
svm_profile_.reset(nullptr);
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user