From 03463ed2c079aae96cee92632b19c7defc180a7d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 27 Sep 2024 15:15:33 -0400 Subject: [PATCH] hsakmt: Enable graphics handle registration with a virtual address Currently registering graphics memory without specifying a target node will return a memory handle that's not a virtual address. As a result, ROCr is forced to register with a target node for IPC usage. Mapping memory without specifying a target node afterwards will result in mapping to the target node that was imported because the previous import call flags this node targeting action to future mapping. For ROCr IPC usage, ROCr wants to map to all GPU nodes if the target node is not specified. Allow the caller to register graphics handles that returns a virtual address without having to specify the target node so that the caller can make a subsequent map call to all GPUs. Change-Id: I5a935092b885cc3568e4f3a5dd951c7ec6c84fca --- libhsakmt/include/hsakmt/hsakmt.h | 15 +++++++++++++++ libhsakmt/include/hsakmt/hsakmttypes.h | 9 +++++++++ libhsakmt/src/fmm.c | 5 +++-- libhsakmt/src/fmm.h | 3 ++- libhsakmt/src/memory.c | 19 ++++++++++++++++++- 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/libhsakmt/include/hsakmt/hsakmt.h b/libhsakmt/include/hsakmt/hsakmt.h index b4ffe3fb3d..8361a3cd94 100644 --- a/libhsakmt/include/hsakmt/hsakmt.h +++ b/libhsakmt/include/hsakmt/hsakmt.h @@ -512,6 +512,21 @@ hsaKmtRegisterGraphicsHandleToNodes( HSAuint32* NodeArray //IN ); +/** + Similar to hsaKmtRegisterGraphicsHandleToNodes but provides registration + options via RegisterFlags. +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtRegisterGraphicsHandleToNodesExt( + HSAuint64 GraphicsResourceHandle, //IN + HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT + HSAuint64 NumberOfNodes, //IN + HSAuint32* NodeArray, //IN + HSA_REGISTER_MEM_FLAGS RegisterFlags //IN + ); + /** * Export a dmabuf handle and offset for a given memory address * diff --git a/libhsakmt/include/hsakmt/hsakmttypes.h b/libhsakmt/include/hsakmt/hsakmttypes.h index 15a6c801f1..1d419cc975 100644 --- a/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/libhsakmt/include/hsakmt/hsakmttypes.h @@ -1488,6 +1488,15 @@ typedef struct _HsaPcSamplingInfo } HsaPcSamplingInfo; +typedef union +{ + HSAuint32 Value; + struct + { + unsigned int requiresVAddr : 1; // Requires virtual address + } ui32; +} HSA_REGISTER_MEM_FLAGS; + #pragma pack(pop, hsakmttypes_h) diff --git a/libhsakmt/src/fmm.c b/libhsakmt/src/fmm.c index 305054ca18..a606c82d4d 100644 --- a/libhsakmt/src/fmm.c +++ b/libhsakmt/src/fmm.c @@ -3586,7 +3586,8 @@ HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes, HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, uint32_t *gpu_id_array, - uint32_t gpu_id_array_size) + uint32_t gpu_id_array_size, + HSA_REGISTER_MEM_FLAGS RegisterFlags) { struct kfd_ioctl_get_dmabuf_info_args infoArgs = {0}; struct kfd_ioctl_import_dmabuf_args importArgs = {0}; @@ -3630,7 +3631,7 @@ HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHand goto error_free_metadata; /* import DMA buffer without VA assigned */ - if (!gpu_id_array && gpu_id_array_size == 0) { + if (!gpu_id_array && gpu_id_array_size == 0 && !RegisterFlags.ui32.requiresVAddr) { aperture = &mem_handle_aperture; } else if (hsakmt_topology_is_svm_needed(gpu_mem[gpu_mem_id].EngineId)) { aperture = svm.dgpu_aperture; diff --git a/libhsakmt/src/fmm.h b/libhsakmt/src/fmm.h index b56581dccf..29e81ef714 100644 --- a/libhsakmt/src/fmm.h +++ b/libhsakmt/src/fmm.h @@ -80,7 +80,8 @@ HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes, HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, uint32_t *gpu_id_array, - uint32_t gpu_id_array_size); + uint32_t gpu_id_array_size, + HSA_REGISTER_MEM_FLAGS RegisterFlags); HSAKMT_STATUS hsakmt_fmm_deregister_memory(void *address); HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(void *MemoryAddress, HSAuint64 MemorySizeInBytes, diff --git a/libhsakmt/src/memory.c b/libhsakmt/src/memory.c index 4e4a267a3b..7e072bd165 100644 --- a/libhsakmt/src/memory.c +++ b/libhsakmt/src/memory.c @@ -336,6 +336,23 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsRe HsaGraphicsResourceInfo *GraphicsResourceInfo, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray) +{ + HSA_REGISTER_MEM_FLAGS regFlags; + regFlags.Value = 0; + + return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle, + GraphicsResourceInfo, + NumberOfNodes, + NodeArray, + regFlags); + +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle, + HsaGraphicsResourceInfo *GraphicsResourceInfo, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray, + HSA_REGISTER_MEM_FLAGS RegisterFlags) { CHECK_KFD_OPEN(); uint32_t *gpu_id_array = NULL; @@ -351,7 +368,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsRe if (ret == HSAKMT_STATUS_SUCCESS) { ret = hsakmt_fmm_register_graphics_handle( GraphicsResourceHandle, GraphicsResourceInfo, - gpu_id_array, NumberOfNodes * sizeof(uint32_t)); + gpu_id_array, NumberOfNodes * sizeof(uint32_t), RegisterFlags); if (ret != HSAKMT_STATUS_SUCCESS) free(gpu_id_array); }