diff --git a/projects/rocr-runtime/include/hsakmt.h b/projects/rocr-runtime/include/hsakmt.h index 2d987d91a9..031ab1dc57 100644 --- a/projects/rocr-runtime/include/hsakmt.h +++ b/projects/rocr-runtime/include/hsakmt.h @@ -400,6 +400,18 @@ hsaKmtRegisterMemoryToNodes( ); +/** + Registers with KFD a memory buffer with memory attributes +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtRegisterMemoryWithFlags( + void *MemoryAddress, // IN (cache-aligned) + HSAuint64 MemorySizeInBytes, // IN (cache-aligned) + HsaMemFlags MemFlags // IN + ); + /** Registers with KFD a graphics buffer and returns graphics metadata */ @@ -515,7 +527,7 @@ hsaKmtMapMemoryToGPUNodes( void* MemoryAddress, //IN (page-aligned) HSAuint64 MemorySizeInBytes, //IN (page-aligned) HSAuint64* AlternateVAGPU, //OUT (page-aligned) - HsaMemMapFlags MemMapFlags, //IN + HsaMemMapFlags MemMapFlags, //IN HSAuint64 NumberOfNodes, //IN HSAuint32* NodeArray //IN ); diff --git a/projects/rocr-runtime/src/fmm.c b/projects/rocr-runtime/src/fmm.c index 8c2b530649..079ee55664 100644 --- a/projects/rocr-runtime/src/fmm.c +++ b/projects/rocr-runtime/src/fmm.c @@ -2794,7 +2794,8 @@ bool fmm_get_handle(void *address, uint64_t *handle) return found; } -static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_object_t **obj_ret) +static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, + vm_object_t **obj_ret, bool coarse_grain) { manageable_aperture_t *aperture = svm.dgpu_aperture; HSAuint32 page_offset = (HSAuint64)addr & (PAGE_SIZE-1); @@ -2818,7 +2819,8 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj svm_addr = __fmm_allocate_device(gpu_id, NULL, aligned_size, aperture, &aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | - KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE, &obj); + KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE | + (coarse_grain ? 0 : KFD_IOC_ALLOC_MEM_FLAGS_COHERENT), &obj); if (!svm_addr) return HSAKMT_STATUS_ERROR; @@ -2841,7 +2843,8 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, uint32_t *gpu_id_array, - uint32_t gpu_id_array_size) + uint32_t gpu_id_array_size, + bool coarse_grain) { manageable_aperture_t *aperture = NULL; vm_object_t *object = NULL; @@ -2857,7 +2860,7 @@ HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, return HSAKMT_STATUS_SUCCESS; /* Register a new user ptr */ - ret = fmm_register_user_memory(address, size_in_bytes, &object); + ret = fmm_register_user_memory(address, size_in_bytes, &object, coarse_grain); if (ret != HSAKMT_STATUS_SUCCESS) return ret; if (gpu_id_array_size == 0) diff --git a/projects/rocr-runtime/src/fmm.h b/projects/rocr-runtime/src/fmm.h index 85ef0ef54d..9709b6e4ce 100644 --- a/projects/rocr-runtime/src/fmm.h +++ b/projects/rocr-runtime/src/fmm.h @@ -69,7 +69,8 @@ HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSA HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes, uint32_t *gpu_id_array, - uint32_t gpu_id_array_size); + uint32_t gpu_id_array_size, + bool coarse_grain); HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, uint32_t *gpu_id_array, diff --git a/projects/rocr-runtime/src/libhsakmt.ver b/projects/rocr-runtime/src/libhsakmt.ver index 10a66a1dca..12bb825f7f 100644 --- a/projects/rocr-runtime/src/libhsakmt.ver +++ b/projects/rocr-runtime/src/libhsakmt.ver @@ -26,6 +26,7 @@ hsaKmtAllocMemory; hsaKmtFreeMemory; hsaKmtRegisterMemory; hsaKmtRegisterMemoryToNodes; +hsaKmtRegisterMemoryWithFlags; hsaKmtRegisterGraphicsHandleToNodes; hsaKmtShareMemory; hsaKmtRegisterSharedHandle; diff --git a/projects/rocr-runtime/src/memory.c b/projects/rocr-runtime/src/memory.c index 2bd67323d8..63b88e4d5d 100644 --- a/projects/rocr-runtime/src/memory.c +++ b/projects/rocr-runtime/src/memory.c @@ -210,7 +210,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress, return HSAKMT_STATUS_SUCCESS; return fmm_register_memory(MemoryAddress, MemorySizeInBytes, - NULL, 0); + NULL, 0, true); } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress, @@ -235,7 +235,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress, if (ret == HSAKMT_STATUS_SUCCESS) { ret = fmm_register_memory(MemoryAddress, MemorySizeInBytes, gpu_id_array, - NumberOfNodes*sizeof(uint32_t)); + NumberOfNodes*sizeof(uint32_t), + true); if (ret != HSAKMT_STATUS_SUCCESS) free(gpu_id_array); } @@ -243,6 +244,30 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress, return ret; } +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HsaMemFlags MemFlags) +{ + CHECK_KFD_OPEN(); + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + pr_debug("[%s] address %p\n", + __func__, MemoryAddress); + + // Registered memory should be ordinary paged host memory. + if ((MemFlags.ui32.HostAccess != 1) || (MemFlags.ui32.NonPaged == 1)) + return HSAKMT_STATUS_NOT_SUPPORTED; + + if (!is_dgpu) + /* TODO: support mixed APU and dGPU configurations */ + return HSAKMT_STATUS_NOT_SUPPORTED; + + ret = fmm_register_memory(MemoryAddress, MemorySizeInBytes, + NULL, 0, MemFlags.ui32.CoarseGrain); + + return ret; +} + HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, HSAuint64 NumberOfNodes,