diff --git a/projects/rocr-runtime/include/hsakmt.h b/projects/rocr-runtime/include/hsakmt.h index 63e6312818..582e907328 100644 --- a/projects/rocr-runtime/include/hsakmt.h +++ b/projects/rocr-runtime/include/hsakmt.h @@ -602,6 +602,16 @@ hsaKmtUnmapGraphicHandle( HSAuint64 SizeInBytes //IN ); +/** + * Get an AMDGPU device handle for a GPU node + */ +HSAKMT_STATUS +HSAKMTAPI +hsaKmtGetAMDGPUDeviceHandle( + HSAuint32 NodeId, //IN + HsaAMDGPUDeviceHandle *DeviceHandle //OUT + ); + /** Allocate GWS resource for a queue */ diff --git a/projects/rocr-runtime/include/hsakmttypes.h b/projects/rocr-runtime/include/hsakmttypes.h index f84f634141..16e156d039 100644 --- a/projects/rocr-runtime/include/hsakmttypes.h +++ b/projects/rocr-runtime/include/hsakmttypes.h @@ -1409,6 +1409,8 @@ typedef enum _HSA_SVM_UNMAP_TRIGGERS { #define HSA_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) #define HSA_SMI_EVENT_MSG_SIZE 96 +typedef void *HsaAMDGPUDeviceHandle; + #pragma pack(pop, hsakmttypes_h) diff --git a/projects/rocr-runtime/src/fmm.c b/projects/rocr-runtime/src/fmm.c index 023ba9e81a..da970e0ba8 100644 --- a/projects/rocr-runtime/src/fmm.c +++ b/projects/rocr-runtime/src/fmm.c @@ -41,6 +41,8 @@ #include #include #include "rbtree.h" +#include + #ifndef MPOL_F_STATIC_NODES /* Bug in numaif.h, this should be defined in there. Definition copied * from linux/mempolicy.h. @@ -195,6 +197,7 @@ typedef struct { int drm_render_fd; uint32_t usable_peer_id_num; uint32_t *usable_peer_id_array; + int drm_render_minor; } gpu_mem_t; enum svm_aperture_type { @@ -2026,10 +2029,15 @@ static HSAKMT_STATUS get_process_apertures( #define DRM_LAST_RENDER_NODE 255 static int drm_render_fds[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE]; +/* amdgpu device handle for each gpu that libdrm uses */ +static struct amdgpu_device *amdgpu_handle[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE]; + int open_drm_render_device(int minor) { char path[128]; int index, fd; + uint32_t major_drm, minor_drm; + struct amdgpu_device **device_handle; if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) { pr_err("DRM render minor %d out of range [%d, %d]\n", minor, @@ -2054,6 +2062,23 @@ int open_drm_render_device(int minor) } drm_render_fds[index] = fd; + /* if amdgpu_device_get_fd availabe query render fd that libdrm uses, + * then close drm_render_fds above, replace it by fd libdrm uses. + */ + device_handle = &amdgpu_handle[index]; + if (fn_amdgpu_device_get_fd && + !amdgpu_device_initialize(fd, &major_drm, &minor_drm, device_handle)) { + fd = fn_amdgpu_device_get_fd(*device_handle); + if (fd > 0) { + close(drm_render_fds[index]); + drm_render_fds[index] = fd; + } else { + pr_err("amdgpu_device_get_fd failed: %d\n", fd); + amdgpu_device_deinitialize(*device_handle); + *device_handle = 0; + } + } + return fd; } @@ -2366,6 +2391,23 @@ static void release_mmio(void) } } +HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id, + HsaAMDGPUDeviceHandle *DeviceHandle) +{ + int32_t i = gpu_mem_find_by_node_id(node_id); + int index; + + if (i < 0) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + + index = gpu_mem[i].drm_render_minor - DRM_FIRST_RENDER_NODE; + if (!amdgpu_handle[index]) + return HSAKMT_STATUS_INVALID_HANDLE; + + *DeviceHandle = amdgpu_handle[index]; + return HSAKMT_STATUS_SUCCESS; +} + static bool two_apertures_overlap(void *start_1, void *limit_1, void *start_2, void *limit_2) { return (start_1 >= start_2 && start_1 <= limit_2) || (start_2 >= start_1 && start_2 <= limit_1); @@ -2376,7 +2418,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages) bool found; uint32_t i; - /* init mem_handle_aperture for buffer handler management*/ + /* init mem_handle_aperture for buffer handler management */ mem_handle_aperture.align = align; mem_handle_aperture.guard_pages = guard_pages; mem_handle_aperture.is_cpu_accessible = false; @@ -2387,7 +2429,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages) found = true; for (i = 0; i < gpu_mem_count; i++) { - if (gpu_mem[i/*gpu_mem_id*/].lds_aperture.base && + if (gpu_mem[i].lds_aperture.base && two_apertures_overlap(gpu_mem[i].lds_aperture.base, gpu_mem[i].lds_aperture.limit, mem_handle_aperture.base, mem_handle_aperture.limit)) { found = false; @@ -2410,16 +2452,17 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages) } if (found) { - pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n", mem_handle_aperture.base, mem_handle_aperture.limit); + pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n", + mem_handle_aperture.base, mem_handle_aperture.limit); return true; } else { - /*increase base by 1UL<<47 to check next hole*/ + /* increase base by 1UL<<47 to check next hole */ mem_handle_aperture.base = VOID_PTR_ADD(mem_handle_aperture.base, (1UL << 47)); mem_handle_aperture.limit = VOID_PTR_ADD(mem_handle_aperture.base, (1ULL << 47)); } } - /* set invalid aperture if fail locating a hole for it*/ + /* set invalid aperture if fail locating a hole for it */ mem_handle_aperture.base = 0; mem_handle_aperture.limit = 0; @@ -2511,6 +2554,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) goto gpu_mem_init_failed; } + gpu_mem[gpu_mem_count].drm_render_minor = props.DrmRenderMinor; gpu_mem[gpu_mem_count].usable_peer_id_array = calloc(NumNodes, sizeof(uint32_t)); if (!gpu_mem[gpu_mem_count].usable_peer_id_array) { @@ -4146,11 +4190,16 @@ void fmm_clear_all_mem(void) void *map_addr; /* Close render node FDs. The child process needs to open new ones */ - for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++) - if (drm_render_fds[i]) { + for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++) { + + if (amdgpu_handle[i]) { + amdgpu_device_deinitialize(amdgpu_handle[i]); + amdgpu_handle[i] = NULL; + } else if (drm_render_fds[i]) { close(drm_render_fds[i]); - drm_render_fds[i] = 0; } + drm_render_fds[i] = 0; + } fmm_clear_aperture(&mem_handle_aperture); fmm_clear_aperture(&cpuvm_aperture); diff --git a/projects/rocr-runtime/src/fmm.h b/projects/rocr-runtime/src/fmm.h index ce4bf55ca7..cd6a14ad02 100644 --- a/projects/rocr-runtime/src/fmm.h +++ b/projects/rocr-runtime/src/fmm.h @@ -45,6 +45,7 @@ typedef struct { void *start_address; } aperture_properties_t; +HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle); HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes); void fmm_destroy_process_apertures(void); @@ -101,4 +102,5 @@ int open_drm_render_device(int minor); void *mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align, uint64_t guard_size, void *aper_base, void *aper_limit); +extern int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle); #endif /* FMM_H_ */ diff --git a/projects/rocr-runtime/src/memory.c b/projects/rocr-runtime/src/memory.c index 647bc44f3c..c5ebfe99e8 100644 --- a/projects/rocr-runtime/src/memory.c +++ b/projects/rocr-runtime/src/memory.c @@ -618,4 +618,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr) #else return HSAKMT_STATUS_NOT_SUPPORTED; #endif -} \ No newline at end of file +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId, + HsaAMDGPUDeviceHandle *DeviceHandle) +{ + CHECK_KFD_OPEN(); + + return fmm_get_amdgpu_device_handle(NodeId, DeviceHandle); +} diff --git a/projects/rocr-runtime/src/openclose.c b/projects/rocr-runtime/src/openclose.c index df98e46219..4913349256 100644 --- a/projects/rocr-runtime/src/openclose.c +++ b/projects/rocr-runtime/src/openclose.c @@ -23,6 +23,11 @@ * DEALINGS IN THE SOFTWARE. */ +/* glibc macro that enables access some nonstandard GNU/Linux extensions + * such as RTLD_DEFAULT used by dlsym + */ +#define _GNU_SOURCE + #include "libhsakmt.h" #include @@ -34,6 +39,9 @@ #include #include #include "fmm.h" +#include + +int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle); static const char kfd_device_name[] = "/dev/kfd"; static pid_t parent_pid = -1; @@ -143,6 +151,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) HSAKMT_STATUS result; int fd = -1; HsaSystemProperties sys_props; + char *error; pthread_mutex_lock(&hsakmt_mutex); @@ -156,6 +165,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) if (kfd_open_count == 0) { static bool atfork_installed = false; + fn_amdgpu_device_get_fd = dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd"); + if ((error = dlerror()) != NULL) + pr_err("amdgpu_device_get_fd is not available: %s\n", error); + else + pr_info("amdgpu_device_get_fd is available %p\n", fn_amdgpu_device_get_fd); + result = init_vars_from_env(); if (result != HSAKMT_STATUS_SUCCESS) goto open_failed;