libhsakmt: query/use render node fds that libdrm uses.

Query render node fds that libdrm uses for current process and
use them at Thunk if available.

v2: avoid naming conflict with amdgpu_device_get_fd from amdgpu.h

Signed-off-by: Xiaogang Chen<Xiaogang.Chen@amd.com>
Change-Id: Id7288c03730f4a4c9c3644e37ca4725fec71a471


[ROCm/ROCR-Runtime commit: ac1db60fc2]
Cette révision appartient à :
Xiaogang Chen
2022-11-15 12:18:27 -06:00
révisé par Xiaogang Chen
Parent 6800fbec43
révision dd8954e83e
6 fichiers modifiés avec 95 ajouts et 9 suppressions
+10
Voir le fichier
@@ -602,6 +602,16 @@ hsaKmtUnmapGraphicHandle(
HSAuint64 SizeInBytes //IN
);
/**
* Get an AMDGPU device handle for a GPU node
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetAMDGPUDeviceHandle(
HSAuint32 NodeId, //IN
HsaAMDGPUDeviceHandle *DeviceHandle //OUT
);
/**
Allocate GWS resource for a queue
*/
+2
Voir le fichier
@@ -1409,6 +1409,8 @@ typedef enum _HSA_SVM_UNMAP_TRIGGERS {
#define HSA_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
#define HSA_SMI_EVENT_MSG_SIZE 96
typedef void *HsaAMDGPUDeviceHandle;
#pragma pack(pop, hsakmttypes_h)
+57 -8
Voir le fichier
@@ -41,6 +41,8 @@
#include <numa.h>
#include <numaif.h>
#include "rbtree.h"
#include <amdgpu.h>
#ifndef MPOL_F_STATIC_NODES
/* Bug in numaif.h, this should be defined in there. Definition copied
* from linux/mempolicy.h.
@@ -195,6 +197,7 @@ typedef struct {
int drm_render_fd;
uint32_t usable_peer_id_num;
uint32_t *usable_peer_id_array;
int drm_render_minor;
} gpu_mem_t;
enum svm_aperture_type {
@@ -2026,10 +2029,15 @@ static HSAKMT_STATUS get_process_apertures(
#define DRM_LAST_RENDER_NODE 255
static int drm_render_fds[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];
/* amdgpu device handle for each gpu that libdrm uses */
static struct amdgpu_device *amdgpu_handle[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];
int open_drm_render_device(int minor)
{
char path[128];
int index, fd;
uint32_t major_drm, minor_drm;
struct amdgpu_device **device_handle;
if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
pr_err("DRM render minor %d out of range [%d, %d]\n", minor,
@@ -2054,6 +2062,23 @@ int open_drm_render_device(int minor)
}
drm_render_fds[index] = fd;
/* if amdgpu_device_get_fd availabe query render fd that libdrm uses,
* then close drm_render_fds above, replace it by fd libdrm uses.
*/
device_handle = &amdgpu_handle[index];
if (fn_amdgpu_device_get_fd &&
!amdgpu_device_initialize(fd, &major_drm, &minor_drm, device_handle)) {
fd = fn_amdgpu_device_get_fd(*device_handle);
if (fd > 0) {
close(drm_render_fds[index]);
drm_render_fds[index] = fd;
} else {
pr_err("amdgpu_device_get_fd failed: %d\n", fd);
amdgpu_device_deinitialize(*device_handle);
*device_handle = 0;
}
}
return fd;
}
@@ -2366,6 +2391,23 @@ static void release_mmio(void)
}
}
HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id,
HsaAMDGPUDeviceHandle *DeviceHandle)
{
int32_t i = gpu_mem_find_by_node_id(node_id);
int index;
if (i < 0)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
index = gpu_mem[i].drm_render_minor - DRM_FIRST_RENDER_NODE;
if (!amdgpu_handle[index])
return HSAKMT_STATUS_INVALID_HANDLE;
*DeviceHandle = amdgpu_handle[index];
return HSAKMT_STATUS_SUCCESS;
}
static bool two_apertures_overlap(void *start_1, void *limit_1, void *start_2, void *limit_2)
{
return (start_1 >= start_2 && start_1 <= limit_2) || (start_2 >= start_1 && start_2 <= limit_1);
@@ -2376,7 +2418,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
bool found;
uint32_t i;
/* init mem_handle_aperture for buffer handler management*/
/* init mem_handle_aperture for buffer handler management */
mem_handle_aperture.align = align;
mem_handle_aperture.guard_pages = guard_pages;
mem_handle_aperture.is_cpu_accessible = false;
@@ -2387,7 +2429,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
found = true;
for (i = 0; i < gpu_mem_count; i++) {
if (gpu_mem[i/*gpu_mem_id*/].lds_aperture.base &&
if (gpu_mem[i].lds_aperture.base &&
two_apertures_overlap(gpu_mem[i].lds_aperture.base, gpu_mem[i].lds_aperture.limit,
mem_handle_aperture.base, mem_handle_aperture.limit)) {
found = false;
@@ -2410,16 +2452,17 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
}
if (found) {
pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n", mem_handle_aperture.base, mem_handle_aperture.limit);
pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n",
mem_handle_aperture.base, mem_handle_aperture.limit);
return true;
} else {
/*increase base by 1UL<<47 to check next hole*/
/* increase base by 1UL<<47 to check next hole */
mem_handle_aperture.base = VOID_PTR_ADD(mem_handle_aperture.base, (1UL << 47));
mem_handle_aperture.limit = VOID_PTR_ADD(mem_handle_aperture.base, (1ULL << 47));
}
}
/* set invalid aperture if fail locating a hole for it*/
/* set invalid aperture if fail locating a hole for it */
mem_handle_aperture.base = 0;
mem_handle_aperture.limit = 0;
@@ -2511,6 +2554,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
goto gpu_mem_init_failed;
}
gpu_mem[gpu_mem_count].drm_render_minor = props.DrmRenderMinor;
gpu_mem[gpu_mem_count].usable_peer_id_array =
calloc(NumNodes, sizeof(uint32_t));
if (!gpu_mem[gpu_mem_count].usable_peer_id_array) {
@@ -4146,11 +4190,16 @@ void fmm_clear_all_mem(void)
void *map_addr;
/* Close render node FDs. The child process needs to open new ones */
for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++)
if (drm_render_fds[i]) {
for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++) {
if (amdgpu_handle[i]) {
amdgpu_device_deinitialize(amdgpu_handle[i]);
amdgpu_handle[i] = NULL;
} else if (drm_render_fds[i]) {
close(drm_render_fds[i]);
drm_render_fds[i] = 0;
}
drm_render_fds[i] = 0;
}
fmm_clear_aperture(&mem_handle_aperture);
fmm_clear_aperture(&cpuvm_aperture);
+2
Voir le fichier
@@ -45,6 +45,7 @@ typedef struct {
void *start_address;
} aperture_properties_t;
HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes);
void fmm_destroy_process_apertures(void);
@@ -101,4 +102,5 @@ int open_drm_render_device(int minor);
void *mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
uint64_t guard_size, void *aper_base, void *aper_limit);
extern int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
#endif /* FMM_H_ */
+9 -1
Voir le fichier
@@ -618,4 +618,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
#else
return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
HsaAMDGPUDeviceHandle *DeviceHandle)
{
CHECK_KFD_OPEN();
return fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
}
+15
Voir le fichier
@@ -23,6 +23,11 @@
* DEALINGS IN THE SOFTWARE.
*/
/* glibc macro that enables access some nonstandard GNU/Linux extensions
* such as RTLD_DEFAULT used by dlsym
*/
#define _GNU_SOURCE
#include "libhsakmt.h"
#include <stdlib.h>
@@ -34,6 +39,9 @@
#include <stdio.h>
#include <strings.h>
#include "fmm.h"
#include <dlfcn.h>
int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
static const char kfd_device_name[] = "/dev/kfd";
static pid_t parent_pid = -1;
@@ -143,6 +151,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
HSAKMT_STATUS result;
int fd = -1;
HsaSystemProperties sys_props;
char *error;
pthread_mutex_lock(&hsakmt_mutex);
@@ -156,6 +165,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
if (kfd_open_count == 0) {
static bool atfork_installed = false;
fn_amdgpu_device_get_fd = dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
if ((error = dlerror()) != NULL)
pr_err("amdgpu_device_get_fd is not available: %s\n", error);
else
pr_info("amdgpu_device_get_fd is available %p\n", fn_amdgpu_device_get_fd);
result = init_vars_from_env();
if (result != HSAKMT_STATUS_SUCCESS)
goto open_failed;