libhsakmt: query/use render node fds that libdrm uses.
Query render node fds that libdrm uses for current process and
use them at Thunk if available.
v2: avoid naming conflict with amdgpu_device_get_fd from amdgpu.h
Signed-off-by: Xiaogang Chen<Xiaogang.Chen@amd.com>
Change-Id: Id7288c03730f4a4c9c3644e37ca4725fec71a471
[ROCm/ROCR-Runtime commit: ac1db60fc2]
Cette révision appartient à :
révisé par
Xiaogang Chen
Parent
6800fbec43
révision
dd8954e83e
@@ -602,6 +602,16 @@ hsaKmtUnmapGraphicHandle(
|
||||
HSAuint64 SizeInBytes //IN
|
||||
);
|
||||
|
||||
/**
|
||||
* Get an AMDGPU device handle for a GPU node
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetAMDGPUDeviceHandle(
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Allocate GWS resource for a queue
|
||||
*/
|
||||
|
||||
@@ -1409,6 +1409,8 @@ typedef enum _HSA_SVM_UNMAP_TRIGGERS {
|
||||
#define HSA_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
|
||||
#define HSA_SMI_EVENT_MSG_SIZE 96
|
||||
|
||||
typedef void *HsaAMDGPUDeviceHandle;
|
||||
|
||||
#pragma pack(pop, hsakmttypes_h)
|
||||
|
||||
|
||||
|
||||
@@ -41,6 +41,8 @@
|
||||
#include <numa.h>
|
||||
#include <numaif.h>
|
||||
#include "rbtree.h"
|
||||
#include <amdgpu.h>
|
||||
|
||||
#ifndef MPOL_F_STATIC_NODES
|
||||
/* Bug in numaif.h, this should be defined in there. Definition copied
|
||||
* from linux/mempolicy.h.
|
||||
@@ -195,6 +197,7 @@ typedef struct {
|
||||
int drm_render_fd;
|
||||
uint32_t usable_peer_id_num;
|
||||
uint32_t *usable_peer_id_array;
|
||||
int drm_render_minor;
|
||||
} gpu_mem_t;
|
||||
|
||||
enum svm_aperture_type {
|
||||
@@ -2026,10 +2029,15 @@ static HSAKMT_STATUS get_process_apertures(
|
||||
#define DRM_LAST_RENDER_NODE 255
|
||||
static int drm_render_fds[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];
|
||||
|
||||
/* amdgpu device handle for each gpu that libdrm uses */
|
||||
static struct amdgpu_device *amdgpu_handle[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];
|
||||
|
||||
int open_drm_render_device(int minor)
|
||||
{
|
||||
char path[128];
|
||||
int index, fd;
|
||||
uint32_t major_drm, minor_drm;
|
||||
struct amdgpu_device **device_handle;
|
||||
|
||||
if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
|
||||
pr_err("DRM render minor %d out of range [%d, %d]\n", minor,
|
||||
@@ -2054,6 +2062,23 @@ int open_drm_render_device(int minor)
|
||||
}
|
||||
drm_render_fds[index] = fd;
|
||||
|
||||
/* if amdgpu_device_get_fd availabe query render fd that libdrm uses,
|
||||
* then close drm_render_fds above, replace it by fd libdrm uses.
|
||||
*/
|
||||
device_handle = &amdgpu_handle[index];
|
||||
if (fn_amdgpu_device_get_fd &&
|
||||
!amdgpu_device_initialize(fd, &major_drm, &minor_drm, device_handle)) {
|
||||
fd = fn_amdgpu_device_get_fd(*device_handle);
|
||||
if (fd > 0) {
|
||||
close(drm_render_fds[index]);
|
||||
drm_render_fds[index] = fd;
|
||||
} else {
|
||||
pr_err("amdgpu_device_get_fd failed: %d\n", fd);
|
||||
amdgpu_device_deinitialize(*device_handle);
|
||||
*device_handle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
@@ -2366,6 +2391,23 @@ static void release_mmio(void)
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id,
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle)
|
||||
{
|
||||
int32_t i = gpu_mem_find_by_node_id(node_id);
|
||||
int index;
|
||||
|
||||
if (i < 0)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
index = gpu_mem[i].drm_render_minor - DRM_FIRST_RENDER_NODE;
|
||||
if (!amdgpu_handle[index])
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
*DeviceHandle = amdgpu_handle[index];
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static bool two_apertures_overlap(void *start_1, void *limit_1, void *start_2, void *limit_2)
|
||||
{
|
||||
return (start_1 >= start_2 && start_1 <= limit_2) || (start_2 >= start_1 && start_2 <= limit_1);
|
||||
@@ -2376,7 +2418,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
|
||||
bool found;
|
||||
uint32_t i;
|
||||
|
||||
/* init mem_handle_aperture for buffer handler management*/
|
||||
/* init mem_handle_aperture for buffer handler management */
|
||||
mem_handle_aperture.align = align;
|
||||
mem_handle_aperture.guard_pages = guard_pages;
|
||||
mem_handle_aperture.is_cpu_accessible = false;
|
||||
@@ -2387,7 +2429,7 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
|
||||
found = true;
|
||||
for (i = 0; i < gpu_mem_count; i++) {
|
||||
|
||||
if (gpu_mem[i/*gpu_mem_id*/].lds_aperture.base &&
|
||||
if (gpu_mem[i].lds_aperture.base &&
|
||||
two_apertures_overlap(gpu_mem[i].lds_aperture.base, gpu_mem[i].lds_aperture.limit,
|
||||
mem_handle_aperture.base, mem_handle_aperture.limit)) {
|
||||
found = false;
|
||||
@@ -2410,16 +2452,17 @@ static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
|
||||
}
|
||||
|
||||
if (found) {
|
||||
pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n", mem_handle_aperture.base, mem_handle_aperture.limit);
|
||||
pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n",
|
||||
mem_handle_aperture.base, mem_handle_aperture.limit);
|
||||
return true;
|
||||
} else {
|
||||
/*increase base by 1UL<<47 to check next hole*/
|
||||
/* increase base by 1UL<<47 to check next hole */
|
||||
mem_handle_aperture.base = VOID_PTR_ADD(mem_handle_aperture.base, (1UL << 47));
|
||||
mem_handle_aperture.limit = VOID_PTR_ADD(mem_handle_aperture.base, (1ULL << 47));
|
||||
}
|
||||
}
|
||||
|
||||
/* set invalid aperture if fail locating a hole for it*/
|
||||
/* set invalid aperture if fail locating a hole for it */
|
||||
mem_handle_aperture.base = 0;
|
||||
mem_handle_aperture.limit = 0;
|
||||
|
||||
@@ -2511,6 +2554,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
|
||||
goto gpu_mem_init_failed;
|
||||
}
|
||||
|
||||
gpu_mem[gpu_mem_count].drm_render_minor = props.DrmRenderMinor;
|
||||
gpu_mem[gpu_mem_count].usable_peer_id_array =
|
||||
calloc(NumNodes, sizeof(uint32_t));
|
||||
if (!gpu_mem[gpu_mem_count].usable_peer_id_array) {
|
||||
@@ -4146,11 +4190,16 @@ void fmm_clear_all_mem(void)
|
||||
void *map_addr;
|
||||
|
||||
/* Close render node FDs. The child process needs to open new ones */
|
||||
for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++)
|
||||
if (drm_render_fds[i]) {
|
||||
for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++) {
|
||||
|
||||
if (amdgpu_handle[i]) {
|
||||
amdgpu_device_deinitialize(amdgpu_handle[i]);
|
||||
amdgpu_handle[i] = NULL;
|
||||
} else if (drm_render_fds[i]) {
|
||||
close(drm_render_fds[i]);
|
||||
drm_render_fds[i] = 0;
|
||||
}
|
||||
drm_render_fds[i] = 0;
|
||||
}
|
||||
|
||||
fmm_clear_aperture(&mem_handle_aperture);
|
||||
fmm_clear_aperture(&cpuvm_aperture);
|
||||
|
||||
@@ -45,6 +45,7 @@ typedef struct {
|
||||
void *start_address;
|
||||
} aperture_properties_t;
|
||||
|
||||
HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
|
||||
HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes);
|
||||
void fmm_destroy_process_apertures(void);
|
||||
|
||||
@@ -101,4 +102,5 @@ int open_drm_render_device(int minor);
|
||||
void *mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
|
||||
uint64_t guard_size, void *aper_base, void *aper_limit);
|
||||
|
||||
extern int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
|
||||
#endif /* FMM_H_ */
|
||||
|
||||
@@ -618,4 +618,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
|
||||
#else
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,11 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* glibc macro that enables access some nonstandard GNU/Linux extensions
|
||||
* such as RTLD_DEFAULT used by dlsym
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include "libhsakmt.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
@@ -34,6 +39,9 @@
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include "fmm.h"
|
||||
#include <dlfcn.h>
|
||||
|
||||
int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
|
||||
|
||||
static const char kfd_device_name[] = "/dev/kfd";
|
||||
static pid_t parent_pid = -1;
|
||||
@@ -143,6 +151,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
HSAKMT_STATUS result;
|
||||
int fd = -1;
|
||||
HsaSystemProperties sys_props;
|
||||
char *error;
|
||||
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
@@ -156,6 +165,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
if (kfd_open_count == 0) {
|
||||
static bool atfork_installed = false;
|
||||
|
||||
fn_amdgpu_device_get_fd = dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
|
||||
if ((error = dlerror()) != NULL)
|
||||
pr_err("amdgpu_device_get_fd is not available: %s\n", error);
|
||||
else
|
||||
pr_info("amdgpu_device_get_fd is available %p\n", fn_amdgpu_device_get_fd);
|
||||
|
||||
result = init_vars_from_env();
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
goto open_failed;
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur