diff --git a/projects/rocr-runtime/include/hsakmttypes.h b/projects/rocr-runtime/include/hsakmttypes.h index eb640d7014..6fc65df068 100644 --- a/projects/rocr-runtime/include/hsakmttypes.h +++ b/projects/rocr-runtime/include/hsakmttypes.h @@ -247,7 +247,7 @@ typedef struct _HsaNodeProperties HSAuint64 LocalMemSize; // Local memory size HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities, - + HSAint32 DrmRenderMinor; // DRM render device minor device number HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name). // Unicode string HSAuint8 AMDName[HSA_PUBLIC_NAME_SIZE]; //CAL Name of the "device", ASCII diff --git a/projects/rocr-runtime/src/fmm.c b/projects/rocr-runtime/src/fmm.c index add26aa4b1..67cea1c834 100644 --- a/projects/rocr-runtime/src/fmm.c +++ b/projects/rocr-runtime/src/fmm.c @@ -1056,9 +1056,12 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla } if (mem && flags.ui32.HostAccess) { + int map_fd = mmap_offset >= (1ULL<<40) ? kfd_fd : + get_drm_render_fd_by_gpu_id(gpu_id); void *ret = mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); + MAP_SHARED | MAP_FIXED, + map_fd, mmap_offset); if (ret == MAP_FAILED) { __fmm_release(mem, aperture); return NULL; @@ -1245,9 +1248,11 @@ static void *fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, ioc_flags, &vm_obj); if (mem && flags.ui32.HostAccess) { + int map_fd = mmap_offset >= (1ULL<<40) ? kfd_fd : + get_drm_render_fd_by_gpu_id(gpu_id); void *ret = mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); + MAP_SHARED | MAP_FIXED, map_fd, mmap_offset); if (ret == MAP_FAILED) { __fmm_release(mem, aperture); return NULL; @@ -1259,7 +1264,7 @@ static void *fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes, memset(ret, 0, MemorySizeInBytes); mmap(VOID_PTR_ADD(mem, my_buf_size), MemorySizeInBytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd, mmap_offset); + MAP_SHARED | MAP_FIXED, map_fd, mmap_offset); } } } @@ -1827,6 +1832,8 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *apert if (!obj) return -1; } else { + int map_fd = mmap_offset >= (1ULL<<40) ? kfd_fd : + get_drm_render_fd_by_gpu_id(gpu_id); fmm_allocate_memory_in_device(gpu_id, address, size, @@ -1835,8 +1842,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *apert KFD_IOC_ALLOC_MEM_FLAGS_GTT); mmap_ret = mmap(address, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, - kfd_fd, mmap_offset); + MAP_SHARED | MAP_FIXED, map_fd, mmap_offset); if (mmap_ret == MAP_FAILED) { __fmm_release(mem, aperture); return -1; @@ -2753,10 +2759,11 @@ HSAKMT_STATUS fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemo pthread_mutex_unlock(&aperture->fmm_mutex); if (importArgs.mmap_offset) { + int map_fd = importArgs.mmap_offset >= (1ULL<<40) ? kfd_fd : + get_drm_render_fd_by_gpu_id(importArgs.gpu_id); void *ret = mmap(reservedMem, (SharedMemoryStruct->SizeInPages << PAGE_SHIFT), PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, kfd_fd, - importArgs.mmap_offset); + MAP_SHARED | MAP_FIXED, map_fd, importArgs.mmap_offset); if (ret == MAP_FAILED) { err = HSAKMT_STATUS_ERROR; goto err_free_obj; diff --git a/projects/rocr-runtime/src/libhsakmt.h b/projects/rocr-runtime/src/libhsakmt.h index 142379010e..07402ffef5 100644 --- a/projects/rocr-runtime/src/libhsakmt.h +++ b/projects/rocr-runtime/src/libhsakmt.h @@ -107,6 +107,7 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id); HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id); uint16_t get_device_id_by_node(HSAuint32 node_id); uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id); +int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id); HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, uint32_t NumberOfNodes, uint32_t *NodeArray); diff --git a/projects/rocr-runtime/src/topology.c b/projects/rocr-runtime/src/topology.c index 44b6fe4e64..2cc2ebdba6 100644 --- a/projects/rocr-runtime/src/topology.c +++ b/projects/rocr-runtime/src/topology.c @@ -54,6 +54,7 @@ typedef struct { HsaMemoryProperties *mem; /* node->NumBanks elements */ HsaCacheProperties *cache; HsaIoLinkProperties *link; + int drm_render_fd; } node_t; static HsaSystemProperties *_system = NULL; @@ -239,6 +240,8 @@ free_node(node_t *n) free((n)->cache); if ((n)->link) free((n)->link); + if ((n)->drm_render_fd > 0) + close((n)->drm_render_fd); } static void free_nodes(node_t *temp_nodes, int size) @@ -825,6 +828,8 @@ HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, props->MaxEngineClockMhzCCompute = (uint32_t)prop_val; else if (strcmp(prop_name, "local_mem_size") == 0) props->LocalMemSize = prop_val; + else if (strcmp(prop_name, "drm_render_minor") == 0) + props->DrmRenderMinor = (int32_t)prop_val; } @@ -1512,6 +1517,16 @@ static void topology_create_indirect_gpu_links(const HsaSystemProperties *sys_pr } } + +static void open_drm_render_device(node_t *n) +{ + int minor = n->node.DrmRenderMinor; + char path[128]; + + sprintf(path, "/dev/dri/renderD%d", minor); + n->drm_render_fd = open(path, O_RDWR | O_CLOEXEC); +} + HSAKMT_STATUS topology_take_snapshot(void) { uint32_t gen_start, gen_end, i, mem_id, cache_id, link_id; @@ -1609,7 +1624,7 @@ retry: } } } - + open_drm_render_device(&temp_nodes[i]); } pci_cleanup(pacc); } @@ -1970,6 +1985,21 @@ uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id) return 0; } +int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id) +{ + unsigned int i; + + if (!node || !_system) + return 0; + + for (i = 0; i < _system->NumNodes; i++) { + if (node[i].gpu_id == gpu_id) + return node[i].drm_render_fd; + } + + return -1; +} + HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, uint32_t NumberOfNodes, uint32_t *NodeArray) {