diff --git a/src/fmm.c b/src/fmm.c index 9bcf9f9115..02e044bb88 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -1318,7 +1318,8 @@ static void *__fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t Memo return mem; } -void *fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes, HsaMemFlags mflags) +void *fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address, + uint64_t MemorySizeInBytes, HsaMemFlags mflags) { manageable_aperture_t *aperture; int32_t gpu_mem_id; @@ -1339,7 +1340,7 @@ void *fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t MemorySizeInB ioc_flags |= fmm_translate_hsa_to_ioc_flags(mflags); - if (topology_is_svm_needed(get_device_id_by_gpu_id(gpu_id))) { + if (topology_is_svm_needed(node_id)) { aperture = svm.dgpu_aperture; if (mflags.ui32.AQLQueueMemory) size = MemorySizeInBytes * 2; @@ -2386,7 +2387,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) fmm_init_rbtree(); for (gpu_mem_id = 0; (uint32_t)gpu_mem_id < gpu_mem_count; gpu_mem_id++) { - if (!topology_is_svm_needed(gpu_mem[gpu_mem_id].device_id)) + if (!topology_is_svm_needed(gpu_mem[gpu_mem_id].node_id)) continue; gpu_mem[gpu_mem_id].mmio_aperture.base = map_mmio( gpu_mem[gpu_mem_id].node_id, @@ -3170,7 +3171,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle, gpu_mem_id = gpu_mem_find_by_gpu_id(infoArgs.gpu_id); if (gpu_mem_id < 0) goto error_free_metadata; - if (topology_is_svm_needed(gpu_mem[gpu_mem_id].device_id)) { + if (topology_is_svm_needed(gpu_mem[gpu_mem_id].node_id)) { aperture = svm.dgpu_aperture; aperture_base = NULL; } else { diff --git a/src/fmm.h b/src/fmm.h index e06acfde24..b49e73f7bf 100644 --- a/src/fmm.h +++ b/src/fmm.h @@ -50,7 +50,8 @@ void fmm_destroy_process_apertures(void); /* Memory interface */ void *fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes); -void *fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes, HsaMemFlags flags); +void *fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address, + uint64_t MemorySizeInBytes, HsaMemFlags flags); void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset); void *fmm_allocate_host(uint32_t node_id, void *address, uint64_t MemorySizeInBytes, HsaMemFlags flags); diff --git a/src/libhsakmt.h b/src/libhsakmt.h index d847003392..3059eebf6a 100644 --- a/src/libhsakmt.h +++ b/src/libhsakmt.h @@ -214,7 +214,7 @@ HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties bool *p2p_links, uint32_t *num_p2pLinks); HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props); void topology_setup_is_dgpu_param(HsaNodeProperties *props); -bool topology_is_svm_needed(uint16_t device_id); +bool topology_is_svm_needed(uint32_t node_id); HSAKMT_STATUS topology_get_asic_family(uint16_t device_id, enum asic_family_type *asic); diff --git a/src/memory.c b/src/memory.c index 145aa0472a..6d2a4f4450 100644 --- a/src/memory.c +++ b/src/memory.c @@ -171,7 +171,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode, } /* GPU allocated VRAM */ - *MemoryAddress = fmm_allocate_device(gpu_id, *MemoryAddress, SizeInBytes, MemFlags); + *MemoryAddress = fmm_allocate_device(gpu_id, PreferredNode, *MemoryAddress, + SizeInBytes, MemFlags); if (!(*MemoryAddress)) { pr_err("[%s] failed to allocate %lu bytes from device\n", diff --git a/src/topology.c b/src/topology.c index 38ef39742b..12337fcff7 100644 --- a/src/topology.c +++ b/src/topology.c @@ -798,16 +798,12 @@ void topology_setup_is_dgpu_param(HsaNodeProperties *props) is_dgpu = true; } -bool topology_is_svm_needed(uint16_t device_id) +bool topology_is_svm_needed(uint32_t node_id) { - const struct hsa_gfxip_table *hsa_gfxip; - if (is_dgpu) return true; - hsa_gfxip = find_hsa_gfxip_device(device_id); - - if (hsa_gfxip && hsa_gfxip->asic_family >= CHIP_VEGA10) + if (get_gfxv_by_node_id(node_id) >= GFX_VERSION_VEGA10) return true; return false;