diff --git a/src/libhsakmt.h b/src/libhsakmt.h index 6177b1b3a8..17dce385fc 100644 --- a/src/libhsakmt.h +++ b/src/libhsakmt.h @@ -107,6 +107,7 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id); HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id); bool prefer_ats(HSAuint32 node_id); uint16_t get_device_id_by_node_id(HSAuint32 node_id); +bool is_kaveri(HSAuint32 node_id); uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id); int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id); HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, diff --git a/src/memory.c b/src/memory.c index 8b1b07599b..6c1996365e 100644 --- a/src/memory.c +++ b/src/memory.c @@ -49,7 +49,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node, pr_debug("[%s] node %d; default %d; alternate %d\n", __func__, Node, DefaultPolicy, AlternatePolicy); - if (is_dgpu) + if (!is_kaveri(Node)) /* This is a legacy API useful on Kaveri only. On dGPU * the alternate aperture is setup and used * automatically for coherent allocations. Don't let diff --git a/src/topology.c b/src/topology.c index eb2367e41f..53ea9b5b0b 100644 --- a/src/topology.c +++ b/src/topology.c @@ -1780,7 +1780,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS; uint32_t i, gpu_id; HSAuint64 aperture_limit; - bool nodeIsDGPU; if (!MemoryProperties) return HSAKMT_STATUS_INVALID_PARAMETER; @@ -1816,8 +1815,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, if (gpu_id == 0) goto out; - nodeIsDGPU = topology_is_dgpu(get_device_id_by_gpu_id(gpu_id)); - /*Add LDS*/ if (i < NumBanks && fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id, @@ -1831,7 +1828,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, * For dGPU the topology node contains Local Memory and it is added by * the for loop above */ - if (!nodeIsDGPU && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 && + if (is_kaveri(NodeId) && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 && fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE; @@ -1848,8 +1845,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, i++; } - /* On dGPUs add SVM aperture */ - if (nodeIsDGPU && i < NumBanks && + /* Add SVM aperture */ + if (topology_is_svm_needed(get_device_id_by_gpu_id(gpu_id)) && i < NumBanks && fmm_get_aperture_base_and_limit( FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { @@ -1954,6 +1951,12 @@ bool prefer_ats(HSAuint32 node_id) && g_props[node_id].node.NumFComputeCores; } +bool is_kaveri(HSAuint32 node_id) +{ + return g_props[node_id].node.EngineId.ui32.Major == 7 + && g_props[node_id].node.EngineId.ui32.Minor == 0; +} + uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id) { unsigned int i;