From 110e754f64437fb0c8a9042a7410586df3f45a3d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 7 Aug 2018 16:03:24 -0400 Subject: [PATCH] Differentiate gfx700 and improve the logic by introducing is_gfx700() Because gfx700 has local memory but other APUs don't, we should reflect that in the code. Meanwhile, fix a bug that on gfx902 svm aperture is not added when calling hsaKmtGetNodeMemoryProperties(). Change-Id: Id840f2db0b14fda9ee713b219a9474c15f8a9771 Signed-off-by: Yong Zhao --- src/libhsakmt.h | 1 + src/memory.c | 2 +- src/topology.c | 15 +++++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/libhsakmt.h b/src/libhsakmt.h index 6177b1b3a8..17dce385fc 100644 --- a/src/libhsakmt.h +++ b/src/libhsakmt.h @@ -107,6 +107,7 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id); HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id); bool prefer_ats(HSAuint32 node_id); uint16_t get_device_id_by_node_id(HSAuint32 node_id); +bool is_kaveri(HSAuint32 node_id); uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id); int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id); HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array, diff --git a/src/memory.c b/src/memory.c index 8b1b07599b..6c1996365e 100644 --- a/src/memory.c +++ b/src/memory.c @@ -49,7 +49,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node, pr_debug("[%s] node %d; default %d; alternate %d\n", __func__, Node, DefaultPolicy, AlternatePolicy); - if (is_dgpu) + if (!is_kaveri(Node)) /* This is a legacy API useful on Kaveri only. On dGPU * the alternate aperture is setup and used * automatically for coherent allocations. Don't let diff --git a/src/topology.c b/src/topology.c index eb2367e41f..53ea9b5b0b 100644 --- a/src/topology.c +++ b/src/topology.c @@ -1780,7 +1780,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS; uint32_t i, gpu_id; HSAuint64 aperture_limit; - bool nodeIsDGPU; if (!MemoryProperties) return HSAKMT_STATUS_INVALID_PARAMETER; @@ -1816,8 +1815,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, if (gpu_id == 0) goto out; - nodeIsDGPU = topology_is_dgpu(get_device_id_by_gpu_id(gpu_id)); - /*Add LDS*/ if (i < NumBanks && fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id, @@ -1831,7 +1828,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, * For dGPU the topology node contains Local Memory and it is added by * the for loop above */ - if (!nodeIsDGPU && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 && + if (is_kaveri(NodeId) && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 && fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE; @@ -1848,8 +1845,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, i++; } - /* On dGPUs add SVM aperture */ - if (nodeIsDGPU && i < NumBanks && + /* Add SVM aperture */ + if (topology_is_svm_needed(get_device_id_by_gpu_id(gpu_id)) && i < NumBanks && fmm_get_aperture_base_and_limit( FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) { @@ -1954,6 +1951,12 @@ bool prefer_ats(HSAuint32 node_id) && g_props[node_id].node.NumFComputeCores; } +bool is_kaveri(HSAuint32 node_id) +{ + return g_props[node_id].node.EngineId.ui32.Major == 7 + && g_props[node_id].node.EngineId.ui32.Minor == 0; +} + uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id) { unsigned int i;