Differentiate gfx700 and improve the logic by introducing is_gfx700()
Because gfx700 has local memory but other APUs don't, we should reflect that in the code. Meanwhile, fix a bug that on gfx902 svm aperture is not added when calling hsaKmtGetNodeMemoryProperties(). Change-Id: Id840f2db0b14fda9ee713b219a9474c15f8a9771 Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Этот коммит содержится в:
коммит произвёл
Yong Zhao
родитель
8fbf4a26ec
Коммит
110e754f64
@@ -107,6 +107,7 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
|
||||
HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
|
||||
bool prefer_ats(HSAuint32 node_id);
|
||||
uint16_t get_device_id_by_node_id(HSAuint32 node_id);
|
||||
bool is_kaveri(HSAuint32 node_id);
|
||||
uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id);
|
||||
int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
|
||||
HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
|
||||
+1
-1
@@ -49,7 +49,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
|
||||
pr_debug("[%s] node %d; default %d; alternate %d\n",
|
||||
__func__, Node, DefaultPolicy, AlternatePolicy);
|
||||
|
||||
if (is_dgpu)
|
||||
if (!is_kaveri(Node))
|
||||
/* This is a legacy API useful on Kaveri only. On dGPU
|
||||
* the alternate aperture is setup and used
|
||||
* automatically for coherent allocations. Don't let
|
||||
|
||||
@@ -1780,7 +1780,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t i, gpu_id;
|
||||
HSAuint64 aperture_limit;
|
||||
bool nodeIsDGPU;
|
||||
|
||||
if (!MemoryProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -1816,8 +1815,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
if (gpu_id == 0)
|
||||
goto out;
|
||||
|
||||
nodeIsDGPU = topology_is_dgpu(get_device_id_by_gpu_id(gpu_id));
|
||||
|
||||
/*Add LDS*/
|
||||
if (i < NumBanks &&
|
||||
fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
|
||||
@@ -1831,7 +1828,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
* For dGPU the topology node contains Local Memory and it is added by
|
||||
* the for loop above
|
||||
*/
|
||||
if (!nodeIsDGPU && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 &&
|
||||
if (is_kaveri(NodeId) && i < NumBanks && g_props[NodeId].node.LocalMemSize > 0 &&
|
||||
fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
|
||||
@@ -1848,8 +1845,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
i++;
|
||||
}
|
||||
|
||||
/* On dGPUs add SVM aperture */
|
||||
if (nodeIsDGPU && i < NumBanks &&
|
||||
/* Add SVM aperture */
|
||||
if (topology_is_svm_needed(get_device_id_by_gpu_id(gpu_id)) && i < NumBanks &&
|
||||
fmm_get_aperture_base_and_limit(
|
||||
FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
|
||||
&aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -1954,6 +1951,12 @@ bool prefer_ats(HSAuint32 node_id)
|
||||
&& g_props[node_id].node.NumFComputeCores;
|
||||
}
|
||||
|
||||
bool is_kaveri(HSAuint32 node_id)
|
||||
{
|
||||
return g_props[node_id].node.EngineId.ui32.Major == 7
|
||||
&& g_props[node_id].node.EngineId.ui32.Minor == 0;
|
||||
}
|
||||
|
||||
uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
Ссылка в новой задаче
Block a user