From dbf94b7dd366d95ec35f844e344d3e89ae67763b Mon Sep 17 00:00:00 2001 From: David Belanger Date: Tue, 24 Jan 2023 09:55:04 -0500 Subject: [PATCH] libhsakmt: Fixed VGPR memory size for GFX11.0 and GFX11.1. Fixed VGPR memory size, size was too small for some GPU, causing a memory overflow. Refactored macro code into a function. Thanks to Jay Cornwall for locating the problem and proposing the fix. Change-Id: Iffedea1c4f341967f02c56d810ff048225b02c16 Signed-off-by: David Belanger [ROCm/ROCR-Runtime commit: a847a7b80edda2103c7094c17b26b547c04957c2] --- projects/rocr-runtime/src/libhsakmt.h | 6 +++--- projects/rocr-runtime/src/queues.c | 24 +++++++++++++++++++++++- projects/rocr-runtime/src/topology.c | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/projects/rocr-runtime/src/libhsakmt.h b/projects/rocr-runtime/src/libhsakmt.h index 7f62c512de..0f80e5eb3b 100644 --- a/projects/rocr-runtime/src/libhsakmt.h +++ b/projects/rocr-runtime/src/libhsakmt.h @@ -149,6 +149,8 @@ enum full_gfx_versions { GFX_VERSION_VANGOGH = 0x0A0303, GFX_VERSION_BEIGE_GOBY = 0x0A0304, GFX_VERSION_YELLOW_CARP = 0x0A0305, + GFX_VERSION_PLUM_BONITO = 0x0B0000, + GFX_VERSION_WHEAT_NAS = 0x0B0001, }; struct hsa_gfxip_table { @@ -222,8 +224,6 @@ uint32_t get_num_sysfs_nodes(void); bool is_forked_child(void); /* Calculate VGPR and SGPR register file size per CU */ -#define VGPR_SIZE_PER_CU(gfxv) \ - (((gfxv) == GFX_VERSION_ARCTURUS || \ - (gfxv) == GFX_VERSION_ALDEBARAN) ? 0x80000 : 0x40000) +uint32_t get_vgpr_size_per_cu(uint32_t gfxv); #define SGPR_SIZE_PER_CU 0x4000 #endif diff --git a/projects/rocr-runtime/src/queues.c b/projects/rocr-runtime/src/queues.c index 7e28ba751a..c53d219c75 100644 --- a/projects/rocr-runtime/src/queues.c +++ b/projects/rocr-runtime/src/queues.c @@ -45,7 +45,7 @@ (((gfxv) >= 0x80000) ? 4096 : 0)) #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ - (VGPR_SIZE_PER_CU(gfxv) + SGPR_SIZE_PER_CU + \ + (get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU + \ LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ @@ -87,6 +87,28 @@ struct process_doorbells { static unsigned int num_doorbells; static struct process_doorbells *doorbells; +uint32_t get_vgpr_size_per_cu(uint32_t gfxv) +{ + uint32_t vgpr_size = 0; + + switch (gfxv) + { + case GFX_VERSION_ARCTURUS: + case GFX_VERSION_ALDEBARAN: + vgpr_size = 0x80000; + break; + case GFX_VERSION_PLUM_BONITO: + case GFX_VERSION_WHEAT_NAS: + vgpr_size = 0x60000; + break; + default: + vgpr_size = 0x40000; + break; + } + + return vgpr_size; +} + HSAKMT_STATUS init_process_doorbells(unsigned int NumNodes) { unsigned int i; diff --git a/projects/rocr-runtime/src/topology.c b/projects/rocr-runtime/src/topology.c index 2887c661bc..7e99ec10d5 100644 --- a/projects/rocr-runtime/src/topology.c +++ b/projects/rocr-runtime/src/topology.c @@ -1239,7 +1239,7 @@ static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, /* Get VGPR/SGPR size in byte per CU */ props->SGPRSizePerCU = SGPR_SIZE_PER_CU; - props->VGPRSizePerCU = VGPR_SIZE_PER_CU(HSA_GET_GFX_VERSION_FULL(props->EngineId.ui32)); + props->VGPRSizePerCU = get_vgpr_size_per_cu(HSA_GET_GFX_VERSION_FULL(props->EngineId.ui32)); } else if (props->DeviceId) /* still return success */