From 783e3467779b86f228222c86f82b4bb6e6c4bcbb Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Fri, 16 Oct 2020 00:12:38 -0700 Subject: [PATCH] libhsakmt: Fix the ctrl stack size calculation On gfx9, the maximum number of wavefronts per queue is the minimum of 40 waves per compute units, or 512 waves per shader engine. On gfx10, there can only be 32 waves per compute units. Signed-off-by: Laurent Morichetti Change-Id: I148d1a4fe6c07cdbfaa1f77939eb29311c81c008 --- src/queues.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/queues.c b/src/queues.c index 33b81fc09c..29949c9092 100644 --- a/src/queues.c +++ b/src/queues.c @@ -47,10 +47,9 @@ #define LDS_SIZE_PER_CU 0x10000 #define HWREG_SIZE_PER_CU 0x1000 #define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) -#define WAVES_PER_CU 32 -#define CNTL_STACK_BYTES_PER_CU(asic_family) (WAVES_PER_CU * (asic_family >= CHIP_NAVI10 ? 12 : 8)) +#define CNTL_STACK_BYTES_PER_WAVE(asic_family) (asic_family >= CHIP_NAVI10 ? 12 : 8) #define DEBUGGER_BYTES_ALIGN 64 -#define DEBUGGER_BYTES_PER_CU(asic_family) (WAVES_PER_CU * 32) +#define DEBUGGER_BYTES_PER_WAVE(asic_family) 32 struct device_info { enum asic_family_type asic_family; @@ -434,8 +433,11 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) if (node.NumFComputeCores && node.NumSIMDPerCU) { uint32_t ctl_stack_size, wg_data_size; uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU; + uint32_t wave_num = (q->dev_info->asic_family < CHIP_NAVI10) + ? MIN(cu_num * 40, node.NumShaderBanks / node.NumArrays * 512) + : cu_num * 32; - ctl_stack_size = cu_num * CNTL_STACK_BYTES_PER_CU(q->dev_info->asic_family) + 8; + ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(q->dev_info->asic_family) + 8; wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family); q->ctl_stack_size = PAGE_ALIGN_UP(sizeof(HsaUserContextSaveAreaHeader) + ctl_stack_size); @@ -449,7 +451,7 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) } q->debug_memory_size = - ALIGN_UP(cu_num * DEBUGGER_BYTES_PER_CU(q->dev_info->asic_family), DEBUGGER_BYTES_ALIGN); + ALIGN_UP(wave_num * DEBUGGER_BYTES_PER_WAVE(q->dev_info->asic_family), DEBUGGER_BYTES_ALIGN); q->ctx_save_restore_size = q->ctl_stack_size + PAGE_ALIGN_UP(wg_data_size + q->debug_memory_size);