From 94420e06245087f0066eaaeb9db87362de845a8b Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Tue, 27 Nov 2018 15:08:17 -0600 Subject: [PATCH] Reserve 128 SGPRs per wave in context save area Originally reserved 100 SGPRs per wave. Pre-gfx10 needs 102 SGPRs and gfx10 needs 128 SGPRs. Reserve 128 SGPRs per wave for all ASICs to simplify calculation. Also double VGPR register size for gfx908 family Change-Id: I98b741cbfa051f49ed37ff25d99f851f124be7b6 Signed-off-by: Jay Cornwall Signed-off-by: Oak Zeng [ROCm/ROCR-Runtime commit: 814e0f0bdcc7a5733a00ebe300730093bccd3e08] --- projects/rocr-runtime/src/queues.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/projects/rocr-runtime/src/queues.c b/projects/rocr-runtime/src/queues.c index f006d5ec2c..5f7384ff4a 100644 --- a/projects/rocr-runtime/src/queues.c +++ b/projects/rocr-runtime/src/queues.c @@ -42,8 +42,13 @@ #define DOORBELL_SIZE_GFX9 8 #define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds)) -#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576 -#define WAVES_PER_CU_VI 32 +#define VGPR_SIZE_PER_CU(asic_family) (asic_family == CHIP_ARCTURUS ? 0x80000 : 0x40000) +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) +#define WAVES_PER_CU 32 +#define CNTL_STACK_BYTES_PER_WAVE 8 struct device_info { enum asic_family_type asic_family; @@ -385,9 +390,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) uint32_t ctl_stack_size, wg_data_size; uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU; - ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8; - wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI; - + ctl_stack_size = cu_num * WAVES_PER_CU * CNTL_STACK_BYTES_PER_WAVE + 8; + wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family); q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size + sizeof(HsaUserContextSaveAreaHeader));