diff --git a/projects/rocr-runtime/src/queues.c b/projects/rocr-runtime/src/queues.c index f006d5ec2c..5f7384ff4a 100644 --- a/projects/rocr-runtime/src/queues.c +++ b/projects/rocr-runtime/src/queues.c @@ -42,8 +42,13 @@ #define DOORBELL_SIZE_GFX9 8 #define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds)) -#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576 -#define WAVES_PER_CU_VI 32 +#define VGPR_SIZE_PER_CU(asic_family) (asic_family == CHIP_ARCTURUS ? 0x80000 : 0x40000) +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) +#define WAVES_PER_CU 32 +#define CNTL_STACK_BYTES_PER_WAVE 8 struct device_info { enum asic_family_type asic_family; @@ -385,9 +390,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) uint32_t ctl_stack_size, wg_data_size; uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU; - ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8; - wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI; - + ctl_stack_size = cu_num * WAVES_PER_CU * CNTL_STACK_BYTES_PER_WAVE + 8; + wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family); q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size + sizeof(HsaUserContextSaveAreaHeader));