Reserve 128 SGPRs per wave in context save area

Originally reserved 100 SGPRs per wave. Pre-gfx10 needs 102 SGPRs
and gfx10 needs 128 SGPRs. Reserve 128 SGPRs per wave for all ASICs
to simplify calculation.
Also double VGPR register size for gfx908 family

Change-Id: I98b741cbfa051f49ed37ff25d99f851f124be7b6
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>


[ROCm/ROCR-Runtime commit: 814e0f0bdc]
Этот коммит содержится в:
Jay Cornwall
2018-11-27 15:08:17 -06:00
коммит произвёл Yong Zhao
родитель cb921ff375
Коммит 94420e0624
+9 -5
Просмотреть файл
@@ -42,8 +42,13 @@
#define DOORBELL_SIZE_GFX9 8
#define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds))
#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576
#define WAVES_PER_CU_VI 32
#define VGPR_SIZE_PER_CU(asic_family) (asic_family == CHIP_ARCTURUS ? 0x80000 : 0x40000)
#define SGPR_SIZE_PER_CU 0x4000
#define LDS_SIZE_PER_CU 0x10000
#define HWREG_SIZE_PER_CU 0x1000
#define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
#define WAVES_PER_CU 32
#define CNTL_STACK_BYTES_PER_WAVE 8
struct device_info {
enum asic_family_type asic_family;
@@ -385,9 +390,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
uint32_t ctl_stack_size, wg_data_size;
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8;
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI;
ctl_stack_size = cu_num * WAVES_PER_CU * CNTL_STACK_BYTES_PER_WAVE + 8;
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family);
q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size
+ sizeof(HsaUserContextSaveAreaHeader));