Reserve 128 SGPRs per wave in context save area
Originally reserved 100 SGPRs per wave. Pre-gfx10 needs 102 SGPRs
and gfx10 needs 128 SGPRs. Reserve 128 SGPRs per wave for all ASICs
to simplify calculation.
Also double VGPR register size for gfx908 family
Change-Id: I98b741cbfa051f49ed37ff25d99f851f124be7b6
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
[ROCm/ROCR-Runtime commit: 814e0f0bdc]
Этот коммит содержится в:
коммит произвёл
Yong Zhao
родитель
cb921ff375
Коммит
94420e0624
@@ -42,8 +42,13 @@
|
||||
#define DOORBELL_SIZE_GFX9 8
|
||||
#define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds))
|
||||
|
||||
#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576
|
||||
#define WAVES_PER_CU_VI 32
|
||||
#define VGPR_SIZE_PER_CU(asic_family) (asic_family == CHIP_ARCTURUS ? 0x80000 : 0x40000)
|
||||
#define SGPR_SIZE_PER_CU 0x4000
|
||||
#define LDS_SIZE_PER_CU 0x10000
|
||||
#define HWREG_SIZE_PER_CU 0x1000
|
||||
#define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
|
||||
#define WAVES_PER_CU 32
|
||||
#define CNTL_STACK_BYTES_PER_WAVE 8
|
||||
|
||||
struct device_info {
|
||||
enum asic_family_type asic_family;
|
||||
@@ -385,9 +390,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
uint32_t ctl_stack_size, wg_data_size;
|
||||
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
|
||||
|
||||
ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8;
|
||||
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI;
|
||||
|
||||
ctl_stack_size = cu_num * WAVES_PER_CU * CNTL_STACK_BYTES_PER_WAVE + 8;
|
||||
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family);
|
||||
q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size
|
||||
+ sizeof(HsaUserContextSaveAreaHeader));
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user