libhsakmt: Fix the ctrl stack size calculation
On gfx9, the maximum number of wavefronts per queue is the minimum of 40 waves per compute units, or 512 waves per shader engine. On gfx10, there can only be 32 waves per compute units. Signed-off-by: Laurent Morichetti <laurent.morichetti@amd.com> Change-Id: I148d1a4fe6c07cdbfaa1f77939eb29311c81c008
Этот коммит содержится в:
+7
-5
@@ -47,10 +47,9 @@
|
||||
#define LDS_SIZE_PER_CU 0x10000
|
||||
#define HWREG_SIZE_PER_CU 0x1000
|
||||
#define WG_CONTEXT_DATA_SIZE_PER_CU(asic_family) (VGPR_SIZE_PER_CU(asic_family) + SGPR_SIZE_PER_CU + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
|
||||
#define WAVES_PER_CU 32
|
||||
#define CNTL_STACK_BYTES_PER_CU(asic_family) (WAVES_PER_CU * (asic_family >= CHIP_NAVI10 ? 12 : 8))
|
||||
#define CNTL_STACK_BYTES_PER_WAVE(asic_family) (asic_family >= CHIP_NAVI10 ? 12 : 8)
|
||||
#define DEBUGGER_BYTES_ALIGN 64
|
||||
#define DEBUGGER_BYTES_PER_CU(asic_family) (WAVES_PER_CU * 32)
|
||||
#define DEBUGGER_BYTES_PER_WAVE(asic_family) 32
|
||||
|
||||
struct device_info {
|
||||
enum asic_family_type asic_family;
|
||||
@@ -434,8 +433,11 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
if (node.NumFComputeCores && node.NumSIMDPerCU) {
|
||||
uint32_t ctl_stack_size, wg_data_size;
|
||||
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
|
||||
uint32_t wave_num = (q->dev_info->asic_family < CHIP_NAVI10)
|
||||
? MIN(cu_num * 40, node.NumShaderBanks / node.NumArrays * 512)
|
||||
: cu_num * 32;
|
||||
|
||||
ctl_stack_size = cu_num * CNTL_STACK_BYTES_PER_CU(q->dev_info->asic_family) + 8;
|
||||
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(q->dev_info->asic_family) + 8;
|
||||
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->dev_info->asic_family);
|
||||
q->ctl_stack_size = PAGE_ALIGN_UP(sizeof(HsaUserContextSaveAreaHeader)
|
||||
+ ctl_stack_size);
|
||||
@@ -449,7 +451,7 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
}
|
||||
|
||||
q->debug_memory_size =
|
||||
ALIGN_UP(cu_num * DEBUGGER_BYTES_PER_CU(q->dev_info->asic_family), DEBUGGER_BYTES_ALIGN);
|
||||
ALIGN_UP(wave_num * DEBUGGER_BYTES_PER_WAVE(q->dev_info->asic_family), DEBUGGER_BYTES_ALIGN);
|
||||
|
||||
q->ctx_save_restore_size = q->ctl_stack_size
|
||||
+ PAGE_ALIGN_UP(wg_data_size + q->debug_memory_size);
|
||||
|
||||
Ссылка в новой задаче
Block a user