diff --git a/src/queues.c b/src/queues.c index 9c1acdd01d..3118d2a12c 100644 --- a/src/queues.c +++ b/src/queues.c @@ -49,47 +49,37 @@ enum asic_family_type { #define IS_DGPU(chip) (((chip) >= CHIP_TONGA && (chip) <= CHIP_FIJI) || \ (chip) == CHIP_HAWAII) +#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576 +#define WAVES_PER_CU_VI 32 + struct device_info { enum asic_family_type asic_family; - uint32_t ctx_save_restore_size; - uint32_t ctl_stack_size; uint32_t eop_buffer_size; }; struct device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, - .ctx_save_restore_size = 0, - .ctl_stack_size = 0, .eop_buffer_size = 0, }; struct device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, - .ctx_save_restore_size = 0, - .ctl_stack_size = 0, .eop_buffer_size = 0, }; struct device_info carrizo_device_info = { .asic_family = CHIP_CARRIZO, - .ctx_save_restore_size = PAGE_ALIGN_UP(2756608 + 4096), - .ctl_stack_size = 4096, .eop_buffer_size = 4096, }; struct device_info tonga_device_info = { .asic_family = CHIP_TONGA, - .ctx_save_restore_size = PAGE_ALIGN_UP(8269824 + PAGE_ALIGN_UP(6152)), - .ctl_stack_size = PAGE_ALIGN_UP(6152), .eop_buffer_size = TONGA_PAGE_SIZE, }; struct device_info fiji_device_info = { .asic_family = CHIP_FIJI, - /*SR size = work group ctx data + ctl stack size*/ - .ctx_save_restore_size = PAGE_ALIGN_UP(20674560 + PAGE_ALIGN_UP(15368)), - .ctl_stack_size = PAGE_ALIGN_UP(15368), .eop_buffer_size = TONGA_PAGE_SIZE, }; @@ -161,6 +151,8 @@ struct queue uint32_t rptr; void *eop_buffer; void *ctx_save_restore; + uint32_t ctx_save_restore_size; + uint32_t ctl_stack_size; const struct device_info *dev_info; }; @@ -240,6 +232,29 @@ static void* allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align) return ptr; } +/* The bool return indicate whether the queue needs a context-save-restore area*/ +static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q) +{ + HsaNodeProperties node; + + if (q->dev_info->asic_family < CHIP_CARRIZO) + return false; + if (hsaKmtGetNodeProperties(nodeid, &node)) + return false; + if (node.NumFComputeCores && node.NumSIMDPerCU) { + uint32_t ctl_stack_size, wg_data_size; + uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU; + + ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8; + wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI; + q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size); + q->ctx_save_restore_size = + q->ctl_stack_size + PAGE_ALIGN_UP(wg_data_size); + return true; + } + return false; +} + void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t NodeId) { @@ -313,7 +328,7 @@ static void free_queue_gpu(struct queue *q) } if (q->ctx_save_restore) { hsaKmtUnmapMemoryToGPU(q->ctx_save_restore); - hsaKmtFreeMemory(q->ctx_save_restore, q->dev_info->ctx_save_restore_size); + hsaKmtFreeMemory(q->ctx_save_restore, q->ctx_save_restore_size); } free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->dev_info->asic_family); } @@ -344,14 +359,14 @@ static int handle_concrete_asic(struct queue *q, args->eop_buffer_size = dev_info->eop_buffer_size; } if (args->queue_type != KFD_IOC_QUEUE_TYPE_SDMA && - dev_info->ctx_save_restore_size > 0) { - args->ctx_save_restore_size = dev_info->ctx_save_restore_size; - args->ctl_stack_size = dev_info->ctl_stack_size; + update_ctx_save_restore_size(NodeId, q) == true) { + args->ctx_save_restore_size = q->ctx_save_restore_size; + args->ctl_stack_size = q->ctl_stack_size; if (IS_DGPU(dev_info->asic_family)) { void *mem; HsaMemFlags flags; HSAKMT_STATUS ret; - HSAuint64 size = dev_info->ctx_save_restore_size; + HSAuint64 size = q->ctx_save_restore_size; flags.Value = 0; flags.ui32.NonPaged = 1; /* device memory*/ @@ -367,7 +382,7 @@ static int handle_concrete_asic(struct queue *q, } else q->ctx_save_restore = - allocate_exec_aligned_memory(dev_info->ctx_save_restore_size, + allocate_exec_aligned_memory(q->ctx_save_restore_size, PAGE_SIZE, dev_info->asic_family, NodeId);