libhsakmt: Compute context save area size depends on CU num
Change-Id: Iaf35ddeee9fe5a6367097483f67c4adaa0796d7d Signed-off-by: shaoyunl <Shaoyun.Liu@amd.com>
Tá an tiomantas seo le fáil i:
tiomanta ag
Gerrit Code Review
tuismitheoir
6d21c4e753
tiomantas
bf16caa75f
+34
-19
@@ -49,47 +49,37 @@ enum asic_family_type {
|
||||
#define IS_DGPU(chip) (((chip) >= CHIP_TONGA && (chip) <= CHIP_FIJI) || \
|
||||
(chip) == CHIP_HAWAII)
|
||||
|
||||
#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576
|
||||
#define WAVES_PER_CU_VI 32
|
||||
|
||||
struct device_info
|
||||
{
|
||||
enum asic_family_type asic_family;
|
||||
uint32_t ctx_save_restore_size;
|
||||
uint32_t ctl_stack_size;
|
||||
uint32_t eop_buffer_size;
|
||||
};
|
||||
|
||||
struct device_info kaveri_device_info = {
|
||||
.asic_family = CHIP_KAVERI,
|
||||
.ctx_save_restore_size = 0,
|
||||
.ctl_stack_size = 0,
|
||||
.eop_buffer_size = 0,
|
||||
};
|
||||
|
||||
struct device_info hawaii_device_info = {
|
||||
.asic_family = CHIP_HAWAII,
|
||||
.ctx_save_restore_size = 0,
|
||||
.ctl_stack_size = 0,
|
||||
.eop_buffer_size = 0,
|
||||
};
|
||||
|
||||
struct device_info carrizo_device_info = {
|
||||
.asic_family = CHIP_CARRIZO,
|
||||
.ctx_save_restore_size = PAGE_ALIGN_UP(2756608 + 4096),
|
||||
.ctl_stack_size = 4096,
|
||||
.eop_buffer_size = 4096,
|
||||
};
|
||||
|
||||
struct device_info tonga_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.ctx_save_restore_size = PAGE_ALIGN_UP(8269824 + PAGE_ALIGN_UP(6152)),
|
||||
.ctl_stack_size = PAGE_ALIGN_UP(6152),
|
||||
.eop_buffer_size = TONGA_PAGE_SIZE,
|
||||
};
|
||||
|
||||
struct device_info fiji_device_info = {
|
||||
.asic_family = CHIP_FIJI,
|
||||
/*SR size = work group ctx data + ctl stack size*/
|
||||
.ctx_save_restore_size = PAGE_ALIGN_UP(20674560 + PAGE_ALIGN_UP(15368)),
|
||||
.ctl_stack_size = PAGE_ALIGN_UP(15368),
|
||||
.eop_buffer_size = TONGA_PAGE_SIZE,
|
||||
};
|
||||
|
||||
@@ -161,6 +151,8 @@ struct queue
|
||||
uint32_t rptr;
|
||||
void *eop_buffer;
|
||||
void *ctx_save_restore;
|
||||
uint32_t ctx_save_restore_size;
|
||||
uint32_t ctl_stack_size;
|
||||
const struct device_info *dev_info;
|
||||
};
|
||||
|
||||
@@ -240,6 +232,29 @@ static void* allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* The bool return indicate whether the queue needs a context-save-restore area*/
|
||||
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
{
|
||||
HsaNodeProperties node;
|
||||
|
||||
if (q->dev_info->asic_family < CHIP_CARRIZO)
|
||||
return false;
|
||||
if (hsaKmtGetNodeProperties(nodeid, &node))
|
||||
return false;
|
||||
if (node.NumFComputeCores && node.NumSIMDPerCU) {
|
||||
uint32_t ctl_stack_size, wg_data_size;
|
||||
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
|
||||
|
||||
ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8;
|
||||
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI;
|
||||
q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size);
|
||||
q->ctx_save_restore_size =
|
||||
q->ctl_stack_size + PAGE_ALIGN_UP(wg_data_size);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
|
||||
uint32_t NodeId)
|
||||
{
|
||||
@@ -313,7 +328,7 @@ static void free_queue_gpu(struct queue *q)
|
||||
}
|
||||
if (q->ctx_save_restore) {
|
||||
hsaKmtUnmapMemoryToGPU(q->ctx_save_restore);
|
||||
hsaKmtFreeMemory(q->ctx_save_restore, q->dev_info->ctx_save_restore_size);
|
||||
hsaKmtFreeMemory(q->ctx_save_restore, q->ctx_save_restore_size);
|
||||
}
|
||||
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->dev_info->asic_family);
|
||||
}
|
||||
@@ -344,14 +359,14 @@ static int handle_concrete_asic(struct queue *q,
|
||||
args->eop_buffer_size = dev_info->eop_buffer_size;
|
||||
}
|
||||
if (args->queue_type != KFD_IOC_QUEUE_TYPE_SDMA &&
|
||||
dev_info->ctx_save_restore_size > 0) {
|
||||
args->ctx_save_restore_size = dev_info->ctx_save_restore_size;
|
||||
args->ctl_stack_size = dev_info->ctl_stack_size;
|
||||
update_ctx_save_restore_size(NodeId, q) == true) {
|
||||
args->ctx_save_restore_size = q->ctx_save_restore_size;
|
||||
args->ctl_stack_size = q->ctl_stack_size;
|
||||
if (IS_DGPU(dev_info->asic_family)) {
|
||||
void *mem;
|
||||
HsaMemFlags flags;
|
||||
HSAKMT_STATUS ret;
|
||||
HSAuint64 size = dev_info->ctx_save_restore_size;
|
||||
HSAuint64 size = q->ctx_save_restore_size;
|
||||
flags.Value = 0;
|
||||
flags.ui32.NonPaged = 1; /* device memory*/
|
||||
|
||||
@@ -367,7 +382,7 @@ static int handle_concrete_asic(struct queue *q,
|
||||
|
||||
} else
|
||||
q->ctx_save_restore =
|
||||
allocate_exec_aligned_memory(dev_info->ctx_save_restore_size,
|
||||
allocate_exec_aligned_memory(q->ctx_save_restore_size,
|
||||
PAGE_SIZE,
|
||||
dev_info->asic_family,
|
||||
NodeId);
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir