libhsakmt: Compute context save area size depends on CU num

Change-Id: Iaf35ddeee9fe5a6367097483f67c4adaa0796d7d
Signed-off-by: shaoyunl <Shaoyun.Liu@amd.com>
Tá an tiomantas seo le fáil i:
shaoyunl
2016-06-08 17:23:51 -04:00
tiomanta ag Gerrit Code Review
tuismitheoir 6d21c4e753
tiomantas bf16caa75f
+34 -19
Féach ar an gComhad
@@ -49,47 +49,37 @@ enum asic_family_type {
#define IS_DGPU(chip) (((chip) >= CHIP_TONGA && (chip) <= CHIP_FIJI) || \
(chip) == CHIP_HAWAII)
#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576
#define WAVES_PER_CU_VI 32
struct device_info
{
enum asic_family_type asic_family;
uint32_t ctx_save_restore_size;
uint32_t ctl_stack_size;
uint32_t eop_buffer_size;
};
struct device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.ctx_save_restore_size = 0,
.ctl_stack_size = 0,
.eop_buffer_size = 0,
};
struct device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
.ctx_save_restore_size = 0,
.ctl_stack_size = 0,
.eop_buffer_size = 0,
};
struct device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
.ctx_save_restore_size = PAGE_ALIGN_UP(2756608 + 4096),
.ctl_stack_size = 4096,
.eop_buffer_size = 4096,
};
struct device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.ctx_save_restore_size = PAGE_ALIGN_UP(8269824 + PAGE_ALIGN_UP(6152)),
.ctl_stack_size = PAGE_ALIGN_UP(6152),
.eop_buffer_size = TONGA_PAGE_SIZE,
};
struct device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
/*SR size = work group ctx data + ctl stack size*/
.ctx_save_restore_size = PAGE_ALIGN_UP(20674560 + PAGE_ALIGN_UP(15368)),
.ctl_stack_size = PAGE_ALIGN_UP(15368),
.eop_buffer_size = TONGA_PAGE_SIZE,
};
@@ -161,6 +151,8 @@ struct queue
uint32_t rptr;
void *eop_buffer;
void *ctx_save_restore;
uint32_t ctx_save_restore_size;
uint32_t ctl_stack_size;
const struct device_info *dev_info;
};
@@ -240,6 +232,29 @@ static void* allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align)
return ptr;
}
/* The bool return indicate whether the queue needs a context-save-restore area*/
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
{
HsaNodeProperties node;
if (q->dev_info->asic_family < CHIP_CARRIZO)
return false;
if (hsaKmtGetNodeProperties(nodeid, &node))
return false;
if (node.NumFComputeCores && node.NumSIMDPerCU) {
uint32_t ctl_stack_size, wg_data_size;
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8;
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI;
q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size);
q->ctx_save_restore_size =
q->ctl_stack_size + PAGE_ALIGN_UP(wg_data_size);
return true;
}
return false;
}
void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
uint32_t NodeId)
{
@@ -313,7 +328,7 @@ static void free_queue_gpu(struct queue *q)
}
if (q->ctx_save_restore) {
hsaKmtUnmapMemoryToGPU(q->ctx_save_restore);
hsaKmtFreeMemory(q->ctx_save_restore, q->dev_info->ctx_save_restore_size);
hsaKmtFreeMemory(q->ctx_save_restore, q->ctx_save_restore_size);
}
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->dev_info->asic_family);
}
@@ -344,14 +359,14 @@ static int handle_concrete_asic(struct queue *q,
args->eop_buffer_size = dev_info->eop_buffer_size;
}
if (args->queue_type != KFD_IOC_QUEUE_TYPE_SDMA &&
dev_info->ctx_save_restore_size > 0) {
args->ctx_save_restore_size = dev_info->ctx_save_restore_size;
args->ctl_stack_size = dev_info->ctl_stack_size;
update_ctx_save_restore_size(NodeId, q) == true) {
args->ctx_save_restore_size = q->ctx_save_restore_size;
args->ctl_stack_size = q->ctl_stack_size;
if (IS_DGPU(dev_info->asic_family)) {
void *mem;
HsaMemFlags flags;
HSAKMT_STATUS ret;
HSAuint64 size = dev_info->ctx_save_restore_size;
HSAuint64 size = q->ctx_save_restore_size;
flags.Value = 0;
flags.ui32.NonPaged = 1; /* device memory*/
@@ -367,7 +382,7 @@ static int handle_concrete_asic(struct queue *q,
} else
q->ctx_save_restore =
allocate_exec_aligned_memory(dev_info->ctx_save_restore_size,
allocate_exec_aligned_memory(q->ctx_save_restore_size,
PAGE_SIZE,
dev_info->asic_family,
NodeId);