dynamic kernel lds size
Change-Id: I051b85550d12ca8662127615be7adfb12c5f9585
Этот коммит содержится в:
@@ -403,7 +403,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
|
||||
ptrn_id = re.compile(r'^([^:]+):(\d+)$')
|
||||
ptrn_mcopy = re.compile(r'(Memcpy|Copy|Fill)')
|
||||
ptrn_barrier = re.compile(r'Marker')
|
||||
is_barrier = 0
|
||||
|
||||
if not os.path.isfile(file_name): return {}
|
||||
|
||||
@@ -439,7 +438,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
|
||||
|
||||
if ptrn_barrier.search(name):
|
||||
name = '"<barrier packet>"'
|
||||
is_barrier = 1
|
||||
|
||||
# insert DB record
|
||||
rec_vals[4] = name # Name
|
||||
@@ -452,11 +450,10 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
|
||||
filtr[corr_id] = 1
|
||||
|
||||
# filling a dependency
|
||||
if is_barrier == 0:
|
||||
if not pid in dep_dict: dep_dict[pid] = {}
|
||||
if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {}
|
||||
dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000
|
||||
dep_dict[pid]['bsp'] = OPS_PID
|
||||
if not pid in dep_dict: dep_dict[pid] = {}
|
||||
if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {}
|
||||
dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000
|
||||
dep_dict[pid]['bsp'] = OPS_PID
|
||||
|
||||
else:
|
||||
fatal("hcc ops bad record: '" + record + "'")
|
||||
|
||||
@@ -483,7 +483,7 @@ bool dump_context_entry(context_entry_t* entry) {
|
||||
entry->data.thread_id,
|
||||
entry->kernel_properties.grid_size,
|
||||
entry->kernel_properties.workgroup_size,
|
||||
entry->kernel_properties.lds_size * AgentInfo::lds_block_size,
|
||||
(entry->kernel_properties.lds_size + (AgentInfo::lds_block_size - 1)) & ~(AgentInfo::lds_block_size - 1),
|
||||
entry->kernel_properties.scratch_size,
|
||||
(entry->kernel_properties.vgpr_count + 1) * agent_info->vgpr_block_size,
|
||||
(entry->kernel_properties.sgpr_count + agent_info->sgpr_block_dflt) * agent_info->sgpr_block_size,
|
||||
@@ -659,7 +659,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
uint64_t workgroup_size = packet->workgroup_size_x * packet->workgroup_size_y * packet->workgroup_size_z;
|
||||
if (workgroup_size > UINT32_MAX) abort();
|
||||
kernel_properties_ptr->workgroup_size = (uint32_t)workgroup_size;
|
||||
kernel_properties_ptr->lds_size = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE); // packet->group_segment_size;
|
||||
kernel_properties_ptr->lds_size = packet->group_segment_size;
|
||||
kernel_properties_ptr->scratch_size = packet->private_segment_size;
|
||||
kernel_properties_ptr->vgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT);
|
||||
kernel_properties_ptr->sgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);
|
||||
|
||||
Ссылка в новой задаче
Block a user