Change-Id: I051b85550d12ca8662127615be7adfb12c5f9585
Этот коммит содержится в:
Evgeny
2020-02-27 16:52:15 -06:00
родитель a5f52b40f5
Коммит 7ecf9ef1e3
2 изменённых файлов: 6 добавлений и 9 удалений
+4 -7
Просмотреть файл
@@ -403,7 +403,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
ptrn_id = re.compile(r'^([^:]+):(\d+)$')
ptrn_mcopy = re.compile(r'(Memcpy|Copy|Fill)')
ptrn_barrier = re.compile(r'Marker')
is_barrier = 0
if not os.path.isfile(file_name): return {}
@@ -439,7 +438,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
if ptrn_barrier.search(name):
name = '"<barrier packet>"'
is_barrier = 1
# insert DB record
rec_vals[4] = name # Name
@@ -452,11 +450,10 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
filtr[corr_id] = 1
# filling a dependency
if is_barrier == 0:
if not pid in dep_dict: dep_dict[pid] = {}
if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {}
dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000
dep_dict[pid]['bsp'] = OPS_PID
if not pid in dep_dict: dep_dict[pid] = {}
if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {}
dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000
dep_dict[pid]['bsp'] = OPS_PID
else:
fatal("hcc ops bad record: '" + record + "'")
+2 -2
Просмотреть файл
@@ -483,7 +483,7 @@ bool dump_context_entry(context_entry_t* entry) {
entry->data.thread_id,
entry->kernel_properties.grid_size,
entry->kernel_properties.workgroup_size,
entry->kernel_properties.lds_size * AgentInfo::lds_block_size,
(entry->kernel_properties.lds_size + (AgentInfo::lds_block_size - 1)) & ~(AgentInfo::lds_block_size - 1),
entry->kernel_properties.scratch_size,
(entry->kernel_properties.vgpr_count + 1) * agent_info->vgpr_block_size,
(entry->kernel_properties.sgpr_count + agent_info->sgpr_block_dflt) * agent_info->sgpr_block_size,
@@ -659,7 +659,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
uint64_t workgroup_size = packet->workgroup_size_x * packet->workgroup_size_y * packet->workgroup_size_z;
if (workgroup_size > UINT32_MAX) abort();
kernel_properties_ptr->workgroup_size = (uint32_t)workgroup_size;
kernel_properties_ptr->lds_size = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE); // packet->group_segment_size;
kernel_properties_ptr->lds_size = packet->group_segment_size;
kernel_properties_ptr->scratch_size = packet->private_segment_size;
kernel_properties_ptr->vgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT);
kernel_properties_ptr->sgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);