diff --git a/projects/rocprofiler/bin/tblextr.py b/projects/rocprofiler/bin/tblextr.py index 4d7863e311..60b2f8e36a 100755 --- a/projects/rocprofiler/bin/tblextr.py +++ b/projects/rocprofiler/bin/tblextr.py @@ -403,7 +403,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): ptrn_id = re.compile(r'^([^:]+):(\d+)$') ptrn_mcopy = re.compile(r'(Memcpy|Copy|Fill)') ptrn_barrier = re.compile(r'Marker') - is_barrier = 0 if not os.path.isfile(file_name): return {} @@ -439,7 +438,6 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): if ptrn_barrier.search(name): name = '""' - is_barrier = 1 # insert DB record rec_vals[4] = name # Name @@ -452,11 +450,10 @@ def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): filtr[corr_id] = 1 # filling a dependency - if is_barrier == 0: - if not pid in dep_dict: dep_dict[pid] = {} - if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {} - dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000 - dep_dict[pid]['bsp'] = OPS_PID + if not pid in dep_dict: dep_dict[pid] = {} + if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {} + dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000 + dep_dict[pid]['bsp'] = OPS_PID else: fatal("hcc ops bad record: '" + record + "'") diff --git a/projects/rocprofiler/test/tool/tool.cpp b/projects/rocprofiler/test/tool/tool.cpp index a429c493e6..363b4095a8 100644 --- a/projects/rocprofiler/test/tool/tool.cpp +++ b/projects/rocprofiler/test/tool/tool.cpp @@ -483,7 +483,7 @@ bool dump_context_entry(context_entry_t* entry) { entry->data.thread_id, entry->kernel_properties.grid_size, entry->kernel_properties.workgroup_size, - entry->kernel_properties.lds_size * AgentInfo::lds_block_size, + (entry->kernel_properties.lds_size + (AgentInfo::lds_block_size - 1)) & ~(AgentInfo::lds_block_size - 1), entry->kernel_properties.scratch_size, (entry->kernel_properties.vgpr_count + 1) * agent_info->vgpr_block_size, (entry->kernel_properties.sgpr_count + agent_info->sgpr_block_dflt) * agent_info->sgpr_block_size, @@ -659,7 +659,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, uint64_t workgroup_size = packet->workgroup_size_x * packet->workgroup_size_y * packet->workgroup_size_z; if (workgroup_size > UINT32_MAX) abort(); kernel_properties_ptr->workgroup_size = (uint32_t)workgroup_size; - kernel_properties_ptr->lds_size = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE); // packet->group_segment_size; + kernel_properties_ptr->lds_size = packet->group_segment_size; kernel_properties_ptr->scratch_size = packet->private_segment_size; kernel_properties_ptr->vgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT); kernel_properties_ptr->sgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);