Fixing SE numbering and cache invalidade parsing

Change-Id: Ie63c68a0b2b07427586faaf69b68a19c315387e3


[ROCm/rocprofiler commit: 363abb238b]
Bu işleme şunda yer alıyor:
Giovanni LB
2024-01-31 13:11:19 -03:00
ebeveyn 3cca2ccb9f
işleme 175f4e024e
2 değiştirilmiş dosya ile 28 ekleme ve 26 silme
+11 -10
Dosyayı Görüntüle
@@ -436,16 +436,17 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
if "flat_" in as_line[0]:
inc_ordering = True
if not bGFX9 and "store" in as_line[0]:
VSMEM_INST.append([reverse_map[line], num_inflight])
NUM_VSMEM += 1
if inc_ordering:
vsmem_ordering = 1
else:
VLMEM_INST.append([reverse_map[line], num_inflight])
NUM_VLMEM += 1
if inc_ordering:
vlmem_ordering = 1
if not "_inv" in as_line[0] and not "_wb" in as_line[0]:
if not bGFX9 and "store" in as_line[0]:
VSMEM_INST.append([reverse_map[line], num_inflight])
NUM_VSMEM += 1
if inc_ordering:
vsmem_ordering = 1
else:
VLMEM_INST.append([reverse_map[line], num_inflight])
NUM_VLMEM += 1
if inc_ordering:
vlmem_ordering = 1
elif inst.type == FLAT:
smem_ordering = 1
vlmem_ordering = 1
+17 -16
Dosyayı Görüntüle
@@ -273,36 +273,37 @@ void AttTracer::AddAttRecord(
rocprofiler::warning("Warning: ATT received a UTC memory error!\n");
if (status == HSA_STATUS_ERROR) fatal("Thread Trace Error!");
size_t max_sample_id = 0;
for (auto& trace_data_it : data)
max_sample_id = std::max<size_t>(max_sample_id, trace_data_it.sample_id+1);
// Allocate memory for shader_engine_data
record->shader_engine_data = static_cast<rocprofiler_record_se_att_data_t*>(
calloc(data.size(), sizeof(rocprofiler_record_se_att_data_t)));
record->shader_engine_data_count = max_sample_id;
record->shader_engine_data = static_cast<rocprofiler_record_se_att_data_t*>(calloc(
max_sample_id,
sizeof(rocprofiler_record_se_att_data_t)
));
std::vector<hsa_ven_amd_aqlprofile_info_data_t>::iterator trace_data_it;
uint32_t se_index = 0;
// iterate over the trace data collected from each shader engine
for (trace_data_it = data.begin(); trace_data_it != data.end(); trace_data_it++) {
const void* data_ptr = trace_data_it->trace_data.ptr;
const uint32_t data_size = trace_data_it->trace_data.size;
for (auto& trace_data_it : data)
{
auto& trace = trace_data_it.trace_data;
void* buffer = NULL;
if (data_size != 0) {
if (trace.ptr && trace.size) {
// Allocate buffer on CPU to copy out trace data
buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool_);
buffer = Packet::AllocateSysMemory(gpu_agent, trace.size, &agent_info.cpu_pool_);
if (buffer == NULL) fatal("Trace data buffer allocation failed");
auto status =
hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size);
hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, trace.ptr, trace.size);
if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed");
record->shader_engine_data[se_index].buffer_ptr = buffer;
record->shader_engine_data[se_index].buffer_size = data_size;
++se_index;
record->shader_engine_data[trace_data_it.sample_id].buffer_ptr = buffer;
record->shader_engine_data[trace_data_it.sample_id].buffer_size = trace.size;
// TODO: clear output buffers after copying
}
}
record->shader_engine_data_count = data.size();
}
hsa_status_t AttTracer::attTraceDataCallback(