From 175f4e024ec6ccb54b14f7212fa6ffe962f337b0 Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Wed, 31 Jan 2024 13:11:19 -0300 Subject: [PATCH] Fixing SE numbering and cache invalidade parsing Change-Id: Ie63c68a0b2b07427586faaf69b68a19c315387e3 [ROCm/rocprofiler commit: 363abb238b72cc7c12de2ac59adb51e5b1d83d23] --- projects/rocprofiler/plugin/att/stitch.py | 21 ++++++------ .../rocprofiler/src/core/session/att/att.cpp | 33 ++++++++++--------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/projects/rocprofiler/plugin/att/stitch.py b/projects/rocprofiler/plugin/att/stitch.py index 7891ea116e..4fd2128d78 100644 --- a/projects/rocprofiler/plugin/att/stitch.py +++ b/projects/rocprofiler/plugin/att/stitch.py @@ -436,16 +436,17 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice): if "flat_" in as_line[0]: inc_ordering = True - if not bGFX9 and "store" in as_line[0]: - VSMEM_INST.append([reverse_map[line], num_inflight]) - NUM_VSMEM += 1 - if inc_ordering: - vsmem_ordering = 1 - else: - VLMEM_INST.append([reverse_map[line], num_inflight]) - NUM_VLMEM += 1 - if inc_ordering: - vlmem_ordering = 1 + if not "_inv" in as_line[0] and not "_wb" in as_line[0]: + if not bGFX9 and "store" in as_line[0]: + VSMEM_INST.append([reverse_map[line], num_inflight]) + NUM_VSMEM += 1 + if inc_ordering: + vsmem_ordering = 1 + else: + VLMEM_INST.append([reverse_map[line], num_inflight]) + NUM_VLMEM += 1 + if inc_ordering: + vlmem_ordering = 1 elif inst.type == FLAT: smem_ordering = 1 vlmem_ordering = 1 diff --git a/projects/rocprofiler/src/core/session/att/att.cpp b/projects/rocprofiler/src/core/session/att/att.cpp index 2c63c6c299..9b68986fbd 100644 --- a/projects/rocprofiler/src/core/session/att/att.cpp +++ b/projects/rocprofiler/src/core/session/att/att.cpp @@ -273,36 +273,37 @@ void AttTracer::AddAttRecord( rocprofiler::warning("Warning: ATT received a UTC memory error!\n"); if (status == HSA_STATUS_ERROR) fatal("Thread Trace Error!"); + size_t max_sample_id = 0; + for (auto& trace_data_it : data) + max_sample_id = std::max(max_sample_id, trace_data_it.sample_id+1); + // Allocate memory for shader_engine_data - record->shader_engine_data = static_cast( - calloc(data.size(), sizeof(rocprofiler_record_se_att_data_t))); + record->shader_engine_data_count = max_sample_id; + record->shader_engine_data = static_cast(calloc( + max_sample_id, + sizeof(rocprofiler_record_se_att_data_t) + )); - std::vector::iterator trace_data_it; - - uint32_t se_index = 0; // iterate over the trace data collected from each shader engine - for (trace_data_it = data.begin(); trace_data_it != data.end(); trace_data_it++) { - const void* data_ptr = trace_data_it->trace_data.ptr; - const uint32_t data_size = trace_data_it->trace_data.size; + for (auto& trace_data_it : data) + { + auto& trace = trace_data_it.trace_data; void* buffer = NULL; - if (data_size != 0) { + if (trace.ptr && trace.size) { // Allocate buffer on CPU to copy out trace data - buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool_); + buffer = Packet::AllocateSysMemory(gpu_agent, trace.size, &agent_info.cpu_pool_); if (buffer == NULL) fatal("Trace data buffer allocation failed"); auto status = - hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size); + hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, trace.ptr, trace.size); if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed"); - record->shader_engine_data[se_index].buffer_ptr = buffer; - record->shader_engine_data[se_index].buffer_size = data_size; - ++se_index; - + record->shader_engine_data[trace_data_it.sample_id].buffer_ptr = buffer; + record->shader_engine_data[trace_data_it.sample_id].buffer_size = trace.size; // TODO: clear output buffers after copying } } - record->shader_engine_data_count = data.size(); } hsa_status_t AttTracer::attTraceDataCallback(