SWDEV-439757: Remove codeobj marker on last kernel. Fixed codeobj size limit. ATT version bump.

Change-Id: Ie0b9c4de6c37acaaad4ae7d8d774d234a3847b7c


[ROCm/rocprofiler commit: 067071f669]
This commit is contained in:
Giovanni LB
2024-01-10 13:29:26 -03:00
bovenliggende 207815802b
commit 68cfdd66ba
5 gewijzigde bestanden met toevoegingen van 41 en 32 verwijderingen
@@ -19,7 +19,7 @@ import gc
from collections import defaultdict
from service import CodeobjService
ATT_VERSION = 3
ATT_VERSION = 4
class TraceData(ctypes.Structure):
_fields_ = [
@@ -299,7 +299,7 @@ def persist(trace_file, SIMD, traces):
br_stalls.append(wave.num_branch_stalls)
timeline.append(wave.timeline)
cc = 1
cc = 0
insts = []
skips = traces[wave.traceid].instructions[-1]
try:
@@ -10,9 +10,9 @@ import os
HEADER_OFFSET = 62
HEADER_MASK = 0x3
ID_OFFSET = 30
ID_MASK = (1<<32)-1
OFFSET_MASK = (1<<30)-1
ID_OFFSET = 34
ID_MASK = (1<<28)-1
OFFSET_MASK = (1<<ID_OFFSET)-1
pluginpath = '../../../lib/rocprofiler/libatt_plugin.so'
filedir = os.path.dirname(os.path.realpath(__file__))
@@ -92,7 +92,7 @@ class CodeobjService:
instance.release()
def GetInstruction(self, addr):
if addr >> HEADER_OFFSET != 0:
if not IsRawPC(addr):
return self.GetInstructionFromID(getID(addr), getOffset(addr))
else:
return self.GetInstructionFromAddr(addr)
@@ -26,6 +26,7 @@ SETPC = 12
SWAPPC = 13
LANEIO = 14
PCINFO = 15
WAVE_ENDED = 16
DONT_KNOW = 100
WaveInstCategory = {
@@ -48,6 +49,7 @@ WaveInstCategory = {
SWAPPC: "SWAPPC",
LANEIO: "LANEIO",
PCINFO: "PCINFO",
WAVE_ENDED: "WAVE_ENDED",
DONT_KNOW: "DONT_KNOW",
}
@@ -361,8 +363,8 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
while i < N and line >= 0 and loops < MAX_STITCHED_TOKENS:
if insts[i].type == PCINFO:
i += 1
pcskip.append(i)
i += 1
continue
loops += 1
@@ -395,32 +397,30 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
elif as_line[1] == SETPC:
next = watchlist.setpc(as_line[0], i)
matched = inst.type in [SALU, JUMP]
if bIsAuto:
i += 1
pcskip.append(i)
while bIsAuto and next < 0 and i+1 < len(insts):
i += 1
pcskip.append(i+1)
while next < 0 and i+1 < len(insts):
i += 1
if insts[i].type == PCINFO:
next = watchlist.setpc(as_line[0], i-1)
pcskip.append(i+1)
else:
inst.cycles += insts[i].cycles
if insts[i].type == PCINFO:
pcskip.append(i)
next = watchlist.setpc(as_line[0], i-1)
else:
inst.cycles += insts[i].cycles
if next < 0:
print('Jump to unknown location in line', as_line[0])
break
elif as_line[1] == SWAPPC:
matched = inst.type in [SALU, JUMP]
next = watchlist.swappc(as_line[0], line, i)
if bIsAuto:
i += 1
pcskip.append(i)
while bIsAuto and next < 0 and i+1 < len(insts):
i += 1
pcskip.append(i+1)
while next < 0 and i+1 < len(insts):
i += 1
if insts[i].type == PCINFO:
next = watchlist.swappc(as_line[0], line, i-1)
pcskip.append(i+1)
else:
inst.cycles += insts[i].cycles
if insts[i].type == PCINFO:
next = watchlist.swappc(as_line[0], line, i-1)
pcskip.append(i)
else:
inst.cycles += insts[i].cycles
if next < 0:
print('Jump to unknown location in line', as_line[0])
break
@@ -560,7 +560,9 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
line = next
N = max(N, 1)
if i != N:
if i != N and insts[i].type == WAVE_ENDED:
print('Warning - Wave ended.')
elif i < N:
print('Warning - Stitching rate: '+str(i * 100 / N)+'% matched', i, ' of ', N)
print('Leftovers:', [WaveInstCategory[insts[i+k].type] for k in range(20) if i+k < len(insts)])
try:
@@ -120,7 +120,8 @@ public:
void InsertLoadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
rocprofiler_intercepted_codeobj_t codeobj
rocprofiler_intercepted_codeobj_t codeobj,
bool bFromStart
);
void SetParameters(const std::vector<rocprofiler_att_parameter_t>& params) {
@@ -41,7 +41,8 @@ union att_header_marker_t
{
uint32_t raw;
struct {
uint32_t type : 2;
uint32_t type : 1;
uint32_t bFromStart : 1;
uint32_t id : 30;
};
};
@@ -66,9 +67,15 @@ void AttTracer::InsertUnloadMarker(
void AttTracer::InsertLoadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
rocprofiler_intercepted_codeobj_t codeobj
rocprofiler_intercepted_codeobj_t codeobj,
bool bFromStart
) {
this->InsertMarker(transformed_packets, agent, codeobj.mem_size, ATT_MARKER_SIZE_CHANNEL);
// TODO: Add this channel
auto sizehi = static_cast<hsa_ven_amd_aqlprofile_att_marker_channel_t>(4);
// Need to send mem_hi
this->InsertMarker(transformed_packets, agent, codeobj.mem_size, sizehi);
uint64_t addr = codeobj.base_address;
this->InsertMarker(transformed_packets, agent, addr & ((1ul << 32)-1), ATT_MARKER_LO_CHANNEL);
@@ -76,6 +83,7 @@ void AttTracer::InsertLoadMarker(
att_header_marker_t header{.raw = 0};
header.type = ROCPROFILER_ATT_MARKER_LOAD;
header.bFromStart = bFromStart;
header.id = codeobj.att_marker_id;
this->InsertMarker(transformed_packets, agent, header.raw, ATT_MARKER_HEADER_CHANNEL);
}
@@ -269,7 +277,7 @@ bool AttTracer::ATTContiguousWriteInterceptor(
{
auto& symbol = symbols.symbols[s];
if (active_capture_event_ids.find(symbol.att_marker_id) == active_capture_event_ids.end())
InsertLoadMarker(transformed, queue_info.GetGPUAgent(), symbol);
InsertLoadMarker(transformed, queue_info.GetGPUAgent(), symbol, bool(insertStart));
}
active_capture_event_ids = std::move(current_ids);
@@ -286,8 +294,6 @@ bool AttTracer::ATTContiguousWriteInterceptor(
if (agent_pending_packets.last_kernel_exec <= writer_end_id)
{
for (uint32_t id : active_capture_event_ids)
InsertUnloadMarker(transformed, queue_info.GetGPUAgent(), id);
InsertPacketStop(transformed, agent_pending_packets, queue_info, agent_handle);
active_capture_event_ids = {};
}