From 11a15ef9b0802bfac11937b262bbaec0ee416294 Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Wed, 4 Oct 2023 14:27:55 -0300 Subject: [PATCH] SWDEV-423947: Moving to new aqllib Change-Id: I2f99d88f9bda752fa0533f9c4416c805b68724d7 --- plugin/att/att.py | 322 ++++++++++++++++++++--------------- plugin/att/att_to_csv.py | 7 +- plugin/att/code_printing.hpp | 4 +- plugin/att/disassembly.cpp | 26 ++- plugin/att/stitch.py | 63 +++---- plugin/att/trace_view.py | 16 +- 6 files changed, 246 insertions(+), 192 deletions(-) diff --git a/plugin/att/att.py b/plugin/att/att.py index 20ad9413c0..62019940fb 100755 --- a/plugin/att/att.py +++ b/plugin/att/att.py @@ -18,6 +18,20 @@ from stitch import stitch import gc from collections import defaultdict +ATT_VERSION = 2 + +class TraceData(ctypes.Structure): + _fields_ = [ + ("num_waves", c_uint64), + ("type", c_uint64), + ("cycles", c_uint64) + ] + +class Trace: + def __init__(self, traceid, tracesize, instructions_array): + self.instructions = [deepcopy(instructions_array[k]) for k in range(tracesize)] + self.traceid = traceid + class PerfEvent(ctypes.Structure): _fields_ = [ ("time", c_uint64), @@ -72,6 +86,7 @@ class Wave(ctypes.Structure): ("wave_id", ctypes.c_uint64), ("begin_time", ctypes.c_uint64), # Begin and end cycle ("end_time", ctypes.c_uint64), + ("traceid", ctypes.c_int64), # total VMEM/FLAT/LDS/SMEM instructions issued # total issued memory instructions ("num_mem_instrs", ctypes.c_uint64), @@ -97,34 +112,53 @@ class Wave(ctypes.Structure): ("num_branch_instrs", ctypes.c_uint64), ("num_branch_taken_instrs", ctypes.c_uint64), ("num_branch_stalls", ctypes.c_uint64), - ("timeline_array", POINTER(ctypes.c_int64)), - ("instructions_array", POINTER(ctypes.c_int64)), + ("timeline_size", ctypes.c_uint64), ("instructions_size", ctypes.c_uint64), + ("timeline_array", POINTER(ctypes.c_int32)), + ("instructions_array", POINTER(ctypes.c_uint64)), ] class PythonWave: - def __init__(self, source_wave): + def __init__(self, sourcew): for property, value in Wave._fields_: - setattr(self, property, getattr(source_wave, property)) + try: + setattr(self, deepcopy(property), deepcopy(getattr(sourcew, property))) + except: + pass + + self.timeline = [ + (int(sourcew.timeline_array[2 * k]), int(sourcew.timeline_array[2 * k + 1])) + for k in range(self.timeline_size) + ] self.timeline_array = None + + self.instructions = [ + (int(sourcew.instructions_array[2*k+0]), int(sourcew.instructions_array[2*k+1])) + for k in range(self.instructions_size) + ] self.instructions_array = None -# Flags : -# IS_NAVI = 0x1 class ReturnInfo(ctypes.Structure): _fields_ = [ - ("num_waves", ctypes.c_uint64), - ("wavedata", POINTER(Wave)), + ("flags", ctypes.c_uint64), + ("binaryID", ctypes.c_uint64), + ("num_traces", ctypes.c_uint64), + ("tracesizes", POINTER(ctypes.c_uint64)), + ("traceIDs", POINTER(ctypes.c_int64)), + ("tracedata", POINTER(POINTER(TraceData))), + ("num_events", ctypes.c_uint64), ("perfevents", POINTER(PerfEvent)), ("occupancy", POINTER(ctypes.c_uint64)), ("num_occupancy", ctypes.c_uint64), - ("flags", ctypes.c_uint64), ("kernel_id_addr", POINTER(ctypes.c_uint64)), ("num_kernel_ids", ctypes.c_uint64), + + ("wavedata", POINTER(Wave)), + ("num_waves", ctypes.c_uint64), ] @@ -141,16 +175,14 @@ SO.AnalyseBinary.restype = ReturnInfo SO.AnalyseBinary.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_bool] SO.wrapped_parse_binary.argtypes = [ctypes.c_char_p, ctypes.c_char_p] SO.wrapped_parse_binary.restype = ReturnAssemblyInfo - +SO.FreeBinary.argtypes = [ctypes.c_uint64] def parse_binary(filename, kernel=None): if kernel is None or kernel == "": kernel = ctypes.c_char_p(0) - print("Parsing all kernels") else: with open(glob.glob(kernel)[0], "r") as file: kernel = file.readlines() - print("Parsing kernel:", kernel[0].split(": ")[0]) kernel = kernel[0].split(": ")[1].split(".kd")[0] kernel = str(kernel).encode("utf-8") filename = os.path.abspath(str(filename)) @@ -185,59 +217,63 @@ def parse_binary(filename, kernel=None): return code, jumps, kernel_addr -def getWaves_binary(name, shader_engine_data_dict, target_cu): +def getWaves_binary(name, target_cu): filename = os.path.abspath(str(name)) info = SO.AnalyseBinary(filename.encode("utf-8"), target_cu, False) + isValid = info.flags & 0x1 + if isValid == 0: + print('Invalid trace ', name) + return ([], [], [], [], None, []) + flags = "navi" if (info.flags & 0x2) else "vega" kernel_addr = [int(info.kernel_id_addr[k]) for k in range(info.num_kernel_ids)] - - waves = [info.wavedata[k] for k in range(info.num_waves)] events = [deepcopy(info.perfevents[k]) for k in range(info.num_events)] occupancy = [int(info.occupancy[k]) for k in range(int(info.num_occupancy))] - flags = "navi" if (info.flags & 0x1) else "vega" + + assert(((info.flags >> 3) & 0x1FFF == ATT_VERSION)) # Check ATT parser version + + traces_python = {} + for T in range(info.num_traces): + if info.tracesizes[T] > 2: + id = info.traceIDs[T] + traces_python[id] = Trace(id, int(info.tracesizes[T]), info.tracedata[T]) waves_python = [] - for wave in waves: - if wave.instructions_size < 2: - continue - pwave = PythonWave(wave) - pwave.timeline = [ - (wave.timeline_array[2 * k], wave.timeline_array[2 * k + 1]) - for k in range(wave.timeline_size) - ] - pwave.instructions = [ - tuple([wave.instructions_array[4 * k + m] for m in range(4)]) - for k in range(wave.instructions_size) - ] - waves_python.append(pwave) - shader_engine_data_dict[name] = (waves_python, events, occupancy, flags, kernel_addr) + for k in range(info.num_waves): + if info.wavedata[k].instructions_size > 2: + waves_python.append(PythonWave(info.wavedata[k])) + + SO.FreeBinary(info.binaryID) + + return (traces_python, waves_python, events, occupancy, flags, kernel_addr) -def getWaves_stitch(SIMD, code, jumps, flags, latency_map, hitcount_map, bIsAuto): - for pwave in SIMD: - pwave.instructions = stitch(pwave.instructions, code, jumps, flags, bIsAuto) - if pwave.instructions is not None: - for inst in pwave.instructions[0]: - hitcount_map[inst[-1]] += 1 - latency_map[inst[-1]] += inst[3] +def getWaves_stitch(traces, code, jumps, flags, latency_map, hitcount_map, bIsAuto): + for id in traces.keys(): + traces[id].instructions = stitch(traces[id].instructions, code, jumps, flags, bIsAuto) + if traces[id].instructions is not None: + for inst in traces[id].instructions[0]: + hitcount_map[inst.asmline] += inst.num_waves + latency_map[inst.asmline] += inst.cycles -def persist(trace_file, SIMD): +def persist(trace_file, SIMD, traces): trace = Path(trace_file).name simds, waves = [], [] - begin_time, end_time, timeline, instructions = [], [], [], [] + begin_time, end_time, timeline, instructions, trace_ids = [], [], [], [], [] mem_ins, issued_ins, valu_ins, valu_stalls = [], [], [], [] vmem_ins, vmem_stalls, flat_ins, flat_stalls = [], [], [], [] lds_ins, lds_stalls, salu_ins, salu_stalls = [], [], [], [] smem_ins, smem_stalls, br_ins, br_taken_ins, br_stalls = [], [], [], [], [] for wave in SIMD: - if wave.instructions is None: + if wave.instructions is None or traces[wave.traceid].instructions is None: continue simds.append(wave.simd) waves.append(wave.wave_id) begin_time.append(wave.begin_time) end_time.append(wave.end_time) + trace_ids.append(wave.traceid) mem_ins.append(wave.num_mem_instrs) issued_ins.append(wave.num_issued_instrs) valu_ins.append(wave.num_valu_instrs) @@ -256,7 +292,20 @@ def persist(trace_file, SIMD): br_taken_ins.append(wave.num_branch_taken_instrs) br_stalls.append(wave.num_branch_stalls) timeline.append(wave.timeline) - instructions.append(wave.instructions) + + cc = 1 + insts = [] + skips = traces[wave.traceid].instructions[-1] + try: + for v in traces[wave.traceid].instructions[0]: + if cc in skips: + cc += 1 + t = wave.instructions[cc] + insts.append((t[0], v.type, 0, t[1], v.asmline)) + cc += 1 + except: + pass # Incomplete waves + instructions.append((insts,) + traces[wave.traceid].instructions[1:-1]) df = { "name": [trace for _ in range(len(begin_time))], @@ -284,6 +333,7 @@ def persist(trace_file, SIMD): "br_stalls": br_stalls, "timeline": timeline, "instructions": instructions, + "traceids": trace_ids, } return df @@ -422,108 +472,100 @@ if __name__ == "__main__": if args.target_cu is None: args.target_cu = 1 - att_kernel = glob.glob(args.att_kernel) + att_kernel_list = glob.glob(args.att_kernel) - if len(att_kernel) == 0: + if len(att_kernel_list) == 0: print("Could not find att output kernel:", args.att_kernel) - exit(1) - elif len(att_kernel) > 1: - print("Found multiple kernel matching given filters:") - for n, k in enumerate(att_kernel): - print("\t", n, "->", k) - - bValid = False - while bValid == False: - try: - args.att_kernel = att_kernel[int(input("Please select number: "))] - bValid = True - except KeyboardInterrupt: - exit(0) - except: - print("Invalid option.") - else: - args.att_kernel = att_kernel[0] - - # Assembly parsing - bIsAuto = False - if args.assembly_code.lower().strip() == 'auto': - args.assembly_code = args.att_kernel.split('_kernel.txt')[0]+'_isa.s' - bIsAuto = True - path = Path(args.assembly_code) - if not path.is_file(): - print("Invalid assembly_code('{0}')!".format(args.assembly_code)) - sys.exit(1) - - # Trace Parsing - if args.trace_file is None: - filenames = glob.glob(args.att_kernel.split("_kernel.txt")[0] + "_*.att") - else: - filenames = glob.glob(args.trace_file) - assert len(filenames) > 0 - - print('Att kernel:', args.att_kernel) - code, jumps, kern_addr = parse_binary(args.assembly_code, None if bIsAuto else args.att_kernel) - - DBFILES = [] - EVENTS = [] - OCCUPANCY = [] - GFXV = [] - analysed_filenames = [] - occupancy_filenames = [] - dispatch_kernel_names = {} - shader_engine_data_dict = {} - for name in filenames: - getWaves_binary(name, shader_engine_data_dict, args.target_cu) - - gc.collect() - latency_map = np.zeros((len(code)), dtype=np.int64) - hitcount_map = np.zeros((len(code)), dtype=np.int32) - for name in filenames: - SIMD, perfevents, occupancy, gfxv, addrs = shader_engine_data_dict[name] - - for id, addr in enumerate(addrs): - dispatch_kernel_names[id] = kern_addr[addr] - if len(occupancy) > 16: - OCCUPANCY.append( occupancy ) - occupancy_filenames.append(name) - if np.sum([0]+[len(s.instructions) for s in SIMD]) == 0: - print("No waves from", name) - continue - getWaves_stitch(SIMD, code, jumps, gfxv, latency_map, hitcount_map, bIsAuto) - - analysed_filenames.append(name) - EVENTS.append(perfevents) - DBFILES.append( persist(name, SIMD) ) - GFXV.append(gfxv) - - gc.collect() - for k in range(len(code)): - code[k][-2] = int(hitcount_map[k]) - code[k][-1] = int(latency_map[k]) - - if CSV_MODE: - from att_to_csv import dump_csv - dump_csv(code) quit() + for att_kernel in att_kernel_list: + print('Parsing:', att_kernel) + assembly_code = deepcopy(args.assembly_code) - gc.collect() + # Assembly parsing + bIsAuto = False + if assembly_code.lower().strip() == 'auto': + assembly_code = att_kernel.split('_kernel.txt')[0]+'_isa.s' + bIsAuto = True + path = Path(assembly_code) + if not path.is_file(): + print("Invalid assembly_code('{0}')!".format(assembly_code)) + sys.exit(1) - drawinfo = { - "TIMELINES": gen_timelines(DBFILES), - "EVENTS": EVENTS, - "EVENT_NAMES": EVENT_NAMES, - "OCCUPANCY": OCCUPANCY, - "ShaderNames": occupancy_filenames, - "DispatchNames": dispatch_kernel_names, - } - view_trace( - args, - code, - DBFILES, - analysed_filenames, - args.dumpfiles, - 0, - gfxv, - drawinfo - ) + # Trace Parsing + trace_instance_name = att_kernel.split("_kernel.txt")[0] + if args.trace_file is None: + filenames = glob.glob(trace_instance_name + "_*.att") + else: + filenames = glob.glob(args.trace_file) + + if len(filenames) == 0: + print("Could not find trace files for", att_kernel) + continue + + print('Att kernel:', att_kernel) + code, jumps, kern_addr = parse_binary(assembly_code, None if bIsAuto else att_kernel) + + DBFILES = [] + EVENTS = [] + OCCUPANCY = [] + GFXV = [] + analysed_filenames = [] + occupancy_filenames = [] + dispatch_kernel_names = {} + + latency_map = np.zeros((len(code)), dtype=np.int64) + hitcount_map = np.zeros((len(code)), dtype=np.int32) + + gc.collect() + + for name in filenames: + traces, waves, perfevents, occupancy, gfxv, addrs = getWaves_binary(name, args.target_cu) + if gfxv is None: + continue + + for id, addr in enumerate(addrs): + dispatch_kernel_names[id] = kern_addr[addr] + if len(occupancy) > 16: + OCCUPANCY.append( occupancy ) + occupancy_filenames.append(name) + + if np.sum([0]+[len(s.instructions) for id, s in traces.items()]) == 0: + print("No traces from", name) + continue + + getWaves_stitch(traces, code, jumps, gfxv, latency_map, hitcount_map, bIsAuto) + + analysed_filenames.append(name) + EVENTS.append(perfevents) + DBFILES.append( persist(name, waves, traces) ) + GFXV.append(gfxv) + + gc.collect() + for k in range(len(code)): + code[k][-2] = int(hitcount_map[k]) + code[k][-1] = int(latency_map[k]) + + if CSV_MODE: + from att_to_csv import dump_csv + dump_csv(code, trace_instance_name) + else: + drawinfo = { + "TIMELINES": gen_timelines(DBFILES), + "EVENTS": EVENTS, + "EVENT_NAMES": EVENT_NAMES, + "OCCUPANCY": OCCUPANCY, + "ShaderNames": occupancy_filenames, + "DispatchNames": dispatch_kernel_names, + } + view_trace( + args, + code, + DBFILES, + analysed_filenames, + args.dumpfiles, + 0, + gfxv, + drawinfo, + trace_instance_name + ) diff --git a/plugin/att/att_to_csv.py b/plugin/att/att_to_csv.py index acc0b3f966..3be2718573 100755 --- a/plugin/att/att_to_csv.py +++ b/plugin/att/att_to_csv.py @@ -4,12 +4,11 @@ import numpy as np import csv import os -def dump_csv(code): +def dump_csv(code, trace_instance_name): outpath = os.getenv("OUT_FILE_NAME") if outpath is None: - outpath = "att_output.csv" - if ".csv" not in outpath: - outpath += ".csv" + outpath = "att_output" + outpath += '_' + trace_instance_name.split('/')[-1] + '.csv' with open(outpath, 'w') as f: writer = csv.writer(f) diff --git a/plugin/att/code_printing.hpp b/plugin/att/code_printing.hpp index ce21f6e30a..a4193694e9 100644 --- a/plugin/att/code_printing.hpp +++ b/plugin/att/code_printing.hpp @@ -20,12 +20,14 @@ #pragma once +#include "rocprofiler.h" + #include #include #include #include -#include "rocprofiler.h" #include + #include "disassembly.hpp" class code_object_decoder_t { diff --git a/plugin/att/disassembly.cpp b/plugin/att/disassembly.cpp index 985a64bbd2..0bb358821e 100644 --- a/plugin/att/disassembly.cpp +++ b/plugin/att/disassembly.cpp @@ -22,8 +22,6 @@ #define _XOPEN_SOURCE 700 #endif -#include "code_printing.hpp" - #include #include #include @@ -49,9 +47,10 @@ #include #include -#include -#include "../utils.h" #include +#include "../utils.h" +#include "code_printing.hpp" +#include #define CHECK_COMGR(call) \ if (amd_comgr_status_s status = call) { \ @@ -170,21 +169,30 @@ DisassemblyInstance::DisassemblyInstance(code_object_decoder_t& decoder) [](uint64_t address, void* user_data) {}, &info)); } +static bool IsKernelType(amd_comgr_symbol_type_t type) +{ + if (type == AMD_COMGR_SYMBOL_TYPE_FUNC) + return true; +#ifdef AMD_COMGR_SYMBOL_TYPE_AMDGPU_HSA_KERNEL // To be deprecated + if (type == AMD_COMGR_SYMBOL_TYPE_AMDGPU_HSA_KERNEL) + return true; +#endif + return false; +} + amd_comgr_status_t DisassemblyInstance::symbol_callback(amd_comgr_symbol_t symbol, void* user_data) { amd_comgr_symbol_type_t type; CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_TYPE, &type)); - if (type != AMD_COMGR_SYMBOL_TYPE_FUNC && type != AMD_COMGR_SYMBOL_TYPE_AMDGPU_HSA_KERNEL) + if (!IsKernelType(type)) return AMD_COMGR_STATUS_SUCCESS; uint64_t vaddr; - CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_VALUE, &vaddr)); - uint64_t mem_size; - CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_SIZE, &mem_size)); - uint64_t name_size; + CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_VALUE, &vaddr)); + CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_SIZE, &mem_size)); CHECK_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME_LENGTH, &name_size)); std::string name; diff --git a/plugin/att/stitch.py b/plugin/att/stitch.py index 6e7f8f911f..fc418fe0d5 100644 --- a/plugin/att/stitch.py +++ b/plugin/att/stitch.py @@ -169,17 +169,17 @@ class PCTranslator: pass def swappc(self, line, line_num, inst_index): try: - loc = self.addrmap[self.insts[inst_index+1][2]] + loc = self.addrmap[self.insts[inst_index+1].cycles] return loc except: - print('SWAPPC: Could not find addr', self.insts[inst_index+1][2], 'for', line) + print('SWAPPC: Could not find addr', self.insts[inst_index+1].cycles, 'for', line) return -1 def setpc(self, line, inst_index): try: - loc = self.addrmap[self.insts[inst_index+1][2]] + loc = self.addrmap[self.insts[inst_index+1].cycles] return loc except: - print('SETPC: Could not find addr', self.insts[inst_index+1][2], 'for', line) + print('SETPC: Could not find addr', self.insts[inst_index+1].cycles, 'for', line) return -1 def scratch(self, line): pass @@ -190,7 +190,7 @@ class PCTranslator: # Matches tokens in reverse order def try_match_swapped(insts, code, i, line): - return insts[i + 1][1] == code[line][1] and insts[i][1] == code[line + 1][1] + return insts[i + 1].type == code[line][1] and insts[i].type == code[line + 1][1] def stitch(insts, raw_code, jumps, gfxv, bIsAuto): @@ -242,26 +242,27 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): loops = 0 maxline = 0 - if bIsAuto and len(insts) and insts[0][1] == PCINFO: + if bIsAuto and len(insts) and insts[0].type == PCINFO: try: watchlist = PCTranslator(code, insts) - line = watchlist.addrmap[insts[0][2]] + line = watchlist.addrmap[insts[0].cycles] except: return None + insts = insts[1:] else: watchlist = RegisterWatchList(labels=labels) - if len(insts) and insts[0][1] == PCINFO: - insts = insts[1:] N = len(insts) - pcsequence = [] + pcskip = [] while i < N: - if insts[i][1] == PCINFO: + if insts[i].type == PCINFO: i += 1 N -= 1 continue + #print(line, i, WaveInstCategory[insts[i].type], insts[i].num_waves, insts[i].cycles) + loops += 1 if line >= len(code) or loops > MAX_STITCHED_TOKENS \ or num_failed_stitches > MAX_FAILED_STITCHES: @@ -283,38 +284,38 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): if as_line[1] == GETPC: try: watchlist.getpc(as_line[0], code[line+1][0]) - matched = inst[1] in [SALU, JUMP] + matched = inst.type in [SALU, JUMP] except: matched = False elif as_line[1] == LANEIO: watchlist.updatelane(as_line[0]) - matched = inst[1] == VALU + matched = inst.type == VALU elif as_line[1] == SETPC: next = watchlist.setpc(as_line[0], i) - matched = inst[1] in [SALU, JUMP] + matched = inst.type in [SALU, JUMP] if bIsAuto: + pcskip.append(i) matched = next >= 0 i += 1 N -= 1 - pcsequence.append(insts[i][2]) elif as_line[1] == SWAPPC: next = watchlist.swappc(as_line[0], line, i) - matched = inst[1] in [SALU, JUMP] + matched = inst.type in [SALU, JUMP] if bIsAuto: + pcskip.append(i) matched = next >= 0 i += 1 N -= 1 - pcsequence.append(insts[i][2]) - elif inst[1] == as_line[1]: + elif inst.type == as_line[1]: if line in jumps: loopCount[jumps[line] - 1] += 1 num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - if inst[1] == SMEM or inst[1] == LDS: - smem_ordering = 1 if inst[1] == SMEM else smem_ordering + if inst.type == SMEM or inst.type == LDS: + smem_ordering = 1 if inst.type == SMEM else smem_ordering SMEM_INST.append([reverse_map[line], num_inflight]) NUM_SMEM += 1 - elif inst[1] == VMEM or (inst[1] == FLAT and "global_" in as_line[0]): + elif inst.type == VMEM or (inst.type == FLAT and "global_" in as_line[0]): inc_ordering = False if "flat_" in as_line[0]: inc_ordering = True @@ -329,13 +330,13 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): NUM_VLMEM += 1 if inc_ordering: vlmem_ordering = 1 - elif inst[1] == FLAT: + elif inst.type == FLAT: smem_ordering = 1 vlmem_ordering = 1 vsmem_ordering = 1 FLAT_INST.append([reverse_map[line], num_inflight]) NUM_FLAT += 1 - elif inst[1] == IMMED and "s_waitcnt" in as_line[0]: + elif inst.type == IMMED and "s_waitcnt" in as_line[0]: if "lgkmcnt" in as_line[0]: wait_N = int(as_line[0].split("lgkmcnt(")[1].split(")")[0]) flight_count.append([as_line[5], num_inflight, wait_N]) @@ -399,12 +400,12 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): NUM_FLAT = min(max(wait_N - NUM_VSMEM, 0), NUM_FLAT) num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - elif inst[1] == JUMP and as_line[1] == BRANCH: + elif inst.type == JUMP and as_line[1] == BRANCH: next = jump_map[as_line[2]] if next is None or next == 0: print("Jump to unknown location!", as_line) break - elif inst[1] == NEXT and as_line[1] == BRANCH: + elif inst.type == NEXT and as_line[1] == BRANCH: next = line + 1 else: matched = False @@ -424,12 +425,14 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): break if matched: - result.append(inst + (reverse_map[line],)) + inst.asmline = reverse_map[line] + result.append(inst) i += 1 num_failed_stitches = 0 - elif not bGFX9 and inst[1] == IMMED and line != next: + elif not bGFX9 and inst.type == IMMED and line != next: skipped_immed += 1 - result.append(inst + (reverse_map[line],)) + inst.asmline = reverse_map[line] + result.append(inst) next = line i += 1 else: @@ -439,7 +442,7 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): N = max(N, 1) if i != N: print('Warning - Stitching rate: '+str(i * 100 / N)+'% matched') - print('Leftovers:', [WaveInstCategory[insts[i+k][1]] for k in range(20) if i+k < len(insts)]) + print('Leftovers:', [WaveInstCategory[insts[i+k].type] for k in range(20) if i+k < len(insts)]) try: print(line, code[line]) except: @@ -453,4 +456,4 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): break line += 1 - return result, loopCount, mem_unroll, flight_count, maxline, len(result) + return result, loopCount, mem_unroll, flight_count, maxline, len(result), pcskip diff --git a/plugin/att/trace_view.py b/plugin/att/trace_view.py index 134cc7a4e0..1124628f64 100755 --- a/plugin/att/trace_view.py +++ b/plugin/att/trace_view.py @@ -210,11 +210,10 @@ class RocTCPServer(socketserver.TCPServer): self.socket.bind(self.server_address) -def run_server(drawinfo): +def run_server(drawinfo, trace_instance_name): Handler = NoCacheHTTPRequestHandler Handler.drawinfo = drawinfo - os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui/")) - # os.chdir('ui/') + os.chdir(trace_instance_name+"_ui/") try: with RocTCPServer((IPAddr, PORT), Handler) as httpd: httpd.serve_forever() @@ -299,7 +298,8 @@ def view_trace( bDumpOnly, se_time_begin, gfxv, - drawinfo + drawinfo, + trace_instance_name ): global JSON_GLOBAL_DICTIONARY pic_thread = None @@ -387,7 +387,7 @@ def view_trace( print("serving at ports: {0},{1}".format(PORT, WebSocketPort)) try: PROCS = [ - Process(target=run_server, args=[drawinfo]), + Process(target=run_server, args=[drawinfo, trace_instance_name]), Process(target=run_websocket), ] for p in PROCS: @@ -397,13 +397,13 @@ def view_trace( except KeyboardInterrupt: print("Exitting.") else: - os.makedirs("ui/", exist_ok=True) + os.makedirs(trace_instance_name + "_ui/", exist_ok=True) JSON_GLOBAL_DICTIONARY["live.json"] = Readable({"live": 0}) os.system( "cp " + os.path.join(os.path.abspath(os.path.dirname(__file__)), "ui") - + "/* ui/" + + "/* " + trace_instance_name + "_ui/" ) for k, v in JSON_GLOBAL_DICTIONARY.items(): - with open(os.path.join("ui", k), "w" if ".json" in k else "wb") as f: + with open(os.path.join(trace_instance_name+"_ui", k), "w" if ".json" in k else "wb") as f: f.write(v.read())