diff --git a/bin/rocprofv2 b/bin/rocprofv2 index 0698062aa0..b808262435 100755 --- a/bin/rocprofv2 +++ b/bin/rocprofv2 @@ -238,12 +238,17 @@ while [ 1 ] ; do shift ATT_OPTIONS="Not done" + ATT_PYTHON3_ARG="python3 " while [ "$ATT_OPTIONS" = "Not done" ]; do if [[ "$3" = "--trace_file" ]]; then ATT_ARGV="$ATT_ARGV $3 \"$4\"" shift shift - elif [[ "$3" = "--mode" || "$3" = "--ports" || "$3" = "--genasm" || "$3" == "--att_kernel" ]]; then + elif [[ "$3" = "--mpi" ]]; then + ATT_PYTHON3_ARG="mpirun -np $4 python3 " + shift + shift + elif [[ "$3" = "--mode" || "$3" = "--ports" || "$3" = "--genasm" || "$3" == "--att_kernel" || "$3" == "--depth" ]]; then ATT_ARGV="$ATT_ARGV $3 $4" shift shift @@ -327,7 +332,7 @@ get_pmc_results_txt_path(){ if [ -n "$ATT_PATH" ]; then if [ -n "$ATT_ARGV" ]; then - eval "python3 $ATT_PATH $ATT_ARGV" + eval "$ATT_PYTHON3_ARG $ATT_PATH $ATT_ARGV" elif [ ! -n "$PMC_LINES" ]; then echo "ATT File is required!" fi diff --git a/plugin/att/CMakeLists.txt b/plugin/att/CMakeLists.txt index d35ef63dc0..05c0ce8774 100644 --- a/plugin/att/CMakeLists.txt +++ b/plugin/att/CMakeLists.txt @@ -57,6 +57,8 @@ install(TARGETS att_plugin configure_file(att.py att/att.py COPYONLY) configure_file(trace_view.py att/trace_view.py COPYONLY) +configure_file(stitch.py att/stitch.py COPYONLY) +configure_file(drawing.py att/drawing.py COPYONLY) configure_file(ui/index.html att/ui/index.html COPYONLY) configure_file(ui/logo.svg att/ui/logo.svg COPYONLY) configure_file(ui/styles.css att/ui/styles.css COPYONLY) diff --git a/plugin/att/att.py b/plugin/att/att.py index f5e7a0a36e..087c944252 100755 --- a/plugin/att/att.py +++ b/plugin/att/att.py @@ -6,38 +6,20 @@ if sys.version_info[0] < 3: import os import argparse from pathlib import Path -from struct import * from ctypes import * import ctypes from copy import deepcopy -from trace_view import view_trace, Readable +from trace_view import view_trace import sys import glob import numpy as np -import matplotlib.pyplot as plt -from io import BytesIO +from stitch import stitch +import gc -class FileBytesIO: - def __init__(self, iobytes): - self.iobytes = deepcopy(iobytes) - self.seek = 0 - - def __len__(self): - return self.iobytes.getbuffer().nbytes - - def read(self, length=0): - if length<=0: - return bytes(self.iobytes.getbuffer()) - else: - if self.seek >= self.iobytes.getbuffer().nbytes: - self.seek = 0 - return None - response = self.iobytes.getbuffer()[self.seek:self.seek+length] - self.seek += length - return bytes(response) - - -COUNTERS_MAX_CAPTURES = 1<<12 +try: + from mpi4py import MPI +except: + pass class PerfEvent(ctypes.Structure): _fields_ = [ @@ -114,8 +96,17 @@ class Wave(ctypes.Structure): ('num_branch_taken_instrs', ctypes.c_uint64), ('num_branch_stalls', ctypes.c_uint64), - ('timeline_string', ctypes.c_char_p), - ('instructions_string', ctypes.c_char_p)] + ('timeline_array', POINTER(ctypes.c_int64)), + ('instructions_array', POINTER(ctypes.c_int64)), + ('timeline_size', ctypes.c_uint64), + ('instructions_size', ctypes.c_uint64)] + +class PythonWave: + def __init__(self, source_wave): + for property, value in Wave._fields_: + setattr(self, property, getattr(source_wave, property)) + self.timeline_array = None + self.instructions_array = None # Flags : # IS_NAVI = 0x1 @@ -154,16 +145,14 @@ def parse_binary(filename, kernel=None): for k in range(info.code_len): code_entry = info.code[k] - # copy string memory from C++ line = deepcopy(code_entry.line.decode("utf-8")) loc = deepcopy(code_entry.loc.decode("utf-8")) - # Transform empty entries back to python's None to_line = int(code_entry.to_line) if (code_entry.to_line >= 0) else None loc = loc if len(loc) > 0 else None - code.append((line, int(code_entry.value), to_line, loc, - int(code_entry.index), int(code_entry.line_num))) + code.append([line, int(code_entry.value), to_line, loc, + int(code_entry.index), int(code_entry.line_num), 0, 0]) # hitcount + cycles jumps = {} for k in range(info.jumps_len): @@ -172,19 +161,35 @@ def parse_binary(filename, kernel=None): return code, jumps -def getWaves(filename, target_cu, verbose): - filename = os.path.abspath(str(filename)) - info = SO.AnalyseBinary(filename.encode('utf-8'), target_cu, verbose) +def getWaves_binary(name, shader_engine_data_dict, target_cu, depth): + filename = os.path.abspath(str(name)) + info = SO.AnalyseBinary(filename.encode('utf-8'), target_cu, False) waves = [info.wavedata[k] for k in range(info.num_waves)] events = [deepcopy(info.perfevents[k]) for k in range(info.num_events)] occupancy = [int(info.occupancy[k]) for k in range(int(info.num_occupancy))] + flags = 'navi' if (info.flags & 0x1) else 'vega' + wave_slot_count = [[0 for k in range(20)] for j in range(4)] + waves_python = [] for wave in waves: - wave.timeline = deepcopy(wave.timeline_string.decode("utf-8")) - wave.instructions = deepcopy(wave.instructions_string.decode("utf-8")) + if wave_slot_count[wave.simd][wave.wave_id] >= depth: + continue + wave_slot_count[wave.simd][wave.wave_id] += 1 + pwave = PythonWave(wave) + pwave.timeline = [(wave.timeline_array[2*k], wave.timeline_array[2*k+1]) for k in range(wave.timeline_size)] + pwave.instructions = [tuple([wave.instructions_array[4*k+m] for m in range(4)]) for k in range(wave.instructions_size)] + waves_python.append( pwave ) + shader_engine_data_dict[name] = (waves_python, events, occupancy, flags) - return waves, events, occupancy, 'navi' if (info.flags & 0x1) else 'vega' + +def getWaves_stitch(SIMD, code, jumps, flags, latency_map, hitcount_map): + for pwave in SIMD: + pwave.instructions = stitch(pwave.instructions, code, jumps, flags) + + for inst in pwave.instructions[0]: + hitcount_map[inst[-1]] += 1 + latency_map[inst[-1]] += inst[3] def persist(trace_file, SIMD): @@ -221,7 +226,6 @@ def persist(trace_file, SIMD): timeline.append(wave.timeline) instructions.append(wave.instructions) - #df = pd.DataFrame({ df = { 'name': [trace for _ in range(len(begin_time))], 'id': [i for i in range(len(begin_time))], @@ -248,8 +252,7 @@ def persist(trace_file, SIMD): 'br_stalls': br_stalls, 'timeline': timeline, 'instructions': instructions, - }#) - #[print(d) for c, d in df.iterrows()]; quit() + } return df @@ -299,128 +302,50 @@ def insert_waitcnt(flight_count, assembly_code): return assembly_code -def get_delta_time(events): - try: - CUS = [[e.time for e in events if e.cu==k and e.bank==0] for k in range(16)] - CUS = [np.asarray(c).astype(np.int64) for c in CUS if len(c) > 2] - return np.min([np.min(abs(c[1:]-c[:-1])) for c in CUS]) - except: - return 1 +def apply_min_event(min_event_time, OCCUPANCY, EVENTS, DBFILES, TIMELINES): + for n, occ in enumerate(OCCUPANCY): + OCCUPANCY[n] = [max(min(int((u>>16)-min_event_time)<<16,2**42),0) | (u&0xFFFFF) for u in occ] + for perf in EVENTS: + for p in perf: + p.time -= min_event_time -def draw_wave_metrics(selections, normalize): - global TIMELINES - global EVENTS - global EVENT_NAMES - - response = Readable({"counters": EVENT_NAMES}) - - plt.figure(figsize=(15,3)) - - delta_step = 8 - quad_delta_time = max(delta_step,int(0.5+np.min([get_delta_time(events) for events in EVENTS]))) - maxtime = np.max([np.max([e.time for e in events]) for events in EVENTS])/quad_delta_time+1 - - if maxtime*delta_step >= COUNTERS_MAX_CAPTURES: - delta_step = 1 - while maxtime >= COUNTERS_MAX_CAPTURES: - quad_delta_time *= 2 - maxtime /= 2 - - maxtime = int(min(maxtime*delta_step, COUNTERS_MAX_CAPTURES)) - event_timeline = np.zeros((16, maxtime), dtype=np.int32) - print('Delta:', quad_delta_time) - print('Max_cycles:', maxtime*quad_delta_time*4//delta_step) - - cycles = 4*quad_delta_time//delta_step*np.arange(maxtime) - kernel = len(EVENTS)*quad_delta_time - - for events in EVENTS: - for e in range(len(events)-1): - bk = events[e].bank*4 - start = events[e].time // (quad_delta_time//delta_step) - end = start+delta_step - event_timeline[bk:bk+4, start:end] += np.asarray(events[e].toTuple()[1:5])[:, None] - start = events[-1].time - event_timeline[bk:bk+4, start:start+delta_step] += \ - np.asarray(events[-1].toTuple()[1:5])[:, None] - - event_timeline = [np.convolve(e, [kernel for k in range(3)])[1:-1] for e in event_timeline] - #event_timeline = [e/kernel for e in event_timeline] - - if normalize: - event_timeline = [100*e/max(e.max(), 1E-5) for e in event_timeline] - - colors = ['blue', 'green', 'gray', 'red', 'orange', 'cyan', 'black', 'darkviolet', - 'yellow', 'darkred', 'pink', 'lime', 'gold', 'tan', 'aqua', 'olive'] - [plt.plot(cycles, e, '-', label=n, color=c) - for e, n, c, sel in zip(event_timeline, EVENT_NAMES, colors, selections) if sel] - - plt.legend() - if normalize: - plt.ylabel('As % of maximum') - else: - plt.ylabel('Value') - plt.subplots_adjust(left=0.05, right=1, top=1, bottom=0.07) - - figure_bytes = BytesIO() - plt.savefig(figure_bytes, dpi=150) - return response, FileBytesIO(figure_bytes), TIMELINES, EVENTS - - -def draw_wave_states(selections, normalize): - global TIMELINES - plot_indices = [1, 2, 3, 4] - STATES = [['Empty', 'Idle', 'Exec', 'Wait', 'Stall'][k] for k in plot_indices] - colors = [['gray', 'orange', 'green', 'red', 'blue'][k] for k in plot_indices] - - plt.figure(figsize=(15,3)) - - maxtime = max([np.max((TIMELINES[k]!=0)*np.arange(0,TIMELINES[k].size)) for k in plot_indices]) - timelines = [deepcopy(TIMELINES[k][:maxtime]) for k in plot_indices] - timelines = [np.pad(t, [0, maxtime-t.size]) for t in timelines] - - if normalize: - timelines = np.array(timelines) / np.maximum(np.sum(timelines,0)*1E-2,1E-7) - - trim = max(maxtime//5000,1) - cycles = np.arange(0, timelines[0].size//trim, 1)*trim - timelines = [time[:trim*(time.size//trim)].reshape((-1, trim)).mean(-1) if len(time) > 0 else cycles*0 for time in timelines] - kernsize = 21 - kernel = np.asarray([np.exp(-abs(10*k/kernsize)) for k in range(-kernsize//2,kernsize//2+1)]) - kernel /= np.sum(kernel) - - timelines = [np.convolve(time, kernel)[kernsize//2:-kernsize//2] for time in timelines] - - [plt.plot(cycles, t, label='State '+s, linewidth=1.1, color=c) - for t, s, c, sel in zip(timelines, STATES, colors, selections) if sel] - - plt.legend() - if normalize: - plt.ylabel('Waves state %') - else: - plt.ylabel('Waves state total') - plt.ylim(-1) - plt.xlim(-maxtime//200, maxtime+maxtime//200+1) - plt.subplots_adjust(left=0.05, right=1, top=1, bottom=0.07) - figure_bytes = BytesIO() - plt.savefig(figure_bytes, dpi=150) - response = Readable({"counters": STATES}) - return response, FileBytesIO(figure_bytes), TIMELINES, [] - - -def GeneratePIC(selections=[True for k in range(16)], normalize=True, bScounter=True): - if bScounter and len(EVENTS) > 0 and np.sum([len(e) for e in EVENTS]) > 32: - return draw_wave_metrics(selections, normalize) - else: - return draw_wave_states(selections, normalize) + for df in DBFILES: + for T in range(len(df['timeline'])): + timeline = df['timeline'][T] + time_acc = 0 + tuples3 = [(0,df['begin_time'][T]-min_event_time)]+[(int(t[0]),int(t[1])) for t in timeline] + for state in tuples3: + if state[1] > 1E8: + print('Warning: Time limit reached for ',state[0], state[1]) + break + if time_acc+state[1] > TIMELINES[state[0]].size: + TIMELINES[state[0]] = np.hstack([ + TIMELINES[state[0]], + np.zeros_like(TIMELINES[state[0]]) + ]) + TIMELINES[state[0]][time_acc:time_acc+state[1]] += 1 + time_acc += state[1] if __name__ == "__main__": + comm = None + mpi_root = True + try: + comm = MPI.COMM_WORLD + if comm.Get_size() < 2: + comm = None + else: + mpi_root = comm.Get_rank() == 0 + except: + print('Could not load MPI') + comm = None + pathenv = os.getenv('OUTPUT_PATH') if pathenv is None: pathenv = "." parser = argparse.ArgumentParser() parser.add_argument("assembly_code", help="Path of the assembly code") + parser.add_argument("--depth", help="Maximum number of parsed waves per slot", default=100, type=int) parser.add_argument("--trace_file", help="Filter for trace files", default=None, type=str) parser.add_argument("--att_kernel", help="Kernel file", type=str, default=pathenv+'/*_kernel.txt') @@ -441,7 +366,6 @@ if __name__ == "__main__": print('Skipping analysis.') quit() - global EVENT_NAMES with open(os.getenv("COUNTERS_PATH"), 'r') as f: lines = [l.split('//')[0] for l in f.readlines()] @@ -452,7 +376,6 @@ if __name__ == "__main__": EVENT_NAMES += ['id: '+clean(line)] elif 'att: TARGET_CU' in line: args.target_cu = int(clean(line)) - print('Target CU set to:', args.target_cu) for line in lines: if 'PERFCOUNTER=' in line: EVENT_NAMES += [clean(line).split('SQ_')[1].lower()] @@ -471,45 +394,61 @@ if __name__ == "__main__": print('Could not find att output kernel:', args.att_kernel) exit(1) elif len(att_kernel) > 1: - print('Found multiple kernel matching given filters:') - for n, k in enumerate(att_kernel): - print('\t', n, '->', k) + if mpi_root: + print('Found multiple kernel matching given filters:') + for n, k in enumerate(att_kernel): + print('\t', n, '->', k) - bValid = False - while bValid == False: - try: - args.att_kernel = att_kernel[int(input("Please select number: "))] - bValid = True - except KeyboardInterrupt: - exit(0) - except: - print('Invalid option.') + bValid = False + while bValid == False: + try: + args.att_kernel = att_kernel[int(input("Please select number: "))] + bValid = True + except KeyboardInterrupt: + exit(0) + except: + print('Invalid option.') + if comm is not None: + args.att_kernel = comm.bcast(args.att_kernel, root=0) else: args.att_kernel = att_kernel[0] - print('Att kernel:', args.att_kernel) - code, jumps = parse_binary(args.assembly_code, args.att_kernel) - # Trace Parsing if args.trace_file is None: filenames = glob.glob(args.att_kernel.split('_kernel.txt')[0]+'_*.att') - assert(len(filenames) > 0) else: filenames = glob.glob(args.trace_file) + assert(len(filenames) > 0) - print('Trace filenames:', filenames) + if comm is not None: + filenames = filenames[comm.Get_rank()::comm.Get_size()] + + code = jumps = None + if mpi_root: + print('Att kernel:', args.att_kernel) + code, jumps = parse_binary(args.assembly_code, args.att_kernel) DBFILES = [] - global TIMELINES - global EVENTS TIMELINES = [np.zeros(int(1E4),dtype=np.int16) for k in range(5)] EVENTS = [] OCCUPANCY = [] - + GFXV = [] analysed_filenames = [] - SIMD_list = [] + + shader_engine_data_dict = {} for name in filenames: - SIMD, perfevents, occupancy, gfxv = getWaves(name, args.target_cu, False) + getWaves_binary(name, shader_engine_data_dict, args.target_cu, args.depth) + + if comm is not None: + code = comm.bcast(code, root=0) + jumps = comm.bcast(jumps, root=0) + + gc.collect() + latency_map = np.zeros((len(code)), dtype=np.int64) + hitcount_map = np.zeros((len(code)), dtype=np.int32) + for name in filenames: + SIMD, perfevents, occupancy, gfxv = shader_engine_data_dict[name] + getWaves_stitch(SIMD, code, jumps, gfxv, latency_map, hitcount_map) if len(SIMD) == 0: print("Error parsing ", name) continue @@ -517,8 +456,9 @@ if __name__ == "__main__": EVENTS.append(perfevents) DBFILES.append( persist(name, SIMD) ) OCCUPANCY.append( occupancy ) - SIMD_list.append( SIMD ) + GFXV.append(gfxv) + gc.collect() min_event_time = 2**62 for df in DBFILES: if len(df['begin_time']) > 0: @@ -528,36 +468,59 @@ if __name__ == "__main__": min_event_time = min(min_event_time, p.time) for occ in OCCUPANCY: min_event_time = min(min_event_time, np.min(np.array(occ)>>16)) + + gc.collect() + min_event_time = max(0, min_event_time-32) + if comm is not None: + min_event_time = comm.reduce(min_event_time, op=MPI.MIN) + min_event_time = comm.bcast(min_event_time, root=0) + + apply_min_event(min_event_time, OCCUPANCY, EVENTS, DBFILES, TIMELINES) + + GFXV = comm.gather(GFXV, root=0) + EVENTS = comm.gather(EVENTS, root=0) + OCCUPANCY = comm.gather(OCCUPANCY, root=0) + TIMELINES = comm.gather(TIMELINES, root=0) + gather_latency_map = comm.gather(latency_map, root=0) + gather_hitcount_map = comm.gather(hitcount_map, root=0) + gathered_filenames = comm.gather(analysed_filenames, root=0) + + if mpi_root: + latency_map *= 0 + hitcount_map *= 0 + for hit, lat in zip(gather_hitcount_map, gather_latency_map): + hitcount_map += hit + latency_map += lat + EVENTS = [e for elem in EVENTS for e in elem] + OCCUPANCY = [e for elem in OCCUPANCY for e in elem] + gathered_filenames = [e for elem in gathered_filenames for e in elem] + gfxv = [e for elem in GFXV for e in elem][0] + + TIMELINES_GATHER = TIMELINES + TIMELINES = [np.zeros((np.max([len(tm[k]) for tm in TIMELINES])), np.int16) for k in range(5)] + for gather in TIMELINES_GATHER: + for t, m in zip(TIMELINES, gather): + t[:len(m)] += m + del(TIMELINES_GATHER) + else: # free up memory + TIMELINES = [] + OCCUPANCY = [] + EVENTS = [] + else: + apply_min_event(min_event_time, OCCUPANCY, EVENTS, DBFILES, TIMELINES) + gathered_filenames = analysed_filenames + + if mpi_root: + for k in range(len(code)): + code[k][-2] = int(hitcount_map[k]) + code[k][-1] = int(latency_map[k]) + + gc.collect() print("Min time:", min_event_time) - for perf in EVENTS: - for p in perf: - p.time -= min_event_time - - OCCUPANCY = [[max(min(int((u>>16)-min_event_time)<<16,2**42),0) | (u&0xFFFFF) for u in occ] for occ in OCCUPANCY] - - for df in DBFILES: - for T in range(len(df['timeline'])): - timeline = df['timeline'][T] - time_acc = 0 - tuples1 = timeline.split('(') - tuples2 = [t.split(')')[0].split(',') for t in tuples1 if t != ''] - tuples3 = [(0,df['begin_time'][T]-min_event_time)]+[(int(t[0]),int(t[1])) for t in tuples2] - - for state in tuples3: - if state[1] > 1E8: - print('Warning: Time limit reached for ',state[0], state[1]) - break - if time_acc+state[1] > TIMELINES[state[0]].size: - TIMELINES[state[0]] = np.hstack([ - TIMELINES[state[0]], - np.zeros_like(TIMELINES[state[0]]) - ]) - TIMELINES[state[0]][time_acc:time_acc+state[1]] += 1 - time_acc += state[1] + drawinfo = {'TIMELINES':TIMELINES, 'EVENTS':EVENTS, 'EVENT_NAMES':EVENT_NAMES, 'OCCUPANCY': OCCUPANCY, 'ShaderNames': gathered_filenames} if args.genasm and len(args.genasm) > 0: - flight_count = view_trace(args, code, jumps, DBFILES, analysed_filenames, True, None, OCCUPANCY, args.dumpfiles, min_event_time, gfxv) - + flight_count = view_trace(args, code, DBFILES, analysed_filenames, True, OCCUPANCY, args.dumpfiles, min_event_time, gfxv, drawinfo, comm, mpi_root) with open(args.assembly_code, 'r') as file: lines = file.readlines() assembly_code = {l+1.0: lines[l][:-1] for l in range(len(lines))} @@ -568,4 +531,4 @@ if __name__ == "__main__": for k in keys: file.write(assembly_code[k]+'\n') else: - view_trace(args, code, jumps, DBFILES, analysed_filenames, False, GeneratePIC, OCCUPANCY, args.dumpfiles, min_event_time, gfxv) + view_trace(args, code, DBFILES, analysed_filenames, False, OCCUPANCY, args.dumpfiles, min_event_time, gfxv, drawinfo, comm, mpi_root) diff --git a/plugin/att/drawing.py b/plugin/att/drawing.py new file mode 100644 index 0000000000..cd44fc3219 --- /dev/null +++ b/plugin/att/drawing.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +import sys +if sys.version_info[0] < 3: + raise Exception("Must be using Python 3") + +import numpy as np +from io import BytesIO +import matplotlib.pyplot as plt +from copy import deepcopy +import json + +COUNTERS_MAX_CAPTURES = 1<<12 + +class Readable: + def __init__(self, jsonstring): + self.jsonstr = json.dumps(jsonstring) + self.seek = 0 + + def read(self, length=0): + if length<=0: + return self.jsonstr + else: + if self.seek >= len(self): + self.seek = 0 + return None + response = self.jsonstr[self.seek:self.seek+length] + self.seek += length + return bytes(response, 'utf-8') + + def __len__(self): + return len(self.jsonstr) + +class FileBytesIO: + def __init__(self, iobytes): + self.iobytes = deepcopy(iobytes) + self.seek = 0 + + def __len__(self): + return self.iobytes.getbuffer().nbytes + + def read(self, length=0): + if length<=0: + return bytes(self.iobytes.getbuffer()) + else: + if self.seek >= self.iobytes.getbuffer().nbytes: + self.seek = 0 + return None + response = self.iobytes.getbuffer()[self.seek:self.seek+length] + self.seek += length + return bytes(response) + +def get_delta_time(events): + try: + CUS = [[e.time for e in events if e.cu==k and e.bank==0] for k in range(16)] + CUS = [np.asarray(c).astype(np.int64) for c in CUS if len(c) > 2] + return np.min([np.min(abs(c[1:]-c[:-1])) for c in CUS]) + except: + return 1 + +def draw_wave_metrics(selections, normalize, TIMELINES, EVENTS, EVENT_NAMES): + plt.figure(figsize=(15,4)) + + delta_step = 8 + quad_delta_time = max(delta_step,int(0.5+np.min([get_delta_time(events) for events in EVENTS]))) + maxtime = np.max([np.max([e.time for e in events]) for events in EVENTS])/quad_delta_time+1 + + if maxtime*delta_step >= COUNTERS_MAX_CAPTURES: + delta_step = 1 + while maxtime >= COUNTERS_MAX_CAPTURES: + quad_delta_time *= 2 + maxtime /= 2 + + maxtime = int(min(maxtime*delta_step, COUNTERS_MAX_CAPTURES)) + event_timeline = np.zeros((16, maxtime), dtype=np.int32) + print('Delta:', quad_delta_time) + print('Max_cycles:', maxtime*quad_delta_time*4//delta_step) + + cycles = 4*quad_delta_time//delta_step*np.arange(maxtime) + kernel = len(EVENTS)*quad_delta_time + + for events in EVENTS: + for e in range(len(events)-1): + bk = events[e].bank*4 + start = events[e].time // (quad_delta_time//delta_step) + end = start+delta_step + event_timeline[bk:bk+4, start:end] += np.asarray(events[e].toTuple()[1:5])[:, None] + start = events[-1].time + event_timeline[bk:bk+4, start:start+delta_step] += \ + np.asarray(events[-1].toTuple()[1:5])[:, None] + + event_timeline = [np.convolve(e, [kernel for k in range(3)])[1:-1] for e in event_timeline] + #event_timeline = [e/kernel for e in event_timeline] + + if normalize: + event_timeline = [100*e/max(e.max(), 1E-5) for e in event_timeline] + + colors = ['blue', 'green', 'gray', 'red', 'orange', 'cyan', 'black', 'darkviolet', + 'yellow', 'darkred', 'pink', 'lime', 'gold', 'tan', 'aqua', 'olive'] + [plt.plot(cycles, e, '-', label=n, color=c) + for e, n, c, sel in zip(event_timeline, EVENT_NAMES, colors, selections) if sel] + + plt.legend() + if normalize: + plt.ylabel('As % of maximum') + else: + plt.ylabel('Value') + plt.xlabel('Cycle') + plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1) + + figure_bytes = BytesIO() + plt.savefig(figure_bytes, dpi=150) + return EVENT_NAMES, FileBytesIO(figure_bytes) + + +def draw_wave_states(selections, normalize, TIMELINES): + plot_indices = [1, 2, 3, 4] + STATES = [['Empty', 'Idle', 'Exec', 'Wait', 'Stall'][k] for k in plot_indices] + colors = [['gray', 'orange', 'green', 'red', 'blue'][k] for k in plot_indices] + + plt.figure(figsize=(15,4)) + + maxtime = max([np.max((TIMELINES[k]!=0)*np.arange(0,TIMELINES[k].size)) for k in plot_indices]) + timelines = [deepcopy(TIMELINES[k][:maxtime]) for k in plot_indices] + timelines = [np.pad(t, [0, maxtime-t.size]) for t in timelines] + + if normalize: + timelines = np.array(timelines) / np.maximum(np.sum(timelines,0)*1E-2,1E-7) + + trim = max(maxtime//5000,1) + cycles = np.arange(0, timelines[0].size//trim, 1)*trim + timelines = [time[:trim*(time.size//trim)].reshape((-1, trim)).mean(-1) if len(time) > 0 else cycles*0 for time in timelines] + kernsize = 21 + kernel = np.asarray([np.exp(-abs(10*k/kernsize)) for k in range(-kernsize//2,kernsize//2+1)]) + kernel /= np.sum(kernel) + + timelines = [np.convolve(time, kernel)[kernsize//2:-kernsize//2] for time in timelines] + + [plt.plot(cycles, t, label='State '+s, linewidth=1.1, color=c) + for t, s, c, sel in zip(timelines, STATES, colors, selections) if sel] + + plt.legend() + if normalize: + plt.ylabel('Waves state %') + else: + plt.ylabel('Waves state total') + plt.xlabel('Cycle') + plt.ylim(-1) + plt.xlim(-maxtime//200, maxtime+maxtime//200+1) + plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1) + figure_bytes = BytesIO() + plt.savefig(figure_bytes, dpi=150) + return STATES, FileBytesIO(figure_bytes) + + +def draw_occupancy(selections, normalize, OCCUPANCY, shadernames): + plt.figure(figsize=(15,4)) + names = [] + for name, occ in zip(shadernames, OCCUPANCY): + occ_values = [0] + occ_times = [0] + occ = [(int(u>>16), (u>>8)&0xFF, u&0xFF) for u in occ] + current_occ = [0 for k in range(16)] + + for time, value, cu in occ: + occ_times.append(time) + occ_values.append(occ_values[-1] + value - current_occ[cu]) + current_occ[cu] = value + try: + name = 'SE'+name.split('.att')[0].split('_se')[-1] + except: + pass + names.append(name) + + NUM_DOTS = 1500 + maxtime = np.max(occ_times) + delta = max(1, maxtime//NUM_DOTS) + chart = np.zeros((maxtime//delta+1), dtype=np.float32) + norm_fact = np.zeros_like(chart) + + for i, t in enumerate(occ_times[:-1]): + b = t//delta + e = max(b+1,occ_times[i+1]//delta) + chart[b:e] += occ_values[i] + norm_fact[b:e] += 1 + + chart /= np.maximum(norm_fact,1) + if normalize: + chart /= max(chart.max(),1E-6) + + plt.plot(np.arange(chart.size)*delta, chart, label=name, linewidth=1.1) + + plt.legend() + if normalize: + plt.ylabel('Occupancy %') + else: + plt.ylabel('Occupancy total') + plt.xlabel('Cycle') + plt.ylim(-1) + plt.xlim(-maxtime//200, maxtime+maxtime//200+delta+1) + plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1) + figure_bytes = BytesIO() + plt.savefig(figure_bytes, dpi=150) + return names, FileBytesIO(figure_bytes) + + +def GeneratePIC(drawinfo, selections=[True for k in range(16)], normalize=False): + EVENTS = drawinfo['EVENTS'] + + response = {} + figures = {} + + states, figure = draw_occupancy(selections, normalize, drawinfo['OCCUPANCY'], drawinfo['ShaderNames']) + response['occupancy.png'] = states + figures['occupancy.png'] = figure + + states, figure = draw_wave_states(selections, normalize, drawinfo['TIMELINES']) + response['timeline.png'] = states + figures['timeline.png'] = figure + + if len(EVENTS) > 0 and np.sum([len(e) for e in EVENTS]) > 32: + EVENT_NAMES, figure = draw_wave_metrics(selections, normalize, drawinfo['TIMELINES'], EVENTS, drawinfo['EVENT_NAMES']) + response['counters.png'] = EVENT_NAMES + figures['counters.png'] = figure + + return Readable(response), figures diff --git a/plugin/att/stitch.py b/plugin/att/stitch.py new file mode 100644 index 0000000000..1893ba27ab --- /dev/null +++ b/plugin/att/stitch.py @@ -0,0 +1,441 @@ +#!/usr/bin/env python3 +import sys +if sys.version_info[0] < 3: + raise Exception("Must be using Python 3") + +from collections import defaultdict +from copy import deepcopy + +MAX_STITCHED_TOKENS = 10000000 +MAX_FAILED_STITCHES = 256 +STACK_SIZE_LIMIT = 64 + +UNKNOWN = 0 +SMEM = 1 +SALU = 2 +VMEM = 3 +FLAT = 4 +LDS = 5 +VALU = 6 +JUMP = 7 +NEXT = 8 +IMMED = 9 +BRANCH = 10 +GETPC = 11 +SETPC = 12 +SWAPPC = 13 +LANEIO = 14 +DONT_KNOW = 100 + +WaveInstCategory = { + UNKNOWN: "UNKNOWN", + SMEM: "SMEM", + SALU: "SALU", + VMEM: "VMEM", + FLAT: "FLAT", + LDS: "LDS", + VALU: "VALU", + JUMP: "JUMP", + NEXT: "NEXT", + IMMED: "IMMED", + JUMP: "JUMP", + NEXT: "NEXT", + IMMED: "IMMED", + BRANCH: "BRANCH", + GETPC: "GETPC", + SETPC: "SETPC", + SWAPPC: "SWAPPC", + LANEIO: "LANEIO", + DONT_KNOW: "DONT_KNOW", +} + + +class RegisterWatchList: + def __init__(self, labels): + self.registers = {'v'+str(k): [[] for m in range(64)] for k in range(64)} + for k in range(64): + self.registers['s'+str(k)] = [] + self.labels = labels + + def try_translate(self, tok): + if tok[0] in ['s']: + return self.registers[self.range(tok)[0]] + elif '@' in tok: + return self.labels[tok.split('@')[0]]+1 + + def range(self, r): + reg = r.split(':') + if len(reg) == 1: + return reg + else: + r0 = reg[0].split('[') + return [r0[0]+str(k) for k in range(int(r0[1]), int(reg[1][:-1])+1)] + + def tokenize(self, line): + return [u for u in [t.split(',')[0].strip() for t in line.split(' ')] if len(u) > 0] + + def getpc(self, line, next_line): + #print('Get pc:', line) + try: + dst = line.split(' ')[1].strip() + label_dest = next_line.split(', ')[-1].split('@')[0] + for reg in self.range(dst): + self.registers[reg].append(deepcopy(self.labels[label_dest])) + except: + pass + + def swappc(self, line, line_num): + try: + tokens = self.tokenize(line) + dst = tokens[1] + src = tokens[2] + + popped = self.registers[self.range(src)[0]][-1] + self.registers[self.range(src)[0]] = self.registers[self.range(src)[0]][:-1] + self.registers[self.range(dst)[0]].append(line_num+1) + return popped + except: + return 0 + + def setpc(self, line): + try: + src = line.split(' ')[1].strip() + #print('Going to:', self.registers[self.range(src)[0]], src) + popped = self.registers[self.range(src)[0]][-1] + self.registers[self.range(src)[0]] = self.registers[self.range(src)[0]][:-1] + return popped + except: + return 0 + + def scratch(self, line): + try: + tokens = self.tokenize(line) + if '_load' in tokens[0]: + dst = tokens[1] + src = tokens[3]+tokens[4] + else: + src = tokens[2] + dst = tokens[3]+tokens[4] + self.registers[dst] = self.registers[src] + except: + pass + + def move(self, line): + try: + tokens = self.tokenize(line) + if tokens[2][0] in ['s', 'd'] and tokens[1][0] in ['s', 'd']: + self.registers[self.range(tokens[1])[0]] = deepcopy(self.registers[self.range(tokens[2])[0]]) + except: + pass + + def updatelane(self, line): + tokens = self.tokenize(line) + try: + if 'v_readlane' in tokens[0]: + self.registers[tokens[1]].append(self.registers[tokens[2]][int(tokens[3])][-1]) + self.registers[tokens[2]][int(tokens[3])] = self.registers[tokens[2]][int(tokens[3])][:-1] + elif 'v_writelane' in tokens[0]: + self.registers[tokens[1]][int(tokens[3])].append(self.registers[tokens[2]][-1]) + self.registers[tokens[2]] = self.registers[tokens[2]][-STACK_SIZE_LIMIT:] + except Exception as e: + pass + +def try_match_swapped(insts, code, i, line): + return insts[i+1][1] == code[line][1] and insts[i][1] == code[line+1][1] + +FORK_NAMES = 1 +class CachedInst: + def __init__(self, inst, as_line): + self.inst_type = inst + self.as_line = as_line + self.forks = None + +class Fork: + def __init__(self): + global FORK_NAMES + self.insts = [] + self.data = None + self.name = FORK_NAMES + FORK_NAMES += 1 + #print('Created new fork: ', self.name) + +def move_down_fork(fork, insts, i): #def move_down_fork(fork : Fork, insts : list, i : int): + N = min(len(insts), len(fork.insts)) + + while i < N: + if insts[i][1] == fork.insts[i].inst_type: + i += 1 + elif i= len(cur_fork.insts): + return False, cur_fork + + last_inst = cur_fork.insts[i] + if last_inst.forks is None: + last_inst.forks = [] + + bMatchFork = False + for fork in last_inst.forks: + if fork.insts[0].inst_type == insts[0][1]: + #print('Found match fork', fork.name) + cur_fork = fork + bMatchFork = True + break + if not bMatchFork: + cur_fork = Fork() + last_inst.forks.append(cur_fork) + return False, cur_fork + + print('Warning: Reached end of loop!') + return False, cur_fork + + +def stitch(insts, raw_code, jumps, gfxv): + bGFX9 = gfxv == 'vega' + result, i, line, loopCount, N = [], 0, 0, defaultdict(int), len(insts) + + SMEM_INST = [] # scalar memory + VLMEM_INST = [] # vector memory load + VSMEM_INST = [] # vector memory store + FLAT_INST = [] + NUM_SMEM = 0 + NUM_VLMEM = 0 + NUM_VSMEM = 0 + NUM_FLAT = 0 + skipped_immed = 0 + + mem_unroll = [] + flight_count = [] + + labels = {} + jump_map = [0] + code = [raw_code[0]] + for c in raw_code[1:]: + c = list(c) + c[0] = c[0].split(';')[0].split('//')[0].strip() + + if c[1] != 100: + code.append(c) + elif ':' in c[0]: + labels[c[0].split(':')[0]] = len(code) + jump_map.append(len(code)-1) + + reverse_map = [] + for k, v in enumerate(jump_map): + if v >= len(reverse_map): + reverse_map.append(k) + + jumps = {jump_map[j]+1: j for j in jumps} + + smem_ordering = 0 + vlmem_ordering = 0 + vsmem_ordering = 0 + + watchlist = RegisterWatchList(labels=labels) + + num_failed_stitches = 0 + loops = 0 + maxline = 0 + + dict_sucess, current_fork = fromDict(insts) + if dict_sucess: + result, loopCount, mem_unroll, flight_count, maxline = current_fork.data + result = [r+(asm[-1],) for r, asm in zip(insts, result)] + return result, loopCount, mem_unroll, flight_count, maxline, len(insts) + + while i < N: + loops += 1 + if line >= len(code) or loops > MAX_STITCHED_TOKENS or num_failed_stitches > MAX_FAILED_STITCHES: + break + + maxline = max(reverse_map[line], maxline) + inst = insts[i] + as_line = code[line] + + matched = True + next = line+1 + + if '_mov_' in as_line[0]: + watchlist.move(as_line[0]) + elif 'scratch_' in as_line[0]: + watchlist.scratch(as_line[0]) + + if as_line[1] == GETPC: + watchlist.getpc(as_line[0], code[line+1][0]) + matched = inst[1] in [SALU, JUMP] + elif as_line[1] == LANEIO: + watchlist.updatelane(as_line[0]) + matched = inst[1] == VALU + elif as_line[1] == SETPC: + next = watchlist.setpc(as_line[0]) + matched = inst[1] in [SALU, JUMP] + elif as_line[1] == SWAPPC: + next = watchlist.swappc(as_line[0], line) + matched = inst[1] in [SALU, JUMP] + elif inst[1] == as_line[1]: + if line in jumps: + loopCount[jumps[line]-1] += 1 + num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM + + if inst[1] == SMEM or inst[1] == LDS: + smem_ordering = 1 if inst[1] == SMEM else smem_ordering + SMEM_INST.append([reverse_map[line], num_inflight]) + NUM_SMEM += 1 + elif inst[1] == VMEM or (inst[1] == FLAT and 'global_' in as_line[0]): + inc_ordering = False + if 'buffer_' in as_line[0] or 'flat_' in as_line[0]: + inc_ordering = True + + if bGFX9 or 'load' in as_line[0]: + VLMEM_INST.append([reverse_map[line], num_inflight]) + NUM_VLMEM += 1 + if inc_ordering: + vlmem_ordering = 1 + else: + VSMEM_INST.append([reverse_map[line], num_inflight]) + NUM_VSMEM += 1 + if inc_ordering: + vsmem_ordering = 1 + elif inst[1] == FLAT: + smem_ordering = 1 + vlmem_ordering = 1 + vsmem_ordering = 1 + FLAT_INST.append([reverse_map[line], num_inflight]) + NUM_FLAT += 1 + elif inst[1] == IMMED and 's_waitcnt ' in as_line[0]: + if 'lgkmcnt' in as_line[0]: + wait_N = int(as_line[0].split('lgkmcnt(')[1].split(')')[0]) + flight_count.append([as_line[-1], num_inflight, wait_N]) + if wait_N == 0: + smem_ordering = 0 + if smem_ordering == 0: + offset = len(SMEM_INST)-wait_N + mem_unroll.append( [reverse_map[line], SMEM_INST[:offset]+FLAT_INST] ) + SMEM_INST = SMEM_INST[offset:] + NUM_SMEM = len(SMEM_INST) + FLAT_INST = [] + NUM_FLAT = 0 + else: + NUM_SMEM = min(max(wait_N-NUM_FLAT, 0), NUM_SMEM) + NUM_FLAT = min(max(wait_N-NUM_SMEM, 0), NUM_FLAT) + num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM + + if 'vmcnt' in as_line[0]: + wait_N = int(as_line[0].split('vmcnt(')[1].split(')')[0]) + flight_count.append([as_line[-1], num_inflight, wait_N]) + if wait_N == 0: + vlmem_ordering = 0 + if vlmem_ordering == 0: + offset = len(VLMEM_INST)-wait_N + mem_unroll.append( [reverse_map[line], VLMEM_INST[:offset]+FLAT_INST] ) + VLMEM_INST = VLMEM_INST[offset:] + NUM_VLMEM = len(VLMEM_INST) + FLAT_INST = [] + NUM_FLAT = 0 + else: + NUM_VLMEM = min(max(wait_N-NUM_FLAT, 0), NUM_VLMEM) + NUM_FLAT = min(max(wait_N-NUM_VLMEM, 0), NUM_FLAT) + num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM + + if 'vscnt' in as_line[0] or (bGFX9 and 'vmcnt' in as_line[0]): + try: + wait_N = int(as_line[0].split('vscnt(')[1].split(')')[0]) + except: + wait_N = int(as_line[0].split('vmcnt(')[1].split(')')[0]) + flight_count.append([as_line[-1], num_inflight, wait_N]) + if wait_N == 0: + vsmem_ordering = 0 + if vsmem_ordering == 0: + offset = len(VSMEM_INST)-wait_N + mem_unroll.append( [reverse_map[line], VSMEM_INST[:offset]+FLAT_INST] ) + VSMEM_INST = VSMEM_INST[offset:] + NUM_VSMEM = len(VSMEM_INST) + FLAT_INST = [] + NUM_FLAT = 0 + else: + NUM_VSMEM = min(max(wait_N-NUM_FLAT, 0), NUM_VSMEM) + NUM_FLAT = min(max(wait_N-NUM_VSMEM, 0), NUM_FLAT) + num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM + + elif inst[1] == JUMP and as_line[1] == BRANCH: + next = jump_map[as_line[2]] + if next is None or next == 0: + print('Jump to unknown location!', as_line) + break + elif inst[1] == NEXT and as_line[1] == BRANCH: + next = line + 1 + else: + matched = False + next = line + 1 + if i+1 < N and line+1 < len(code): + if try_match_swapped(insts, code, i, line): + temp = insts[i] + insts[i] = insts[i+1] + insts[i+1] = temp + next = line + elif 's_waitcnt ' in as_line[0] or '_load_' in as_line[0]: + if skipped_immed > 0 and 's_waitcnt ' in as_line[0]: + matched = True + skipped_immed -= 1 + else: + print('Parsing terminated at:', as_line) + break + + if matched: + result.append(inst + (reverse_map[line],)) + i += 1 + num_failed_stitches = 0 + elif not bGFX9 and inst[1] == IMMED and line != next: + skipped_immed += 1 + result.append(inst + (reverse_map[line],)) + next = line + i += 1 + else: + num_failed_stitches += 1 + line = next + + N = max(N, 1) + if len(result) != N: + print('Warning - Stitching rate: '+str(len(result) * 100 / N)+'% matched') + print('Leftovers:', [WaveInstCategory[insts[i+k][1]] for k in range(20) if i+k < len(insts)]) + try: + print(line, code[line]) + except: + pass + else: + while line < len(code): + if 's_endpgm' in code[line]: + mem_unroll.append( [reverse_map[line], SMEM_INST+VLMEM_INST+VSMEM_INST+FLAT_INST] ) + break + line += 1 + + current_fork.insts = [CachedInst(inst[1], inst[-1]) for inst in result] + current_fork.data = result, loopCount, mem_unroll, flight_count, maxline + return result, loopCount, mem_unroll, flight_count, maxline, len(insts) diff --git a/plugin/att/trace_view.py b/plugin/att/trace_view.py index 91277f930c..ae0f5ce474 100755 --- a/plugin/att/trace_view.py +++ b/plugin/att/trace_view.py @@ -3,16 +3,12 @@ import sys if sys.version_info[0] < 3: raise Exception("Must be using Python 3") - import os import sys import time import socket from pathlib import Path -from struct import * from collections import defaultdict -import json -import time import http.server import socketserver import socket @@ -20,427 +16,13 @@ import asyncio import websockets from multiprocessing import Process, Manager import numpy as np -from copy import deepcopy from http import HTTPStatus from io import BytesIO - - -class Readable: - def __init__(self, jsonstring): - self.jsonstr = json.dumps(jsonstring) - self.seek = 0 - - def read(self, length=0): - if length<=0: - return self.jsonstr - else: - if self.seek >= len(self): - self.seek = 0 - return None - response = self.jsonstr[self.seek:self.seek+length] - self.seek += length - return bytes(response, 'utf-8') - - def __len__(self): - return len(self.jsonstr) - - -MAX_STITCHED_TOKENS = 10000000 -MAX_FAILED_STITCHES = 256 -STACK_SIZE_LIMIT = 64 - -UNKNOWN = 0 -SMEM = 1 -SALU = 2 -VMEM = 3 -FLAT = 4 -LDS = 5 -VALU = 6 -JUMP = 7 -NEXT = 8 -IMMED = 9 -BRANCH = 10 -GETPC = 11 -SETPC = 12 -SWAPPC = 13 -LANEIO = 14 -DONT_KNOW = 100 - -WaveInstCategory = { - UNKNOWN: "UNKNOWN", - SMEM: "SMEM", - SALU: "SALU", - VMEM: "VMEM", - FLAT: "FLAT", - LDS: "LDS", - VALU: "VALU", - JUMP: "JUMP", - NEXT: "NEXT", - IMMED: "IMMED", - JUMP: "JUMP", - NEXT: "NEXT", - IMMED: "IMMED", - BRANCH: "BRANCH", - GETPC: "GETPC", - SETPC: "SETPC", - SWAPPC: "SWAPPC", - LANEIO: "LANEIO", - DONT_KNOW: "DONT_KNOW", -} +from drawing import Readable, GeneratePIC +from copy import deepcopy JSON_GLOBAL_DICTIONARY = {} - -class RegisterWatchList: - def __init__(self, labels): - self.registers = {'v'+str(k): [[] for m in range(64)] for k in range(64)} - for k in range(64): - self.registers['s'+str(k)] = [] - self.labels = labels - - def try_translate(self, tok): - if tok[0] in ['s']: - return self.registers[self.range(tok)[0]] - elif '@' in tok: - return self.labels[tok.split('@')[0]]+1 - - def range(self, r): - reg = r.split(':') - if len(reg) == 1: - return reg - else: - r0 = reg[0].split('[') - return [r0[0]+str(k) for k in range(int(r0[1]), int(reg[1][:-1])+1)] - - def tokenize(self, line): - return [u for u in [t.split(',')[0].strip() for t in line.split(' ')] if len(u) > 0] - - def getpc(self, line, next_line): - #print('Get pc:', line) - try: - dst = line.split(' ')[1].strip() - label_dest = next_line.split(', ')[-1].split('@')[0] - for reg in self.range(dst): - self.registers[reg].append(deepcopy(self.labels[label_dest])) - except: - pass - - def swappc(self, line, line_num): - try: - tokens = self.tokenize(line) - dst = tokens[1] - src = tokens[2] - - popped = self.registers[self.range(src)[0]][-1] - self.registers[self.range(src)[0]] = self.registers[self.range(src)[0]][:-1] - self.registers[self.range(dst)[0]].append(line_num+1) - return popped - except: - return 0 - - def setpc(self, line): - try: - src = line.split(' ')[1].strip() - #print('Going to:', self.registers[self.range(src)[0]], src) - popped = self.registers[self.range(src)[0]][-1] - self.registers[self.range(src)[0]] = self.registers[self.range(src)[0]][:-1] - return popped - except: - return 0 - - def scratch(self, line): - try: - tokens = self.tokenize(line) - if '_load' in tokens[0]: - dst = tokens[1] - src = tokens[3]+tokens[4] - else: - src = tokens[2] - dst = tokens[3]+tokens[4] - self.registers[dst] = self.registers[src] - except: - pass - - def move(self, line): - try: - tokens = self.tokenize(line) - if tokens[2][0] in ['s', 'd'] and tokens[1][0] in ['s', 'd']: - self.registers[self.range(tokens[1])[0]] = deepcopy(self.registers[self.range(tokens[2])[0]]) - except: - pass - - def updatelane(self, line): - tokens = self.tokenize(line) - try: - if 'v_readlane' in tokens[0]: - self.registers[tokens[1]].append(self.registers[tokens[2]][int(tokens[3])][-1]) - self.registers[tokens[2]][int(tokens[3])] = self.registers[tokens[2]][int(tokens[3])][:-1] - elif 'v_writelane' in tokens[0]: - self.registers[tokens[1]][int(tokens[3])].append(self.registers[tokens[2]][-1]) - self.registers[tokens[2]] = self.registers[tokens[2]][-STACK_SIZE_LIMIT:] - except Exception as e: - pass - - -def try_match_swapped(insts, code, i, line): - return insts[i+1][1] == code[line][1] and insts[i][1] == code[line+1][1] - - -def Match(inst_value, code_value): - if code_value == inst_value: - return True - if code_value in [GETPC, SWAPPC, SETPC] and inst_value in [SALU, JUMP]: - return True - if code_value == BRANCH and inst_value in [JUMP, NEXT]: # TODO: Maybe lets not reorder branches? - return True - return False - - -def get_match_lookahead(insts, code, i, line): - if try_match_swapped(insts, code, i, line): - return [i+1, i] - new_inst_order = [] - - allowed_insts = list(range(i, min(i+4, len(insts)))) - for l in range(line, min(line+10, len(code))): - bMatch = False - for j in allowed_insts: - if Match(insts[j][1], code[l][1]): - new_inst_order.append(j) - allowed_insts.remove(j) - bMatch = True - break - if bMatch == False: - break - if len(new_inst_order): - new_inst_order += [j for j in list(range(i, max(new_inst_order)+1)) if j not in new_inst_order] - return new_inst_order - - -def stitch(insts, raw_code, jumps, gfxv): - bGFX9 = gfxv == 'vega' - result, i, line, loopCount, N = [], 0, 0, defaultdict(int), len(insts) - - SMEM_INST = [] # scalar memory - VLMEM_INST = [] # vector memory load - VSMEM_INST = [] # vector memory store - FLAT_INST = [] - NUM_SMEM = 0 - NUM_VLMEM = 0 - NUM_VSMEM = 0 - NUM_FLAT = 0 - skipped_immed = 0 - - mem_unroll = [] - flight_count = [] - - labels = {} - jump_map = [0] - code = [raw_code[0]] - for c in raw_code[1:]: - c = list(c) - c[0] = c[0].split(';')[0].split('//')[0].strip() - - if c[1] != 100: - code.append(c) - elif ':' in c[0]: - labels[c[0].split(':')[0]] = len(code) - jump_map.append(len(code)-1) - - reverse_map = [] - for k, v in enumerate(jump_map): - if v >= len(reverse_map): - reverse_map.append(k) - - jumps = {jump_map[j]+1: j for j in jumps} - - smem_ordering = 0 - vlmem_ordering = 0 - vsmem_ordering = 0 - max_line = 0 - - watchlist = RegisterWatchList(labels=labels) - - num_failed_stitches = 0 - loops = 0 - maxline = 0 - - while i < N: - #print('L', line) - loops += 1 - if line >= len(code) or loops > MAX_STITCHED_TOKENS or num_failed_stitches > MAX_FAILED_STITCHES: - break - - maxline = max(reverse_map[line], maxline) - inst = insts[i] - - as_line = code[line] - max_line = max(max_line, reverse_map[line]) - - matched = True - next = line+1 - - if '_mov_' in as_line[0]: - watchlist.move(as_line[0]) - elif 'scratch_' in as_line[0]: - watchlist.scratch(as_line[0]) - - if as_line[1] == GETPC: # TODO: @ can put you ahead of label! - watchlist.getpc(as_line[0], code[line+1][0]) - matched = inst[1] in [SALU, JUMP] - elif as_line[1] == LANEIO: - watchlist.updatelane(as_line[0]) - matched = inst[1] == VALU - elif as_line[1] == SETPC: - next = watchlist.setpc(as_line[0]) - matched = inst[1] in [SALU, JUMP] - elif as_line[1] == SWAPPC: - next = watchlist.swappc(as_line[0], line) - #print('Next:', next, code[next]) - matched = inst[1] in [SALU, JUMP] - elif inst[1] == as_line[1]: - if line in jumps: - loopCount[jumps[line]-1] += 1 # label is the previous line - num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - - if inst[1] == SMEM or inst[1] == LDS: - smem_ordering = 1 if inst[1] == SMEM else smem_ordering - SMEM_INST.append([reverse_map[line], num_inflight]) - NUM_SMEM += 1 - elif inst[1] == VMEM or (inst[1] == FLAT and 'global_' in as_line[0]): - inc_ordering = False - if 'buffer_' in as_line[0] or 'flat_' in as_line[0]: - inc_ordering = True - - if bGFX9 or 'load' in as_line[0]: - VLMEM_INST.append([reverse_map[line], num_inflight]) - NUM_VLMEM += 1 - if inc_ordering: - vlmem_ordering = 1 - else: - VSMEM_INST.append([reverse_map[line], num_inflight]) - NUM_VSMEM += 1 - if inc_ordering: - vsmem_ordering = 1 - elif inst[1] == FLAT: - smem_ordering = 1 - vlmem_ordering = 1 - vsmem_ordering = 1 - FLAT_INST.append([reverse_map[line], num_inflight]) - NUM_FLAT += 1 - elif inst[1] == IMMED and 's_waitcnt ' in as_line[0]: - if 'lgkmcnt' in as_line[0]: - wait_N = int(as_line[0].split('lgkmcnt(')[1].split(')')[0]) - flight_count.append([as_line[-1], num_inflight, wait_N]) - if wait_N == 0: - smem_ordering = 0 - if smem_ordering == 0: - offset = len(SMEM_INST)-wait_N - mem_unroll.append( [reverse_map[line], SMEM_INST[:offset]+FLAT_INST] ) - SMEM_INST = SMEM_INST[offset:] - NUM_SMEM = len(SMEM_INST) - FLAT_INST = [] - NUM_FLAT = 0 - else: - NUM_SMEM = min(max(wait_N-NUM_FLAT, 0), NUM_SMEM) - NUM_FLAT = min(max(wait_N-NUM_SMEM, 0), NUM_FLAT) - num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - - if 'vmcnt' in as_line[0]: - wait_N = int(as_line[0].split('vmcnt(')[1].split(')')[0]) - flight_count.append([as_line[-1], num_inflight, wait_N]) - if wait_N == 0: - vlmem_ordering = 0 - if vlmem_ordering == 0: - offset = len(VLMEM_INST)-wait_N - mem_unroll.append( [reverse_map[line], VLMEM_INST[:offset]+FLAT_INST] ) - VLMEM_INST = VLMEM_INST[offset:] - NUM_VLMEM = len(VLMEM_INST) - FLAT_INST = [] - NUM_FLAT = 0 - else: - NUM_VLMEM = min(max(wait_N-NUM_FLAT, 0), NUM_VLMEM) - NUM_FLAT = min(max(wait_N-NUM_VLMEM, 0), NUM_FLAT) - num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - - if 'vscnt' in as_line[0] or (bGFX9 and 'vmcnt' in as_line[0]): - try: - wait_N = int(as_line[0].split('vscnt(')[1].split(')')[0]) - except: - wait_N = int(as_line[0].split('vmcnt(')[1].split(')')[0]) - flight_count.append([as_line[-1], num_inflight, wait_N]) - if wait_N == 0: - vsmem_ordering = 0 - if vsmem_ordering == 0: - offset = len(VSMEM_INST)-wait_N - mem_unroll.append( [reverse_map[line], VSMEM_INST[:offset]+FLAT_INST] ) - VSMEM_INST = VSMEM_INST[offset:] - NUM_VSMEM = len(VSMEM_INST) - FLAT_INST = [] - NUM_FLAT = 0 - else: - NUM_VSMEM = min(max(wait_N-NUM_FLAT, 0), NUM_VSMEM) - NUM_FLAT = min(max(wait_N-NUM_VSMEM, 0), NUM_FLAT) - num_inflight = NUM_FLAT + NUM_SMEM + NUM_VLMEM + NUM_VSMEM - - elif inst[1] == JUMP and as_line[1] == BRANCH: - next = jump_map[as_line[2]] - if next is None or next == 0: - print('Jump to unknown location!', as_line) - break - elif inst[1] == NEXT and as_line[1] == BRANCH: - next = line + 1 - else: - matched = False - next = line + 1 - if i+1 < N and line+1 < len(code): - #print('Swap:', try_match_swapped(insts, code, i, line)) - if try_match_swapped(insts, code, i, line): - temp = insts[i] - insts[i] = insts[i+1] - insts[i+1] = temp - next = line - elif 's_waitcnt ' in as_line[0] or '_load_' in as_line[0]: - if skipped_immed > 0 and 's_waitcnt ' in as_line[0]: - matched = True - skipped_immed -= 1 - else: - print('Parsing terminated at:', as_line) - break - - #print(matched, WaveInstCategory[inst[1]], WaveInstCategory[as_line[1]], as_line, inst) - #print([WaveInstCategory[insts[i+k][1]] for k in range(20) if i+k < len(insts)]) - if matched: - result.append(inst + (reverse_map[line],)) - i += 1 - num_failed_stitches = 0 - elif not bGFX9 and inst[1] == IMMED and line != next: - skipped_immed += 1 - result.append(inst + (reverse_map[line],)) - next = line - i += 1 - else: - num_failed_stitches += 1 - line = next - - N = max(N, 1) - if len(result) != N: - print('Warning - Stitching rate: '+str(len(result) * 100 / N)+'% matched') - print('Leftovers:', [WaveInstCategory[insts[i+k][1]] for k in range(20) if i+k < len(insts)]) - try: - print(line, code[line]) - except: - pass - else: - while line < len(code): - if 's_endpgm' in code[line]: - mem_unroll.append( [reverse_map[line], SMEM_INST+VLMEM_INST+VSMEM_INST+FLAT_INST] ) - break - line += 1 - - return result, loopCount, mem_unroll, flight_count, maxline - - def get_ip(): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.settimeout(0) @@ -459,28 +41,10 @@ PORT, WebSocketPort = 8000, 18000 SP = '\u00A0' -def extract_tuple(content, num): - vals = content.split(',') - assert (len(vals) == num) - last_val = vals[-1][:-1] if vals[-1].endswith(')') else vals[-1] - vals = [vals[0][1:]] + vals[1:-1] + [last_val] - return tuple(int(val) for val in vals) - - -def get_top_n(stitched): +def get_top_n(code): TOP_N = 10 - by_line_num = defaultdict(lambda: [0, 0, 0]) - for (_, _, s2i, run_time, line_num) in stitched: - entry = by_line_num[line_num] - entry[0] += 1 - entry[1] += s2i - entry[2] += run_time - top_n = sorted( - [(line_num, v[0], v[1], v[2]) - for (line_num, v) in by_line_num.items()], - key=lambda x: x[2] + x[3], - reverse=True) - return top_n[:TOP_N] + top_n = sorted(deepcopy(code), key=lambda x: x[-1], reverse=True)[:TOP_N] + return [(line_num, hitc, 0, run_time) for _, _, _, _, line_num, _, hitc, run_time in top_n] def wave_info(df, id): @@ -498,74 +62,28 @@ def wave_info(df, id): return dic -def extract_waves(waves): - result, slot2seq = [], {} - for id in waves['id']: - row = {key: waves[key][id] for key in waves.keys()} - - insts, timeline = [], [] - for x in row['instructions'].split('),'): - if len(x) > 0: - insts.append(extract_tuple(x, 4)) - for x in row['timeline'].split('),'): - if len(x) > 0: - timeline.append(extract_tuple(x, 2)) - - # aggregate per wave slot - if (row['simd'], row['wave_slot']) in slot2seq: - slot = result[slot2seq[(row['simd'], row['wave_slot'])]] - last_end_time = slot[2][-1][-1] - slot[2] += (row['id'], row['begin_time'], row['end_time']), - slot[3] += insts - # filler between waves - slot[4] += (0, row['begin_time'] - last_end_time), - slot[4] += timeline - else: - slot2seq[row['simd'], row['wave_slot']] = len(result) - result.append([row['simd'], row['wave_slot'], - [(row['id'], row['begin_time'], row['end_time'])], - insts, - timeline]) - - return result - - -def extract_data(df, se_number, code, jumps, gfxv): +def extract_data(df, se_number): if len(df['id']) == 0 or len(df['instructions']) == 0 or len(df['timeline']) == 0: return None - cu_waves = extract_waves(df) wave_filenames = [] flight_count = [] - maxgrade = [{df['wave_slot'][wave_id]: -1 for wave_id in df['id']} for k in range(4)] - non_stitched = [{df['wave_slot'][wave_id]: -1 for wave_id in df['id']} for k in range(4)] - + wave_slot_count = [{df['wave_slot'][wave_id]: 0 for wave_id in df['id']} for k in range(4)] + print('Number of waves:', len(df['id'])) allwaves_maxline = 0 for wave_id in df['id']: - if non_stitched[df['simd'][wave_id]][df['wave_slot'][wave_id]] == 0: - continue - insts, timeline = [], [] - if len(df['instructions'][wave_id]) == 0 or len(df['timeline'][wave_id]) == 0: - continue + stitched, loopCount, mem_unroll, count, maxline, num_insts = df['instructions'][wave_id] + timeline = df['timeline'][wave_id] - for x in df['instructions'][wave_id].split('),'): - insts.append(extract_tuple(x, 4)) - for x in df['timeline'][wave_id].split('),'): - timeline.append(extract_tuple(x, 2)) - - stitched, loopCount, mem_unroll, count, maxline = stitch(insts, code, jumps, gfxv) - srate = len(stitched)**2 / max(len(insts), 1) - if srate <= maxgrade[df['simd'][wave_id]][df['wave_slot'][wave_id]]: + if len(stitched) == 0 or len(timeline) == 0 or len(stitched) != num_insts: continue allwaves_maxline = max(allwaves_maxline, maxline) - maxgrade[df['simd'][wave_id]][df['wave_slot'][wave_id]] = srate - non_stitched[df['simd'][wave_id]][df['wave_slot'][wave_id]] = len(insts) - len(stitched) flight_count.append(count) - - wave_entry = { + + wave_entry = { "id": int(df['id'][wave_id]), "simd": int(df['simd'][wave_id]), "slot": int(df['wave_slot'][wave_id]), @@ -578,33 +96,36 @@ def extract_data(df, se_number, code, jumps, gfxv): } data_obj = { "name": 'SE'.format(se_number), - "kernel": code[0][0], "duration": sum(dur for (_, dur) in timeline), "wave": wave_entry, "loop_count": loopCount, - "top_n": get_top_n(stitched), + "top_n": [], + "num_stitched": len(stitched), + "num_insts": num_insts, "websocket_port": WebSocketPort, "generation_time": time.ctime() } - OUT = 'se'+str(se_number)+'_sm'+str(df['simd'][wave_id])+'_wv'+str(df['wave_slot'][wave_id])+'.json' + simd_id = df['simd'][wave_id] + slot_id = df['wave_slot'][wave_id] + slot_count = wave_slot_count[simd_id][slot_id] + wave_slot_count[simd_id][slot_id] += 1 + + OUT = 'se'+str(se_number)+'_sm'+str(simd_id)+'_sl'+str(slot_id)+'_wv'+str(slot_count)+'.json' JSON_GLOBAL_DICTIONARY[OUT] = Readable(data_obj) - wave_filenames.append(OUT) + wave_filenames.append((OUT, df['begin_time'][wave_id], df['end_time'][wave_id])) data_obj = { "name": 'SE'.format(se_number), - "kernel": code[0][0], - "simd_waves": [], - "cu_waves": cu_waves, - "code": code[:allwaves_maxline+16], "websocket_port": WebSocketPort, "generation_time": time.ctime() } - se_filename = 'se'+str(se_number)+'_code.json' + se_filename = None if len(wave_filenames) > 0: + se_filename = 'se'+str(se_number)+'_info.json' JSON_GLOBAL_DICTIONARY[se_filename] = Readable(data_obj) - return flight_count, wave_filenames, se_filename + return flight_count, wave_filenames, se_filename, allwaves_maxline class NoCacheHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): @@ -618,19 +139,18 @@ class NoCacheHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): self.send_header("Expires", "0") def do_GET(self): - global PICTURE_CALLBACK - if 'timeline.png?' in self.path: - selections = [int(s)!=0 for s in self.path.split('timeline.png?')[1]] - counters_json, imagebytes, _, _ = PICTURE_CALLBACK(selections[1:], selections[0]) - JSON_GLOBAL_DICTIONARY['counters.json'] = counters_json - JSON_GLOBAL_DICTIONARY[self.path.split('/')[-1]] = imagebytes + if '.png?' in self.path and self.path.split('/')[-1] not in JSON_GLOBAL_DICTIONARY.keys(): + selections = [int(s)!=0 for s in self.path.split('.png?')[-1]] + counters_json, imagebytes = GeneratePIC(self.drawinfo, selections[1:], selections[0]) + JSON_GLOBAL_DICTIONARY['graph_options.json'] = counters_json + JSON_GLOBAL_DICTIONARY[self.path.split('/')[-1]] = imagebytes[self.path.split('/')[-1].split('?')[0]] - if '.json' in self.path or 'timeline.png' in self.path or 'wstates' in self.path: + if '.json' in self.path or '.png' in self.path: try: response_file = JSON_GLOBAL_DICTIONARY[self.path.split('/')[-1]] - #print(response_file) except: print('Invalid json request:', self.path) + print(JSON_GLOBAL_DICTIONARY.keys()) self.send_error(HTTPStatus.NOT_FOUND, "File not found") return self.send_response(HTTPStatus.OK) @@ -658,9 +178,11 @@ class RocTCPServer(socketserver.TCPServer): self.socket.bind(self.server_address) -def run_server(): +def run_server(drawinfo): Handler = NoCacheHTTPRequestHandler - os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),'ui')) + Handler.drawinfo = drawinfo + os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),'ui/')) + #os.chdir('ui/') try: with RocTCPServer((IPAddr, PORT), Handler) as httpd: httpd.serve_forever() @@ -676,7 +198,6 @@ def fix_space(line): def WebSocketserver(websocket, path): data = websocket.recv() - print(354, data) cpp, ln, _ = data.split(':') ln = int(ln) HL, EMP = 'highlight', '' @@ -713,68 +234,87 @@ def assign_ports(ports): PORT, WebSocketPort = ps[0], ps[1] -def call_picture_callback(return_dict): - global PICTURE_CALLBACK - response, imagebytes, wstates, counter_events = PICTURE_CALLBACK() - return_dict['counters.json'] = response - return_dict['timeline.png'] = imagebytes - for n, m in enumerate(wstates): +def call_picture_callback(return_dict, drawinfo): + response, imagebytes = GeneratePIC(drawinfo) + return_dict['graph_options.json'] = response + for k, v in imagebytes.items(): + return_dict[k] = v + + for n, m in enumerate(drawinfo['TIMELINES']): return_dict['wstates'+str(n)+'.json'] = Readable({"data": [int(n) for n in list(np.asarray(m))]}) - for n, e in enumerate(counter_events): + for n, e in enumerate(drawinfo['EVENTS']): return_dict['se'+str(n)+'_perfcounter.json'] = Readable({"data": [v.toTuple() for v in e]}) -def view_trace(args, code, jumps, dbnames, att_filenames, bReturnLoc, pic_callback, OCCUPANCY, bDumpOnly, se_time_begin, gfxv): - global PICTURE_CALLBACK - PICTURE_CALLBACK = pic_callback - manager = Manager() - return_dict = manager.dict() - JSON_GLOBAL_DICTIONARY['occupancy.json'] = Readable({str(k): OCCUPANCY[k] for k in range(len(OCCUPANCY))}) +def view_trace(args, code, dbnames, att_filenames, bReturnLoc, OCCUPANCY, bDumpOnly, se_time_begin, gfxv, drawinfo, MPI_COMM, mpi_root): + global JSON_GLOBAL_DICTIONARY + pic_thread = None + if mpi_root: + manager = Manager() + return_dict = manager.dict() + JSON_GLOBAL_DICTIONARY['occupancy.json'] = Readable({str(k): OCCUPANCY[k] for k in range(len(OCCUPANCY))}) + pic_thread = Process(target=call_picture_callback, args=(return_dict, drawinfo)) + pic_thread.start() - pic_thread = Process(target=call_picture_callback, args=(return_dict,)) - pic_thread.start() - - assert(len(dbnames) > 0) att_filenames = [Path(f).name for f in att_filenames] se_numbers = [int(a.split('_se')[1].split('.att')[0]) for a in att_filenames] flight_count = [] simd_wave_filenames = {} se_filenames = [] + allse_maxline = 0 for se_number, dbname in zip(se_numbers, dbnames): if len(dbname['id']) == 0: continue - count, wv_filenames, se_filename = extract_data(dbname, se_number, code, jumps, gfxv) + count, wv_filenames, se_filename, maxline = extract_data(dbname, se_number) + if se_filename is None: + continue + allse_maxline = max(allse_maxline, maxline) se_filenames.append(se_filename) if count is not None: flight_count.append(count) simd_wave_filenames[se_number] = wv_filenames + if mpi_root: + JSON_GLOBAL_DICTIONARY['code.json'] = Readable({"code": code[:allse_maxline+16], "top_n": get_top_n(code[:allse_maxline+16])}) + if bReturnLoc: return flight_count for key in simd_wave_filenames.keys(): wv_array = [[ - int(s.split('_sm')[1].split('_wv')[0]), - int(s.split('_wv')[1].split('.')[0]), + int(s[0].split('_sm')[1].split('_sl')[0]), + int(s[0].split('_sl')[1].split('_wv')[0]), + int(s[0].split('_wv')[1].split('.')[0]), s ] for s in simd_wave_filenames[key]] wv_dict = {} for wv in wv_array: try: - wv_dict[wv[0]][wv[1]] = wv[2] + wv_dict[wv[0]][wv[1]][wv[2]] = wv[3] except: try: - wv_dict[wv[0]] = {wv[1]: wv[2]} + wv_dict[wv[0]][wv[1]] = {wv[2]: wv[3]} except: - exit(-1) + try: + wv_dict[wv[0]] = {wv[1]: {wv[2]: wv[3]}} + except: + pass simd_wave_filenames[key] = wv_dict - JSON_GLOBAL_DICTIONARY['filenames.json'] = Readable({"wave_filenames": simd_wave_filenames, + if MPI_COMM is not None: + se_filenames = MPI_COMM.gather(se_filenames, root=0) + simd_wave_filenames = MPI_COMM.gather(simd_wave_filenames, root=0) + if mpi_root: + se_filenames = [e for elem in se_filenames for e in elem] + simd_wave_filenames = {k:v for smf in simd_wave_filenames for k,v in smf.items()} + + if mpi_root: + JSON_GLOBAL_DICTIONARY['filenames.json'] = Readable({"wave_filenames": simd_wave_filenames, "se_filenames": se_filenames, "global_begin_time": int(se_time_begin), "gfxv": gfxv}) @@ -785,11 +325,18 @@ def view_trace(args, code, jumps, dbnames, att_filenames, bReturnLoc, pic_callba JSON_GLOBAL_DICTIONARY[k] = v if bDumpOnly == False: + if MPI_COMM is not None: + JSON_GLOBAL_DICTIONARY = MPI_COMM.gather(JSON_GLOBAL_DICTIONARY, root=0) + if not mpi_root: + quit() + JSON_GLOBAL_DICTIONARY = {k:v for smf in JSON_GLOBAL_DICTIONARY for k,v in smf.items()} + + JSON_GLOBAL_DICTIONARY['live.json'] = Readable({'live': 1}) if args.ports: assign_ports(args.ports) print('serving at ports: {0},{1}'.format(PORT, WebSocketPort)) try: - PROCS = [Process(target=run_server), Process(target=run_websocket)] + PROCS = [Process(target=run_server, args=[drawinfo]), Process(target=run_websocket)] for p in PROCS: p.start() for p in PROCS: @@ -797,8 +344,10 @@ def view_trace(args, code, jumps, dbnames, att_filenames, bReturnLoc, pic_callba except KeyboardInterrupt: print("Exitting.") else: - os.makedirs('ui', exist_ok=True) - os.system('cp ' + os.path.join(os.path.abspath(os.path.dirname(__file__)),'ui') + '/* ui/' ) + os.makedirs('ui/', exist_ok=True) + if mpi_root: + JSON_GLOBAL_DICTIONARY['live.json'] = Readable({'live': 0}) + os.system('cp ' + os.path.join(os.path.abspath(os.path.dirname(__file__)),'ui') + '/* ui/' ) for k, v in JSON_GLOBAL_DICTIONARY.items(): with open(os.path.join('ui',k), 'w' if '.json' in k else 'wb') as f: f.write(v.read()) diff --git a/plugin/att/ui/httpserver.py b/plugin/att/ui/httpserver.py index b8a821b4cc..8e75b7be9f 100644 --- a/plugin/att/ui/httpserver.py +++ b/plugin/att/ui/httpserver.py @@ -20,8 +20,8 @@ class NoCacheHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): self.send_header("Expires", "0") def do_GET(self): - if 'timeline.png?' in self.path: - self.path = 'timeline.png' + if '.png?' in self.path: + self.path = self.path.split('.png?')[0]+'.png' http.server.SimpleHTTPRequestHandler.do_GET(self) diff --git a/plugin/att/ui/index.html b/plugin/att/ui/index.html index 7dc12e15f8..c6418511ef 100644 --- a/plugin/att/ui/index.html +++ b/plugin/att/ui/index.html @@ -3,16 +3,19 @@ - MI Trace Viewer + ATT Analysis View -
+
-
-
+
+ + + +
@@ -26,13 +29,14 @@
-
+
-
+
+
-
+
-
    +
      - + diff --git a/plugin/att/ui/styles.css b/plugin/att/ui/styles.css index 1949a608d9..396913b77d 100644 --- a/plugin/att/ui/styles.css +++ b/plugin/att/ui/styles.css @@ -103,4 +103,35 @@ li:hover .tooltip { .btn:hover { color: blue; -} \ No newline at end of file +} + +.dropbtn { + border: 2px solid black; + background-color: #D7D7D7; + color: black; + padding: 3px 4px; + font-size: 15px; + cursor: pointer; + border-style: ridge; + border-radius: 4px; +} + +.dropbtn:hover, .dropbtn:focus { + color: blue; +} + +.dropdown { + position: relative; +} + +.dropdown-content { + display: none; + position: absolute; + background-color: #e0e0f0; + min-width: 10px; + box-shadow: 0px 8px 16px 0px rgba(0,0,0.1,0.1); + z-index: 1; +} + +.dropdown-content a:hover {background-color: #ddd;} +.show {display:inline-flex;}