Files
rocm-systems/plugin/att/drawing.py
T
Giovanni LB 60197d0f20 SWDEV-427013: Fixing ATT parser for tensile kernels and network mode
Change-Id: I07cd34f5606c0aaf718d9e3ffabd5a407523a943
2023-10-13 20:41:46 -04:00

352 строки
11 KiB
Python

#!/usr/bin/env python3
import sys
if sys.version_info[0] < 3:
raise Exception("Must be using Python 3")
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
from copy import deepcopy
import json
COUNTERS_MAX_CAPTURES = 1 << 12
class Readable:
def __init__(self, jsonstring):
self.jsonstr = json.dumps(jsonstring)
self.seek = 0
def read(self, length=0):
if length <= 0:
return self.jsonstr
else:
if self.seek >= len(self):
self.seek = 0
return None
response = self.jsonstr[self.seek : self.seek + length]
self.seek += length
return bytes(response, "utf-8")
def __len__(self):
return len(self.jsonstr)
class FileBytesIO:
def __init__(self, iobytes):
self.iobytes = deepcopy(iobytes)
self.seek = 0
def __len__(self):
return self.iobytes.getbuffer().nbytes
def read(self, length=0):
if length <= 0:
return bytes(self.iobytes.getbuffer())
else:
if self.seek >= self.iobytes.getbuffer().nbytes:
self.seek = 0
return None
response = self.iobytes.getbuffer()[self.seek : self.seek + length]
self.seek += length
return bytes(response)
def get_delta_time(events):
try:
CUS = [[e.time for e in events if e.cu == k and e.bank == 0] for k in range(16)]
CUS = [np.asarray(c).astype(np.int64) for c in CUS if len(c) > 2]
return np.min([np.min(abs(c[1:] - c[:-1])) for c in CUS])
except:
return 1
def draw_wave_metrics(selections, normalize, TIMELINES, EVENTS, EVENT_NAMES):
plt.figure(figsize=(15, 4))
delta_step = 8
quad_delta_time = max(
delta_step, int(0.5 + np.min([get_delta_time(events) for events in EVENTS]))
)
maxtime = (
np.max([np.max([e.time for e in events]) for events in EVENTS]) / quad_delta_time
+ 1
)
if maxtime * delta_step >= COUNTERS_MAX_CAPTURES:
delta_step = 1
while maxtime >= COUNTERS_MAX_CAPTURES:
quad_delta_time *= 2
maxtime /= 2
maxtime = int(min(maxtime * delta_step, COUNTERS_MAX_CAPTURES))
event_timeline = np.zeros((16, maxtime), dtype=np.int32)
print("Delta:", quad_delta_time)
print("Max_cycles:", maxtime * quad_delta_time * 4 // delta_step)
cycles = 4 * quad_delta_time // delta_step * np.arange(maxtime)
kernel = len(EVENTS) * quad_delta_time
for events in EVENTS:
for e in range(len(events) - 1):
bk = events[e].bank * 4
start = events[e].time // (quad_delta_time // delta_step)
end = start + delta_step
event_timeline[bk : bk + 4, start:end] += np.asarray(
events[e].toTuple()[1:5]
)[:, None]
start = events[-1].time
event_timeline[bk : bk + 4, start : start + delta_step] += np.asarray(
events[-1].toTuple()[1:5]
)[:, None]
event_timeline = [
np.convolve(e, [kernel for k in range(3)])[1:-1] for e in event_timeline
]
# event_timeline = [e/kernel for e in event_timeline]
if normalize:
event_timeline = [100 * e / max(e.max(), 1e-5) for e in event_timeline]
colors = [
"blue",
"green",
"gray",
"red",
"orange",
"cyan",
"black",
"darkviolet",
"yellow",
"darkred",
"pink",
"lime",
"gold",
"tan",
"aqua",
"olive",
]
[
plt.plot(cycles, e, "-", label=n, color=c)
for e, n, c, sel in zip(event_timeline, EVENT_NAMES, colors, selections)
if sel
]
plt.legend()
if normalize:
plt.ylabel("As % of maximum")
else:
plt.ylabel("Value")
plt.xlabel("Cycle")
plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1)
figure_bytes = BytesIO()
plt.savefig(figure_bytes, dpi=150)
return EVENT_NAMES, FileBytesIO(figure_bytes)
def draw_wave_states(selections, normalize, TIMELINES):
plot_indices = [1, 2, 3, 4]
STATES = [["Empty", "Idle", "Exec", "Wait", "Stall"][k] for k in plot_indices]
colors = [["gray", "orange", "green", "red", "blue"][k] for k in plot_indices]
plt.figure(figsize=(15, 4))
maxtime = max([np.max((TIMELINES[k]!=0)*np.arange(0,TIMELINES[k].size)) for k in plot_indices])
maxtime = max(maxtime, 1)
timelines = [deepcopy(TIMELINES[k][:maxtime]) for k in plot_indices]
timelines = [np.pad(t, [0, maxtime - t.size]) for t in timelines]
if normalize:
timelines = np.array(timelines) / np.maximum(np.sum(timelines, 0) * 1e-2, 1e-7)
trim = max(maxtime // 5000, 1)
cycles = np.arange(0, timelines[0].size // trim, 1) * trim
timelines = [
time[: trim * (time.size // trim)].reshape((-1, trim)).mean(-1)
if len(time) > 0
else cycles * 0
for time in timelines
]
kernsize = 15
kernel = np.asarray([
np.exp(-abs(10 * k / kernsize)) for k in range(-kernsize // 2, kernsize // 2 + 1)
])
kernel /= np.sum(kernel)
timelines = [
np.convolve(time, kernel)[kernsize // 2 : -kernsize // 2]
for time in timelines if len(time) > 0
]
maxtime *= 16
cycles *= 16
[
plt.plot(cycles, t, label="State " + s, linewidth=1.1, color=c)
for t, s, c, sel in zip(timelines, STATES, colors, selections)
if sel
]
plt.legend()
if normalize:
plt.ylabel("Waves state %")
else:
plt.ylabel("Waves state total")
plt.xlabel("Cycle")
plt.ylim(-1)
plt.xlim(-maxtime // 200, maxtime + maxtime // 200 + 1)
plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1)
figure_bytes = BytesIO()
plt.savefig(figure_bytes, dpi=150)
return STATES, FileBytesIO(figure_bytes)
def draw_occupancy_per_dispatch(selections, normalize, OCCUPANCY, dispatchnames):
plt.figure(figsize=(15, 4))
maxtime = 1
delta = 1
for k in range(len(OCCUPANCY)):
if len(OCCUPANCY[k]) <= 16:
continue
maxtime = max(maxtime, OCCUPANCY[k][-1][0])
NUM_DOTS = 1600
delta = max(1, maxtime // NUM_DOTS)
chart = np.zeros((len(dispatchnames), maxtime // delta + 2), dtype=np.float32)
for occ in OCCUPANCY:
if len(occ) <= 16:
continue
small_chart = np.zeros_like(chart)
norm_fact = np.zeros_like(chart)
norm_fact += 1E-6
current_occ = [[0 for m in range(16)] for k in range(len(dispatchnames))]
current_occ[0] = [m[1] for m in occ[:16]]
current_time = [0 for k in range(len(dispatchnames))]
total_value = [0 for k in range(len(dispatchnames))]
total_value[0] = np.sum(current_occ[0])
for time, value, cu, kid in occ:
b = current_time[kid]
e = max(b + 1, time // delta)
small_chart[kid][b:e] += total_value[kid]
norm_fact[kid][b:e] += 1
total_value[kid] += value - current_occ[kid][cu]
current_occ[kid][cu] = value
current_time[kid] = time // delta
for small, norm, time, value in zip(small_chart, norm_fact, current_time, total_value):
small[time] += value
norm[time] += value
chart += small_chart/norm_fact
for (id, name), occ in zip(dispatchnames.items(), chart):
plt.plot(np.arange(occ.size) * delta, occ, label=str(id)+'#'+name, linewidth=1.1)
plt.legend()
if normalize:
plt.ylabel("Occupancy %")
else:
plt.ylabel("Occupancy total")
plt.xlabel("Cycle")
plt.ylim(-1)
plt.xlim(-maxtime // 200, maxtime + maxtime // 200 + delta + 1)
plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1)
figure_bytes = BytesIO()
plt.savefig(figure_bytes, dpi=150)
return dispatchnames, FileBytesIO(figure_bytes)
def draw_occupancy(selections, normalize, OCCUPANCY, shadernames, numdispatchid):
plt.figure(figsize=(15, 4))
names = []
g_maxtime = 1
g_delta = 1
for name, occ in zip(shadernames, OCCUPANCY):
if len(occ) <= 16:
continue
current_occ = [[0 for m in range(16)] for k in range(numdispatchid)]
current_occ[0] = [m[1] for m in occ[:16]]
occ_values = [np.sum(current_occ[0])]
occ_times = [0]
for time, value, cu, kid in occ:
occ_times.append(time)
occ_values.append(occ_values[-1] + value - current_occ[kid][cu])
current_occ[kid][cu] = value
try:
names.append('SE'+name.split('_se')[1].split('.att')[0])
except:
names.append(name)
NUM_DOTS = 1500
maxtime = occ_times[-1]+1
delta = max(1, maxtime // NUM_DOTS)
g_maxtime = max(g_maxtime, maxtime)
g_delta = max(g_delta, delta)
chart = np.zeros((maxtime // delta + 1), dtype=np.float32)
norm_fact = np.zeros_like(chart)
norm_fact += 1E-6
for i in range(len(occ_times)-1):
b = occ_times[i] // delta
e = max(b + 1, occ_times[i + 1] // delta)
chart[b:e] += occ_values[i]
norm_fact[b:e] += 1
chart /= norm_fact
if normalize:
chart /= max(chart.max(), 1e-6)
plt.plot(np.arange(chart.size) * delta, chart, label=names[-1], linewidth=1.1)
plt.legend()
if normalize:
plt.ylabel("Occupancy %")
else:
plt.ylabel("Occupancy total")
plt.xlabel("Cycle")
plt.ylim(-1)
plt.xlim(-g_maxtime // 200, g_maxtime + g_maxtime // 200 + g_delta + 1)
plt.subplots_adjust(left=0.04, right=1, top=1, bottom=0.1)
figure_bytes = BytesIO()
plt.savefig(figure_bytes, dpi=150)
return names, FileBytesIO(figure_bytes)
def GeneratePIC(drawinfo, selections=[True for k in range(16)], normalize=False):
EVENTS = drawinfo["EVENTS"]
response = {}
figures = {}
OCCUPANCY = drawinfo["OCCUPANCY"]
OCCUPANCY = [[(16*int(u>>23), (u>>12) & 0x7F, (u>>19) & 0xF, u&0xFFF) for u in OCCUPANCY[k]] for k in range(len(OCCUPANCY))]
states, figure = draw_occupancy(selections, normalize, OCCUPANCY, drawinfo["ShaderNames"], len(drawinfo["DispatchNames"]))
response["occupancy.png"] = states
figures["occupancy.png"] = figure
states, figure = draw_occupancy_per_dispatch(selections, normalize, OCCUPANCY, drawinfo["DispatchNames"])
response["dispatches.png"] = states
figures["dispatches.png"] = figure
states, figure = draw_wave_states(selections, normalize, drawinfo["TIMELINES"])
response["timeline.png"] = states
figures["timeline.png"] = figure
if len(EVENTS) > 0 and np.sum([len(e) for e in EVENTS]) > 32:
EVENT_NAMES, figure = draw_wave_metrics(
selections, normalize, drawinfo["TIMELINES"], EVENTS, drawinfo["EVENT_NAMES"]
)
response["counters.png"] = EVENT_NAMES
figures["counters.png"] = figure
return Readable(response), figures