Files
rocm-systems/src/utils/perfagg.py
T

464 خطوط
14 KiB
Python

2023-02-13 09:26:12 -06:00
##############################################################################bl
# MIT License
2023-02-13 14:50:24 -06:00
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
2023-02-13 14:50:24 -06:00
#
2022-11-04 14:49:36 -05:00
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
2023-02-13 14:50:24 -06:00
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
2023-02-13 14:50:24 -06:00
#
2022-11-04 14:49:36 -05:00
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2022-11-04 14:49:36 -05:00
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
2023-02-13 09:26:12 -06:00
##############################################################################el
2022-11-04 14:49:36 -05:00
import sys, os, pathlib, shutil, subprocess, argparse, glob, re
import numpy as np
import math
2023-05-16 15:39:45 -05:00
import warnings
2023-05-05 15:07:20 -05:00
import pandas as pd
2022-11-04 14:49:36 -05:00
prog = "omniperf"
# Per IP block max number of simulutaneous counters
# GFX IP Blocks
perfmon_config = {
"vega10": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 16,
},
2022-11-04 14:49:36 -05:00
"mi50": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 16,
},
"mi100": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 32,
},
"mi200": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 32,
},
}
2023-05-08 11:56:49 -05:00
2023-05-16 15:39:45 -05:00
def test_df_column_equality(df):
return df.eq(df.iloc[:, 0], axis=0).all(1).all()
2023-05-05 15:07:20 -05:00
# joins disparate runs less dumbly than rocprof
2023-05-16 15:39:45 -05:00
def join_prof(workload_dir, join_type, log_file, verbose, out=None):
# Set default output directory if not specified
if out == None:
out = workload_dir + "/pmc_perf.csv"
2023-05-05 15:07:20 -05:00
files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
df = None
2023-05-08 11:56:49 -05:00
2023-05-05 15:07:20 -05:00
for i, file in enumerate(files):
_df = pd.read_csv(file)
if join_type == "kernel":
2023-05-16 15:39:45 -05:00
key = _df.groupby("KernelName").cumcount()
elif join_type == "grid":
2023-05-16 15:39:45 -05:00
key = _df.groupby(["KernelName", "grd"]).cumcount()
else:
print("ERROR: Unrecognized --join-type")
sys.exit(1)
2023-05-08 11:56:49 -05:00
2023-05-16 15:39:45 -05:00
_df["key"] = _df.KernelName + " - " + key.astype(str)
2023-05-05 15:07:20 -05:00
if df is None:
df = _df
else:
# join by unique index of kernel
2023-05-08 11:56:49 -05:00
df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}"))
2023-05-16 15:39:45 -05:00
# TODO: check for any mismatch in joins
duplicate_cols = {
"gpu": [col for col in df.columns if "gpu" in col],
"grd": [col for col in df.columns if "grd" in col],
"wpr": [col for col in df.columns if "wgr" in col],
"lds": [col for col in df.columns if "lds" in col],
"scr": [col for col in df.columns if "scr" in col],
"arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
"accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
"spgr": [col for col in df.columns if "sgpr" in col],
}
for key, cols in duplicate_cols.items():
_df = df[cols]
if not test_df_column_equality(_df):
msg = (
"WARNING: Detected differing {} values while joining pmc_perf.csv".format(
key
)
)
warnings.warn(msg)
log_file.write(msg + "\n")
if test_df_column_equality(_df) and verbose:
msg = "Successfully joined {} in pmc_perf.csv".format(key)
print(msg)
log_file.write(msg + "\n")
2023-05-05 15:07:20 -05:00
# now, we can:
2023-05-08 11:56:49 -05:00
#   A) throw away any of the "boring" duplicats
df = df[
[
k
for k in df.keys()
if not any(
check in k
for check in [
2023-05-16 15:39:45 -05:00
# removed merged counters, keep original
"gpu-id_",
"grd_",
"wgr_",
"lds_",
"scr_",
"vgpr_",
"sgpr_",
"Index_",
# un-mergable, remove all
2023-05-08 11:56:49 -05:00
"queue-id",
"queue-index",
"pid",
"tid",
"fbar",
"sig",
"obj",
]
)
]
]
#   B) any timestamps that are _not_ the duration, which is the one we care
#   about
df = df[
[
k
for k in df.keys()
if not any(check in k for check in ["DispatchNs", "CompleteNs"])
]
]
#   C) sanity check the name and key
namekeys = [k for k in df.keys() if "KernelName" in k]
2023-05-05 15:07:20 -05:00
assert len(namekeys)
for k in namekeys[1:]:
assert (df[namekeys[0]] == df[k]).all()
df = df.drop(columns=namekeys[1:])
# now take the median of the durations
2023-05-08 11:25:05 -05:00
bkeys = []
ekeys = []
for k in df.keys():
2023-05-08 11:56:49 -05:00
if "Begin" in k:
2023-05-08 11:25:05 -05:00
bkeys.append(k)
2023-05-08 11:56:49 -05:00
if "End" in k:
2023-05-08 11:25:05 -05:00
ekeys.append(k)
# compute mean begin and end timestamps
endNs = df[ekeys].mean(axis=1)
beginNs = df[bkeys].mean(axis=1)
2023-05-05 15:07:20 -05:00
# and replace
2023-05-08 11:56:49 -05:00
df = df.drop(columns=bkeys)
df = df.drop(columns=ekeys)
df["BeginNs"] = beginNs
df["EndNs"] = endNs
2023-05-05 15:07:20 -05:00
# finally, join the drop key
2023-05-08 11:56:49 -05:00
df = df.drop(columns=["key"])
2023-05-05 15:07:20 -05:00
# and save to file
df.to_csv(out, index=False)
# and delete old file(s)
2023-05-16 15:39:45 -05:00
if not verbose:
for file in files:
os.remove(file)
2023-05-05 15:07:20 -05:00
2023-05-08 11:56:49 -05:00
2023-05-05 15:07:20 -05:00
def pmc_perf_split(workload_dir):
workload_perfmon_dir = workload_dir + "/perfmon"
lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines()
# Iterate over each line in pmc_perf.txt
mpattern = r"^pmc:(.*)"
i = 0
for line in lines:
# Verify no comments
stext = line.split("#")[0].strip()
if not stext:
continue
# all pmc counters start with "pmc:"
m = re.match(mpattern, stext)
if m is None:
continue
2023-05-08 11:56:49 -05:00
2023-05-05 15:07:20 -05:00
# Create separate file for each line
fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w")
fd.write(stext + "\n\n")
fd.write("gpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
i += 1
2023-05-08 11:56:49 -05:00
2023-05-05 15:07:20 -05:00
# Remove old pmc_perf.txt input from perfmon dir
os.remove(workload_perfmon_dir + "/pmc_perf.txt")
2022-11-04 14:49:36 -05:00
def perfmon_coalesce(pmc_files_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# match pattern for pmc counters
mpattern = r"^pmc:(.*)"
pmc_list = dict(
[
("SQ", []),
("GRBM", []),
("TCP", []),
("TA", []),
("TD", []),
("TCC", []),
("SPI", []),
("CPC", []),
("CPF", []),
("GDS", []),
("TCC2", {}), # per-channel TCC perfmon
]
)
for ch in range(perfmon_config[soc]["TCC_channels"]):
pmc_list["TCC2"][str(ch)] = []
# Extract all PMC counters and store in separate buckets
for fname in pmc_files_list:
lines = open(fname, "r").read().splitlines()
for line in lines:
# Strip all comements, skip empty lines
stext = line.split("#")[0].strip()
if not stext:
continue
# all pmc counters start with "pmc:"
m = re.match(mpattern, stext)
if m is None:
continue
# we have found all the counters, store them in buckets
counters = m.group(1).split()
if "SQ_ACCUM_PREV_HIRES" in counters:
# save all level counters separately
nindex = counters.index("SQ_ACCUM_PREV_HIRES")
level_counter = counters[nindex - 1]
# Save to level counter file, file name = level counter name
fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
fd.write(stext + "\n\n")
fd.write("gpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
continue
# save normal pmc counters in matching buckets
for counter in counters:
IP_block = counter.split(sep="_")[0].upper()
# SQC and SQ belong to the IP block, coalesce them
if IP_block == "SQC":
IP_block = "SQ"
if IP_block != "TCC":
# Insert unique pmc counters into its bucket
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
else:
# TCC counters processing
m = re.match(r"[\s\S]+\[(\d+)\]", counter)
if m is None:
# Aggregated TCC counters
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
else:
# TCC channel ID
ch = m.group(1)
# fake IP block for per channel TCC
if str(ch) in pmc_list["TCC2"]:
# append unique counter into the channel
if counter not in pmc_list["TCC2"][str(ch)]:
pmc_list["TCC2"][str(ch)].append(counter)
else:
# initial counter in this channel
pmc_list["TCC2"][str(ch)] = [counter]
# sort the per channel counter, so that same counter in all channels can be aligned
for ch in range(perfmon_config[soc]["TCC_channels"]):
pmc_list["TCC2"][str(ch)].sort()
return pmc_list
def perfmon_emit(pmc_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# Calculate the minimum number of iteration to save the pmc counters
# non-TCC counters
pmc_cnt = [
len(pmc_list[key]) / perfmon_config[soc][key]
for key in pmc_list
if key not in ["TCC", "TCC2"]
]
# TCC counters
tcc_channels = perfmon_config[soc]["TCC_channels"]
tcc_cnt = len(pmc_list["TCC"]) / perfmon_config[soc]["TCC"]
tcc2_cnt = (
np.array([len(pmc_list["TCC2"][str(ch)]) for ch in range(tcc_channels)])
/ perfmon_config[soc]["TCC"]
)
# Total number iterations to write pmc: counters line
niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
# Emit PMC counters into pmc config file
fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
tcc2_index = 0
for iter in range(niter):
# Prefix
line = "pmc: "
# Add all non-TCC counters
for key in pmc_list:
if key not in ["TCC", "TCC2"]:
N = perfmon_config[soc][key]
ip_counters = pmc_list[key][iter * N : iter * N + N]
if ip_counters:
line = line + " " + " ".join(ip_counters)
# Add TCC counters
N = perfmon_config[soc]["TCC"]
tcc_counters = pmc_list["TCC"][iter * N : iter * N + N]
if not tcc_counters:
# TCC per-channel counters
for ch in range(perfmon_config[soc]["TCC_channels"]):
tcc_counters += pmc_list["TCC2"][str(ch)][
tcc2_index * N : tcc2_index * N + N
]
tcc2_index += 1
# TCC aggregated counters
line = line + " " + " ".join(tcc_counters)
fd.write(line + "\n")
fd.write("\ngpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
def perfmon_filter(workload_dir, perfmon_dir, args):
workload_perfmon_dir = workload_dir + "/perfmon"
soc = args.target
# Initialize directories
# TODO: Modify this so that data is appended to previous?
if not os.path.isdir(workload_dir):
os.makedirs(workload_dir)
else:
shutil.rmtree(workload_dir)
os.makedirs(workload_perfmon_dir)
ref_pmc_files_list = glob.glob(perfmon_dir + "/" + "pmc_*perf*.txt")
ref_pmc_files_list += glob.glob(perfmon_dir + "/" + soc + "/pmc_*_perf*.txt")
# Perfmon list filtering
if args.ipblocks != None:
for i in range(len(args.ipblocks)):
args.ipblocks[i] = args.ipblocks[i].lower()
mpattern = "pmc_([a-zA-Z0-9_]+)_perf*"
pmc_files_list = []
for fname in ref_pmc_files_list:
fbase = os.path.splitext(os.path.basename(fname))[0]
ip = re.match(mpattern, fbase).group(1)
if ip in args.ipblocks:
pmc_files_list.append(fname)
print("fname: " + fbase + ": Added")
else:
print("fname: " + fbase + ": Skipped")
else:
# default: take all perfmons
pmc_files_list = ref_pmc_files_list
# Coalesce and writeback workload specific perfmon
pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
perfmon_emit(pmc_list, workload_dir, soc)
def pmc_filter(workload_dir, perfmon_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
if not os.path.isdir(workload_perfmon_dir):
os.makedirs(workload_perfmon_dir)
else:
shutil.rmtree(workload_perfmon_dir)
ref_pmc_files_list = glob.glob(perfmon_dir + "/roofline/" + "pmc_roof_perf.txt")
# ref_pmc_files_list += glob.glob(perfmon_dir + "/" + soc + "/pmc_*_perf*.txt")
pmc_files_list = ref_pmc_files_list
# Coalesce and writeback workload specific perfmon
pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
perfmon_emit(pmc_list, workload_dir, soc)