Files
rocm-systems/src/utils/perfagg.py
T
coleramos425 44edef4635 Enhance logging and warning reporting
Signed-off-by: coleramos425 <colramos@amd.com>
2023-05-16 15:39:45 -05:00

464 строки
14 KiB
Python
Исполняемый файл
Исходник Ответственный История

Этот файл содержит невидимые символы Юникода
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import sys, os, pathlib, shutil, subprocess, argparse, glob, re
import numpy as np
import math
import warnings
import pandas as pd
prog = "omniperf"
# Per IP block max number of simulutaneous counters
# GFX IP Blocks
perfmon_config = {
"vega10": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 16,
},
"mi50": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 16,
},
"mi100": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 32,
},
"mi200": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 32,
},
}
def test_df_column_equality(df):
return df.eq(df.iloc[:, 0], axis=0).all(1).all()
# joins disparate runs less dumbly than rocprof
def join_prof(workload_dir, join_type, log_file, verbose, out=None):
# Set default output directory if not specified
if out == None:
out = workload_dir + "/pmc_perf.csv"
files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
df = None
for i, file in enumerate(files):
_df = pd.read_csv(file)
if join_type == "kernel":
key = _df.groupby("KernelName").cumcount()
elif join_type == "grid":
key = _df.groupby(["KernelName", "grd"]).cumcount()
else:
print("ERROR: Unrecognized --join-type")
sys.exit(1)
_df["key"] = _df.KernelName + " - " + key.astype(str)
if df is None:
df = _df
else:
# join by unique index of kernel
df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}"))
# TODO: check for any mismatch in joins
duplicate_cols = {
"gpu": [col for col in df.columns if "gpu" in col],
"grd": [col for col in df.columns if "grd" in col],
"wpr": [col for col in df.columns if "wgr" in col],
"lds": [col for col in df.columns if "lds" in col],
"scr": [col for col in df.columns if "scr" in col],
"arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
"accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
"spgr": [col for col in df.columns if "sgpr" in col],
}
for key, cols in duplicate_cols.items():
_df = df[cols]
if not test_df_column_equality(_df):
msg = (
"WARNING: Detected differing {} values while joining pmc_perf.csv".format(
key
)
)
warnings.warn(msg)
log_file.write(msg + "\n")
if test_df_column_equality(_df) and verbose:
msg = "Successfully joined {} in pmc_perf.csv".format(key)
print(msg)
log_file.write(msg + "\n")
# now, we can:
#   A) throw away any of the "boring" duplicats
df = df[
[
k
for k in df.keys()
if not any(
check in k
for check in [
# removed merged counters, keep original
"gpu-id_",
"grd_",
"wgr_",
"lds_",
"scr_",
"vgpr_",
"sgpr_",
"Index_",
# un-mergable, remove all
"queue-id",
"queue-index",
"pid",
"tid",
"fbar",
"sig",
"obj",
]
)
]
]
#   B) any timestamps that are _not_ the duration, which is the one we care
#   about
df = df[
[
k
for k in df.keys()
if not any(check in k for check in ["DispatchNs", "CompleteNs"])
]
]
#   C) sanity check the name and key
namekeys = [k for k in df.keys() if "KernelName" in k]
assert len(namekeys)
for k in namekeys[1:]:
assert (df[namekeys[0]] == df[k]).all()
df = df.drop(columns=namekeys[1:])
# now take the median of the durations
bkeys = []
ekeys = []
for k in df.keys():
if "Begin" in k:
bkeys.append(k)
if "End" in k:
ekeys.append(k)
# compute mean begin and end timestamps
endNs = df[ekeys].mean(axis=1)
beginNs = df[bkeys].mean(axis=1)
# and replace
df = df.drop(columns=bkeys)
df = df.drop(columns=ekeys)
df["BeginNs"] = beginNs
df["EndNs"] = endNs
# finally, join the drop key
df = df.drop(columns=["key"])
# and save to file
df.to_csv(out, index=False)
# and delete old file(s)
if not verbose:
for file in files:
os.remove(file)
def pmc_perf_split(workload_dir):
workload_perfmon_dir = workload_dir + "/perfmon"
lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines()
# Iterate over each line in pmc_perf.txt
mpattern = r"^pmc:(.*)"
i = 0
for line in lines:
# Verify no comments
stext = line.split("#")[0].strip()
if not stext:
continue
# all pmc counters start with "pmc:"
m = re.match(mpattern, stext)
if m is None:
continue
# Create separate file for each line
fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w")
fd.write(stext + "\n\n")
fd.write("gpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
i += 1
# Remove old pmc_perf.txt input from perfmon dir
os.remove(workload_perfmon_dir + "/pmc_perf.txt")
def perfmon_coalesce(pmc_files_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# match pattern for pmc counters
mpattern = r"^pmc:(.*)"
pmc_list = dict(
[
("SQ", []),
("GRBM", []),
("TCP", []),
("TA", []),
("TD", []),
("TCC", []),
("SPI", []),
("CPC", []),
("CPF", []),
("GDS", []),
("TCC2", {}), # per-channel TCC perfmon
]
)
for ch in range(perfmon_config[soc]["TCC_channels"]):
pmc_list["TCC2"][str(ch)] = []
# Extract all PMC counters and store in separate buckets
for fname in pmc_files_list:
lines = open(fname, "r").read().splitlines()
for line in lines:
# Strip all comements, skip empty lines
stext = line.split("#")[0].strip()
if not stext:
continue
# all pmc counters start with "pmc:"
m = re.match(mpattern, stext)
if m is None:
continue
# we have found all the counters, store them in buckets
counters = m.group(1).split()
if "SQ_ACCUM_PREV_HIRES" in counters:
# save all level counters separately
nindex = counters.index("SQ_ACCUM_PREV_HIRES")
level_counter = counters[nindex - 1]
# Save to level counter file, file name = level counter name
fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
fd.write(stext + "\n\n")
fd.write("gpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
continue
# save normal pmc counters in matching buckets
for counter in counters:
IP_block = counter.split(sep="_")[0].upper()
# SQC and SQ belong to the IP block, coalesce them
if IP_block == "SQC":
IP_block = "SQ"
if IP_block != "TCC":
# Insert unique pmc counters into its bucket
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
else:
# TCC counters processing
m = re.match(r"[\s\S]+\[(\d+)\]", counter)
if m is None:
# Aggregated TCC counters
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
else:
# TCC channel ID
ch = m.group(1)
# fake IP block for per channel TCC
if str(ch) in pmc_list["TCC2"]:
# append unique counter into the channel
if counter not in pmc_list["TCC2"][str(ch)]:
pmc_list["TCC2"][str(ch)].append(counter)
else:
# initial counter in this channel
pmc_list["TCC2"][str(ch)] = [counter]
# sort the per channel counter, so that same counter in all channels can be aligned
for ch in range(perfmon_config[soc]["TCC_channels"]):
pmc_list["TCC2"][str(ch)].sort()
return pmc_list
def perfmon_emit(pmc_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# Calculate the minimum number of iteration to save the pmc counters
# non-TCC counters
pmc_cnt = [
len(pmc_list[key]) / perfmon_config[soc][key]
for key in pmc_list
if key not in ["TCC", "TCC2"]
]
# TCC counters
tcc_channels = perfmon_config[soc]["TCC_channels"]
tcc_cnt = len(pmc_list["TCC"]) / perfmon_config[soc]["TCC"]
tcc2_cnt = (
np.array([len(pmc_list["TCC2"][str(ch)]) for ch in range(tcc_channels)])
/ perfmon_config[soc]["TCC"]
)
# Total number iterations to write pmc: counters line
niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
# Emit PMC counters into pmc config file
fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
tcc2_index = 0
for iter in range(niter):
# Prefix
line = "pmc: "
# Add all non-TCC counters
for key in pmc_list:
if key not in ["TCC", "TCC2"]:
N = perfmon_config[soc][key]
ip_counters = pmc_list[key][iter * N : iter * N + N]
if ip_counters:
line = line + " " + " ".join(ip_counters)
# Add TCC counters
N = perfmon_config[soc]["TCC"]
tcc_counters = pmc_list["TCC"][iter * N : iter * N + N]
if not tcc_counters:
# TCC per-channel counters
for ch in range(perfmon_config[soc]["TCC_channels"]):
tcc_counters += pmc_list["TCC2"][str(ch)][
tcc2_index * N : tcc2_index * N + N
]
tcc2_index += 1
# TCC aggregated counters
line = line + " " + " ".join(tcc_counters)
fd.write(line + "\n")
fd.write("\ngpu:\n")
fd.write("range:\n")
fd.write("kernel:\n")
fd.close()
def perfmon_filter(workload_dir, perfmon_dir, args):
workload_perfmon_dir = workload_dir + "/perfmon"
soc = args.target
# Initialize directories
# TODO: Modify this so that data is appended to previous?
if not os.path.isdir(workload_dir):
os.makedirs(workload_dir)
else:
shutil.rmtree(workload_dir)
os.makedirs(workload_perfmon_dir)
ref_pmc_files_list = glob.glob(perfmon_dir + "/" + "pmc_*perf*.txt")
ref_pmc_files_list += glob.glob(perfmon_dir + "/" + soc + "/pmc_*_perf*.txt")
# Perfmon list filtering
if args.ipblocks != None:
for i in range(len(args.ipblocks)):
args.ipblocks[i] = args.ipblocks[i].lower()
mpattern = "pmc_([a-zA-Z0-9_]+)_perf*"
pmc_files_list = []
for fname in ref_pmc_files_list:
fbase = os.path.splitext(os.path.basename(fname))[0]
ip = re.match(mpattern, fbase).group(1)
if ip in args.ipblocks:
pmc_files_list.append(fname)
print("fname: " + fbase + ": Added")
else:
print("fname: " + fbase + ": Skipped")
else:
# default: take all perfmons
pmc_files_list = ref_pmc_files_list
# Coalesce and writeback workload specific perfmon
pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
perfmon_emit(pmc_list, workload_dir, soc)
def pmc_filter(workload_dir, perfmon_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
if not os.path.isdir(workload_perfmon_dir):
os.makedirs(workload_perfmon_dir)
else:
shutil.rmtree(workload_perfmon_dir)
ref_pmc_files_list = glob.glob(perfmon_dir + "/roofline/" + "pmc_roof_perf.txt")
# ref_pmc_files_list += glob.glob(perfmon_dir + "/" + soc + "/pmc_*_perf*.txt")
pmc_files_list = ref_pmc_files_list
# Coalesce and writeback workload specific perfmon
pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
perfmon_emit(pmc_list, workload_dir, soc)