Merge pull request #125 from AMDResearch/separate-pmc-perf
Manually join rocprof runs
Этот коммит содержится в:
+43
-17
@@ -38,7 +38,7 @@ import warnings
|
||||
|
||||
from parser import parse
|
||||
from utils import specs
|
||||
from utils.perfagg import perfmon_filter, pmc_filter
|
||||
from utils.perfagg import perfmon_filter, pmc_filter, pmc_perf_split, join_prof
|
||||
from utils import remove_workload
|
||||
from utils import csv_converter # Import workload
|
||||
from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline
|
||||
@@ -160,18 +160,22 @@ def isWorkloadEmpty(my_parser, path):
|
||||
)
|
||||
|
||||
|
||||
def replace_timestamps(workload_dir):
|
||||
def replace_timestamps(workload_dir, log_file):
|
||||
df_stamps = pd.read_csv(workload_dir + "/timestamps.csv")
|
||||
if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns:
|
||||
df_pmc_perf = pd.read_csv(workload_dir + "/pmc_perf.csv")
|
||||
# Update timestamps for all *.csv output files
|
||||
for fname in glob.glob(workload_dir + "/" + "*.csv"):
|
||||
df_pmc_perf = pd.read_csv(fname)
|
||||
|
||||
df_pmc_perf["BeginNs"] = df_stamps["BeginNs"]
|
||||
df_pmc_perf["EndNs"] = df_stamps["EndNs"]
|
||||
df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False)
|
||||
df_pmc_perf["BeginNs"] = df_stamps["BeginNs"]
|
||||
df_pmc_perf["EndNs"] = df_stamps["EndNs"]
|
||||
df_pmc_perf.to_csv(fname, index=False)
|
||||
else:
|
||||
warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps."
|
||||
warnings.warn(
|
||||
"WARNING: Incomplete profiling data detected. Unable to update timestamps."
|
||||
warning
|
||||
)
|
||||
log_file.write(warning + "\n")
|
||||
|
||||
|
||||
def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
|
||||
@@ -395,6 +399,9 @@ def characterize_app(args, VER):
|
||||
# Perfmon filtering
|
||||
pmc_filter(workload_dir, perfmon_dir, args.target)
|
||||
|
||||
# Separate pmc_perf runs
|
||||
pmc_perf_split(workload_dir)
|
||||
|
||||
# Set up a log file
|
||||
log = open(workload_dir + "/log.txt", "w")
|
||||
print("Log: ", workload_dir + "/log.txt\n")
|
||||
@@ -403,7 +410,7 @@ def characterize_app(args, VER):
|
||||
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not args.kernel == None:
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
@@ -412,10 +419,11 @@ def characterize_app(args, VER):
|
||||
fname,
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
|
||||
# Dispatch filtering (inplace replacement)
|
||||
if not args.dispatch == None:
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
@@ -424,17 +432,17 @@ def characterize_app(args, VER):
|
||||
fname,
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
print(fname)
|
||||
if args.use_rocscope == True:
|
||||
run_rocscope(args, fname)
|
||||
else:
|
||||
run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose)
|
||||
|
||||
# Close log
|
||||
log.close()
|
||||
|
||||
|
||||
# run again with timestamps
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
rocprof_cmd,
|
||||
# "-i", fname,
|
||||
@@ -446,8 +454,16 @@ def characterize_app(args, VER):
|
||||
'"' + app_cmd + '"',
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
# Update pmc_perf.csv timestamps
|
||||
replace_timestamps(workload_dir)
|
||||
replace_timestamps(workload_dir, log)
|
||||
|
||||
# Manually join each pmc_perf*.csv output
|
||||
if args.use_rocscope == False:
|
||||
join_prof(workload_dir, args.join_type, log, args.verbose)
|
||||
|
||||
# Close log
|
||||
log.close()
|
||||
|
||||
|
||||
################################################
|
||||
@@ -551,6 +567,9 @@ def omniperf_profile(args, VER):
|
||||
# Perfmon filtering
|
||||
perfmon_filter(workload_dir, perfmon_dir, args)
|
||||
|
||||
# Separate pmc_perf runs
|
||||
pmc_perf_split(workload_dir)
|
||||
|
||||
# Set up a log file
|
||||
log = open(workload_dir + "/log.txt", "w")
|
||||
print("Log: ", workload_dir + "/log.txt\n")
|
||||
@@ -628,7 +647,7 @@ def omniperf_profile(args, VER):
|
||||
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not args.kernel == None:
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
@@ -637,10 +656,11 @@ def omniperf_profile(args, VER):
|
||||
fname,
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
|
||||
# Dispatch filtering (inplace replacement)
|
||||
if not args.dispatch == None:
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
@@ -649,6 +669,7 @@ def omniperf_profile(args, VER):
|
||||
fname,
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
print(fname)
|
||||
if args.use_rocscope == True:
|
||||
run_rocscope(args, fname)
|
||||
@@ -656,7 +677,7 @@ def omniperf_profile(args, VER):
|
||||
run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
|
||||
|
||||
# run again with timestamps
|
||||
run_subprocess(
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
rocprof_cmd,
|
||||
# "-i", fname,
|
||||
@@ -668,8 +689,13 @@ def omniperf_profile(args, VER):
|
||||
'"' + args.remaining + '"',
|
||||
]
|
||||
)
|
||||
log.write(output)
|
||||
# Update pmc_perf.csv timestamps
|
||||
replace_timestamps(workload_dir)
|
||||
replace_timestamps(workload_dir, log)
|
||||
|
||||
# Manually join each pmc_perf*.csv output
|
||||
if args.use_rocscope == False:
|
||||
join_prof(workload_dir, args.join_type, log, args.verbose)
|
||||
|
||||
# Generate sysinfo
|
||||
gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
|
||||
|
||||
@@ -182,6 +182,14 @@ def parse(my_parser):
|
||||
required=False,
|
||||
help="\t\t\tDispatch ID filtering.",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--join-type",
|
||||
metavar="",
|
||||
required=False,
|
||||
choices=["kernel", "grid"],
|
||||
default="grid",
|
||||
help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--no-roof",
|
||||
required=False,
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
import sys, os, pathlib, shutil, subprocess, argparse, glob, re
|
||||
import numpy as np
|
||||
import math
|
||||
import warnings
|
||||
import pandas as pd
|
||||
|
||||
prog = "omniperf"
|
||||
|
||||
@@ -86,6 +88,164 @@ perfmon_config = {
|
||||
}
|
||||
|
||||
|
||||
def test_df_column_equality(df):
|
||||
return df.eq(df.iloc[:, 0], axis=0).all(1).all()
|
||||
|
||||
|
||||
# joins disparate runs less dumbly than rocprof
|
||||
def join_prof(workload_dir, join_type, log_file, verbose, out=None):
|
||||
# Set default output directory if not specified
|
||||
if out == None:
|
||||
out = workload_dir + "/pmc_perf.csv"
|
||||
files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
|
||||
df = None
|
||||
|
||||
for i, file in enumerate(files):
|
||||
_df = pd.read_csv(file)
|
||||
if join_type == "kernel":
|
||||
key = _df.groupby("KernelName").cumcount()
|
||||
elif join_type == "grid":
|
||||
key = _df.groupby(["KernelName", "grd"]).cumcount()
|
||||
else:
|
||||
print("ERROR: Unrecognized --join-type")
|
||||
sys.exit(1)
|
||||
|
||||
_df["key"] = _df.KernelName + " - " + key.astype(str)
|
||||
if df is None:
|
||||
df = _df
|
||||
else:
|
||||
# join by unique index of kernel
|
||||
df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}"))
|
||||
|
||||
# TODO: check for any mismatch in joins
|
||||
duplicate_cols = {
|
||||
"gpu": [col for col in df.columns if "gpu" in col],
|
||||
"grd": [col for col in df.columns if "grd" in col],
|
||||
"wpr": [col for col in df.columns if "wgr" in col],
|
||||
"lds": [col for col in df.columns if "lds" in col],
|
||||
"scr": [col for col in df.columns if "scr" in col],
|
||||
"arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
|
||||
"accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
|
||||
"spgr": [col for col in df.columns if "sgpr" in col],
|
||||
}
|
||||
for key, cols in duplicate_cols.items():
|
||||
_df = df[cols]
|
||||
if not test_df_column_equality(_df):
|
||||
msg = (
|
||||
"WARNING: Detected differing {} values while joining pmc_perf.csv".format(
|
||||
key
|
||||
)
|
||||
)
|
||||
warnings.warn(msg)
|
||||
log_file.write(msg + "\n")
|
||||
if test_df_column_equality(_df) and verbose:
|
||||
msg = "Successfully joined {} in pmc_perf.csv".format(key)
|
||||
print(msg)
|
||||
log_file.write(msg + "\n")
|
||||
|
||||
# now, we can:
|
||||
# A) throw away any of the "boring" duplicats
|
||||
df = df[
|
||||
[
|
||||
k
|
||||
for k in df.keys()
|
||||
if not any(
|
||||
check in k
|
||||
for check in [
|
||||
# removed merged counters, keep original
|
||||
"gpu-id_",
|
||||
"grd_",
|
||||
"wgr_",
|
||||
"lds_",
|
||||
"scr_",
|
||||
"vgpr_",
|
||||
"sgpr_",
|
||||
"Index_",
|
||||
# un-mergable, remove all
|
||||
"queue-id",
|
||||
"queue-index",
|
||||
"pid",
|
||||
"tid",
|
||||
"fbar",
|
||||
"sig",
|
||||
"obj",
|
||||
]
|
||||
)
|
||||
]
|
||||
]
|
||||
# B) any timestamps that are _not_ the duration, which is the one we care
|
||||
# about
|
||||
df = df[
|
||||
[
|
||||
k
|
||||
for k in df.keys()
|
||||
if not any(check in k for check in ["DispatchNs", "CompleteNs"])
|
||||
]
|
||||
]
|
||||
# C) sanity check the name and key
|
||||
namekeys = [k for k in df.keys() if "KernelName" in k]
|
||||
assert len(namekeys)
|
||||
for k in namekeys[1:]:
|
||||
assert (df[namekeys[0]] == df[k]).all()
|
||||
df = df.drop(columns=namekeys[1:])
|
||||
# now take the median of the durations
|
||||
bkeys = []
|
||||
ekeys = []
|
||||
for k in df.keys():
|
||||
if "Begin" in k:
|
||||
bkeys.append(k)
|
||||
if "End" in k:
|
||||
ekeys.append(k)
|
||||
# compute mean begin and end timestamps
|
||||
endNs = df[ekeys].mean(axis=1)
|
||||
beginNs = df[bkeys].mean(axis=1)
|
||||
# and replace
|
||||
df = df.drop(columns=bkeys)
|
||||
df = df.drop(columns=ekeys)
|
||||
df["BeginNs"] = beginNs
|
||||
df["EndNs"] = endNs
|
||||
# finally, join the drop key
|
||||
df = df.drop(columns=["key"])
|
||||
# and save to file
|
||||
df.to_csv(out, index=False)
|
||||
# and delete old file(s)
|
||||
if not verbose:
|
||||
for file in files:
|
||||
os.remove(file)
|
||||
|
||||
|
||||
def pmc_perf_split(workload_dir):
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines()
|
||||
|
||||
# Iterate over each line in pmc_perf.txt
|
||||
mpattern = r"^pmc:(.*)"
|
||||
i = 0
|
||||
for line in lines:
|
||||
# Verify no comments
|
||||
stext = line.split("#")[0].strip()
|
||||
if not stext:
|
||||
continue
|
||||
|
||||
# all pmc counters start with "pmc:"
|
||||
m = re.match(mpattern, stext)
|
||||
if m is None:
|
||||
continue
|
||||
|
||||
# Create separate file for each line
|
||||
fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w")
|
||||
fd.write(stext + "\n\n")
|
||||
fd.write("gpu:\n")
|
||||
fd.write("range:\n")
|
||||
fd.write("kernel:\n")
|
||||
fd.close()
|
||||
|
||||
i += 1
|
||||
|
||||
# Remove old pmc_perf.txt input from perfmon dir
|
||||
os.remove(workload_perfmon_dir + "/pmc_perf.txt")
|
||||
|
||||
|
||||
def perfmon_coalesce(pmc_files_list, workload_dir, soc):
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user