diff --git a/src/omniperf b/src/omniperf index f3388bdf42..71208c1777 100755 --- a/src/omniperf +++ b/src/omniperf @@ -38,7 +38,7 @@ import warnings from parser import parse from utils import specs -from utils.perfagg import perfmon_filter, pmc_filter +from utils.perfagg import perfmon_filter, pmc_filter, pmc_perf_split, join_prof from utils import remove_workload from utils import csv_converter # Import workload from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline @@ -160,18 +160,22 @@ def isWorkloadEmpty(my_parser, path): ) -def replace_timestamps(workload_dir): +def replace_timestamps(workload_dir, log_file): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns: - df_pmc_perf = pd.read_csv(workload_dir + "/pmc_perf.csv") + # Update timestamps for all *.csv output files + for fname in glob.glob(workload_dir + "/" + "*.csv"): + df_pmc_perf = pd.read_csv(fname) - df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] - df_pmc_perf["EndNs"] = df_stamps["EndNs"] - df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False) + df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] + df_pmc_perf["EndNs"] = df_stamps["EndNs"] + df_pmc_perf.to_csv(fname, index=False) else: + warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps." warnings.warn( - "WARNING: Incomplete profiling data detected. Unable to update timestamps." + warning ) + log_file.write(warning + "\n") def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): @@ -395,6 +399,9 @@ def characterize_app(args, VER): # Perfmon filtering pmc_filter(workload_dir, perfmon_dir, args.target) + # Separate pmc_perf runs + pmc_perf_split(workload_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -403,7 +410,7 @@ def characterize_app(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -412,10 +419,11 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -424,17 +432,17 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - # Close log - log.close() + # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -446,8 +454,16 @@ def characterize_app(args, VER): '"' + app_cmd + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) + + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, args.join_type, log, args.verbose) + + # Close log + log.close() ################################################ @@ -551,6 +567,9 @@ def omniperf_profile(args, VER): # Perfmon filtering perfmon_filter(workload_dir, perfmon_dir, args) + # Separate pmc_perf runs + pmc_perf_split(workload_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -628,7 +647,7 @@ def omniperf_profile(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -637,10 +656,11 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -649,6 +669,7 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) @@ -656,7 +677,7 @@ def omniperf_profile(args, VER): run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -668,8 +689,13 @@ def omniperf_profile(args, VER): '"' + args.remaining + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) + + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, args.join_type, log, args.verbose) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/parser.py b/src/parser.py index 7d7b414a03..da018ba944 100644 --- a/src/parser.py +++ b/src/parser.py @@ -182,6 +182,14 @@ def parse(my_parser): required=False, help="\t\t\tDispatch ID filtering.", ) + profile_group.add_argument( + "--join-type", + metavar="", + required=False, + choices=["kernel", "grid"], + default="grid", + help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)", + ) profile_group.add_argument( "--no-roof", required=False, diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 0606b4dc4b..fa14a60b02 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -25,6 +25,8 @@ import sys, os, pathlib, shutil, subprocess, argparse, glob, re import numpy as np import math +import warnings +import pandas as pd prog = "omniperf" @@ -86,6 +88,164 @@ perfmon_config = { } +def test_df_column_equality(df): + return df.eq(df.iloc[:, 0], axis=0).all(1).all() + + +# joins disparate runs less dumbly than rocprof +def join_prof(workload_dir, join_type, log_file, verbose, out=None): + # Set default output directory if not specified + if out == None: + out = workload_dir + "/pmc_perf.csv" + files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") + df = None + + for i, file in enumerate(files): + _df = pd.read_csv(file) + if join_type == "kernel": + key = _df.groupby("KernelName").cumcount() + elif join_type == "grid": + key = _df.groupby(["KernelName", "grd"]).cumcount() + else: + print("ERROR: Unrecognized --join-type") + sys.exit(1) + + _df["key"] = _df.KernelName + " - " + key.astype(str) + if df is None: + df = _df + else: + # join by unique index of kernel + df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}")) + + # TODO: check for any mismatch in joins + duplicate_cols = { + "gpu": [col for col in df.columns if "gpu" in col], + "grd": [col for col in df.columns if "grd" in col], + "wpr": [col for col in df.columns if "wgr" in col], + "lds": [col for col in df.columns if "lds" in col], + "scr": [col for col in df.columns if "scr" in col], + "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], + "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], + "spgr": [col for col in df.columns if "sgpr" in col], + } + for key, cols in duplicate_cols.items(): + _df = df[cols] + if not test_df_column_equality(_df): + msg = ( + "WARNING: Detected differing {} values while joining pmc_perf.csv".format( + key + ) + ) + warnings.warn(msg) + log_file.write(msg + "\n") + if test_df_column_equality(_df) and verbose: + msg = "Successfully joined {} in pmc_perf.csv".format(key) + print(msg) + log_file.write(msg + "\n") + + # now, we can: + #   A) throw away any of the "boring" duplicats + df = df[ + [ + k + for k in df.keys() + if not any( + check in k + for check in [ + # removed merged counters, keep original + "gpu-id_", + "grd_", + "wgr_", + "lds_", + "scr_", + "vgpr_", + "sgpr_", + "Index_", + # un-mergable, remove all + "queue-id", + "queue-index", + "pid", + "tid", + "fbar", + "sig", + "obj", + ] + ) + ] + ] + #   B) any timestamps that are _not_ the duration, which is the one we care + #   about + df = df[ + [ + k + for k in df.keys() + if not any(check in k for check in ["DispatchNs", "CompleteNs"]) + ] + ] + #   C) sanity check the name and key + namekeys = [k for k in df.keys() if "KernelName" in k] + assert len(namekeys) + for k in namekeys[1:]: + assert (df[namekeys[0]] == df[k]).all() + df = df.drop(columns=namekeys[1:]) + # now take the median of the durations + bkeys = [] + ekeys = [] + for k in df.keys(): + if "Begin" in k: + bkeys.append(k) + if "End" in k: + ekeys.append(k) + # compute mean begin and end timestamps + endNs = df[ekeys].mean(axis=1) + beginNs = df[bkeys].mean(axis=1) + # and replace + df = df.drop(columns=bkeys) + df = df.drop(columns=ekeys) + df["BeginNs"] = beginNs + df["EndNs"] = endNs + # finally, join the drop key + df = df.drop(columns=["key"]) + # and save to file + df.to_csv(out, index=False) + # and delete old file(s) + if not verbose: + for file in files: + os.remove(file) + + +def pmc_perf_split(workload_dir): + workload_perfmon_dir = workload_dir + "/perfmon" + lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines() + + # Iterate over each line in pmc_perf.txt + mpattern = r"^pmc:(.*)" + i = 0 + for line in lines: + # Verify no comments + stext = line.split("#")[0].strip() + if not stext: + continue + + # all pmc counters start with "pmc:" + m = re.match(mpattern, stext) + if m is None: + continue + + # Create separate file for each line + fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w") + fd.write(stext + "\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + + i += 1 + + # Remove old pmc_perf.txt input from perfmon dir + os.remove(workload_perfmon_dir + "/pmc_perf.txt") + + def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon"