From a9d82759cadff575dd6ee37b22b8c7cb20888208 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 5 May 2023 15:07:20 -0500 Subject: [PATCH 1/7] Implement custom merge utility for rocprof Signed-off-by: coleramos425 --- src/omniperf | 26 ++++++++++--- src/utils/perfagg.py | 89 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/src/omniperf b/src/omniperf index f3388bdf42..41fce2fbbe 100755 --- a/src/omniperf +++ b/src/omniperf @@ -38,7 +38,7 @@ import warnings from parser import parse from utils import specs -from utils.perfagg import perfmon_filter, pmc_filter +from utils.perfagg import perfmon_filter, pmc_filter, pmc_perf_split, join_prof from utils import remove_workload from utils import csv_converter # Import workload from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline @@ -163,11 +163,13 @@ def isWorkloadEmpty(my_parser, path): def replace_timestamps(workload_dir): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns: - df_pmc_perf = pd.read_csv(workload_dir + "/pmc_perf.csv") + # Update timestamps for all *.csv output files + for fname in glob.glob(workload_dir + "/" + "*.csv"): + df_pmc_perf = pd.read_csv(fname) - df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] - df_pmc_perf["EndNs"] = df_stamps["EndNs"] - df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False) + df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] + df_pmc_perf["EndNs"] = df_stamps["EndNs"] + df_pmc_perf.to_csv(fname, index=False) else: warnings.warn( "WARNING: Incomplete profiling data detected. Unable to update timestamps." @@ -395,6 +397,9 @@ def characterize_app(args, VER): # Perfmon filtering pmc_filter(workload_dir, perfmon_dir, args.target) + # Separate pmc_perf runs + pmc_perf_split(workload_dir, perfmon_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -449,6 +454,10 @@ def characterize_app(args, VER): # Update pmc_perf.csv timestamps replace_timestamps(workload_dir) + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, workload_dir + "/pmc_perf_NEW.csv") + ################################################ # Profiling Helpers @@ -551,6 +560,9 @@ def omniperf_profile(args, VER): # Perfmon filtering perfmon_filter(workload_dir, perfmon_dir, args) + # Separate pmc_perf runs + pmc_perf_split(workload_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -670,6 +682,10 @@ def omniperf_profile(args, VER): ) # Update pmc_perf.csv timestamps replace_timestamps(workload_dir) + + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, workload_dir + "/pmc_perf.csv") # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 0606b4dc4b..6dd5eb4f47 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -25,6 +25,7 @@ import sys, os, pathlib, shutil, subprocess, argparse, glob, re import numpy as np import math +import pandas as pd prog = "omniperf" @@ -85,6 +86,94 @@ perfmon_config = { }, } +# joins disparate runs less dumbly than rocprof +def join_prof(workload_dir, out): + files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") + df = None + + for i, file in enumerate(files): + #_df = parse_rocprof_kernels(file) + _df = pd.read_csv(file) + key = _df.groupby("KernelName").cumcount() + _df['key'] = _df.KernelName + ' - ' + key.astype(str) + + if df is None: + df = _df + else: + # join by unique index of kernel + df = pd.merge(df, _df, how='inner', on='key', suffixes=('', f'_{i}')) + # now, we can: + #   A) throw away any of the "boring" duplicats + df = df[[k for k in df.keys() if not any( + check in k for check in [ + 'gpu', 'queue-id', 'queue-index', 'pid', 'tid', 'grd', 'wgr', + 'lds', 'scr', 'vgpr', 'sgpr', 'fbar', 'sig', 'obj'])]] + #   B) any timestamps that are _not_ the duration, which is the one we care + #   about + df = df[[k for k in df.keys() if not any( + check in k for check in [ + 'stop', 'start', 'DispatchNs', 'CompleteNs'])]] + #   C) sanity check the name and key + namekeys = [k for k in df.keys() if 'KernelName' in k] + assert len(namekeys) + for k in namekeys[1:]: + assert (df[namekeys[0]] == df[k]).all() + df = df.drop(columns=namekeys[1:]) + # now take the median of the durations + dkeys = [k for k in df.keys() if 'duration' in k] + duration = df[dkeys].median(axis=1) + # compute min and max, just for sanity + min_duration = df[dkeys].min(axis=1) + max_duration = df[dkeys].max(axis=1) + std_duration = df[dkeys].std(axis=1) + mean_duration = df[dkeys].mean(axis=1) + # and replace + df = df.drop(columns=dkeys) + df['duration'] = duration + df['duration[max]'] = max_duration + df['duration[min]'] = min_duration + df['duration[std]'] = std_duration + df['duration[mean]'] = mean_duration + # finally, join the drop key + df = df.drop(columns=['key']) + # and save to file + df.to_csv(out, index=False) + # and delete old file(s) + for file in files: + os.remove(file) + +def pmc_perf_split(workload_dir): + workload_perfmon_dir = workload_dir + "/perfmon" + lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines() + + # Iterate over each line in pmc_perf.txt + mpattern = r"^pmc:(.*)" + i = 0 + for line in lines: + # Verify no comments + stext = line.split("#")[0].strip() + if not stext: + continue + + # all pmc counters start with "pmc:" + m = re.match(mpattern, stext) + if m is None: + continue + + # Create separate file for each line + fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w") + fd.write(stext + "\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + + i += 1 + + # Remove old pmc_perf.txt input from perfmon dir + os.remove(workload_perfmon_dir + "/pmc_perf.txt") + + def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" From 298271e1d083c75c0da11dea120a0f31fac553eb Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:25:05 -0500 Subject: [PATCH 2/7] Update mean timestamp calculation Signed-off-by: coleramos425 --- src/utils/perfagg.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 6dd5eb4f47..3c09a648ba 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -112,7 +112,7 @@ def join_prof(workload_dir, out): #   about df = df[[k for k in df.keys() if not any( check in k for check in [ - 'stop', 'start', 'DispatchNs', 'CompleteNs'])]] + 'DispatchNs', 'CompleteNs'])]] #   C) sanity check the name and key namekeys = [k for k in df.keys() if 'KernelName' in k] assert len(namekeys) @@ -120,20 +120,21 @@ def join_prof(workload_dir, out): assert (df[namekeys[0]] == df[k]).all() df = df.drop(columns=namekeys[1:]) # now take the median of the durations - dkeys = [k for k in df.keys() if 'duration' in k] - duration = df[dkeys].median(axis=1) - # compute min and max, just for sanity - min_duration = df[dkeys].min(axis=1) - max_duration = df[dkeys].max(axis=1) - std_duration = df[dkeys].std(axis=1) - mean_duration = df[dkeys].mean(axis=1) + bkeys = [] + ekeys = [] + for k in df.keys(): + if 'Begin' in k: + bkeys.append(k) + if 'End' in k: + ekeys.append(k) + # compute mean begin and end timestamps + endNs = df[ekeys].mean(axis=1) + beginNs = df[bkeys].mean(axis=1) # and replace - df = df.drop(columns=dkeys) - df['duration'] = duration - df['duration[max]'] = max_duration - df['duration[min]'] = min_duration - df['duration[std]'] = std_duration - df['duration[mean]'] = mean_duration + df= df.drop(columns=bkeys) + df= df.drop(columns=ekeys) + df['BeginNs'] = beginNs + df['EndNs'] = endNs # finally, join the drop key df = df.drop(columns=['key']) # and save to file From f499408c7b042120a8f6501de6546b21a70fddd8 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:55:45 -0500 Subject: [PATCH 3/7] Fix join_prof() call for --roof-only mode Signed-off-by: coleramos425 --- src/omniperf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omniperf b/src/omniperf index 41fce2fbbe..520af84468 100755 --- a/src/omniperf +++ b/src/omniperf @@ -398,7 +398,7 @@ def characterize_app(args, VER): pmc_filter(workload_dir, perfmon_dir, args.target) # Separate pmc_perf runs - pmc_perf_split(workload_dir, perfmon_dir) + pmc_perf_split(workload_dir) # Set up a log file log = open(workload_dir + "/log.txt", "w") @@ -456,7 +456,7 @@ def characterize_app(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf_NEW.csv") + join_prof(workload_dir, workload_dir + "/pmc_perf.csv") ################################################ From 8c173446d2909db48c6cfe024ab54205fbfef1e8 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:56:49 -0500 Subject: [PATCH 4/7] Comply to Python formatting Signed-off-by: coleramos425 --- src/utils/perfagg.py | 80 +++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 3c09a648ba..81537d6261 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -86,35 +86,61 @@ perfmon_config = { }, } + # joins disparate runs less dumbly than rocprof def join_prof(workload_dir, out): files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") df = None - + for i, file in enumerate(files): - #_df = parse_rocprof_kernels(file) + # _df = parse_rocprof_kernels(file) _df = pd.read_csv(file) key = _df.groupby("KernelName").cumcount() - _df['key'] = _df.KernelName + ' - ' + key.astype(str) - + _df["key"] = _df.KernelName + " - " + key.astype(str) + if df is None: df = _df else: # join by unique index of kernel - df = pd.merge(df, _df, how='inner', on='key', suffixes=('', f'_{i}')) + df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}")) # now, we can: - #   A) throw away any of the "boring" duplicats - df = df[[k for k in df.keys() if not any( - check in k for check in [ - 'gpu', 'queue-id', 'queue-index', 'pid', 'tid', 'grd', 'wgr', - 'lds', 'scr', 'vgpr', 'sgpr', 'fbar', 'sig', 'obj'])]] - #   B) any timestamps that are _not_ the duration, which is the one we care - #   about - df = df[[k for k in df.keys() if not any( - check in k for check in [ - 'DispatchNs', 'CompleteNs'])]] - #   C) sanity check the name and key - namekeys = [k for k in df.keys() if 'KernelName' in k] + #   A) throw away any of the "boring" duplicats + df = df[ + [ + k + for k in df.keys() + if not any( + check in k + for check in [ + "gpu", + "queue-id", + "queue-index", + "pid", + "tid", + "grd", + "wgr", + "lds", + "scr", + "vgpr", + "sgpr", + "fbar", + "sig", + "obj", + ] + ) + ] + ] + #   B) any timestamps that are _not_ the duration, which is the one we care + #   about + df = df[ + [ + k + for k in df.keys() + if not any(check in k for check in ["DispatchNs", "CompleteNs"]) + ] + ] + #   C) sanity check the name and key + namekeys = [k for k in df.keys() if "KernelName" in k] assert len(namekeys) for k in namekeys[1:]: assert (df[namekeys[0]] == df[k]).all() @@ -123,26 +149,27 @@ def join_prof(workload_dir, out): bkeys = [] ekeys = [] for k in df.keys(): - if 'Begin' in k: + if "Begin" in k: bkeys.append(k) - if 'End' in k: + if "End" in k: ekeys.append(k) # compute mean begin and end timestamps endNs = df[ekeys].mean(axis=1) beginNs = df[bkeys].mean(axis=1) # and replace - df= df.drop(columns=bkeys) - df= df.drop(columns=ekeys) - df['BeginNs'] = beginNs - df['EndNs'] = endNs + df = df.drop(columns=bkeys) + df = df.drop(columns=ekeys) + df["BeginNs"] = beginNs + df["EndNs"] = endNs # finally, join the drop key - df = df.drop(columns=['key']) + df = df.drop(columns=["key"]) # and save to file df.to_csv(out, index=False) # and delete old file(s) for file in files: os.remove(file) + def pmc_perf_split(workload_dir): workload_perfmon_dir = workload_dir + "/perfmon" lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines() @@ -160,7 +187,7 @@ def pmc_perf_split(workload_dir): m = re.match(mpattern, stext) if m is None: continue - + # Create separate file for each line fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w") fd.write(stext + "\n\n") @@ -170,12 +197,11 @@ def pmc_perf_split(workload_dir): fd.close() i += 1 - + # Remove old pmc_perf.txt input from perfmon dir os.remove(workload_perfmon_dir + "/pmc_perf.txt") - def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" From 419e2fcac6ef38bc6f96ff3d8d56c1773e4213ff Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 11 May 2023 13:00:30 -0500 Subject: [PATCH 5/7] Add grid size option and cmd line option for --join-type Signed-off-by: coleramos425 --- src/omniperf | 4 ++-- src/parser.py | 8 ++++++++ src/utils/perfagg.py | 16 ++++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/omniperf b/src/omniperf index 520af84468..c22aaa4e67 100755 --- a/src/omniperf +++ b/src/omniperf @@ -456,7 +456,7 @@ def characterize_app(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf.csv") + join_prof(workload_dir, args.join_type) ################################################ @@ -685,7 +685,7 @@ def omniperf_profile(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf.csv") + join_prof(workload_dir, args.join_type) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/parser.py b/src/parser.py index a18ad5d22b..65b29f12e0 100644 --- a/src/parser.py +++ b/src/parser.py @@ -182,6 +182,14 @@ def parse(my_parser): required=False, help="\t\t\tDispatch ID filtering.", ) + profile_group.add_argument( + "--join-type", + metavar="", + required=False, + choices=["kernel", "grid"], + default="grid", + help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)", + ) profile_group.add_argument( "--no-roof", required=False, diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 81537d6261..333498b2c6 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -88,15 +88,27 @@ perfmon_config = { # joins disparate runs less dumbly than rocprof -def join_prof(workload_dir, out): +def join_prof(workload_dir, join_type, out=None): + # Set default output directory if not specified + if out == None: + out = workload_dir + "/pmc_perf.csv" files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") df = None for i, file in enumerate(files): # _df = parse_rocprof_kernels(file) + _df = pd.read_csv(file) key = _df.groupby("KernelName").cumcount() - _df["key"] = _df.KernelName + " - " + key.astype(str) + if join_type == "kernel": + _df["key"] = _df.KernelName + " - " + key.astype(str) + elif join_type == "grid": + _df["key"] = ( + _df.KernelName + " - " + key.astype(str) + " - " + _df.grd.astype(str) + ) + else: + print("ERROR: Unrecognized --join-type") + sys.exit(1) if df is None: df = _df From 444102d92726baf286d1cb321fc2eb3f1bc30ee3 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 11 May 2023 14:04:02 -0500 Subject: [PATCH 6/7] Redefine boring merge values Signed-off-by: coleramos425 --- src/utils/perfagg.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 333498b2c6..8d77ff12d5 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -124,17 +124,17 @@ def join_prof(workload_dir, join_type, out=None): if not any( check in k for check in [ - "gpu", + # "gpu", "queue-id", "queue-index", "pid", "tid", - "grd", - "wgr", - "lds", - "scr", - "vgpr", - "sgpr", + # "grd", + # "wgr", + # "lds", + # "scr", + # "vgpr", + # "sgpr", "fbar", "sig", "obj", From 44edef4635c18ee3f2bb764f7078db05dfbbecd6 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 16 May 2023 15:39:45 -0500 Subject: [PATCH 7/7] Enhance logging and warning reporting Signed-off-by: coleramos425 --- src/omniperf | 38 +++++++++++++++---------- src/utils/perfagg.py | 66 ++++++++++++++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 31 deletions(-) diff --git a/src/omniperf b/src/omniperf index c22aaa4e67..71208c1777 100755 --- a/src/omniperf +++ b/src/omniperf @@ -160,7 +160,7 @@ def isWorkloadEmpty(my_parser, path): ) -def replace_timestamps(workload_dir): +def replace_timestamps(workload_dir, log_file): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns: # Update timestamps for all *.csv output files @@ -171,9 +171,11 @@ def replace_timestamps(workload_dir): df_pmc_perf["EndNs"] = df_stamps["EndNs"] df_pmc_perf.to_csv(fname, index=False) else: + warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps." warnings.warn( - "WARNING: Incomplete profiling data detected. Unable to update timestamps." + warning ) + log_file.write(warning + "\n") def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): @@ -408,7 +410,7 @@ def characterize_app(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -417,10 +419,11 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -429,17 +432,17 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - # Close log - log.close() + # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -451,12 +454,16 @@ def characterize_app(args, VER): '"' + app_cmd + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, args.join_type) + join_prof(workload_dir, args.join_type, log, args.verbose) + + # Close log + log.close() ################################################ @@ -640,7 +647,7 @@ def omniperf_profile(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -649,10 +656,11 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -661,6 +669,7 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) @@ -668,7 +677,7 @@ def omniperf_profile(args, VER): run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -680,12 +689,13 @@ def omniperf_profile(args, VER): '"' + args.remaining + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, args.join_type) + join_prof(workload_dir, args.join_type, log, args.verbose) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 8d77ff12d5..fa14a60b02 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -25,6 +25,7 @@ import sys, os, pathlib, shutil, subprocess, argparse, glob, re import numpy as np import math +import warnings import pandas as pd prog = "omniperf" @@ -87,8 +88,12 @@ perfmon_config = { } +def test_df_column_equality(df): + return df.eq(df.iloc[:, 0], axis=0).all(1).all() + + # joins disparate runs less dumbly than rocprof -def join_prof(workload_dir, join_type, out=None): +def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Set default output directory if not specified if out == None: out = workload_dir + "/pmc_perf.csv" @@ -96,25 +101,48 @@ def join_prof(workload_dir, join_type, out=None): df = None for i, file in enumerate(files): - # _df = parse_rocprof_kernels(file) - _df = pd.read_csv(file) - key = _df.groupby("KernelName").cumcount() if join_type == "kernel": - _df["key"] = _df.KernelName + " - " + key.astype(str) + key = _df.groupby("KernelName").cumcount() elif join_type == "grid": - _df["key"] = ( - _df.KernelName + " - " + key.astype(str) + " - " + _df.grd.astype(str) - ) + key = _df.groupby(["KernelName", "grd"]).cumcount() else: print("ERROR: Unrecognized --join-type") sys.exit(1) + _df["key"] = _df.KernelName + " - " + key.astype(str) if df is None: df = _df else: # join by unique index of kernel df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}")) + + # TODO: check for any mismatch in joins + duplicate_cols = { + "gpu": [col for col in df.columns if "gpu" in col], + "grd": [col for col in df.columns if "grd" in col], + "wpr": [col for col in df.columns if "wgr" in col], + "lds": [col for col in df.columns if "lds" in col], + "scr": [col for col in df.columns if "scr" in col], + "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], + "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], + "spgr": [col for col in df.columns if "sgpr" in col], + } + for key, cols in duplicate_cols.items(): + _df = df[cols] + if not test_df_column_equality(_df): + msg = ( + "WARNING: Detected differing {} values while joining pmc_perf.csv".format( + key + ) + ) + warnings.warn(msg) + log_file.write(msg + "\n") + if test_df_column_equality(_df) and verbose: + msg = "Successfully joined {} in pmc_perf.csv".format(key) + print(msg) + log_file.write(msg + "\n") + # now, we can: #   A) throw away any of the "boring" duplicats df = df[ @@ -124,17 +152,20 @@ def join_prof(workload_dir, join_type, out=None): if not any( check in k for check in [ - # "gpu", + # removed merged counters, keep original + "gpu-id_", + "grd_", + "wgr_", + "lds_", + "scr_", + "vgpr_", + "sgpr_", + "Index_", + # un-mergable, remove all "queue-id", "queue-index", "pid", "tid", - # "grd", - # "wgr", - # "lds", - # "scr", - # "vgpr", - # "sgpr", "fbar", "sig", "obj", @@ -178,8 +209,9 @@ def join_prof(workload_dir, join_type, out=None): # and save to file df.to_csv(out, index=False) # and delete old file(s) - for file in files: - os.remove(file) + if not verbose: + for file in files: + os.remove(file) def pmc_perf_split(workload_dir):