From 89f91b59bface639a88444ea3bee2ab1247221d6 Mon Sep 17 00:00:00 2001 From: Keith Lowery Date: Wed, 9 Nov 2022 11:28:00 -0600 Subject: [PATCH] Added a new feature --i-feel-lucky that discovers and profiles only the top 5 kernels Signed-off-by: Keith Lowery Signed-off-by: coleramos425 [ROCm/rocprofiler-compute commit: 4c6a64f4e569c0ce2cca942508d922e20513337c] --- projects/rocprofiler-compute/src/omniperf | 41 +++++++++++++++++++++- projects/rocprofiler-compute/src/parser.py | 9 +++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/projects/rocprofiler-compute/src/omniperf b/projects/rocprofiler-compute/src/omniperf index d483f0bc75..fd3b705767 100755 --- a/projects/rocprofiler-compute/src/omniperf +++ b/projects/rocprofiler-compute/src/omniperf @@ -318,6 +318,7 @@ def omniperf_profile(args): print("Command: ", args.remaining) print("Kernel Selection: ", args.kernel) print("Dispatch Selection: ", args.dispatch) + if args.ipblocks == None: print("IP Blocks: All") else: @@ -326,10 +327,48 @@ def omniperf_profile(args): # Set up directories workload_dir = args.path + "/" + args.name + "/" + args.target perfmon_dir = str(OMNIPERF_HOME) + "/perfmon_pub" + + if not args.lucky == None and args.lucky == True: + print("You're feeling lucky - only profiling top N kernels") + #look for whether workload_dir exists - create if not + try: + os.makedirs(workload_dir, exist_ok = True) + except Exception as e: + print("Unable to create workload directory: ", workload_dir) + print(e) + sys.exit(1) + subprocess.run( + [ + rocprof_cmd, + # "-i", fname, + # "-m", perfmon_dir + "/" + "metrics.xml", + "--timestamp", + "on", + "-o", + workload_dir + "/" + "timestamps.csv", + '"' + args.remaining + '"', + ] + ) + df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") + df_elapsed = df_stamps['EndNs'] - df_stamps['BeginNs'] + df_stamps['Duration'] = df_elapsed + if args.dispatch == None: + args.dispatch = list(map(lambda x: str(x) + ':' + str(x), df_stamps.sort_values(by=['Duration'], ascending=False)['Index'].iloc[0:5].tolist())) + else: + ids = df_stamps.sort_values(by=['Duration'], ascending=False)['Index'].iloc[0:5].tolist() + dispatch = {} + for d in ids: + dispatch[d] = d + for d in args.dispatch: + dispatch[d] = d + args.dispatch = list(map(lambda x: str(x) + ':' + str(x),dispatch.values())) + + # Perfmon filtering perfmon_filter(workload_dir, perfmon_dir, args) + # Workload profiling for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) @@ -351,7 +390,7 @@ def omniperf_profile(args): "sed", "-i", "-r", - "s%^(range:).*%" + "range: " + ",".join(args.dispatch) + "%g", + "s%^(range:).*%" + "range: " + " ".join(args.dispatch) + "%g", fname, ] ) diff --git a/projects/rocprofiler-compute/src/parser.py b/projects/rocprofiler-compute/src/parser.py index 6559e7dc0b..ac62fd2995 100644 --- a/projects/rocprofiler-compute/src/parser.py +++ b/projects/rocprofiler-compute/src/parser.py @@ -116,6 +116,15 @@ def parse(my_parser): default=None, help="\t\t\tKernel filtering.", ) + profile_group.add_argument( + "-l", + "--i-feel-lucky", + required=False, + default=False, + action="store_true", + dest="lucky", + help="\t\t\tProfile only the most time consuming kernels.", + ) profile_group.add_argument( "-b", "--ipblocks",