From 44edef4635c18ee3f2bb764f7078db05dfbbecd6 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Tue, 16 May 2023 15:39:45 -0500
Subject: [PATCH] Enhance logging and warning reporting

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf         | 38 +++++++++++++++----------
 src/utils/perfagg.py | 66 ++++++++++++++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index c22aaa4e67..71208c1777 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -160,7 +160,7 @@ def isWorkloadEmpty(my_parser, path):
         )
 
 
-def replace_timestamps(workload_dir):
+def replace_timestamps(workload_dir, log_file):
     df_stamps = pd.read_csv(workload_dir + "/timestamps.csv")
     if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns:
         # Update timestamps for all *.csv output files
@@ -171,9 +171,11 @@ def replace_timestamps(workload_dir):
             df_pmc_perf["EndNs"] = df_stamps["EndNs"]
             df_pmc_perf.to_csv(fname, index=False)
     else:
+        warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps."
         warnings.warn(
-            "WARNING: Incomplete profiling data detected. Unable to update timestamps."
+            warning
         )
+        log_file.write(warning + "\n")
 
 
 def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
@@ -408,7 +410,7 @@ def characterize_app(args, VER):
     for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
         # Kernel filtering (in-place replacement)
         if not args.kernel == None:
-            run_subprocess(
+            success, output = capture_subprocess_output(
                 [
                     "sed",
                     "-i",
@@ -417,10 +419,11 @@ def characterize_app(args, VER):
                     fname,
                 ]
             )
+            log.write(output)
 
         # Dispatch filtering (inplace replacement)
         if not args.dispatch == None:
-            run_subprocess(
+            success, output = capture_subprocess_output(
                 [
                     "sed",
                     "-i",
@@ -429,17 +432,17 @@ def characterize_app(args, VER):
                     fname,
                 ]
             )
+            log.write(output)
         print(fname)
         if args.use_rocscope == True:
             run_rocscope(args, fname)
         else:
             run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose)
     
-    # Close log
-    log.close()
+    
 
     # run again with timestamps
-    run_subprocess(
+    success, output = capture_subprocess_output(
         [
             rocprof_cmd,
             # "-i", fname,
@@ -451,12 +454,16 @@ def characterize_app(args, VER):
             '"' + app_cmd + '"',
         ]
     )
+    log.write(output)
     # Update pmc_perf.csv timestamps
-    replace_timestamps(workload_dir)
+    replace_timestamps(workload_dir, log)
 
     # Manually join each pmc_perf*.csv output
     if args.use_rocscope == False:
-        join_prof(workload_dir, args.join_type)
+        join_prof(workload_dir, args.join_type, log, args.verbose)
+
+    # Close log
+    log.close()
 
 
 ################################################
@@ -640,7 +647,7 @@ def omniperf_profile(args, VER):
         for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
             # Kernel filtering (in-place replacement)
             if not args.kernel == None:
-                run_subprocess(
+                success, output = capture_subprocess_output(
                     [
                         "sed",
                         "-i",
@@ -649,10 +656,11 @@ def omniperf_profile(args, VER):
                         fname,
                     ]
                 )
+                log.write(output)
 
             # Dispatch filtering (inplace replacement)
             if not args.dispatch == None:
-                run_subprocess(
+                success, output = capture_subprocess_output(
                     [
                         "sed",
                         "-i",
@@ -661,6 +669,7 @@ def omniperf_profile(args, VER):
                         fname,
                     ]
                 )
+                log.write(output)
             print(fname)
             if args.use_rocscope == True:
                 run_rocscope(args, fname)
@@ -668,7 +677,7 @@ def omniperf_profile(args, VER):
                 run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
 
         # run again with timestamps
-        run_subprocess(
+        success, output = capture_subprocess_output(
             [
                 rocprof_cmd,
                 # "-i", fname,
@@ -680,12 +689,13 @@ def omniperf_profile(args, VER):
                 '"' + args.remaining + '"',
             ]
         )
+        log.write(output)
         # Update pmc_perf.csv timestamps
-        replace_timestamps(workload_dir)
+        replace_timestamps(workload_dir, log)
         
         # Manually join each pmc_perf*.csv output
         if args.use_rocscope == False:
-            join_prof(workload_dir, args.join_type)
+            join_prof(workload_dir, args.join_type, log, args.verbose)
 
     # Generate sysinfo
     gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 8d77ff12d5..fa14a60b02 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -25,6 +25,7 @@
 import sys, os, pathlib, shutil, subprocess, argparse, glob, re
 import numpy as np
 import math
+import warnings
 import pandas as pd
 
 prog = "omniperf"
@@ -87,8 +88,12 @@ perfmon_config = {
 }
 
 
+def test_df_column_equality(df):
+    return df.eq(df.iloc[:, 0], axis=0).all(1).all()
+
+
 # joins disparate runs less dumbly than rocprof
-def join_prof(workload_dir, join_type, out=None):
+def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     # Set default output directory if not specified
     if out == None:
         out = workload_dir + "/pmc_perf.csv"
@@ -96,25 +101,48 @@ def join_prof(workload_dir, join_type, out=None):
     df = None
 
     for i, file in enumerate(files):
-        # _df = parse_rocprof_kernels(file)
-
         _df = pd.read_csv(file)
-        key = _df.groupby("KernelName").cumcount()
         if join_type == "kernel":
-            _df["key"] = _df.KernelName + " - " + key.astype(str)
+            key = _df.groupby("KernelName").cumcount()
         elif join_type == "grid":
-            _df["key"] = (
-                _df.KernelName + " - " + key.astype(str) + " - " + _df.grd.astype(str)
-            )
+            key = _df.groupby(["KernelName", "grd"]).cumcount()
         else:
             print("ERROR: Unrecognized --join-type")
             sys.exit(1)
 
+        _df["key"] = _df.KernelName + " - " + key.astype(str)
         if df is None:
             df = _df
         else:
             # join by unique index of kernel
             df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}"))
+
+    # TODO: check for any mismatch in joins
+    duplicate_cols = {
+        "gpu": [col for col in df.columns if "gpu" in col],
+        "grd": [col for col in df.columns if "grd" in col],
+        "wpr": [col for col in df.columns if "wgr" in col],
+        "lds": [col for col in df.columns if "lds" in col],
+        "scr": [col for col in df.columns if "scr" in col],
+        "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
+        "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
+        "spgr": [col for col in df.columns if "sgpr" in col],
+    }
+    for key, cols in duplicate_cols.items():
+        _df = df[cols]
+        if not test_df_column_equality(_df):
+            msg = (
+                "WARNING: Detected differing {} values while joining pmc_perf.csv".format(
+                    key
+                )
+            )
+            warnings.warn(msg)
+            log_file.write(msg + "\n")
+        if test_df_column_equality(_df) and verbose:
+            msg = "Successfully joined {} in pmc_perf.csv".format(key)
+            print(msg)
+            log_file.write(msg + "\n")
+
     # now, we can:
     #   A) throw away any of the "boring" duplicats
     df = df[
@@ -124,17 +152,20 @@ def join_prof(workload_dir, join_type, out=None):
             if not any(
                 check in k
                 for check in [
-                    # "gpu",
+                    # removed merged counters, keep original
+                    "gpu-id_",
+                    "grd_",
+                    "wgr_",
+                    "lds_",
+                    "scr_",
+                    "vgpr_",
+                    "sgpr_",
+                    "Index_",
+                    # un-mergable, remove all
                     "queue-id",
                     "queue-index",
                     "pid",
                     "tid",
-                    # "grd",
-                    # "wgr",
-                    # "lds",
-                    # "scr",
-                    # "vgpr",
-                    # "sgpr",
                     "fbar",
                     "sig",
                     "obj",
@@ -178,8 +209,9 @@ def join_prof(workload_dir, join_type, out=None):
     # and save to file
     df.to_csv(out, index=False)
     # and delete old file(s)
-    for file in files:
-        os.remove(file)
+    if not verbose:
+        for file in files:
+            os.remove(file)
 
 
 def pmc_perf_split(workload_dir):