Optimizations to run_prof() utility

Signed-off-by: colramos-amd <colramos@amd.com>


[ROCm/rocprofiler-compute commit: a1b5c252e2]
Этот коммит содержится в:
colramos-amd
2024-01-11 12:52:27 -06:00
коммит произвёл Cole Ramos
родитель 5a1281e375
Коммит 3bc71d87e9
8 изменённых файлов: 90 добавлений и 46 удалений
+3 -3
Просмотреть файл
@@ -197,13 +197,13 @@ class Omniperf:
# instantiate desired profiler
if self.__profiler_mode == "rocprofv1":
from omniperf_profile.profiler_rocprof_v1 import rocprof_v1_profiler
profiler = rocprof_v1_profiler(self.__args, self.__profiler_mode, self.__soc)
profiler = rocprof_v1_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch])
elif self.__profiler_mode == "rocprofv2":
from omniperf_profile.profiler_rocprof_v2 import rocprof_v2_profiler
profiler = rocprof_v2_profiler(self.__args, self.__profiler_mode, self.__soc)
profiler = rocprof_v2_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch])
elif self.__profiler_mode == "rocscope":
from omniperf_profile.profiler_rocscope import rocscope_profiler
profiler = rocscope_profiler(self.__args, self.__profiler_mode, self.__soc)
profiler = rocscope_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch])
else:
logging.error("Unsupported profiler")
sys.exit(1)
+21 -6
Просмотреть файл
@@ -33,15 +33,20 @@ import config
import pandas as pd
class OmniProfiler_Base():
def __init__(self,args, profiler_mode,soc):
def __init__(self, args, profiler_mode, soc):
self.__args = args
self.__profiler = profiler_mode
self.__soc = soc
self._soc = soc # OmniSoC obj
self.__perfmon_dir = os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs")
def get_args(self):
return self.__args
def get_profiler_options(self, fname):
"""Fetch any version specific arguments required by profiler
"""
# assume no SoC specific options and return empty list by default
return []
@demarcate
def pmc_perf_split(self):
@@ -328,9 +333,20 @@ class OmniProfiler_Base():
else:
logging.debug(output)
logging.info("\nCurrent input file: %s" % fname)
if self.__profiler == "rocprofv1":
#TODO: Look back at run_prof() definition. We may want to separate this based on SoC
run_prof(fname, self.get_args().path, self.__perfmon_dir, self.__args.remaining, self.__args.target, self.__args.verbose)
options = self.get_profiler_options(fname)
options += self._soc.get_profiler_options()
print("options are ", options)
if self.__profiler == "rocprofv1" or self.__profiler == "rocprofv2":
run_prof(
fname=fname,
# workload_dir=self.get_args().path,
# perfmon_dir=self.__perfmon_dir,
# cmd=self.__args.remaining,
# target=self.__args.target,
profiler_options=options
)
elif self.__profiler == "rocscope":
run_rocscope(self.__args, fname)
@@ -355,4 +371,3 @@ class OmniProfiler_Base():
def test_df_column_equality(df):
return df.eq(df.iloc[:, 0], axis=0).all(1).all()
+14
Просмотреть файл
@@ -36,6 +36,19 @@ class rocprof_v1_profiler(OmniProfiler_Base):
self.ready_to_profile = (self.get_args().roof_only and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv"))
or not self.get_args().roof_only)
def get_profiler_options(self, fname):
fbase = os.path.splitext(os.path.basename(fname))[0]
app_cmd = self.get_args().remaining
args = [
# v1 requires request for timestamps
"--timestamp", "on",
# v1 requires csv extension
"-o", self.get_args().path + "/" + fbase + ".csv",
# v1 does require quotes on app cmd
'"' + app_cmd + '"',
]
return args
#-----------------------
# Required child methods
#-----------------------
@@ -54,6 +67,7 @@ class rocprof_v1_profiler(OmniProfiler_Base):
if self.ready_to_profile:
if self.get_args().roof_only:
logging.info("[roofline] Generating pmc_perf.csv")
# Log profiling options and setup filtering
super().run_profiling(version, prog)
else:
logging.info("[roofline] Detected existing pmc_perf.csv")
+31
Просмотреть файл
@@ -34,6 +34,18 @@ class rocprof_v2_profiler(OmniProfiler_Base):
self.ready_to_profile = (self.get_args().roof_only and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv"))
or not self.get_args().roof_only)
def get_profiler_options(self, fname):
fbase = os.path.splitext(os.path.basename(fname))[0]
app_cmd = self.get_args().remaining
args = [
# v2 requires output directory argument
"-d", self.get_args().path + "/" + "out",
# v2 does not require csv extension
"-o", fbase,
# v2 doen not require quotes on cmd
app_cmd
]
return args
#-----------------------
# Required child methods
#-----------------------
@@ -56,6 +68,25 @@ class rocprof_v2_profiler(OmniProfiler_Base):
else:
logging.info("[roofline] Detected existing pmc_perf.csv")
# [Run] Get any SoC specific rocprof options
# Pass profiler name and throw error if not supported
soc_options = self._soc.get_rocprof_options(rocprof_version)
# [Run] Load any rocprof version rocprof options
# -i
# -d
# -o
profiler_options = [
"-i", fname,
"-d", workload_dir,
"-o", fbase,
cmd
]
# [Run] Call run_prof() util
@demarcate
def post_processing(self):
+1 -1
Просмотреть файл
@@ -33,7 +33,7 @@ class gfx906_soc (OmniSoC_Base):
soc = "gfx906"
self.set_soc(soc)
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", soc))
# Per IP block max number of simulutaneous counters. GFX IP Blocks
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
"SQ": 8,
+5 -1
Просмотреть файл
@@ -33,7 +33,7 @@ class gfx908_soc (OmniSoC_Base):
soc = "gfx908"
self.set_soc(soc)
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", soc))
# Per IP block max number of simulutaneous counters. GFX IP Blocks
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
"SQ": 8,
@@ -63,6 +63,10 @@ class gfx908_soc (OmniSoC_Base):
}
)
@demarcate
def get_profiler_options(self):
# Mi100 requires a custom xml config
return ["-m", self.get_perfmon_dir() + "/" + "metrics.xml"]
#-----------------------
# Required child methods
+1 -1
Просмотреть файл
@@ -38,7 +38,7 @@ class gfx90a_soc (OmniSoC_Base):
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline"))
else:
self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", soc))
# Per IP block max number of simulutaneous counters. GFX IP Blocks
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
{
"SQ": 8,
+14 -34
Просмотреть файл
@@ -178,46 +178,26 @@ def capture_subprocess_output(subprocess_args):
return (success, output)
def run_prof(fname, workload_dir, perfmon_dir, cmd, target, verbose):
def run_prof(fname, profiler_options):
fbase = os.path.splitext(os.path.basename(fname))[0]
logging.debug("pmc file:", os.path.basename(fname))
# profile the app (run w/ custom config files for mi100)
if target == "mi100":
logging.info("RUNNING WITH CUSTOM METRICS")
success, output = capture_subprocess_output(
[
rocprof_cmd,
"-i",
fname,
"-m",
perfmon_dir + "/" + "metrics.xml",
"--timestamp",
"on",
"-o",
workload_dir + "/" + fbase + ".csv",
'"' + cmd + '"',
]
)
if not success:
error(output)
else:
success, output = capture_subprocess_output(
[
rocprof_cmd,
"-i",
fname,
"--timestamp",
"on",
"-o",
workload_dir + "/" + fbase + ".csv",
'"' + cmd + '"',
]
)
if not success:
# standard rocprof options
default_options = [
"-i", fname
]
options = default_options + profiler_options
# profile the app
success, output = capture_subprocess_output(
[ rocprof_cmd, "-i", fname ] + options
)
if not success:
error(output)
# write rocprof output to logging
logging.info(output)