From 5cc49d6e90ac787186790ddee82679ab751f4855 Mon Sep 17 00:00:00 2001 From: ywang103-amd Date: Mon, 10 Feb 2025 17:59:03 -0500 Subject: [PATCH] fix crash for running rocprofv3 on mi100 (#557) * initial hack to fix for v3 stucking on mi100 becasue of -m parameter and missing counter csv file * proper formating * refactored profiler option function to take soc arch * resolve missing step that casued error for profiler option * fix typo of arch name * change method of putting soc info into profiler option * isort and black format * add comment for the part that handles missing counter csv file * remove unncecessary import --------- Co-authored-by: YANG WANG [ROCm/rocprofiler-compute commit: 5ee37b33534f9e1b7031e4aa3cce20e39944117e] --- .../src/rocprof_compute_profile/profiler_base.py | 6 ++---- .../src/rocprof_compute_profile/profiler_rocprof_v1.py | 10 ++++++++-- .../src/rocprof_compute_profile/profiler_rocprof_v2.py | 10 ++++++++-- .../src/rocprof_compute_profile/profiler_rocprof_v3.py | 2 +- .../src/rocprof_compute_soc/soc_base.py | 6 ------ .../src/rocprof_compute_soc/soc_gfx908.py | 5 ----- projects/rocprofiler-compute/src/utils/utils.py | 9 ++++++++- 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 68e92bb105..14e0e4a594 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -63,7 +63,7 @@ class RocProfCompute_Base: def get_args(self): return self.__args - def get_profiler_options(self, fname): + def get_profiler_options(self, fname, soc): """Fetch any version specific arguments required by profiler""" # assume no SoC specific options and return empty list by default return [] @@ -361,9 +361,7 @@ class RocProfCompute_Base: console_debug(output) console_log("profiling", "Current input file: %s" % fname) - # Fetch any SoC/profiler specific profiling options - options = self._soc.get_profiler_options() - options += self.get_profiler_options(fname) + options = self.get_profiler_options(fname, self._soc) if ( self.__profiler == "rocprofv1" or self.__profiler == "rocprofv2" diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v1.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v1.py index e2be4e2480..e4fa44cdfd 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v1.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v1.py @@ -38,10 +38,16 @@ class rocprof_v1_profiler(RocProfCompute_Base): or not self.get_args().roof_only ) - def get_profiler_options(self, fname): + def get_profiler_options(self, fname, soc): fbase = Path(fname).stem app_cmd = self.get_args().remaining - args = [ + + args = [] + # can be removed in the future. It supports gfx908 + v1 + if soc.get_arch() == "gfx908": + args += ["-m", soc.get_workload_perfmon_dir() + "/" + "metrics.xml"] + + args += [ # v1 requires request for timestamps "--timestamp", "on", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v2.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v2.py index ae320b7c2d..4ed8b2ba9f 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v2.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v2.py @@ -39,10 +39,16 @@ class rocprof_v2_profiler(RocProfCompute_Base): or not self.get_args().roof_only ) - def get_profiler_options(self, fname): + def get_profiler_options(self, fname, soc): fbase = Path(fname).stem app_cmd = shlex.split(self.get_args().remaining) - args = [ + + args = [] + # can be removed in the future. It supports gfx908 + v2 + if soc.get_arch() == "gfx908": + args += ["-m", soc.get_workload_perfmon_dir() + "/" + "metrics.xml"] + + args += [ # v2 requires output directory argument "-d", self.get_args().path + "/" + "out", diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py index b257d75ae9..24267c9f29 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_rocprof_v3.py @@ -40,7 +40,7 @@ class rocprof_v3_profiler(RocProfCompute_Base): or not self.get_args().roof_only ) - def get_profiler_options(self, fname): + def get_profiler_options(self, fname, soc): app_cmd = shlex.split(self.get_args().remaining) trace_option = "--kernel-trace" rocprof_out_format = "json" diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py index 85af78cd66..88f5a22095 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_base.py @@ -97,12 +97,6 @@ class OmniSoC_Base: def get_compatible_profilers(self): return self.__compatible_profilers - @demarcate - def get_profiler_options(self): - """Fetch any SoC specific arguments required by the profiler""" - # assume no SoC specific options and return empty list by default - return [] - @demarcate def populate_mspec(self): from utils.specs import run, search, total_sqc, total_xcds diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py index 15371c035a..9cf8a46d8a 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py @@ -69,11 +69,6 @@ class gfx908_soc(OmniSoC_Base): self._mspec.max_mclk = 1200 self._mspec.cur_mclk = 1200 - @demarcate - def get_profiler_options(self): - # Mi100 requires a custom xml config - return ["-m", self.get_workload_perfmon_dir() + "/" + "metrics.xml"] - # ----------------------- # Required child methods # ----------------------- diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 65699702d3..929767b76c 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -596,6 +596,9 @@ def run_prof( new_env = os.environ.copy() new_env["ROCPROFILER_INDIVIDUAL_XCC_MODE"] = "1" + is_timestamps = False + if path(fname).name == "timestamps.txt": + is_timestamps = True time_1 = time.time() # profile the app @@ -686,10 +689,14 @@ def run_prof( results_files_csv = glob.glob( workload_dir + "/out/pmc_1/*/*_converted.csv" ) - else: + elif is_timestamps: + # when the input is timestamps, we know counter csv file is not generated and will instead parse kernel trace file results_files_csv = glob.glob( workload_dir + "/out/pmc_1/*/*_kernel_trace.csv" ) + else: + # when the input is not for timestamps, and counter csv file is not generated, we assume failed rocprof run and will completely bypass the file generation and merging for current pmc + return else: console_error("The output file of rocprofv3 can only support json or csv!!!")