diff --git a/projects/rocprofiler-compute/src/omniperf_profile/profiler_base.py b/projects/rocprofiler-compute/src/omniperf_profile/profiler_base.py index 18fdd02d20..e841c13f96 100644 --- a/projects/rocprofiler-compute/src/omniperf_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/omniperf_profile/profiler_base.py @@ -106,7 +106,10 @@ class OmniProfiler_Base: if type(self.__args.path) == str: if out is None: out = self.__args.path + "/pmc_perf.csv" + # we sort so that we have a consistent ordering of files between runs + # regardless of the file-system, etc. files = glob.glob(self.__args.path + "/" + "pmc_perf_*.csv") + file.sort() elif type(self.__args.path) == list: files = self.__args.path else: @@ -339,6 +342,8 @@ class OmniProfiler_Base: disable_tqdm = False # Run profiling on each input file + # we sort so that we have a consistent ordering of files between runs + # regardless of the file-system, etc. input_files = glob.glob(self.get_args().path + "/perfmon/*.txt") input_files.sort() diff --git a/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py b/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py index 1e007035a7..8177dbff6f 100644 --- a/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py +++ b/projects/rocprofiler-compute/src/omniperf_soc/soc_base.py @@ -23,6 +23,7 @@ ##############################################################################el from abc import ABC, abstractmethod +from collections import OrderedDict import os import math import shutil @@ -212,10 +213,12 @@ class OmniSoC_Base: os.makedirs(workload_perfmon_dir) if not roofline_perfmon_only: - ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt") - ref_pmc_files_list += glob.glob( + # we sort so that we have a consistent ordering of files between runs + # regardless of the file-system, etc. + ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt")) + ref_pmc_files_list += sorted(glob.glob( self.__perfmon_dir + "/" + self.__arch + "/pmc_*_perf*.txt" - ) + )) # Perfmon list filtering if self.__args.ipblocks != None: @@ -237,7 +240,9 @@ class OmniSoC_Base: # default: take all perfmons pmc_files_list = ref_pmc_files_list else: - ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt") + # we sort so that we have a consistent ordering of files between runs + # regardless of the file-system, etc. + ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt")) pmc_files_list = ref_pmc_files_list # Coalesce and writeback workload specific perfmon @@ -272,7 +277,8 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir): # match pattern for pmc counters mpattern = r"^pmc:(.*)" - pmc_list = dict( + # ordered dict again to ensure consistent ordering between runs + pmc_list = OrderedDict( [ ("SQ", []), ("GRBM", []), @@ -284,7 +290,7 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir): ("CPC", []), ("CPF", []), ("GDS", []), - ("TCC2", {}), # per-channel TCC perfmon + ("TCC2", OrderedDict()), # per-channel TCC perfmon ] ) for ch in range(perfmon_config["TCC_channels"]): @@ -348,7 +354,8 @@ def update_pmc_bucket( ) if pmc_list is None: detected_external_call = True - pmc_list = dict( + # ordered dict again to ensure consistent ordering between runs + pmc_list = OrderedDict( [ ("SQ", []), ("GRBM", []), @@ -360,7 +367,7 @@ def update_pmc_bucket( ("CPC", []), ("CPF", []), ("GDS", []), - ("TCC2", {}), # per-channel TCC perfmon + ("TCC2", OrderedDict()), # per-channel TCC perfmon ] ) for ch in range(perfmon_config["TCC_channels"]): @@ -391,16 +398,25 @@ def update_pmc_bucket( if IP_block != "TCC": # Insert unique pmc counters into its bucket - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) - + # NOTE: we specifically do _not_ exclude multiple versions of + # the same counter, because some counters are particularly + # sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES). + # Often, the resulting metrics do not make sense if they + # are taken from different runs, see: + # https://github.com/ROCm/omniperf/issues/332 + pmc_list[IP_block].append(counter) else: # TCC counters processing m = re.match(r"[\s\S]+\[(\d+)\]", counter) if m is None: # Aggregated TCC counters - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) + # NOTE: we specifically do _not_ exclude multiple versions of + # the same counter, because some counters are particularly + # sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES). + # Often, the resulting metrics do not make sense if they + # are taken from different runs, see: + # https://github.com/ROCm/omniperf/issues/332 + pmc_list[IP_block].append(counter) else: # TCC channel ID @@ -409,8 +425,13 @@ def update_pmc_bucket( # fake IP block for per channel TCC if str(ch) in pmc_list["TCC2"]: # append unique counter into the channel - if counter not in pmc_list["TCC2"][str(ch)]: - pmc_list["TCC2"][str(ch)].append(counter) + # NOTE: we specifically do _not_ exclude multiple versions of + # the same counter, because some counters are particularly + # sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES). + # Often, the resulting metrics do not make sense if they + # are taken from different runs, see: + # https://github.com/ROCm/omniperf/issues/332 + pmc_list["TCC2"][str(ch)].append(counter) else: # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter]