Implement deterministic ordering of perfmon

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>


[ROCm/rocprofiler-compute commit: 97af96e435]
Этот коммит содержится в:
Nicholas Curtis
2024-03-22 11:06:30 -04:00
коммит произвёл Cole Ramos
родитель e55b9b1d34
Коммит 3ff4117c4e
2 изменённых файлов: 41 добавлений и 15 удалений
+5
Просмотреть файл
@@ -106,7 +106,10 @@ class OmniProfiler_Base:
if type(self.__args.path) == str:
if out is None:
out = self.__args.path + "/pmc_perf.csv"
# we sort so that we have a consistent ordering of files between runs
# regardless of the file-system, etc.
files = glob.glob(self.__args.path + "/" + "pmc_perf_*.csv")
file.sort()
elif type(self.__args.path) == list:
files = self.__args.path
else:
@@ -339,6 +342,8 @@ class OmniProfiler_Base:
disable_tqdm = False
# Run profiling on each input file
# we sort so that we have a consistent ordering of files between runs
# regardless of the file-system, etc.
input_files = glob.glob(self.get_args().path + "/perfmon/*.txt")
input_files.sort()
+36 -15
Просмотреть файл
@@ -23,6 +23,7 @@
##############################################################################el
from abc import ABC, abstractmethod
from collections import OrderedDict
import os
import math
import shutil
@@ -212,10 +213,12 @@ class OmniSoC_Base:
os.makedirs(workload_perfmon_dir)
if not roofline_perfmon_only:
ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt")
ref_pmc_files_list += glob.glob(
# we sort so that we have a consistent ordering of files between runs
# regardless of the file-system, etc.
ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt"))
ref_pmc_files_list += sorted(glob.glob(
self.__perfmon_dir + "/" + self.__arch + "/pmc_*_perf*.txt"
)
))
# Perfmon list filtering
if self.__args.ipblocks != None:
@@ -237,7 +240,9 @@ class OmniSoC_Base:
# default: take all perfmons
pmc_files_list = ref_pmc_files_list
else:
ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt")
# we sort so that we have a consistent ordering of files between runs
# regardless of the file-system, etc.
ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt"))
pmc_files_list = ref_pmc_files_list
# Coalesce and writeback workload specific perfmon
@@ -272,7 +277,8 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir):
# match pattern for pmc counters
mpattern = r"^pmc:(.*)"
pmc_list = dict(
# ordered dict again to ensure consistent ordering between runs
pmc_list = OrderedDict(
[
("SQ", []),
("GRBM", []),
@@ -284,7 +290,7 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir):
("CPC", []),
("CPF", []),
("GDS", []),
("TCC2", {}), # per-channel TCC perfmon
("TCC2", OrderedDict()), # per-channel TCC perfmon
]
)
for ch in range(perfmon_config["TCC_channels"]):
@@ -348,7 +354,8 @@ def update_pmc_bucket(
)
if pmc_list is None:
detected_external_call = True
pmc_list = dict(
# ordered dict again to ensure consistent ordering between runs
pmc_list = OrderedDict(
[
("SQ", []),
("GRBM", []),
@@ -360,7 +367,7 @@ def update_pmc_bucket(
("CPC", []),
("CPF", []),
("GDS", []),
("TCC2", {}), # per-channel TCC perfmon
("TCC2", OrderedDict()), # per-channel TCC perfmon
]
)
for ch in range(perfmon_config["TCC_channels"]):
@@ -391,16 +398,25 @@ def update_pmc_bucket(
if IP_block != "TCC":
# Insert unique pmc counters into its bucket
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
# NOTE: we specifically do _not_ exclude multiple versions of
# the same counter, because some counters are particularly
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
# Often, the resulting metrics do not make sense if they
# are taken from different runs, see:
# https://github.com/ROCm/omniperf/issues/332
pmc_list[IP_block].append(counter)
else:
# TCC counters processing
m = re.match(r"[\s\S]+\[(\d+)\]", counter)
if m is None:
# Aggregated TCC counters
if counter not in pmc_list[IP_block]:
pmc_list[IP_block].append(counter)
# NOTE: we specifically do _not_ exclude multiple versions of
# the same counter, because some counters are particularly
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
# Often, the resulting metrics do not make sense if they
# are taken from different runs, see:
# https://github.com/ROCm/omniperf/issues/332
pmc_list[IP_block].append(counter)
else:
# TCC channel ID
@@ -409,8 +425,13 @@ def update_pmc_bucket(
# fake IP block for per channel TCC
if str(ch) in pmc_list["TCC2"]:
# append unique counter into the channel
if counter not in pmc_list["TCC2"][str(ch)]:
pmc_list["TCC2"][str(ch)].append(counter)
# NOTE: we specifically do _not_ exclude multiple versions of
# the same counter, because some counters are particularly
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
# Often, the resulting metrics do not make sense if they
# are taken from different runs, see:
# https://github.com/ROCm/omniperf/issues/332
pmc_list["TCC2"][str(ch)].append(counter)
else:
# initial counter in this channel
pmc_list["TCC2"][str(ch)] = [counter]