Implement deterministic ordering of perfmon
Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
[ROCm/rocprofiler-compute commit: 97af96e435]
Этот коммит содержится в:
коммит произвёл
Cole Ramos
родитель
e55b9b1d34
Коммит
3ff4117c4e
@@ -106,7 +106,10 @@ class OmniProfiler_Base:
|
||||
if type(self.__args.path) == str:
|
||||
if out is None:
|
||||
out = self.__args.path + "/pmc_perf.csv"
|
||||
# we sort so that we have a consistent ordering of files between runs
|
||||
# regardless of the file-system, etc.
|
||||
files = glob.glob(self.__args.path + "/" + "pmc_perf_*.csv")
|
||||
file.sort()
|
||||
elif type(self.__args.path) == list:
|
||||
files = self.__args.path
|
||||
else:
|
||||
@@ -339,6 +342,8 @@ class OmniProfiler_Base:
|
||||
disable_tqdm = False
|
||||
|
||||
# Run profiling on each input file
|
||||
# we sort so that we have a consistent ordering of files between runs
|
||||
# regardless of the file-system, etc.
|
||||
input_files = glob.glob(self.get_args().path + "/perfmon/*.txt")
|
||||
input_files.sort()
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
##############################################################################el
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import OrderedDict
|
||||
import os
|
||||
import math
|
||||
import shutil
|
||||
@@ -212,10 +213,12 @@ class OmniSoC_Base:
|
||||
os.makedirs(workload_perfmon_dir)
|
||||
|
||||
if not roofline_perfmon_only:
|
||||
ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt")
|
||||
ref_pmc_files_list += glob.glob(
|
||||
# we sort so that we have a consistent ordering of files between runs
|
||||
# regardless of the file-system, etc.
|
||||
ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt"))
|
||||
ref_pmc_files_list += sorted(glob.glob(
|
||||
self.__perfmon_dir + "/" + self.__arch + "/pmc_*_perf*.txt"
|
||||
)
|
||||
))
|
||||
|
||||
# Perfmon list filtering
|
||||
if self.__args.ipblocks != None:
|
||||
@@ -237,7 +240,9 @@ class OmniSoC_Base:
|
||||
# default: take all perfmons
|
||||
pmc_files_list = ref_pmc_files_list
|
||||
else:
|
||||
ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt")
|
||||
# we sort so that we have a consistent ordering of files between runs
|
||||
# regardless of the file-system, etc.
|
||||
ref_pmc_files_list = sorted(glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt"))
|
||||
pmc_files_list = ref_pmc_files_list
|
||||
|
||||
# Coalesce and writeback workload specific perfmon
|
||||
@@ -272,7 +277,8 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir):
|
||||
|
||||
# match pattern for pmc counters
|
||||
mpattern = r"^pmc:(.*)"
|
||||
pmc_list = dict(
|
||||
# ordered dict again to ensure consistent ordering between runs
|
||||
pmc_list = OrderedDict(
|
||||
[
|
||||
("SQ", []),
|
||||
("GRBM", []),
|
||||
@@ -284,7 +290,7 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir):
|
||||
("CPC", []),
|
||||
("CPF", []),
|
||||
("GDS", []),
|
||||
("TCC2", {}), # per-channel TCC perfmon
|
||||
("TCC2", OrderedDict()), # per-channel TCC perfmon
|
||||
]
|
||||
)
|
||||
for ch in range(perfmon_config["TCC_channels"]):
|
||||
@@ -348,7 +354,8 @@ def update_pmc_bucket(
|
||||
)
|
||||
if pmc_list is None:
|
||||
detected_external_call = True
|
||||
pmc_list = dict(
|
||||
# ordered dict again to ensure consistent ordering between runs
|
||||
pmc_list = OrderedDict(
|
||||
[
|
||||
("SQ", []),
|
||||
("GRBM", []),
|
||||
@@ -360,7 +367,7 @@ def update_pmc_bucket(
|
||||
("CPC", []),
|
||||
("CPF", []),
|
||||
("GDS", []),
|
||||
("TCC2", {}), # per-channel TCC perfmon
|
||||
("TCC2", OrderedDict()), # per-channel TCC perfmon
|
||||
]
|
||||
)
|
||||
for ch in range(perfmon_config["TCC_channels"]):
|
||||
@@ -391,16 +398,25 @@ def update_pmc_bucket(
|
||||
|
||||
if IP_block != "TCC":
|
||||
# Insert unique pmc counters into its bucket
|
||||
if counter not in pmc_list[IP_block]:
|
||||
pmc_list[IP_block].append(counter)
|
||||
|
||||
# NOTE: we specifically do _not_ exclude multiple versions of
|
||||
# the same counter, because some counters are particularly
|
||||
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
|
||||
# Often, the resulting metrics do not make sense if they
|
||||
# are taken from different runs, see:
|
||||
# https://github.com/ROCm/omniperf/issues/332
|
||||
pmc_list[IP_block].append(counter)
|
||||
else:
|
||||
# TCC counters processing
|
||||
m = re.match(r"[\s\S]+\[(\d+)\]", counter)
|
||||
if m is None:
|
||||
# Aggregated TCC counters
|
||||
if counter not in pmc_list[IP_block]:
|
||||
pmc_list[IP_block].append(counter)
|
||||
# NOTE: we specifically do _not_ exclude multiple versions of
|
||||
# the same counter, because some counters are particularly
|
||||
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
|
||||
# Often, the resulting metrics do not make sense if they
|
||||
# are taken from different runs, see:
|
||||
# https://github.com/ROCm/omniperf/issues/332
|
||||
pmc_list[IP_block].append(counter)
|
||||
|
||||
else:
|
||||
# TCC channel ID
|
||||
@@ -409,8 +425,13 @@ def update_pmc_bucket(
|
||||
# fake IP block for per channel TCC
|
||||
if str(ch) in pmc_list["TCC2"]:
|
||||
# append unique counter into the channel
|
||||
if counter not in pmc_list["TCC2"][str(ch)]:
|
||||
pmc_list["TCC2"][str(ch)].append(counter)
|
||||
# NOTE: we specifically do _not_ exclude multiple versions of
|
||||
# the same counter, because some counters are particularly
|
||||
# sensitive to run-to-run variation (e.g., SQ_WAVE_CYCLES).
|
||||
# Often, the resulting metrics do not make sense if they
|
||||
# are taken from different runs, see:
|
||||
# https://github.com/ROCm/omniperf/issues/332
|
||||
pmc_list["TCC2"][str(ch)].append(counter)
|
||||
else:
|
||||
# initial counter in this channel
|
||||
pmc_list["TCC2"][str(ch)] = [counter]
|
||||
|
||||
Ссылка в новой задаче
Block a user