Handle mutually exclusive report section filters (#710)

* Make --roof-only, --block and --set mutually exclusive from each other

* Update help output and documentation
  * Add sanitize function for checking profiler options

* Update filter blocks arguments when --set or --roof-only is provided

* Update filter_blocks in profiling_config.yaml based on --set option
  * Log Filtered Sections instead of Report Sections and Set Selection

* Move soc class function calls from rocprof compute base class to profiler base class

* Fix bug in panel level filtering using --filter-block option

* Remove roofline specific pmc files

* Move microbenchmark entry point from gfx specific soc class to base soc class

* Run microbenchmarks only if block 4 is selected or roof only is selected; skip for mi100
This commit is contained in:
vedithal-amd
2025-09-09 17:48:20 -04:00
gecommit door GitHub
bovenliggende 5738922604
commit 85a557673d
21 gewijzigde bestanden met toevoegingen van 202 en 450 verwijderingen
@@ -269,6 +269,7 @@ Filtering options
``-b``, ``--block <block-name>``
Allows system profiling on one or more selected analysis report blocks to speed
up the profiling process. See :ref:`profiling-hw-component-filtering`.
Note that this option cannot be used with ``--roof-only`` or ``--set``.
``-k``, ``--kernel <kernel-substr>``
Allows for kernel filtering. Usage is equivalent with the current ``rocprof``
@@ -480,6 +481,7 @@ If you want to focus only on roofline-specific performance data and reduce the t
This option checks if there is existing profiling data in the workload directory (``pmc_perf.csv`` and ``roofline.csv``):
a) If found, uses the data files with the provided arguments to create another roofline PDF output; otherwise,
b) Profile mode runs but is limited to collecting only roofline performance counters.
Note that ``--roof-only`` cannot be used with ``--block`` or ``--set`` options.
Roofline options
----------------
@@ -232,7 +232,8 @@ Examples:
help=(
"\t\t\tSpecify metric id(s) from --list-metrics for filtering "
"(e.g. 12, 12.1, 12.1.1).\n"
"\t\t\tCan provide multiple space separated arguments."
"\t\t\tCan provide multiple space separated arguments.\n"
"\t\t\tCannot be used with --set or --roof-only"
),
)
profile_group.add_argument(
@@ -244,8 +245,12 @@ Examples:
"--set",
default=None,
dest="set_selected",
help="\t\t\tProfile a set of metrics of topic of interest by collecting "
"counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
help=(
"\t\t\tProfile a set of metrics of topic of interest by collecting "
"counters in a single pass.\n"
"\t\t\tFor available sets, see --list-sets\n"
"\t\t\tCannot be used with --block or --roof-only"
),
)
profile_group.add_argument(
@@ -345,7 +350,10 @@ Examples:
required=False,
default=False,
action="store_true",
help="\t\t\tProfile roofline data only.",
help=(
"\t\t\tProfile roofline data only.\n"
"\t\t\tCannot be used with --block or --set"
),
)
roofline_group.add_argument(
"--sort",
@@ -31,8 +31,6 @@ import sys
import time
from pathlib import Path
import yaml
import config
from argparser import omniarg_parser
from utils import file_io, parser, schema
@@ -221,17 +219,17 @@ class RocProfCompute:
"rocprof-compute requires you to pass a valid mode. Detected None."
)
elif self.__args.mode == "profile":
# FIXME:
# Might want to get host name from detected spec
# Add --name to workload path if --path is not given
if self.__args.path == str(Path(os.getcwd()) / "workloads"):
self.__args.path = str(Path(self.__args.path) / self.__args.name)
# Add node name to workload path
if self.__args.subpath == "node_name":
self.__args.path = str(
Path(self.__args.path).joinpath(socket.gethostname())
)
self.__args.path = str(Path(self.__args.path) / socket.gethostname())
# Or, add gpu model name to workload path
elif self.__args.subpath == "gpu_model":
self.__args.path = str(
Path(self.__args.path).joinpath(self.__mspec.gpu_model)
)
self.__args.path = str(Path(self.__args.path) / self.__mspec.gpu_model)
# Create workload directory if it does not exist
p = Path(self.__args.path)
if not p.exists():
try:
@@ -267,8 +265,9 @@ class RocProfCompute:
)
if arch in self.__supported_archs.keys():
ac = schema.ArchConfig()
config_dir = Path(self.__args.config_dir)
ac.panel_configs = file_io.load_panel_configs([config_dir.joinpath(arch)])
ac.panel_configs = file_io.load_panel_configs([
Path(self.__args.config_dir) / arch
])
sys_info = (
self.__mspec.get_class_members().iloc[0] if for_current_arch else None
)
@@ -350,18 +349,6 @@ class RocProfCompute:
if self.__args.name.find("/") != -1:
console_error("'/' not permitted in profile name")
# FIXME:
# Changing default path should be done at the end of arg parsing stage,
# unless there is a specific reason to do here.
# Update default path
if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
self.__args.path = str(
Path(self.__args.path).joinpath(
self.__args.name, self.__mspec.gpu_model
)
)
# instantiate desired profiler
if self.__profiler_mode == "rocprofv1":
from rocprof_compute_profile.profiler_rocprof_v1 import rocprof_v1_profiler
@@ -370,7 +357,6 @@ class RocProfCompute:
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
elif self.__profiler_mode == "rocprofv2":
from rocprof_compute_profile.profiler_rocprof_v2 import rocprof_v2_profiler
@@ -379,7 +365,6 @@ class RocProfCompute:
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
elif self.__profiler_mode == "rocprofv3":
from rocprof_compute_profile.profiler_rocprof_v3 import rocprof_v3_profiler
@@ -388,7 +373,6 @@ class RocProfCompute:
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
elif self.__profiler_mode == "rocprofiler-sdk":
from rocprof_compute_profile.profiler_rocprofiler_sdk import (
@@ -399,7 +383,6 @@ class RocProfCompute:
self.__args,
self.__profiler_mode,
self.__soc[self.__mspec.gpu_arch],
self.__supported_archs,
)
else:
console_error("Unsupported profiler")
@@ -408,12 +391,8 @@ class RocProfCompute:
# run profiling workflow
# -----------------------
self.__soc[self.__mspec.gpu_arch].profiling_setup()
# Write profiling configuration as yaml file
with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f:
args_dict = vars(self.__args)
args_dict["config_dir"] = str(args_dict["config_dir"])
yaml.dump(args_dict, f)
profiler.sanitize()
# enable file-based logging
setup_file_handler(self.__args.loglevel, self.__args.path)
@@ -438,9 +417,6 @@ class RocProfCompute:
int(time_end_post - time_end_prof)
)
)
self.__soc[self.__mspec.gpu_arch].post_profiling()
return
@demarcate
def update_db(self):
@@ -508,7 +484,7 @@ class RocProfCompute:
and analyzer.get_args().spatial_multiplexing is not True
else file_io.find_1st_sub_dir(d[0])
)
sys_info = file_io.load_sys_info(sysinfo_path.joinpath("sysinfo.csv"))
sys_info = file_io.load_sys_info(sysinfo_path / "sysinfo.csv")
sys_info = sys_info.to_dict("list")
sys_info = {key: value[0] for key, value in sys_info.items()}
@@ -34,6 +34,7 @@ from abc import abstractmethod
from pathlib import Path
import pandas as pd
import yaml
from utils.logger import (
console_debug,
@@ -53,10 +54,9 @@ from utils.utils import (
class RocProfCompute_Base:
def __init__(self, args, profiler_mode, soc, supported_archs):
def __init__(self, args, profiler_mode, soc):
self.__args = args
self.__profiler = profiler_mode
self.__supported_archs = supported_archs
self._soc = soc # OmniSoC obj
def get_args(self):
@@ -67,6 +67,50 @@ class RocProfCompute_Base:
# assume no SoC specific options and return empty list by default
return []
@demarcate
def sanitize(self):
"""Perform sanitization of inputs"""
args = self.get_args()
if (
sum((
bool(args.filter_blocks),
bool(args.set_selected),
bool(args.roof_only),
))
> 1
):
console_error(
"--block, --set, and --roof-only are mutually exclusive options. "
"Please use only one of them."
)
# verify not accessing parent directories
if ".." in str(args.path):
console_error(
"Access denied. Cannot access parent directories in path (i.e. ../)"
)
# verify correct formatting for application binary
args.remaining = args.remaining[1:]
if args.remaining:
# Ensure that command points to an executable
if not shutil.which(args.remaining[0]):
console_error(
f"Your command {args.remaining[0]} doesn't point to a executable. "
"Please verify."
)
args.remaining = " ".join(args.remaining)
else:
console_error(
(
"Profiling command required. Pass application executable after -- "
"at the end of options.\n"
"\t\ti.e. rocprof-compute profile -n vcopy -- "
"./vcopy -n 1048576 -b 256"
)
)
@demarcate
def join_prof(self, out=None):
"""Manually join separated rocprof runs"""
@@ -299,6 +343,16 @@ class RocProfCompute_Base:
"""Perform any pre-processing steps prior to profiling."""
console_debug("profiling", "pre-processing using %s profiler" % self.__profiler)
self._filter_blocks = self._soc.profiling_setup()
# Write profiling configuration as yaml file
with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f:
args_dict = vars(self.__args)
# Override filter_blocks when writing profiling config yaml
args_dict["filter_blocks"] = self._filter_blocks
args_dict["config_dir"] = str(args_dict["config_dir"])
yaml.dump(args_dict, f)
# verify soc compatibility
if self.__profiler not in self._soc.get_compatible_profilers():
console_error(
@@ -309,31 +363,6 @@ class RocProfCompute_Base:
self._soc.get_compatible_profilers(),
)
)
# verify not accessing parent directories
if ".." in str(self.__args.path):
console_error(
"Access denied. Cannot access parent directories in path (i.e. ../)"
)
# verify correct formatting for application binary
self.__args.remaining = self.__args.remaining[1:]
if self.__args.remaining:
# Ensure that command points to an executable
if not shutil.which(self.__args.remaining[0]):
console_error(
"Your command %s doesn't point to a executable. Please verify."
% self.__args.remaining[0]
)
self.__args.remaining = " ".join(self.__args.remaining)
else:
console_error(
(
"Profiling command required. Pass application executable after -- "
"at the end of options.\n"
"\t\ti.e. rocprof-compute profile -n vcopy -- "
"./vcopy -n 1048576 -b 256"
)
)
gen_sysinfo(
workload_name=self.__args.name,
@@ -359,14 +388,10 @@ class RocProfCompute_Base:
console_log("Command: " + str(self.__args.remaining))
console_log("Kernel Selection: " + str(self.__args.kernel))
console_log("Dispatch Selection: " + str(self.__args.dispatch))
if self.get_args().set_selected:
console_log("Set Selection: " + str(self.__args.set_selected))
if self.get_args().filter_blocks is None:
console_log("Report Sections: All")
if self._filter_blocks:
console_log(f"Filtered sections: {str(self._filter_blocks)}")
else:
console_log("Report Sections: " + str(self.get_args().filter_blocks))
console_log("Filtered sections: All")
msg = "Collecting Performance Counters"
(
@@ -500,6 +525,7 @@ class RocProfCompute_Base:
"profiling",
"performing post-processing using %s profiler" % self.__profiler,
)
self._soc.post_profiling()
def test_df_column_equality(df):
@@ -32,8 +32,8 @@ from utils.utils import replace_timestamps, store_app_cmd
class rocprof_v1_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
@@ -98,10 +98,12 @@ class rocprof_v1_profiler(RocProfCompute_Base):
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
super().post_processing()
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Run roofline microbenchmark
super().post_processing()
# Replace timestamp data to solve a known rocprof bug
replace_timestamps(self.get_args().path)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -33,8 +33,8 @@ from utils.utils import replace_timestamps, store_app_cmd
class rocprof_v2_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
@@ -88,10 +88,12 @@ class rocprof_v2_profiler(RocProfCompute_Base):
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
super().post_processing()
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Run roofline microbenchmark
super().post_processing()
# Replace timestamp data to solve a known rocprof bug
replace_timestamps(self.get_args().path)
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -31,8 +31,8 @@ from utils.logger import console_error, console_log, demarcate
class rocprof_v3_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
@@ -106,10 +106,10 @@ class rocprof_v3_profiler(RocProfCompute_Base):
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
super().post_processing()
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Replace timestamp data to solve a known rocprof bug
# replace_timestamps(self.get_args().path)
# Run roofline microbenchmark
super().post_processing()
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -31,8 +31,8 @@ from utils.logger import console_error, console_log, demarcate
class rocprofiler_sdk_profiler(RocProfCompute_Base):
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
def __init__(self, profiling_args, profiler_mode, soc):
super().__init__(profiling_args, profiler_mode, soc)
self.ready_to_profile = (
self.get_args().roof_only
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
@@ -116,10 +116,10 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
@demarcate
def post_processing(self):
"""Perform any post-processing steps prior to profiling."""
super().post_processing()
if self.ready_to_profile:
# Manually join each pmc_perf*.csv output
self.join_prof()
# Replace timestamp data to solve a known rocprof bug
# replace_timestamps(self.get_args().path)
# Run roofline microbenchmark
super().post_processing()
else:
console_log("roofline", "Detected existing pmc_perf.csv")
@@ -1,9 +0,0 @@
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA_RDREQ_32B_sum TCC_EA_RDREQ_sum TCC_EA_WRREQ_64B_sum TCC_EA_WRREQ_sum
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
range:
gpu:
kernels:
@@ -1,12 +0,0 @@
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
#ROOF:MI300
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
range:
gpu:
kernels:
@@ -1,12 +0,0 @@
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
#ROOF:MI300 series and above
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
range:
gpu:
kernels:
@@ -28,7 +28,6 @@ import json
import math
import os
import re
import shutil
import sys
from abc import abstractmethod
from pathlib import Path
@@ -36,6 +35,7 @@ from pathlib import Path
import yaml
import config
from roofline import Roofline
from utils.logger import (
console_debug,
console_error,
@@ -52,6 +52,7 @@ from utils.utils import (
detect_rocprof,
get_submodules,
is_tcc_channel_counter,
mibench,
parse_sets_yaml,
using_v3,
)
@@ -65,12 +66,14 @@ class OmniSoC_Base:
self.__args = args
self.__arch = None
self._mspec = mspec
self.__perfmon_dir = None
# Per IP block, max number of simultaneous counters. GFX IP Blocks.
self.__perfmon_config = {}
self.__soc_params = {} # SoC specifications
self.__compatible_profilers = [] # Store profilers compatible with SoC
self.populate_mspec()
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.__args, "mode") and self.__args.mode:
self.roofline_obj = Roofline(args, self._mspec)
def __hash__(self):
return hash(self.__arch)
@@ -80,9 +83,6 @@ class OmniSoC_Base:
return NotImplemented
return self.__arch == other.get_soc()
def set_perfmon_dir(self, path: str):
self.__perfmon_dir = path
def set_perfmon_config(self, config: dict):
self.__perfmon_config = config
@@ -280,40 +280,37 @@ class OmniSoC_Base:
Parse analysis report configuration files based on the selected report
sections to be filtered.
"""
# Read the analysis config files and filter
config_root_dir = f"{self.get_args().config_dir}/{self.__arch}"
args = self.get_args()
# File id dict
config_root_dir = f"{args.config_dir}/{self.__arch}"
config_filename_dict = {
Path(filename).name.split("_")[0]: filename
for filename in glob.glob(f"{config_root_dir}/*.yaml")
}
texts = list()
set_selected = self.get_args().set_selected
if set_selected:
# NOTE: --blocks and --set are mutually exclusive
if self.get_args().filter_blocks:
console_error("--block and --set are exclusive options.")
filter_blocks = args.filter_blocks
if args.set_selected:
sets_info = parse_sets_yaml(self.__arch)
if set_selected not in set(sets_info.keys()):
if args.set_selected not in set(sets_info.keys()):
console_error(
f"argument --set: invalid choice: '{set_selected}' "
f"argument --set: invalid choice: '{args.set_selected}' "
f"(choose from {sets_info.keys()})"
)
self.__args.filter_blocks = [
filter_blocks = [
next(iter(metric.keys()))
for metric in sets_info[set_selected]["metric"]
for metric in sets_info[args.set_selected]["metric"]
]
elif args.roof_only:
filter_blocks = ["4"]
if not self.get_args().filter_blocks:
texts = list()
if not filter_blocks:
# Select all sections by default
for filename in config_filename_dict.values():
with open(filename, "r") as stream:
texts.append(stream.read())
for block_id in self.get_args().filter_blocks:
for block_id in filter_blocks:
file_id, panel_id, metric_id = convert_metric_id_to_panel_info(block_id)
# File id filtering
@@ -349,6 +346,7 @@ class OmniSoC_Base:
if metric_id is None:
# If no metric id level filtering, then read the whole panel
texts.append(yaml.dump(panel_dict[panel_id], sort_keys=False))
continue
# Metric id filtering
metric_dict = {
@@ -383,34 +381,12 @@ class OmniSoC_Base:
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
})
return counters
return counters, filter_blocks
@demarcate
def perfmon_filter(self, roofline_perfmon_only: bool):
def perfmon_filter(self):
"""Filter default performance counter set based on user arguments"""
if (
roofline_perfmon_only
and Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
):
return
if roofline_perfmon_only:
counters = set()
for fname in glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt"):
lines = open(fname, "r").read().splitlines()
for line in lines:
# Strip all comments, skip empty lines
stext = line.split("#")[0].strip()
if not stext:
continue
# all pmc counters start with "pmc:"
m = re.match(r"^pmc:(.*)", stext)
if m is None:
continue
# de-duplicate counters
counters = counters.union(set(m.group(1).split()))
else:
counters = self.detect_counters()
counters, filter_blocks = self.detect_counters()
if not using_v3():
# Counters not supported in rocprof v1 / v2
@@ -434,6 +410,8 @@ class OmniSoC_Base:
# Coalesce and writeback workload specific perfmon
self.perfmon_coalesce(counters)
return filter_blocks
@demarcate
def parse_counters(self, config_text):
"""
@@ -586,29 +564,8 @@ class OmniSoC_Base:
Sort and bucket all related performance counters to minimize required
application passes
"""
# Create workload directory
# In some cases (i.e. --specs) path will not be given
if hasattr(self.get_args(), "path"):
if self.get_args().path == str(Path(os.getcwd()).joinpath("workloads")):
workload_dir = str(
Path(self.get_args().path).joinpath(
self.get_args().name, self._mspec.gpu_model
)
)
else:
workload_dir = self.get_args().path
# Initialize directories
if not Path(workload_dir).is_dir():
os.makedirs(workload_dir)
elif not Path(workload_dir).is_symlink():
shutil.rmtree(workload_dir)
else:
os.unlink(workload_dir)
workload_perfmon_dir = workload_dir + "/perfmon"
os.makedirs(workload_perfmon_dir)
workload_perfmon_dir = self.get_args().path + "/perfmon"
Path(workload_perfmon_dir).mkdir(parents=True, exist_ok=True)
# Sanity check whether counters are supported by underlying rocprof tool
rocprof_counters = self.get_rocprof_supported_counters()
@@ -845,11 +802,40 @@ class OmniSoC_Base:
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
console_debug("profiling", "perform SoC post processing for %s" % self.__arch)
# Roofline can be skipped via --no-roof
# Roofline not supported on MI 100
# If --filter-blocks is provided, roofline block (block 4) should be mentioned
if (
self.get_args().no_roof
or self.__arch == "gfx908"
or (
self.get_args().filter_blocks
and "4" not in self.get_args().filter_blocks
)
):
console_log("roofline", "Skipping roofline")
else:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
@abstractmethod
def analysis_setup(self):
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
console_debug("analysis", "perform SoC analysis setup for %s" % self.__arch)
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
# Set with limited size
@@ -51,7 +51,8 @@ class gfx908_soc(OmniSoC_Base):
if self.get_args().roof_only:
console_error("%s does not support roofline analysis" % self.get_arch())
# Perfmon filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
@@ -59,6 +60,6 @@ class gfx908_soc(OmniSoC_Base):
super().post_profiling()
@demarcate
def analysis_setup(self):
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -23,31 +23,16 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_log, console_warning, demarcate
from utils.logger import demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
class gfx90a_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx90a")
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
self.get_arch(),
"roofline",
)
)
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
@@ -56,9 +41,6 @@ class gfx90a_soc(OmniSoC_Base):
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx90a"))
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.get_args(), "mode") and self.get_args().mode:
self.roofline_obj = Roofline(args, self._mspec)
# Set arch specific specs
self._mspec._l2_banks = 32
@@ -73,35 +55,15 @@ class gfx90a_soc(OmniSoC_Base):
"""Perform any SoC-specific setup prior to profiling."""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
super().post_profiling()
if not self.get_args().no_roof:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
else:
console_log("roofline", "Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
# configure roofline for analysis
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -23,31 +23,16 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_log, console_warning, demarcate
from utils.logger import demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
class gfx940_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx940")
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx940",
"roofline",
)
)
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
@@ -56,9 +41,6 @@ class gfx940_soc(OmniSoC_Base):
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx940"))
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.get_args(), "mode") and self.get_args().mode:
self.roofline_obj = Roofline(args, self._mspec)
# Set arch specific specs
self._mspec._l2_banks = 16
@@ -73,35 +55,15 @@ class gfx940_soc(OmniSoC_Base):
"""Perform any SoC-specific setup prior to profiling."""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
super().post_profiling()
if not self.get_args().no_roof:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
else:
console_log("roofline", "Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
# configure roofline for analysis
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -23,31 +23,16 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_log, console_warning, demarcate
from utils.logger import demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
class gfx941_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx941")
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx940",
"roofline",
)
)
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
@@ -56,9 +41,6 @@ class gfx941_soc(OmniSoC_Base):
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx941"))
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.get_args(), "mode") and self.get_args().mode:
self.roofline_obj = Roofline(args, self._mspec)
# Set arch specific specs
self._mspec._l2_banks = 16
@@ -73,35 +55,15 @@ class gfx941_soc(OmniSoC_Base):
"""Perform any SoC-specific setup prior to profiling."""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
super().post_profiling()
if not self.get_args().no_roof:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
else:
console_log("roofline", "Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
# configure roofline for analysis
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -23,31 +23,16 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_log, console_warning, demarcate
from utils.logger import demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
class gfx942_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx942")
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx940",
"roofline",
)
)
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
@@ -56,9 +41,6 @@ class gfx942_soc(OmniSoC_Base):
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx942"))
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.get_args(), "mode") and self.get_args().mode:
self.roofline_obj = Roofline(args, self._mspec)
# Set arch specific specs
self._mspec._l2_banks = 16
@@ -73,36 +55,15 @@ class gfx942_soc(OmniSoC_Base):
"""Perform any SoC-specific setup prior to profiling."""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
super().post_profiling()
if not self.get_args().no_roof:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
else:
console_log("roofline", "Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
# configure roofline for analysis
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -23,37 +23,19 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_log, console_warning, demarcate
from utils.logger import demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
class gfx950_soc(OmniSoC_Base):
def __init__(self, args, mspec):
super().__init__(args, mspec)
self.set_arch("gfx950")
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx950",
"roofline",
)
)
)
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx950"))
# Create roofline object if mode is provided; skip for --specs
if hasattr(self.get_args(), "mode") and self.get_args().mode:
self.roofline_obj = Roofline(args, self._mspec)
# Set arch specific specs
self._mspec._l2_banks = 16
@@ -68,35 +50,15 @@ class gfx950_soc(OmniSoC_Base):
"""Perform any SoC-specific setup prior to profiling."""
super().profiling_setup()
# Performance counter filtering
self.perfmon_filter(self.get_args().roof_only)
filter_blocks = self.perfmon_filter()
return filter_blocks
@demarcate
def post_profiling(self):
"""Perform any SoC-specific post profiling activities."""
super().post_profiling()
if not self.get_args().no_roof:
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
if not Path(pmc_path).is_file():
console_warning(
"Incomplete or missing profiling data. Skipping roofline."
)
return
console_log(
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
)
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
mibench(self.get_args(), self._mspec)
self.roofline_obj.post_processing()
else:
console_log("roofline", "Skipping roofline")
@demarcate
def analysis_setup(self, roofline_parameters=None):
"""Perform any SoC-specific setup prior to analysis."""
super().analysis_setup()
# configure roofline for analysis
if roofline_parameters:
self.roofline_obj = Roofline(
self.get_args(), self._mspec, roofline_parameters
)
super().analysis_setup(roofline_parameters=roofline_parameters)
@@ -52,7 +52,6 @@ from utils.roofline_calc import (
calc_ai_profile,
constuct_roof,
)
from utils.utils import mibench
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
@@ -966,38 +965,6 @@ class Roofline:
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
self.empirical_roofline(ret_df=t_df)
@abstractmethod
def profile(self):
if self.__args.roof_only:
# check for roofline benchmark
console_log(
"roofline", "Checking for roofline.csv in " + str(self.__args.path)
)
roof_path = str(Path(self.__args.path).joinpath("roofline.csv"))
if not Path(roof_path).is_file():
mibench(self.__args, self.__mspec)
# check for profiling data
console_log(
"roofline", "Checking for pmc_perf.csv in " + str(self.__args.path)
)
app_path = str(Path(self.__args.path).joinpath("pmc_perf.csv"))
if not Path(app_path).is_file():
console_log("roofline", "pmc_perf.csv not found. Generating...")
if not self.__args.remaining:
console_error(
"profiling"
"An <app_cmd> is required to run.\r"
"rocprof-compute profile -n test -- <app_cmd>"
)
# TODO: Add an equivelent of characterize_app() to run profiling
# directly out of this module
elif self.__args.no_roof:
console_log("roofline", "Skipping roofline.")
else:
mibench(self.__args, self.__mspec)
# NB: Currently the post_prossesing() method is the only one being used by
# rocprofiler-compute, we include pre_processing() and profile() methods for
# those who wish to borrow the roofline module
@@ -2068,13 +2068,15 @@ def format_value_for_display(value, max_length=6):
if isinstance(value, str):
try:
if "." in value:
# when dot is in the string, we know it's a float number and convert with "float"
# when dot is in the string, we know it's a
# float number and convert with "float"
value = float(value)
else:
# without dot, we assume it's an integer and convert with "int"
value = int(value)
except ValueError:
# when conversion fails, the string is neither legit float or int, then assume it's invalid and display "N/A"
# when conversion fails, the string is neither legit float or int,
# then assume it's invalid and display "N/A"
return "N/A"
if isinstance(value, (int, float)):
@@ -1923,6 +1923,10 @@ class TestSetsIntegration:
assert test_utils.get_num_pmc_file(workload_dir) == 1
assert test_utils.check_file_pattern(
"- 11.2.3", f"{workload_dir}/profiling_config.yaml"
)
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute):