Handle mutually exclusive report section filters (#710)
* Make --roof-only, --block and --set mutually exclusive from each other * Update help output and documentation * Add sanitize function for checking profiler options * Update filter blocks arguments when --set or --roof-only is provided * Update filter_blocks in profiling_config.yaml based on --set option * Log Filtered Sections instead of Report Sections and Set Selection * Move soc class function calls from rocprof compute base class to profiler base class * Fix bug in panel level filtering using --filter-block option * Remove roofline specific pmc files * Move microbenchmark entry point from gfx specific soc class to base soc class * Run microbenchmarks only if block 4 is selected or roof only is selected; skip for mi100
This commit is contained in:
gecommit door
GitHub
bovenliggende
5738922604
commit
85a557673d
@@ -269,6 +269,7 @@ Filtering options
|
||||
``-b``, ``--block <block-name>``
|
||||
Allows system profiling on one or more selected analysis report blocks to speed
|
||||
up the profiling process. See :ref:`profiling-hw-component-filtering`.
|
||||
Note that this option cannot be used with ``--roof-only`` or ``--set``.
|
||||
|
||||
``-k``, ``--kernel <kernel-substr>``
|
||||
Allows for kernel filtering. Usage is equivalent with the current ``rocprof``
|
||||
@@ -480,6 +481,7 @@ If you want to focus only on roofline-specific performance data and reduce the t
|
||||
This option checks if there is existing profiling data in the workload directory (``pmc_perf.csv`` and ``roofline.csv``):
|
||||
a) If found, uses the data files with the provided arguments to create another roofline PDF output; otherwise,
|
||||
b) Profile mode runs but is limited to collecting only roofline performance counters.
|
||||
Note that ``--roof-only`` cannot be used with ``--block`` or ``--set`` options.
|
||||
|
||||
Roofline options
|
||||
----------------
|
||||
|
||||
@@ -232,7 +232,8 @@ Examples:
|
||||
help=(
|
||||
"\t\t\tSpecify metric id(s) from --list-metrics for filtering "
|
||||
"(e.g. 12, 12.1, 12.1.1).\n"
|
||||
"\t\t\tCan provide multiple space separated arguments."
|
||||
"\t\t\tCan provide multiple space separated arguments.\n"
|
||||
"\t\t\tCannot be used with --set or --roof-only"
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
@@ -244,8 +245,12 @@ Examples:
|
||||
"--set",
|
||||
default=None,
|
||||
dest="set_selected",
|
||||
help="\t\t\tProfile a set of metrics of topic of interest by collecting "
|
||||
"counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
|
||||
help=(
|
||||
"\t\t\tProfile a set of metrics of topic of interest by collecting "
|
||||
"counters in a single pass.\n"
|
||||
"\t\t\tFor available sets, see --list-sets\n"
|
||||
"\t\t\tCannot be used with --block or --roof-only"
|
||||
),
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
@@ -345,7 +350,10 @@ Examples:
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="\t\t\tProfile roofline data only.",
|
||||
help=(
|
||||
"\t\t\tProfile roofline data only.\n"
|
||||
"\t\t\tCannot be used with --block or --set"
|
||||
),
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"--sort",
|
||||
|
||||
@@ -31,8 +31,6 @@ import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
import config
|
||||
from argparser import omniarg_parser
|
||||
from utils import file_io, parser, schema
|
||||
@@ -221,17 +219,17 @@ class RocProfCompute:
|
||||
"rocprof-compute requires you to pass a valid mode. Detected None."
|
||||
)
|
||||
elif self.__args.mode == "profile":
|
||||
# FIXME:
|
||||
# Might want to get host name from detected spec
|
||||
# Add --name to workload path if --path is not given
|
||||
if self.__args.path == str(Path(os.getcwd()) / "workloads"):
|
||||
self.__args.path = str(Path(self.__args.path) / self.__args.name)
|
||||
# Add node name to workload path
|
||||
if self.__args.subpath == "node_name":
|
||||
self.__args.path = str(
|
||||
Path(self.__args.path).joinpath(socket.gethostname())
|
||||
)
|
||||
self.__args.path = str(Path(self.__args.path) / socket.gethostname())
|
||||
# Or, add gpu model name to workload path
|
||||
elif self.__args.subpath == "gpu_model":
|
||||
self.__args.path = str(
|
||||
Path(self.__args.path).joinpath(self.__mspec.gpu_model)
|
||||
)
|
||||
self.__args.path = str(Path(self.__args.path) / self.__mspec.gpu_model)
|
||||
|
||||
# Create workload directory if it does not exist
|
||||
p = Path(self.__args.path)
|
||||
if not p.exists():
|
||||
try:
|
||||
@@ -267,8 +265,9 @@ class RocProfCompute:
|
||||
)
|
||||
if arch in self.__supported_archs.keys():
|
||||
ac = schema.ArchConfig()
|
||||
config_dir = Path(self.__args.config_dir)
|
||||
ac.panel_configs = file_io.load_panel_configs([config_dir.joinpath(arch)])
|
||||
ac.panel_configs = file_io.load_panel_configs([
|
||||
Path(self.__args.config_dir) / arch
|
||||
])
|
||||
sys_info = (
|
||||
self.__mspec.get_class_members().iloc[0] if for_current_arch else None
|
||||
)
|
||||
@@ -350,18 +349,6 @@ class RocProfCompute:
|
||||
if self.__args.name.find("/") != -1:
|
||||
console_error("'/' not permitted in profile name")
|
||||
|
||||
# FIXME:
|
||||
# Changing default path should be done at the end of arg parsing stage,
|
||||
# unless there is a specific reason to do here.
|
||||
|
||||
# Update default path
|
||||
if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
|
||||
self.__args.path = str(
|
||||
Path(self.__args.path).joinpath(
|
||||
self.__args.name, self.__mspec.gpu_model
|
||||
)
|
||||
)
|
||||
|
||||
# instantiate desired profiler
|
||||
if self.__profiler_mode == "rocprofv1":
|
||||
from rocprof_compute_profile.profiler_rocprof_v1 import rocprof_v1_profiler
|
||||
@@ -370,7 +357,6 @@ class RocProfCompute:
|
||||
self.__args,
|
||||
self.__profiler_mode,
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
elif self.__profiler_mode == "rocprofv2":
|
||||
from rocprof_compute_profile.profiler_rocprof_v2 import rocprof_v2_profiler
|
||||
@@ -379,7 +365,6 @@ class RocProfCompute:
|
||||
self.__args,
|
||||
self.__profiler_mode,
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
elif self.__profiler_mode == "rocprofv3":
|
||||
from rocprof_compute_profile.profiler_rocprof_v3 import rocprof_v3_profiler
|
||||
@@ -388,7 +373,6 @@ class RocProfCompute:
|
||||
self.__args,
|
||||
self.__profiler_mode,
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
elif self.__profiler_mode == "rocprofiler-sdk":
|
||||
from rocprof_compute_profile.profiler_rocprofiler_sdk import (
|
||||
@@ -399,7 +383,6 @@ class RocProfCompute:
|
||||
self.__args,
|
||||
self.__profiler_mode,
|
||||
self.__soc[self.__mspec.gpu_arch],
|
||||
self.__supported_archs,
|
||||
)
|
||||
else:
|
||||
console_error("Unsupported profiler")
|
||||
@@ -408,12 +391,8 @@ class RocProfCompute:
|
||||
# run profiling workflow
|
||||
# -----------------------
|
||||
|
||||
self.__soc[self.__mspec.gpu_arch].profiling_setup()
|
||||
# Write profiling configuration as yaml file
|
||||
with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f:
|
||||
args_dict = vars(self.__args)
|
||||
args_dict["config_dir"] = str(args_dict["config_dir"])
|
||||
yaml.dump(args_dict, f)
|
||||
profiler.sanitize()
|
||||
|
||||
# enable file-based logging
|
||||
setup_file_handler(self.__args.loglevel, self.__args.path)
|
||||
|
||||
@@ -438,9 +417,6 @@ class RocProfCompute:
|
||||
int(time_end_post - time_end_prof)
|
||||
)
|
||||
)
|
||||
self.__soc[self.__mspec.gpu_arch].post_profiling()
|
||||
|
||||
return
|
||||
|
||||
@demarcate
|
||||
def update_db(self):
|
||||
@@ -508,7 +484,7 @@ class RocProfCompute:
|
||||
and analyzer.get_args().spatial_multiplexing is not True
|
||||
else file_io.find_1st_sub_dir(d[0])
|
||||
)
|
||||
sys_info = file_io.load_sys_info(sysinfo_path.joinpath("sysinfo.csv"))
|
||||
sys_info = file_io.load_sys_info(sysinfo_path / "sysinfo.csv")
|
||||
|
||||
sys_info = sys_info.to_dict("list")
|
||||
sys_info = {key: value[0] for key, value in sys_info.items()}
|
||||
|
||||
@@ -34,6 +34,7 @@ from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import yaml
|
||||
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
@@ -53,10 +54,9 @@ from utils.utils import (
|
||||
|
||||
|
||||
class RocProfCompute_Base:
|
||||
def __init__(self, args, profiler_mode, soc, supported_archs):
|
||||
def __init__(self, args, profiler_mode, soc):
|
||||
self.__args = args
|
||||
self.__profiler = profiler_mode
|
||||
self.__supported_archs = supported_archs
|
||||
self._soc = soc # OmniSoC obj
|
||||
|
||||
def get_args(self):
|
||||
@@ -67,6 +67,50 @@ class RocProfCompute_Base:
|
||||
# assume no SoC specific options and return empty list by default
|
||||
return []
|
||||
|
||||
@demarcate
|
||||
def sanitize(self):
|
||||
"""Perform sanitization of inputs"""
|
||||
args = self.get_args()
|
||||
|
||||
if (
|
||||
sum((
|
||||
bool(args.filter_blocks),
|
||||
bool(args.set_selected),
|
||||
bool(args.roof_only),
|
||||
))
|
||||
> 1
|
||||
):
|
||||
console_error(
|
||||
"--block, --set, and --roof-only are mutually exclusive options. "
|
||||
"Please use only one of them."
|
||||
)
|
||||
|
||||
# verify not accessing parent directories
|
||||
if ".." in str(args.path):
|
||||
console_error(
|
||||
"Access denied. Cannot access parent directories in path (i.e. ../)"
|
||||
)
|
||||
|
||||
# verify correct formatting for application binary
|
||||
args.remaining = args.remaining[1:]
|
||||
if args.remaining:
|
||||
# Ensure that command points to an executable
|
||||
if not shutil.which(args.remaining[0]):
|
||||
console_error(
|
||||
f"Your command {args.remaining[0]} doesn't point to a executable. "
|
||||
"Please verify."
|
||||
)
|
||||
args.remaining = " ".join(args.remaining)
|
||||
else:
|
||||
console_error(
|
||||
(
|
||||
"Profiling command required. Pass application executable after -- "
|
||||
"at the end of options.\n"
|
||||
"\t\ti.e. rocprof-compute profile -n vcopy -- "
|
||||
"./vcopy -n 1048576 -b 256"
|
||||
)
|
||||
)
|
||||
|
||||
@demarcate
|
||||
def join_prof(self, out=None):
|
||||
"""Manually join separated rocprof runs"""
|
||||
@@ -299,6 +343,16 @@ class RocProfCompute_Base:
|
||||
"""Perform any pre-processing steps prior to profiling."""
|
||||
console_debug("profiling", "pre-processing using %s profiler" % self.__profiler)
|
||||
|
||||
self._filter_blocks = self._soc.profiling_setup()
|
||||
|
||||
# Write profiling configuration as yaml file
|
||||
with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f:
|
||||
args_dict = vars(self.__args)
|
||||
# Override filter_blocks when writing profiling config yaml
|
||||
args_dict["filter_blocks"] = self._filter_blocks
|
||||
args_dict["config_dir"] = str(args_dict["config_dir"])
|
||||
yaml.dump(args_dict, f)
|
||||
|
||||
# verify soc compatibility
|
||||
if self.__profiler not in self._soc.get_compatible_profilers():
|
||||
console_error(
|
||||
@@ -309,31 +363,6 @@ class RocProfCompute_Base:
|
||||
self._soc.get_compatible_profilers(),
|
||||
)
|
||||
)
|
||||
# verify not accessing parent directories
|
||||
if ".." in str(self.__args.path):
|
||||
console_error(
|
||||
"Access denied. Cannot access parent directories in path (i.e. ../)"
|
||||
)
|
||||
|
||||
# verify correct formatting for application binary
|
||||
self.__args.remaining = self.__args.remaining[1:]
|
||||
if self.__args.remaining:
|
||||
# Ensure that command points to an executable
|
||||
if not shutil.which(self.__args.remaining[0]):
|
||||
console_error(
|
||||
"Your command %s doesn't point to a executable. Please verify."
|
||||
% self.__args.remaining[0]
|
||||
)
|
||||
self.__args.remaining = " ".join(self.__args.remaining)
|
||||
else:
|
||||
console_error(
|
||||
(
|
||||
"Profiling command required. Pass application executable after -- "
|
||||
"at the end of options.\n"
|
||||
"\t\ti.e. rocprof-compute profile -n vcopy -- "
|
||||
"./vcopy -n 1048576 -b 256"
|
||||
)
|
||||
)
|
||||
|
||||
gen_sysinfo(
|
||||
workload_name=self.__args.name,
|
||||
@@ -359,14 +388,10 @@ class RocProfCompute_Base:
|
||||
console_log("Command: " + str(self.__args.remaining))
|
||||
console_log("Kernel Selection: " + str(self.__args.kernel))
|
||||
console_log("Dispatch Selection: " + str(self.__args.dispatch))
|
||||
|
||||
if self.get_args().set_selected:
|
||||
console_log("Set Selection: " + str(self.__args.set_selected))
|
||||
|
||||
if self.get_args().filter_blocks is None:
|
||||
console_log("Report Sections: All")
|
||||
if self._filter_blocks:
|
||||
console_log(f"Filtered sections: {str(self._filter_blocks)}")
|
||||
else:
|
||||
console_log("Report Sections: " + str(self.get_args().filter_blocks))
|
||||
console_log("Filtered sections: All")
|
||||
|
||||
msg = "Collecting Performance Counters"
|
||||
(
|
||||
@@ -500,6 +525,7 @@ class RocProfCompute_Base:
|
||||
"profiling",
|
||||
"performing post-processing using %s profiler" % self.__profiler,
|
||||
)
|
||||
self._soc.post_profiling()
|
||||
|
||||
|
||||
def test_df_column_equality(df):
|
||||
|
||||
+6
-4
@@ -32,8 +32,8 @@ from utils.utils import replace_timestamps, store_app_cmd
|
||||
|
||||
|
||||
class rocprof_v1_profiler(RocProfCompute_Base):
|
||||
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
|
||||
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
|
||||
def __init__(self, profiling_args, profiler_mode, soc):
|
||||
super().__init__(profiling_args, profiler_mode, soc)
|
||||
self.ready_to_profile = (
|
||||
self.get_args().roof_only
|
||||
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
@@ -98,10 +98,12 @@ class rocprof_v1_profiler(RocProfCompute_Base):
|
||||
@demarcate
|
||||
def post_processing(self):
|
||||
"""Perform any post-processing steps prior to profiling."""
|
||||
super().post_processing()
|
||||
|
||||
if self.ready_to_profile:
|
||||
# Manually join each pmc_perf*.csv output
|
||||
self.join_prof()
|
||||
# Run roofline microbenchmark
|
||||
super().post_processing()
|
||||
# Replace timestamp data to solve a known rocprof bug
|
||||
replace_timestamps(self.get_args().path)
|
||||
else:
|
||||
console_log("roofline", "Detected existing pmc_perf.csv")
|
||||
|
||||
+6
-4
@@ -33,8 +33,8 @@ from utils.utils import replace_timestamps, store_app_cmd
|
||||
|
||||
|
||||
class rocprof_v2_profiler(RocProfCompute_Base):
|
||||
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
|
||||
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
|
||||
def __init__(self, profiling_args, profiler_mode, soc):
|
||||
super().__init__(profiling_args, profiler_mode, soc)
|
||||
self.ready_to_profile = (
|
||||
self.get_args().roof_only
|
||||
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
@@ -88,10 +88,12 @@ class rocprof_v2_profiler(RocProfCompute_Base):
|
||||
@demarcate
|
||||
def post_processing(self):
|
||||
"""Perform any post-processing steps prior to profiling."""
|
||||
super().post_processing()
|
||||
|
||||
if self.ready_to_profile:
|
||||
# Manually join each pmc_perf*.csv output
|
||||
self.join_prof()
|
||||
# Run roofline microbenchmark
|
||||
super().post_processing()
|
||||
# Replace timestamp data to solve a known rocprof bug
|
||||
replace_timestamps(self.get_args().path)
|
||||
else:
|
||||
console_log("roofline", "Detected existing pmc_perf.csv")
|
||||
|
||||
+6
-6
@@ -31,8 +31,8 @@ from utils.logger import console_error, console_log, demarcate
|
||||
|
||||
|
||||
class rocprof_v3_profiler(RocProfCompute_Base):
|
||||
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
|
||||
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
|
||||
def __init__(self, profiling_args, profiler_mode, soc):
|
||||
super().__init__(profiling_args, profiler_mode, soc)
|
||||
self.ready_to_profile = (
|
||||
self.get_args().roof_only
|
||||
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
@@ -106,10 +106,10 @@ class rocprof_v3_profiler(RocProfCompute_Base):
|
||||
@demarcate
|
||||
def post_processing(self):
|
||||
"""Perform any post-processing steps prior to profiling."""
|
||||
super().post_processing()
|
||||
|
||||
if self.ready_to_profile:
|
||||
# Manually join each pmc_perf*.csv output
|
||||
self.join_prof()
|
||||
# Replace timestamp data to solve a known rocprof bug
|
||||
# replace_timestamps(self.get_args().path)
|
||||
# Run roofline microbenchmark
|
||||
super().post_processing()
|
||||
else:
|
||||
console_log("roofline", "Detected existing pmc_perf.csv")
|
||||
|
||||
+6
-6
@@ -31,8 +31,8 @@ from utils.logger import console_error, console_log, demarcate
|
||||
|
||||
|
||||
class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
||||
def __init__(self, profiling_args, profiler_mode, soc, supported_archs):
|
||||
super().__init__(profiling_args, profiler_mode, soc, supported_archs)
|
||||
def __init__(self, profiling_args, profiler_mode, soc):
|
||||
super().__init__(profiling_args, profiler_mode, soc)
|
||||
self.ready_to_profile = (
|
||||
self.get_args().roof_only
|
||||
and not Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
@@ -116,10 +116,10 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
||||
@demarcate
|
||||
def post_processing(self):
|
||||
"""Perform any post-processing steps prior to profiling."""
|
||||
super().post_processing()
|
||||
|
||||
if self.ready_to_profile:
|
||||
# Manually join each pmc_perf*.csv output
|
||||
self.join_prof()
|
||||
# Replace timestamp data to solve a known rocprof bug
|
||||
# replace_timestamps(self.get_args().path)
|
||||
# Run roofline microbenchmark
|
||||
super().post_processing()
|
||||
else:
|
||||
console_log("roofline", "Detected existing pmc_perf.csv")
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
|
||||
|
||||
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA_RDREQ_32B_sum TCC_EA_RDREQ_sum TCC_EA_WRREQ_64B_sum TCC_EA_WRREQ_sum
|
||||
|
||||
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
|
||||
|
||||
range:
|
||||
gpu:
|
||||
kernels:
|
||||
@@ -1,12 +0,0 @@
|
||||
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
|
||||
|
||||
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
|
||||
|
||||
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
|
||||
|
||||
#ROOF:MI300
|
||||
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
|
||||
|
||||
range:
|
||||
gpu:
|
||||
kernels:
|
||||
@@ -1,12 +0,0 @@
|
||||
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
|
||||
|
||||
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
|
||||
|
||||
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
|
||||
|
||||
#ROOF:MI300 series and above
|
||||
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
|
||||
|
||||
range:
|
||||
gpu:
|
||||
kernels:
|
||||
@@ -28,7 +28,6 @@ import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
@@ -36,6 +35,7 @@ from pathlib import Path
|
||||
import yaml
|
||||
|
||||
import config
|
||||
from roofline import Roofline
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
console_error,
|
||||
@@ -52,6 +52,7 @@ from utils.utils import (
|
||||
detect_rocprof,
|
||||
get_submodules,
|
||||
is_tcc_channel_counter,
|
||||
mibench,
|
||||
parse_sets_yaml,
|
||||
using_v3,
|
||||
)
|
||||
@@ -65,12 +66,14 @@ class OmniSoC_Base:
|
||||
self.__args = args
|
||||
self.__arch = None
|
||||
self._mspec = mspec
|
||||
self.__perfmon_dir = None
|
||||
# Per IP block, max number of simultaneous counters. GFX IP Blocks.
|
||||
self.__perfmon_config = {}
|
||||
self.__soc_params = {} # SoC specifications
|
||||
self.__compatible_profilers = [] # Store profilers compatible with SoC
|
||||
self.populate_mspec()
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.__args, "mode") and self.__args.mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.__arch)
|
||||
@@ -80,9 +83,6 @@ class OmniSoC_Base:
|
||||
return NotImplemented
|
||||
return self.__arch == other.get_soc()
|
||||
|
||||
def set_perfmon_dir(self, path: str):
|
||||
self.__perfmon_dir = path
|
||||
|
||||
def set_perfmon_config(self, config: dict):
|
||||
self.__perfmon_config = config
|
||||
|
||||
@@ -280,40 +280,37 @@ class OmniSoC_Base:
|
||||
Parse analysis report configuration files based on the selected report
|
||||
sections to be filtered.
|
||||
"""
|
||||
# Read the analysis config files and filter
|
||||
config_root_dir = f"{self.get_args().config_dir}/{self.__arch}"
|
||||
args = self.get_args()
|
||||
|
||||
# File id dict
|
||||
config_root_dir = f"{args.config_dir}/{self.__arch}"
|
||||
config_filename_dict = {
|
||||
Path(filename).name.split("_")[0]: filename
|
||||
for filename in glob.glob(f"{config_root_dir}/*.yaml")
|
||||
}
|
||||
|
||||
texts = list()
|
||||
|
||||
set_selected = self.get_args().set_selected
|
||||
|
||||
if set_selected:
|
||||
# NOTE: --blocks and --set are mutually exclusive
|
||||
if self.get_args().filter_blocks:
|
||||
console_error("--block and --set are exclusive options.")
|
||||
|
||||
filter_blocks = args.filter_blocks
|
||||
if args.set_selected:
|
||||
sets_info = parse_sets_yaml(self.__arch)
|
||||
if set_selected not in set(sets_info.keys()):
|
||||
if args.set_selected not in set(sets_info.keys()):
|
||||
console_error(
|
||||
f"argument --set: invalid choice: '{set_selected}' "
|
||||
f"argument --set: invalid choice: '{args.set_selected}' "
|
||||
f"(choose from {sets_info.keys()})"
|
||||
)
|
||||
self.__args.filter_blocks = [
|
||||
filter_blocks = [
|
||||
next(iter(metric.keys()))
|
||||
for metric in sets_info[set_selected]["metric"]
|
||||
for metric in sets_info[args.set_selected]["metric"]
|
||||
]
|
||||
elif args.roof_only:
|
||||
filter_blocks = ["4"]
|
||||
|
||||
if not self.get_args().filter_blocks:
|
||||
texts = list()
|
||||
if not filter_blocks:
|
||||
# Select all sections by default
|
||||
for filename in config_filename_dict.values():
|
||||
with open(filename, "r") as stream:
|
||||
texts.append(stream.read())
|
||||
|
||||
for block_id in self.get_args().filter_blocks:
|
||||
for block_id in filter_blocks:
|
||||
file_id, panel_id, metric_id = convert_metric_id_to_panel_info(block_id)
|
||||
|
||||
# File id filtering
|
||||
@@ -349,6 +346,7 @@ class OmniSoC_Base:
|
||||
if metric_id is None:
|
||||
# If no metric id level filtering, then read the whole panel
|
||||
texts.append(yaml.dump(panel_dict[panel_id], sort_keys=False))
|
||||
continue
|
||||
|
||||
# Metric id filtering
|
||||
metric_dict = {
|
||||
@@ -383,34 +381,12 @@ class OmniSoC_Base:
|
||||
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
|
||||
})
|
||||
|
||||
return counters
|
||||
return counters, filter_blocks
|
||||
|
||||
@demarcate
|
||||
def perfmon_filter(self, roofline_perfmon_only: bool):
|
||||
def perfmon_filter(self):
|
||||
"""Filter default performance counter set based on user arguments"""
|
||||
if (
|
||||
roofline_perfmon_only
|
||||
and Path(self.get_args().path).joinpath("pmc_perf.csv").is_file()
|
||||
):
|
||||
return
|
||||
|
||||
if roofline_perfmon_only:
|
||||
counters = set()
|
||||
for fname in glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt"):
|
||||
lines = open(fname, "r").read().splitlines()
|
||||
for line in lines:
|
||||
# Strip all comments, skip empty lines
|
||||
stext = line.split("#")[0].strip()
|
||||
if not stext:
|
||||
continue
|
||||
# all pmc counters start with "pmc:"
|
||||
m = re.match(r"^pmc:(.*)", stext)
|
||||
if m is None:
|
||||
continue
|
||||
# de-duplicate counters
|
||||
counters = counters.union(set(m.group(1).split()))
|
||||
else:
|
||||
counters = self.detect_counters()
|
||||
counters, filter_blocks = self.detect_counters()
|
||||
|
||||
if not using_v3():
|
||||
# Counters not supported in rocprof v1 / v2
|
||||
@@ -434,6 +410,8 @@ class OmniSoC_Base:
|
||||
# Coalesce and writeback workload specific perfmon
|
||||
self.perfmon_coalesce(counters)
|
||||
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def parse_counters(self, config_text):
|
||||
"""
|
||||
@@ -586,29 +564,8 @@ class OmniSoC_Base:
|
||||
Sort and bucket all related performance counters to minimize required
|
||||
application passes
|
||||
"""
|
||||
|
||||
# Create workload directory
|
||||
# In some cases (i.e. --specs) path will not be given
|
||||
if hasattr(self.get_args(), "path"):
|
||||
if self.get_args().path == str(Path(os.getcwd()).joinpath("workloads")):
|
||||
workload_dir = str(
|
||||
Path(self.get_args().path).joinpath(
|
||||
self.get_args().name, self._mspec.gpu_model
|
||||
)
|
||||
)
|
||||
else:
|
||||
workload_dir = self.get_args().path
|
||||
|
||||
# Initialize directories
|
||||
if not Path(workload_dir).is_dir():
|
||||
os.makedirs(workload_dir)
|
||||
elif not Path(workload_dir).is_symlink():
|
||||
shutil.rmtree(workload_dir)
|
||||
else:
|
||||
os.unlink(workload_dir)
|
||||
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
os.makedirs(workload_perfmon_dir)
|
||||
workload_perfmon_dir = self.get_args().path + "/perfmon"
|
||||
Path(workload_perfmon_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sanity check whether counters are supported by underlying rocprof tool
|
||||
rocprof_counters = self.get_rocprof_supported_counters()
|
||||
@@ -845,11 +802,40 @@ class OmniSoC_Base:
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
console_debug("profiling", "perform SoC post processing for %s" % self.__arch)
|
||||
# Roofline can be skipped via --no-roof
|
||||
# Roofline not supported on MI 100
|
||||
# If --filter-blocks is provided, roofline block (block 4) should be mentioned
|
||||
if (
|
||||
self.get_args().no_roof
|
||||
or self.__arch == "gfx908"
|
||||
or (
|
||||
self.get_args().filter_blocks
|
||||
and "4" not in self.get_args().filter_blocks
|
||||
)
|
||||
):
|
||||
console_log("roofline", "Skipping roofline")
|
||||
else:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
|
||||
@abstractmethod
|
||||
def analysis_setup(self):
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
console_debug("analysis", "perform SoC analysis setup for %s" % self.__arch)
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
|
||||
|
||||
# Set with limited size
|
||||
|
||||
@@ -51,7 +51,8 @@ class gfx908_soc(OmniSoC_Base):
|
||||
if self.get_args().roof_only:
|
||||
console_error("%s does not support roofline analysis" % self.get_arch())
|
||||
# Perfmon filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
@@ -59,6 +60,6 @@ class gfx908_soc(OmniSoC_Base):
|
||||
super().post_profiling()
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self):
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -23,31 +23,16 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.logger import demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
class gfx90a_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx90a")
|
||||
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
self.get_arch(),
|
||||
"roofline",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
@@ -56,9 +41,6 @@ class gfx90a_soc(OmniSoC_Base):
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx90a"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.get_args(), "mode") and self.get_args().mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
# Set arch specific specs
|
||||
self._mspec._l2_banks = 32
|
||||
@@ -73,35 +55,15 @@ class gfx90a_soc(OmniSoC_Base):
|
||||
"""Perform any SoC-specific setup prior to profiling."""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
super().post_profiling()
|
||||
|
||||
if not self.get_args().no_roof:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
else:
|
||||
console_log("roofline", "Skipping roofline")
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
# configure roofline for analysis
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -23,31 +23,16 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.logger import demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
class gfx940_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx940")
|
||||
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"gfx940",
|
||||
"roofline",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
@@ -56,9 +41,6 @@ class gfx940_soc(OmniSoC_Base):
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx940"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.get_args(), "mode") and self.get_args().mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
# Set arch specific specs
|
||||
self._mspec._l2_banks = 16
|
||||
@@ -73,35 +55,15 @@ class gfx940_soc(OmniSoC_Base):
|
||||
"""Perform any SoC-specific setup prior to profiling."""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
super().post_profiling()
|
||||
|
||||
if not self.get_args().no_roof:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
else:
|
||||
console_log("roofline", "Skipping roofline")
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
# configure roofline for analysis
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -23,31 +23,16 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.logger import demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
class gfx941_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx941")
|
||||
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"gfx940",
|
||||
"roofline",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
@@ -56,9 +41,6 @@ class gfx941_soc(OmniSoC_Base):
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx941"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.get_args(), "mode") and self.get_args().mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
# Set arch specific specs
|
||||
self._mspec._l2_banks = 16
|
||||
@@ -73,35 +55,15 @@ class gfx941_soc(OmniSoC_Base):
|
||||
"""Perform any SoC-specific setup prior to profiling."""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
super().post_profiling()
|
||||
|
||||
if not self.get_args().no_roof:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
else:
|
||||
console_log("roofline", "Skipping roofline")
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
# configure roofline for analysis
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -23,31 +23,16 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.logger import demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
class gfx942_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx942")
|
||||
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"gfx940",
|
||||
"roofline",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
@@ -56,9 +41,6 @@ class gfx942_soc(OmniSoC_Base):
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx942"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.get_args(), "mode") and self.get_args().mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
# Set arch specific specs
|
||||
self._mspec._l2_banks = 16
|
||||
@@ -73,36 +55,15 @@ class gfx942_soc(OmniSoC_Base):
|
||||
"""Perform any SoC-specific setup prior to profiling."""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
super().post_profiling()
|
||||
|
||||
if not self.get_args().no_roof:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
else:
|
||||
console_log("roofline", "Skipping roofline")
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
# configure roofline for analysis
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -23,37 +23,19 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.logger import demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
class gfx950_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx950")
|
||||
if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only:
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"gfx950",
|
||||
"roofline",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx950"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
if hasattr(self.get_args(), "mode") and self.get_args().mode:
|
||||
self.roofline_obj = Roofline(args, self._mspec)
|
||||
|
||||
# Set arch specific specs
|
||||
self._mspec._l2_banks = 16
|
||||
@@ -68,35 +50,15 @@ class gfx950_soc(OmniSoC_Base):
|
||||
"""Perform any SoC-specific setup prior to profiling."""
|
||||
super().profiling_setup()
|
||||
# Performance counter filtering
|
||||
self.perfmon_filter(self.get_args().roof_only)
|
||||
filter_blocks = self.perfmon_filter()
|
||||
return filter_blocks
|
||||
|
||||
@demarcate
|
||||
def post_profiling(self):
|
||||
"""Perform any SoC-specific post profiling activities."""
|
||||
super().post_profiling()
|
||||
|
||||
if not self.get_args().no_roof:
|
||||
pmc_path = str(Path(self.get_args().path).joinpath("pmc_perf.csv"))
|
||||
if not Path(pmc_path).is_file():
|
||||
console_warning(
|
||||
"Incomplete or missing profiling data. Skipping roofline."
|
||||
)
|
||||
return
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.get_args().path)
|
||||
)
|
||||
if not Path(self.get_args().path).joinpath("roofline.csv").is_file():
|
||||
mibench(self.get_args(), self._mspec)
|
||||
self.roofline_obj.post_processing()
|
||||
else:
|
||||
console_log("roofline", "Skipping roofline")
|
||||
|
||||
@demarcate
|
||||
def analysis_setup(self, roofline_parameters=None):
|
||||
"""Perform any SoC-specific setup prior to analysis."""
|
||||
super().analysis_setup()
|
||||
# configure roofline for analysis
|
||||
if roofline_parameters:
|
||||
self.roofline_obj = Roofline(
|
||||
self.get_args(), self._mspec, roofline_parameters
|
||||
)
|
||||
super().analysis_setup(roofline_parameters=roofline_parameters)
|
||||
|
||||
@@ -52,7 +52,6 @@ from utils.roofline_calc import (
|
||||
calc_ai_profile,
|
||||
constuct_roof,
|
||||
)
|
||||
from utils.utils import mibench
|
||||
|
||||
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
|
||||
|
||||
@@ -966,38 +965,6 @@ class Roofline:
|
||||
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
|
||||
self.empirical_roofline(ret_df=t_df)
|
||||
|
||||
@abstractmethod
|
||||
def profile(self):
|
||||
if self.__args.roof_only:
|
||||
# check for roofline benchmark
|
||||
console_log(
|
||||
"roofline", "Checking for roofline.csv in " + str(self.__args.path)
|
||||
)
|
||||
roof_path = str(Path(self.__args.path).joinpath("roofline.csv"))
|
||||
if not Path(roof_path).is_file():
|
||||
mibench(self.__args, self.__mspec)
|
||||
|
||||
# check for profiling data
|
||||
console_log(
|
||||
"roofline", "Checking for pmc_perf.csv in " + str(self.__args.path)
|
||||
)
|
||||
app_path = str(Path(self.__args.path).joinpath("pmc_perf.csv"))
|
||||
if not Path(app_path).is_file():
|
||||
console_log("roofline", "pmc_perf.csv not found. Generating...")
|
||||
if not self.__args.remaining:
|
||||
console_error(
|
||||
"profiling"
|
||||
"An <app_cmd> is required to run.\r"
|
||||
"rocprof-compute profile -n test -- <app_cmd>"
|
||||
)
|
||||
# TODO: Add an equivelent of characterize_app() to run profiling
|
||||
# directly out of this module
|
||||
|
||||
elif self.__args.no_roof:
|
||||
console_log("roofline", "Skipping roofline.")
|
||||
else:
|
||||
mibench(self.__args, self.__mspec)
|
||||
|
||||
# NB: Currently the post_prossesing() method is the only one being used by
|
||||
# rocprofiler-compute, we include pre_processing() and profile() methods for
|
||||
# those who wish to borrow the roofline module
|
||||
|
||||
@@ -2068,13 +2068,15 @@ def format_value_for_display(value, max_length=6):
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
if "." in value:
|
||||
# when dot is in the string, we know it's a float number and convert with "float"
|
||||
# when dot is in the string, we know it's a
|
||||
# float number and convert with "float"
|
||||
value = float(value)
|
||||
else:
|
||||
# without dot, we assume it's an integer and convert with "int"
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
# when conversion fails, the string is neither legit float or int, then assume it's invalid and display "N/A"
|
||||
# when conversion fails, the string is neither legit float or int,
|
||||
# then assume it's invalid and display "N/A"
|
||||
return "N/A"
|
||||
|
||||
if isinstance(value, (int, float)):
|
||||
|
||||
@@ -1923,6 +1923,10 @@ class TestSetsIntegration:
|
||||
|
||||
assert test_utils.get_num_pmc_file(workload_dir) == 1
|
||||
|
||||
assert test_utils.check_file_pattern(
|
||||
"- 11.2.3", f"{workload_dir}/profiling_config.yaml"
|
||||
)
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute):
|
||||
|
||||
Verwijs in nieuw issue
Block a user