Remove hardware IP block based filtering (#820)
* Analysis report block based filtering is the default now
* Update documentation
* Update CHANGELOG
* Fix tests
* Replace hardware block based filtering tests with report block
based filtering tests
[ROCm/rocprofiler-compute commit: 98bb0f4237]
This commit is contained in:
@@ -116,6 +116,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* Roofline support for Ubuntu 20.04 and SLES below 15.6
|
||||
* Usage of rocm-smi
|
||||
* Remove support for MI50/MI60 in accordance with the documentation
|
||||
* Hardware IP block based filtering has been removed in favor of analysis report block based filtering
|
||||
|
||||
## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0
|
||||
|
||||
|
||||
@@ -214,14 +214,6 @@ add_test(
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_ipblocks
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
set_property(TEST test_profile_ipblocks PROPERTY COST 11)
|
||||
|
||||
add_test(
|
||||
NAME test_profile_dispatch
|
||||
COMMAND
|
||||
|
||||
@@ -232,7 +232,7 @@ Filtering options
|
||||
-----------------
|
||||
|
||||
``-b``, ``--block <block-name>``
|
||||
Allows system profiling on one or more selected hardware report blocks to speed
|
||||
Allows system profiling on one or more selected analysis report blocks to speed
|
||||
up the profiling process. See :ref:`profiling-hw-component-filtering`.
|
||||
|
||||
``-k``, ``--kernel <kernel-substr>``
|
||||
@@ -253,11 +253,11 @@ Filtering options
|
||||
|
||||
.. _profiling-hw-component-filtering:
|
||||
|
||||
Hardware report block filtering
|
||||
Analysis report block filtering
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can profile specific hardware report blocks to speed up the profiling process.
|
||||
In ROCm Compute Profiler, the term hardware report block refers to a section of the
|
||||
In ROCm Compute Profiler, the term analysis report block refers to a section of the
|
||||
analysis report which focuses on metrics associated with a hardware component or
|
||||
a group of hardware components. All profiling results are accumulated in the same
|
||||
target directory without overwriting those for other hardware components.
|
||||
@@ -332,54 +332,6 @@ To see a list of available hardware report blocks, use the ``--list-metrics`` op
|
||||
6.1.2 -> Workgroup Manager Utilization
|
||||
|
||||
|
||||
It is also possible to filter counter collection by hardware component such as Shader Sequencer (SQ)
|
||||
and L2 cache (TCC) as shown below.
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ rocprof-compute profile --name vcopy -b 10 7 -- ./vcopy -n 1048576 -b 256
|
||||
|
||||
__ _
|
||||
_ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___
|
||||
| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
|
||||
| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/
|
||||
|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___|
|
||||
|_| |_|
|
||||
|
||||
fname: pmc_cpc_perf: Skipped
|
||||
fname: pmc_spi_perf: Skipped
|
||||
fname: pmc_cpf_perf: Skipped
|
||||
fname: pmc_tcp_perf: Skipped
|
||||
fname: pmc_sq_perf4: Added
|
||||
fname: pmc_tcc_perf: Added
|
||||
fname: pmc_sq_perf8: Added
|
||||
fname: pmc_ta_perf: Skipped
|
||||
fname: pmc_sq_perf1: Added
|
||||
fname: pmc_sq_perf3: Added
|
||||
fname: pmc_td_perf: Skipped
|
||||
fname: pmc_tcc2_perf: Skipped
|
||||
fname: pmc_sqc_perf1: Skipped
|
||||
fname: pmc_sq_perf6: Added
|
||||
fname: pmc_sq_perf2: Added
|
||||
rocprofiler-compute version: 2.0.0
|
||||
Profiler choice: rocprofv1
|
||||
Path: /home/auser/repos/rocprofiler-compute/sample/workloads/vcopy/MI200
|
||||
Target: MI200
|
||||
Command: ./vcopy -n 1048576 -b 256
|
||||
Kernel Selection: None
|
||||
Dispatch Selection: None
|
||||
Hardware Blocks: ['sq', 'tcc']
|
||||
Report Sections: []
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Collecting Performance Counters
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
...
|
||||
|
||||
.. warning::
|
||||
|
||||
Filtering by hardware components (e.g. SQ, TCC) will soon be deprecated.
|
||||
It is recommended to use hardware report block based filtering.
|
||||
|
||||
.. _profiling-kernel-filtering:
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ Common filters to customize data collection include:
|
||||
Enables filtering based on dispatch ID.
|
||||
|
||||
``-b``, ``--block``
|
||||
Enables collection metrics for only the specified hardware report blocks.
|
||||
Enables collection metrics for only the specified analysis report blocks.
|
||||
|
||||
See :ref:`Filtering <filtering>` for an in-depth walkthrough.
|
||||
|
||||
@@ -80,7 +80,7 @@ interface with profiling results. View different metrics derived from your
|
||||
profiled results and get immediate access all metrics organized by hardware
|
||||
blocks.
|
||||
|
||||
If you don't apply kernel, dispatch, or hardware report block filters at this stage,
|
||||
If you don't apply kernel, dispatch, or analysis report block filters at this stage,
|
||||
analysis is reflective of the entirety of the profiling data.
|
||||
|
||||
To interact with profiling results from a different session, provide the
|
||||
|
||||
@@ -52,7 +52,6 @@ pythonpath = [
|
||||
markers = [
|
||||
"section",
|
||||
"kernel_execution",
|
||||
"block",
|
||||
"misc",
|
||||
"mem",
|
||||
"sort",
|
||||
|
||||
@@ -178,50 +178,24 @@ Examples:
|
||||
help="\t\t\tDispatch ID filtering.",
|
||||
)
|
||||
|
||||
class AggregateDict(argparse.Action):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
aggregated_dict = getattr(namespace, self.dest, {})
|
||||
if aggregated_dict is None:
|
||||
aggregated_dict = {}
|
||||
for key, value in values:
|
||||
aggregated_dict[key] = value
|
||||
setattr(namespace, self.dest, aggregated_dict)
|
||||
|
||||
def validate_block(value):
|
||||
# Metric id regex, for example, 10, 4, 4.3, 4.32
|
||||
# Dont allow more than two digits after decimal point
|
||||
metric_id_pattern = re.compile(r"^\d+$|^\d+\.\d$|^\d+\.\d\d$")
|
||||
# Allow only the following hardware blocks
|
||||
hardware_block_pattern = re.compile(r"^(SQ|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF)$")
|
||||
if metric_id_pattern.match(value):
|
||||
return (str(value), "metric_id")
|
||||
if hardware_block_pattern.match(value):
|
||||
return (str(value), "hardware_block")
|
||||
raise argparse.ArgumentTypeError(f"Invalid hardware block or metric id: {value}")
|
||||
return value
|
||||
raise argparse.ArgumentTypeError(f"Invalid metric id: {value}")
|
||||
|
||||
profile_group.add_argument(
|
||||
"-b",
|
||||
"--block",
|
||||
type=validate_block,
|
||||
action=AggregateDict,
|
||||
dest="filter_blocks",
|
||||
metavar="",
|
||||
nargs="+",
|
||||
required=False,
|
||||
default={},
|
||||
help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3).
|
||||
\t\t\tCan provide multiple space separated arguments.
|
||||
\t\t\tCan also accept Hardware blocks.
|
||||
\t\t\tHardware block filtering (to be deprecated soon):
|
||||
\t\t\t SQ
|
||||
\t\t\t SQC
|
||||
\t\t\t TA
|
||||
\t\t\t TD
|
||||
\t\t\t TCP
|
||||
\t\t\t TCC
|
||||
\t\t\t SPI
|
||||
\t\t\t CPC
|
||||
\t\t\t CPF""",
|
||||
default=[],
|
||||
help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 10, 4, 4.3).\n\t\t\tCan provide multiple space separated arguments.""",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--list-metrics",
|
||||
|
||||
@@ -246,14 +246,6 @@ class RocProfCompute:
|
||||
if self.__args.name.find("/") != -1:
|
||||
console_error("'/' not permitted in profile name")
|
||||
|
||||
# Deprecation warning for hardware blocks
|
||||
if [
|
||||
name
|
||||
for name, type in self.__args.filter_blocks.items()
|
||||
if type == "hardware_block"
|
||||
]:
|
||||
console_warning("Hardware block based filtering will be deprecated soon")
|
||||
|
||||
# FIXME:
|
||||
# Changing default path should be done at the end of arg parsing stage,
|
||||
# unless there is a specific reason to do here.
|
||||
|
||||
@@ -56,14 +56,6 @@ class RocProfCompute_Base:
|
||||
self.__profiler = profiler_mode
|
||||
self.__supported_archs = supported_archs
|
||||
self._soc = soc # OmniSoC obj
|
||||
self.__filter_hardware_blocks = [
|
||||
name for name, type in args.filter_blocks.items() if type == "hardware_block"
|
||||
]
|
||||
self.__filter_metric_ids = [
|
||||
name for name, type in args.filter_blocks.items() if type == "metric_id"
|
||||
]
|
||||
# Fixme: remove the hack code "21" after we could enable pc sampling as default
|
||||
self.__pc_sampling = True if "21" in self.__filter_metric_ids else False
|
||||
|
||||
def get_args(self):
|
||||
return self.__args
|
||||
@@ -309,14 +301,8 @@ class RocProfCompute_Base:
|
||||
gen_sysinfo(
|
||||
workload_name=self.__args.name,
|
||||
workload_dir=self.get_args().path,
|
||||
ip_blocks=[
|
||||
name
|
||||
for name, type in self.__args.filter_blocks.items()
|
||||
if type == "hardware_block"
|
||||
],
|
||||
app_cmd=self.__args.remaining,
|
||||
skip_roof=self.__args.no_roof,
|
||||
roof_only=self.__args.roof_only,
|
||||
mspec=self._soc._mspec,
|
||||
soc=self._soc,
|
||||
)
|
||||
@@ -336,14 +322,10 @@ class RocProfCompute_Base:
|
||||
console_log("Command: " + str(self.__args.remaining))
|
||||
console_log("Kernel Selection: " + str(self.__args.kernel))
|
||||
console_log("Dispatch Selection: " + str(self.__args.dispatch))
|
||||
if self.__filter_hardware_blocks == None:
|
||||
console_log("Hardware Blocks: All")
|
||||
else:
|
||||
console_log("Hardware Blocks: " + str(self.__filter_hardware_blocks))
|
||||
if self.__filter_metric_ids == None:
|
||||
if self.get_args().filter_blocks is None:
|
||||
console_log("Report Sections: All")
|
||||
else:
|
||||
console_log("Report Sections: " + str(self.__filter_metric_ids))
|
||||
console_log("Report Sections: " + str(self.get_args().filter_blocks))
|
||||
|
||||
msg = "Collecting Performance Counters"
|
||||
(
|
||||
@@ -443,7 +425,8 @@ class RocProfCompute_Base:
|
||||
else:
|
||||
console_error("Profiler not supported")
|
||||
total_profiling_time_so_far += actual_profiling_duration
|
||||
if self.__pc_sampling == True and self.__profiler in (
|
||||
# PC sampling data is only collected when block "21" is specified
|
||||
if "21" in self.get_args().filter_blocks and self.__profiler in (
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
):
|
||||
@@ -460,8 +443,8 @@ class RocProfCompute_Base:
|
||||
pc_sampling_duration = end_run_prof - start_run_prof
|
||||
console_debug(
|
||||
"The time of pc sampling profiling is {} m {} sec".format(
|
||||
int((end_run_prof - start_run_prof) / 60),
|
||||
str((end_run_prof - start_run_prof) % 60),
|
||||
int((pc_sampling_duration) / 60),
|
||||
str((pc_sampling_duration) % 60),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -267,13 +267,8 @@ class OmniSoC_Base:
|
||||
)
|
||||
if filename.endswith(".yaml")
|
||||
}
|
||||
metric_ids = [
|
||||
name
|
||||
for name, type in self.get_args().filter_blocks.items()
|
||||
if type == "metric_id"
|
||||
]
|
||||
file_ids = []
|
||||
for section in metric_ids:
|
||||
for section in self.get_args().filter_blocks:
|
||||
section_num = convert_metric_id_to_panel_idx(section)
|
||||
file_id = str(section_num // 100)
|
||||
# Convert "4" to "04"
|
||||
@@ -282,16 +277,17 @@ class OmniSoC_Base:
|
||||
file_ids.append(file_id)
|
||||
# Apply sub section filtering
|
||||
for config_filename in config_filenames:
|
||||
if config_filename.startswith(file_id) and section_num % 100:
|
||||
# If first two characters of the config filename match the file_id
|
||||
if config_filename[:2].startswith(file_id) and section_num % 100:
|
||||
config_filenames[config_filename].append(section_num)
|
||||
|
||||
# Apply section filters only if metric ids have been provided for filtering
|
||||
if metric_ids:
|
||||
if self.get_args().filter_blocks:
|
||||
# Identify yaml files corresponding to file_ids
|
||||
config_filenames = {
|
||||
filename: subsections
|
||||
for filename, subsections in config_filenames.items()
|
||||
if filename.startswith(tuple(file_ids))
|
||||
if filename[:2].startswith(tuple(file_ids))
|
||||
}
|
||||
|
||||
for config_filename, subsections in config_filenames.items():
|
||||
@@ -362,18 +358,6 @@ class OmniSoC_Base:
|
||||
counters = counters.union(set(m.group(1).split()))
|
||||
else:
|
||||
counters = self.detect_counters()
|
||||
# Perfmon hardware block filtering
|
||||
filter_hardware_blocks = [
|
||||
name
|
||||
for name, type in self.get_args().filter_blocks.items()
|
||||
if type == "hardware_block"
|
||||
]
|
||||
if filter_hardware_blocks:
|
||||
counters = {
|
||||
counter_name
|
||||
for counter_name in counters
|
||||
if counter_name.startswith(tuple(filter_hardware_blocks))
|
||||
}
|
||||
|
||||
if not using_v3():
|
||||
# Counters not supported in rocprof v1 / v2
|
||||
|
||||
@@ -306,13 +306,14 @@ def process_panels_to_dataframes(
|
||||
"""
|
||||
|
||||
comparable_columns = build_comparable_columns(args.time_unit)
|
||||
filter_panel_ids = [
|
||||
convert_metric_id_to_panel_idx(section)
|
||||
for section in [
|
||||
name
|
||||
for name, type in profiling_config.get("filter_blocks", {}).items()
|
||||
if type == "metric_id"
|
||||
filter_panel_ids = profiling_config.get("filter_blocks", [])
|
||||
if isinstance(filter_panel_ids, dict):
|
||||
# For backward compatibility
|
||||
filter_panel_ids = [
|
||||
name for name, type in filter_panel_ids.items() if type == "metric_id"
|
||||
]
|
||||
filter_panel_ids = [
|
||||
convert_metric_id_to_panel_idx(section) for section in filter_panel_ids
|
||||
]
|
||||
|
||||
# Initialize the result structure
|
||||
|
||||
@@ -64,13 +64,14 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
Show all panels with their data in plain text mode.
|
||||
"""
|
||||
comparable_columns = parser.build_comparable_columns(args.time_unit)
|
||||
filter_panel_ids = [
|
||||
convert_metric_id_to_panel_idx(section)
|
||||
for section in [
|
||||
name
|
||||
for name, type in profiling_config.get("filter_blocks", {}).items()
|
||||
if type == "metric_id"
|
||||
filter_panel_ids = profiling_config.get("filter_blocks", [])
|
||||
if isinstance(filter_panel_ids, dict):
|
||||
# For backward compatibility
|
||||
filter_panel_ids = [
|
||||
name for name, type in filter_panel_ids.items() if type == "metric_id"
|
||||
]
|
||||
filter_panel_ids = [
|
||||
convert_metric_id_to_panel_idx(section) for section in filter_panel_ids
|
||||
]
|
||||
comparable_columns = parser.build_comparable_columns(args.time_unit)
|
||||
|
||||
|
||||
@@ -1142,9 +1142,7 @@ def replace_timestamps(workload_dir):
|
||||
)
|
||||
|
||||
|
||||
def gen_sysinfo(
|
||||
workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only, mspec, soc
|
||||
):
|
||||
def gen_sysinfo(workload_name, workload_dir, app_cmd, skip_roof, mspec, soc):
|
||||
console_debug("[gen_sysinfo]")
|
||||
df = mspec.get_class_members()
|
||||
|
||||
@@ -1152,12 +1150,7 @@ def gen_sysinfo(
|
||||
df["command"] = app_cmd
|
||||
df["workload_name"] = workload_name
|
||||
|
||||
blocks = []
|
||||
if not ip_blocks:
|
||||
t = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"]
|
||||
blocks += t
|
||||
else:
|
||||
blocks += ip_blocks
|
||||
blocks = ["SQ", "LDS", "SQC", "TA", "TD", "TCP", "TCC", "SPI", "CPC", "CPF"]
|
||||
if hasattr(soc, "roofline_obj") and (not skip_roof):
|
||||
blocks.append("roofline")
|
||||
df["ip_blocks"] = "|".join(blocks)
|
||||
@@ -1550,10 +1543,9 @@ def convert_metric_id_to_panel_idx(metric_id):
|
||||
tokens = metric_id.split(".")
|
||||
if len(tokens) == 1:
|
||||
return int(tokens[0]) * 100
|
||||
elif len(tokens) == 2:
|
||||
if len(tokens) == 2:
|
||||
return int(tokens[0]) * 100 + int(tokens[1])
|
||||
else:
|
||||
raise Exception(f"Invalid metric id: {metric_id}")
|
||||
raise Exception(f"Invalid metric id: {metric_id}")
|
||||
|
||||
|
||||
def format_time(seconds):
|
||||
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
+1
-1
@@ -1,7 +1,7 @@
|
||||
config_dir: /app/src/rocprof_compute_soc/analysis_configs
|
||||
device: -1
|
||||
dispatch: null
|
||||
filter_blocks: {}
|
||||
filter_blocks: []
|
||||
format_rocprof_output: csv
|
||||
hip_trace: false
|
||||
join_type: grid
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
config_dir: /app/src/rocprof_compute_soc/analysis_configs
|
||||
device: -1
|
||||
dispatch: null
|
||||
filter_blocks: {}
|
||||
filter_blocks: []
|
||||
format_rocprof_output: csv
|
||||
hip_trace: false
|
||||
join_type: grid
|
||||
|
||||
مرجع در شماره جدید
Block a user