Enable rocpd output format with rocprofiler sdk (#790)
* Add `rocpd` choice for `--format-rocprof-output` option
* Add rocpd_data.py which defines SQL queries to extract data from rocpd database
* Use sqlite3 package to read the database
* Add `--retain-rocpd-output` option in profile mode to retain raw
rocpd database
* Add warning notice to say `--format-rocprof-output rocpd` will be
default in future release
For rocpd output:
* Use only `pmc_perf.csv` instead of reading individual coll_level results csv files
* Post process csv files using pandas in analysis mode instead of profile mode
* Use ACCUM counters instead of SQ_ACCUM_PREV_HIRES
* Add test cases for rocpd output format
* Fix code formatting issues
* Update CHANGELOG
[ROCm/rocprofiler-compute commit: 03d27c0ba0]
This commit is contained in:
@@ -6,6 +6,26 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
### Added
|
||||
|
||||
* Add `rocpd` choice for `--format-rocprof-output` option in profile mode
|
||||
* Add `--retain-rocpd-output` option in profile mode to save large raw rocpd databases in workload directory
|
||||
|
||||
### Changed
|
||||
|
||||
* Add notice for change in default output format to `rocpd` in a future release
|
||||
* This is displayed when `--format-rocprof-output rocpd` is not used in profile mode
|
||||
|
||||
* When `--format-rocprof-output rocpd` is used, only pmc_perf.csv will be written to workload directory instead of mulitple csv files.
|
||||
|
||||
### Resolved issues
|
||||
|
||||
### Known issues
|
||||
|
||||
### Removed
|
||||
|
||||
## ROCm Compute Profiler 3.2.0 for ROCm 7.0.0
|
||||
|
||||
### Added
|
||||
|
||||
* Support Roofline plot on CLI (single run)
|
||||
|
||||
* Stochastic (hardware-based) PC sampling has been enabled for AMD Instinct MI300X series and later accelerators.
|
||||
|
||||
@@ -249,7 +249,7 @@ Examples:
|
||||
required=False,
|
||||
metavar="",
|
||||
dest="format_rocprof_output",
|
||||
choices=["json", "csv"],
|
||||
choices=["json", "csv", "rocpd"],
|
||||
default="csv",
|
||||
help="\t\t\tSet the format of output file of rocprof.",
|
||||
)
|
||||
@@ -280,6 +280,13 @@ Examples:
|
||||
default="/opt/rocm/lib/librocprofiler-sdk.so",
|
||||
help="\t\t\tSet the path to rocprofiler SDK library.",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--retain-rocpd-output",
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="\t\t\tRetain the large raw rocpd database in workload directory.\n\t\t\tThis option requires --format-rocprof-output rocpd.",
|
||||
)
|
||||
|
||||
## Roofline Command Line Options
|
||||
roofline_group.add_argument(
|
||||
|
||||
@@ -128,11 +128,13 @@ class OmniAnalyze_Base:
|
||||
if not normalization_filter:
|
||||
for k, v in self._arch_configs.items():
|
||||
parser.build_metric_value_string(
|
||||
v.dfs, v.dfs_type, self.__args.normal_unit
|
||||
v.dfs, v.dfs_type, self.__args.normal_unit, self._profiling_config
|
||||
)
|
||||
else:
|
||||
for k, v in self._arch_configs.items():
|
||||
parser.build_metric_value_string(v.dfs, v.dfs_type, normalization_filter)
|
||||
parser.build_metric_value_string(
|
||||
v.dfs, v.dfs_type, normalization_filter, self._profiling_config
|
||||
)
|
||||
|
||||
args = self.__args
|
||||
# Error checking for multiple runs and multiple kernel filters
|
||||
|
||||
@@ -47,6 +47,7 @@ class cli_analysis(OmniAnalyze_Base):
|
||||
self.get_args().spatial_multiplexing,
|
||||
self.get_args().kernel_verbose,
|
||||
self.get_args().verbose,
|
||||
self._profiling_config,
|
||||
)
|
||||
|
||||
if self.get_args().spatial_multiplexing:
|
||||
@@ -72,7 +73,11 @@ class cli_analysis(OmniAnalyze_Base):
|
||||
|
||||
# create the loaded table
|
||||
parser.load_table_data(
|
||||
workload=self._runs[d[0]], dir=d[0], is_gui=False, args=self.get_args()
|
||||
workload=self._runs[d[0]],
|
||||
dir=d[0],
|
||||
is_gui=False,
|
||||
args=self.get_args(),
|
||||
config=self._profiling_config,
|
||||
)
|
||||
|
||||
@demarcate
|
||||
|
||||
@@ -120,6 +120,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
self.get_args().spatial_multiplexing,
|
||||
self.get_args().kernel_verbose,
|
||||
self.get_args().verbose,
|
||||
self._profiling_config,
|
||||
)
|
||||
|
||||
if self.get_args().spatial_multiplexing:
|
||||
@@ -168,6 +169,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
dir=self.dest_dir,
|
||||
is_gui=True,
|
||||
args=self.get_args(),
|
||||
config=self._profiling_config,
|
||||
)
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -300,6 +302,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
self.get_args().spatial_multiplexing,
|
||||
self.get_args().kernel_verbose,
|
||||
args.verbose,
|
||||
self._profiling_config,
|
||||
)
|
||||
|
||||
if self.get_args().spatial_multiplexing:
|
||||
|
||||
@@ -169,6 +169,14 @@ class RocProfCompute:
|
||||
)
|
||||
self.__args = parser.parse_args()
|
||||
|
||||
if (
|
||||
"format_rocprof_output" in self.__args
|
||||
and self.__args.format_rocprof_output != "rocpd"
|
||||
):
|
||||
console_warning(
|
||||
f"The option --format-rocprof-output currently set to {self.__args.format_rocprof_output} will default to rocpd in a future release."
|
||||
)
|
||||
|
||||
if self.__args.mode == None:
|
||||
if self.__args.specs:
|
||||
print(generate_machine_specs(self.__args))
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
@@ -68,6 +69,30 @@ class RocProfCompute_Base:
|
||||
@demarcate
|
||||
def join_prof(self, out=None):
|
||||
"""Manually join separated rocprof runs"""
|
||||
if self.get_args().format_rocprof_output == "rocpd":
|
||||
# Vertically concat (by rows) results_*.csv into pmc_perf.csv
|
||||
result_files = glob.glob(self.get_args().path + "/results_*.csv")
|
||||
if out is None:
|
||||
out = self.__args.path + "/pmc_perf.csv"
|
||||
with open(out, "w", newline="") as outfile:
|
||||
writer = None
|
||||
for file in result_files:
|
||||
with open(file, "r", newline="") as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
# Write header only once
|
||||
if writer is None:
|
||||
writer = csv.writer(outfile)
|
||||
writer.writerow(header)
|
||||
for row in reader:
|
||||
writer.writerow(row)
|
||||
console_debug(f"Created file: {out}")
|
||||
# Delete results_*.csv files
|
||||
for file in result_files:
|
||||
os.remove(file)
|
||||
console_debug(f"Deleted file: {file}")
|
||||
return
|
||||
|
||||
# Set default output directory if not specified
|
||||
if type(self.__args.path) == str:
|
||||
if out is None:
|
||||
@@ -412,6 +437,7 @@ class RocProfCompute_Base:
|
||||
mspec=self._soc._mspec,
|
||||
loglevel=self.get_args().loglevel,
|
||||
format_rocprof_output=self.get_args().format_rocprof_output,
|
||||
retain_rocpd_output=self.get_args().retain_rocpd_output,
|
||||
)
|
||||
end_run_prof = time.time()
|
||||
actual_profiling_duration = end_run_prof - start_run_prof
|
||||
|
||||
@@ -43,11 +43,6 @@ class rocprof_v3_profiler(RocProfCompute_Base):
|
||||
def get_profiler_options(self, fname, soc):
|
||||
app_cmd = shlex.split(self.get_args().remaining)
|
||||
trace_option = "--kernel-trace"
|
||||
rocprof_out_format = "json"
|
||||
|
||||
if self.get_args().format_rocprof_output == "csv":
|
||||
rocprof_out_format = "csv"
|
||||
|
||||
if self.get_args().kokkos_trace:
|
||||
trace_option = "--kokkos-trace"
|
||||
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
|
||||
@@ -63,7 +58,7 @@ class rocprof_v3_profiler(RocProfCompute_Base):
|
||||
self.get_args().path + "/" + "out",
|
||||
trace_option,
|
||||
"--output-format",
|
||||
rocprof_out_format,
|
||||
self.get_args().format_rocprof_output,
|
||||
]
|
||||
# Kernel filtering
|
||||
if self.get_args().kernel:
|
||||
|
||||
+1
-4
@@ -55,13 +55,10 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
||||
"ROCP_TOOL_LIBRARIES": rocprofiler_sdk_tool_path,
|
||||
"LD_LIBRARY_PATH": rocm_libdir,
|
||||
"ROCPROF_KERNEL_TRACE": "1",
|
||||
"ROCPROF_OUTPUT_FORMAT": "json",
|
||||
"ROCPROF_OUTPUT_FORMAT": self.get_args().format_rocprof_output,
|
||||
"ROCPROF_OUTPUT_PATH": self.get_args().path + "/out/pmc_1",
|
||||
}
|
||||
|
||||
if self.get_args().format_rocprof_output == "csv":
|
||||
options["ROCPROF_OUTPUT_FORMAT"] = "csv"
|
||||
|
||||
if self.get_args().kokkos_trace:
|
||||
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
|
||||
console_error(
|
||||
|
||||
@@ -61,6 +61,7 @@ class tui_analysis(OmniAnalyze_Base):
|
||||
self.get_args().spatial_multiplexing,
|
||||
self.get_args().kernel_verbose,
|
||||
self.get_args().verbose,
|
||||
self._profiling_config,
|
||||
)
|
||||
|
||||
if self.get_args().spatial_multiplexing:
|
||||
@@ -90,6 +91,7 @@ class tui_analysis(OmniAnalyze_Base):
|
||||
dir=self.path,
|
||||
is_gui=False,
|
||||
args=self.get_args(),
|
||||
config=self._profiling_config,
|
||||
)
|
||||
|
||||
def initalize_runs(self, normalization_filter=None):
|
||||
|
||||
@@ -35,6 +35,7 @@ import plotext as plt
|
||||
import plotly.graph_objects as go
|
||||
from dash import dcc, html
|
||||
|
||||
from utils import file_io, rocpd_data
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
console_error,
|
||||
@@ -673,6 +674,9 @@ class Roofline:
|
||||
console_error("roofline", "{} does not exist".format(pmc_perf_csv))
|
||||
t_df = OrderedDict()
|
||||
t_df["pmc_perf"] = pd.read_csv(pmc_perf_csv)
|
||||
profiling_config = file_io.load_profiling_config(self.__args.path[0][0])
|
||||
if profiling_config.get("format_rocprof_output") == "rocpd":
|
||||
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
|
||||
|
||||
color_scheme = {
|
||||
"HBM": "blue+",
|
||||
@@ -861,6 +865,9 @@ class Roofline:
|
||||
console_error("roofline", "{} does not exist".format(app_path))
|
||||
t_df = OrderedDict()
|
||||
t_df["pmc_perf"] = pd.read_csv(app_path)
|
||||
profiling_config = file_io.load_profiling_config(self.__args.path)
|
||||
if profiling_config.get("format_rocprof_output") == "rocpd":
|
||||
t_df["pmc_perf"] = rocpd_data.process_rocpd_csv(t_df["pmc_perf"])
|
||||
self.empirical_roofline(ret_df=t_df)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -31,7 +31,7 @@ import pandas as pd
|
||||
import yaml
|
||||
|
||||
import config
|
||||
from utils import schema
|
||||
from utils import rocpd_data, schema
|
||||
from utils.kernel_name_shortener import kernel_name_shortener
|
||||
from utils.logger import console_debug, console_error, console_log, demarcate
|
||||
|
||||
@@ -95,9 +95,7 @@ def load_profiling_config(config_dir):
|
||||
prof_config = yaml.safe_load(file)
|
||||
return prof_config
|
||||
except FileNotFoundError:
|
||||
console_log(
|
||||
f"Could not find profiling_config.yaml in {config_dir} for filtering analysis report"
|
||||
)
|
||||
console_log(f"Could not find profiling_config.yaml in {config_dir}")
|
||||
return dict()
|
||||
|
||||
|
||||
@@ -195,7 +193,7 @@ def create_df_kernel_top_stats(
|
||||
|
||||
@demarcate
|
||||
def create_df_pmc(
|
||||
raw_data_root_dir, nodes, spatial_multiplexing, kernel_verbose, verbose
|
||||
raw_data_root_dir, nodes, spatial_multiplexing, kernel_verbose, verbose, config
|
||||
):
|
||||
"""
|
||||
Load all raw pmc counters and join into one df.
|
||||
@@ -214,6 +212,8 @@ def create_df_pmc(
|
||||
f == schema.pmc_perf_file_prefix + ".csv"
|
||||
):
|
||||
tmp_df = pd.read_csv(str(Path(root).joinpath(f)))
|
||||
if config.get("format_rocprof_output") == "rocpd":
|
||||
tmp_df = rocpd_data.process_rocpd_csv(tmp_df)
|
||||
# Demangle original KernelNames
|
||||
kernel_name_shortener(tmp_df, kernel_verbose)
|
||||
|
||||
|
||||
@@ -271,7 +271,7 @@ class CodeTransformer(ast.NodeTransformer):
|
||||
return node
|
||||
|
||||
|
||||
def build_eval_string(equation, coll_level):
|
||||
def build_eval_string(equation, coll_level, config):
|
||||
"""
|
||||
Convert user defined equation string to eval executable string
|
||||
For example,
|
||||
@@ -314,7 +314,14 @@ def build_eval_string(equation, coll_level):
|
||||
# use .get() to catch any potential KeyErrors
|
||||
s = re.sub(r"raw_pmc_df\['(.*?)']", r'raw_pmc_df.get("\1")', s)
|
||||
# apply coll_level
|
||||
s = re.sub(r"raw_pmc_df", "raw_pmc_df.get('" + coll_level + "')", s)
|
||||
if config.get("format_rocprof_output") == "rocpd":
|
||||
# Replace SQ_ACCUM_PREV_HIRES with coll_level_ACCUM then ignore coll_level df
|
||||
s = re.sub(f"SQ_ACCUM_PREV_HIRES", f"{coll_level}_ACCUM", s)
|
||||
s = re.sub(
|
||||
r"raw_pmc_df", "raw_pmc_df.get('" + schema.pmc_perf_file_prefix + "')", s
|
||||
)
|
||||
else:
|
||||
s = re.sub(r"raw_pmc_df", "raw_pmc_df.get('" + coll_level + "')", s)
|
||||
# print("--- build_eval_string, return: ", s)
|
||||
return s
|
||||
|
||||
@@ -653,7 +660,7 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
|
||||
setattr(archConfigs, "metric_counters", metric_counters)
|
||||
|
||||
|
||||
def build_metric_value_string(dfs, dfs_type, normal_unit):
|
||||
def build_metric_value_string(dfs, dfs_type, normal_unit, profiling_config):
|
||||
"""
|
||||
Apply the real eval string to its field in the metric_table df.
|
||||
"""
|
||||
@@ -674,6 +681,7 @@ def build_metric_value_string(dfs, dfs_type, normal_unit):
|
||||
df.at[row_idx_label, expr] = build_eval_string(
|
||||
df.at[row_idx_label, expr],
|
||||
df.at[row_idx_label, "coll_level"],
|
||||
profiling_config,
|
||||
)
|
||||
|
||||
elif expr.lower() == "unit" or expr.lower() == "units":
|
||||
@@ -683,7 +691,7 @@ def build_metric_value_string(dfs, dfs_type, normal_unit):
|
||||
|
||||
|
||||
@demarcate
|
||||
def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug):
|
||||
def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
"""
|
||||
Execute the expr string for each metric in the df.
|
||||
"""
|
||||
@@ -784,7 +792,7 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug):
|
||||
if "PER_XCD" not in key:
|
||||
continue
|
||||
# NB: assume all built-in vars from pmc_perf.csv for now
|
||||
s = build_eval_string(value, schema.pmc_perf_file_prefix)
|
||||
s = build_eval_string(value, schema.pmc_perf_file_prefix, config)
|
||||
try:
|
||||
ammolite__build_in[key] = eval(compile(s, "<string>", "eval"))
|
||||
except TypeError:
|
||||
@@ -801,7 +809,7 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug):
|
||||
if "PER_XCD" in key:
|
||||
continue
|
||||
# NB: assume all built-in vars from pmc_perf.csv for now
|
||||
s = build_eval_string(value, schema.pmc_perf_file_prefix)
|
||||
s = build_eval_string(value, schema.pmc_perf_file_prefix, config)
|
||||
try:
|
||||
ammolite__build_in[key] = eval(compile(s, "<string>", "eval"))
|
||||
except TypeError:
|
||||
@@ -1437,7 +1445,7 @@ def load_kernel_top(workload, dir, args):
|
||||
|
||||
|
||||
@demarcate
|
||||
def load_table_data(workload, dir, is_gui, args, skipKernelTop=False):
|
||||
def load_table_data(workload, dir, is_gui, args, config, skipKernelTop=False):
|
||||
"""
|
||||
- Load data for all "raw_csv_table"
|
||||
- Load dat for "pc_sampling_table"
|
||||
@@ -1452,6 +1460,7 @@ def load_table_data(workload, dir, is_gui, args, skipKernelTop=False):
|
||||
workload.sys_info.iloc[0],
|
||||
apply_filters(workload, dir, is_gui, args.debug),
|
||||
args.debug,
|
||||
config,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
import csv
|
||||
import sqlite3
|
||||
from contextlib import closing
|
||||
|
||||
from utils.logger import console_error
|
||||
|
||||
# From schema definition in source/share/rocprofiler-sdk-rocpd/data_views.sql in rocprofiler-sdk repository
|
||||
COUNTERS_COLLECTION_QUERY = """
|
||||
SELECT
|
||||
agent_id as GPU_ID,
|
||||
dispatch_id as Dispatch_ID,
|
||||
grid_size as Grid_Size,
|
||||
workgroup_size as Workgroup_Size,
|
||||
lds_block_size as LDS_Per_Workgroup,
|
||||
scratch_size as Scratch_Per_Workitem,
|
||||
vgpr_count as Arch_VGPR,
|
||||
accum_vgpr_count as Accum_VGPR,
|
||||
sgpr_count as SGPR,
|
||||
kernel_name as Kernel_Name,
|
||||
start as Start_Timestamp,
|
||||
end as End_Timestamp,
|
||||
kernel_id as Kernel_ID,
|
||||
counter_name as Counter_Name,
|
||||
value as Counter_Value
|
||||
FROM counters_collection
|
||||
"""
|
||||
|
||||
|
||||
def convert_db_to_csv(
|
||||
db_path: str,
|
||||
csv_file_path: str,
|
||||
) -> None:
|
||||
"""
|
||||
Read rocpd database and write to CSV file
|
||||
"""
|
||||
# Read counters_collection view from the database and write to CSV
|
||||
try:
|
||||
with closing(sqlite3.connect(db_path)) as conn:
|
||||
with closing(conn.execute(COUNTERS_COLLECTION_QUERY)) as cursor:
|
||||
with open(csv_file_path, "w", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow(
|
||||
[description[0] for description in cursor.description]
|
||||
)
|
||||
for row in cursor:
|
||||
writer.writerow(row)
|
||||
except (sqlite3.DatabaseError, IOError) as e:
|
||||
console_error(f"Error converting database to CSV: {e}")
|
||||
|
||||
|
||||
def process_rocpd_csv(df):
|
||||
"""
|
||||
Merge counters across unique dispatches from the input dataframe and return processed dataframe.
|
||||
"""
|
||||
# Only import pandas if needed
|
||||
import pandas as pd
|
||||
|
||||
data = list()
|
||||
# Group by unique kernel and merge into a single row
|
||||
for _, group_df in df.groupby(
|
||||
[
|
||||
"Dispatch_ID",
|
||||
"Kernel_Name",
|
||||
"Grid_Size",
|
||||
"Workgroup_Size",
|
||||
"LDS_Per_Workgroup",
|
||||
]
|
||||
):
|
||||
row = {
|
||||
"GPU_ID": group_df["GPU_ID"].iloc[0],
|
||||
"Grid_Size": group_df["Grid_Size"].iloc[0],
|
||||
"Workgroup_Size": group_df["Workgroup_Size"].iloc[0],
|
||||
"LDS_Per_Workgroup": group_df["LDS_Per_Workgroup"].iloc[0],
|
||||
"Scratch_Per_Workitem": group_df["Scratch_Per_Workitem"].iloc[0],
|
||||
"Arch_VGPR": group_df["Arch_VGPR"].iloc[0],
|
||||
"Accum_VGPR": group_df["Accum_VGPR"].iloc[0],
|
||||
"SGPR": group_df["SGPR"].iloc[0],
|
||||
"Kernel_Name": group_df["Kernel_Name"].iloc[0],
|
||||
"Kernel_ID": group_df["Kernel_ID"].iloc[0],
|
||||
}
|
||||
# Each counter will become its own column
|
||||
row.update(dict(zip(group_df["Counter_Name"], group_df["Counter_Value"])))
|
||||
# Replace end timestamp with median of durations of group, start timestamp is set to 0
|
||||
row["End_Timestamp"] = (
|
||||
group_df["End_Timestamp"] - group_df["Start_Timestamp"]
|
||||
).median()
|
||||
row["Start_Timestamp"] = 0.0
|
||||
data.append(row)
|
||||
df = pd.DataFrame(data)
|
||||
# Rank GPU IDs, map lowest number to 0, next to 1, etc.
|
||||
df["GPU_ID"] = df["GPU_ID"].rank(method="dense").astype(int) - 1
|
||||
# Reset dispatch IDs
|
||||
df["Dispatch_ID"] = range(len(df))
|
||||
return df
|
||||
@@ -45,6 +45,7 @@ import pandas as pd
|
||||
import yaml
|
||||
|
||||
import config
|
||||
from utils import rocpd_data
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
console_error,
|
||||
@@ -707,9 +708,14 @@ def parse_text(text_file):
|
||||
|
||||
|
||||
def run_prof(
|
||||
fname, profiler_options, workload_dir, mspec, loglevel, format_rocprof_output
|
||||
fname,
|
||||
profiler_options,
|
||||
workload_dir,
|
||||
mspec,
|
||||
loglevel,
|
||||
format_rocprof_output,
|
||||
retain_rocpd_output=False,
|
||||
):
|
||||
time_0 = time.time()
|
||||
fbase = path(fname).stem
|
||||
|
||||
console_debug("pmc file: %s" % path(fname).name)
|
||||
@@ -831,7 +837,29 @@ def run_prof(
|
||||
|
||||
results_files = []
|
||||
|
||||
if rocprof_cmd.endswith("v2"):
|
||||
if format_rocprof_output == "rocpd":
|
||||
if rocprof_cmd == "rocprofiler-sdk" or rocprof_cmd.endswith("v3"):
|
||||
# Write results_fbase.csv
|
||||
rocpd_data.convert_db_to_csv(
|
||||
glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0],
|
||||
workload_dir + f"/results_{fbase}.csv",
|
||||
)
|
||||
if retain_rocpd_output:
|
||||
shutil.copyfile(
|
||||
glob.glob(workload_dir + "/out/pmc_1/*/*.db")[0],
|
||||
workload_dir + "/" + fbase + ".db",
|
||||
)
|
||||
console_warning(
|
||||
f"Retaining large raw rocpd database: {workload_dir}/{fbase}.db"
|
||||
)
|
||||
# Remove temp directory
|
||||
shutil.rmtree(workload_dir + "/" + "out")
|
||||
return
|
||||
else:
|
||||
console_error(
|
||||
"rocpd output format is only supported with rocprofiler-sdk or rocprofv3."
|
||||
)
|
||||
elif rocprof_cmd.endswith("v2"):
|
||||
# rocprofv2 has separate csv files for each process
|
||||
results_files = glob.glob(workload_dir + "/out/pmc_1/results_*.csv")
|
||||
|
||||
@@ -1058,7 +1086,6 @@ def process_rocprofv3_output(rocprof_output, workload_dir, is_timestamps):
|
||||
else:
|
||||
# when the input is not for timestamps, and counter csv file is not generated, we assume failed rocprof run and will completely bypass the file generation and merging for current pmc
|
||||
results_files_csv = []
|
||||
|
||||
else:
|
||||
console_error("The output file of rocprofv3 can only support json or csv!!!")
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ indirs = [
|
||||
"tests/workloads/vcopy/MI200",
|
||||
"tests/workloads/vcopy/MI300A_A1",
|
||||
"tests/workloads/vcopy/MI300X_A1",
|
||||
"tests/workloads/vcopy/MI300X_A1_rocpd",
|
||||
"tests/workloads/vcopy/MI350",
|
||||
]
|
||||
|
||||
@@ -266,7 +267,11 @@ def test_dispatch_5(binary_handler_analyze_rocprof_compute):
|
||||
@pytest.mark.misc
|
||||
def test_gpu_ids(binary_handler_analyze_rocprof_compute):
|
||||
for dir in indirs:
|
||||
if dir.endswith("MI350"):
|
||||
# if dir.endswith("MI350") or dir.endswith("MI300X_A1_rocpd"):
|
||||
if dir in (
|
||||
"tests/workloads/vcopy/MI350",
|
||||
"tests/workloads/vcopy/MI300X_A1_rocpd",
|
||||
):
|
||||
gpu_id = "0"
|
||||
else:
|
||||
gpu_id = "2"
|
||||
@@ -783,12 +788,12 @@ def test_parser_error_handling():
|
||||
from utils.parser import build_eval_string, calc_builtin_var, update_denom_string
|
||||
|
||||
try:
|
||||
build_eval_string("AVG(SQ_WAVES)", None)
|
||||
build_eval_string("AVG(SQ_WAVES)", None, config={})
|
||||
assert False, "Should have raised exception for None coll_level"
|
||||
except Exception as e:
|
||||
assert "coll_level can not be None" in str(e)
|
||||
|
||||
assert build_eval_string("", "pmc_perf") == ""
|
||||
assert build_eval_string("", "pmc_perf", config={}) == ""
|
||||
assert update_denom_string("", "per_wave") == ""
|
||||
|
||||
class MockSysInfo:
|
||||
@@ -813,12 +818,12 @@ def test_parser_error_handling():
|
||||
from utils.parser import build_eval_string, calc_builtin_var, update_denom_string
|
||||
|
||||
try:
|
||||
build_eval_string("AVG(SQ_WAVES)", None)
|
||||
build_eval_string("AVG(SQ_WAVES)", None, config={})
|
||||
assert False, "Should have raised exception for None coll_level"
|
||||
except Exception as e:
|
||||
assert "coll_level can not be None" in str(e)
|
||||
|
||||
assert build_eval_string("", "pmc_perf") == ""
|
||||
assert build_eval_string("", "pmc_perf", config={}) == ""
|
||||
assert update_denom_string("", "per_wave") == ""
|
||||
|
||||
class MockSysInfo:
|
||||
@@ -943,7 +948,7 @@ def test_analyze_with_debug_mode(binary_handler_analyze_rocprof_compute):
|
||||
}
|
||||
|
||||
try:
|
||||
eval_metric(mock_dfs, mock_dfs_type, sys_info, raw_pmc_df, debug=True)
|
||||
eval_metric(mock_dfs, mock_dfs_type, sys_info, raw_pmc_df, debug=True, config={})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
@@ -45,6 +45,13 @@ def test_analyze_vcopy_MI200(binary_handler_analyze_rocprof_compute):
|
||||
assert code == 0
|
||||
|
||||
|
||||
def test_analyze_vcopy_MI300_rocpd(binary_handler_analyze_rocprof_compute):
|
||||
code = binary_handler_analyze_rocprof_compute(
|
||||
["analyze", "--path", "tests/workloads/vcopy/MI300X_A1_rocpd"]
|
||||
)
|
||||
assert code == 0
|
||||
|
||||
|
||||
def test_analyze_ipblocks_TCP_MI300X_A1(binary_handler_analyze_rocprof_compute):
|
||||
code = binary_handler_analyze_rocprof_compute(
|
||||
["analyze", "--path", "tests/workloads/ipblocks_TCP/MI300X_A1"]
|
||||
|
||||
@@ -572,6 +572,21 @@ def test_path(binary_handler_profile_rocprof_compute):
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_path_rocpd(binary_handler_profile_rocprof_compute):
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
options = ["--format-rocprof-output", "rocpd"]
|
||||
binary_handler_profile_rocprof_compute(config, workload_dir, options)
|
||||
|
||||
assert (Path(workload_dir) / "pmc_perf.csv").exists()
|
||||
assert test_utils.check_file_pattern(
|
||||
"format_rocprof_output: rocpd", f"{workload_dir}/profiling_config.yaml"
|
||||
)
|
||||
assert test_utils.check_file_pattern("Counter_Name", f"{workload_dir}/pmc_perf.csv")
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
|
||||
if soc in ("MI100"):
|
||||
@@ -711,6 +726,45 @@ def test_roof_file_validation(binary_handler_profile_rocprof_compute):
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roof_rocpd(binary_handler_profile_rocprof_compute):
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
options = ["--device", "0", "--roof-only", "--format-rocprof-output", "rocpd"]
|
||||
binary_handler_profile_rocprof_compute(config, workload_dir, options, roof=True)
|
||||
|
||||
assert (Path(workload_dir) / "pmc_perf.csv").exists()
|
||||
assert (Path(workload_dir) / "roofline.csv").exists()
|
||||
assert test_utils.check_file_pattern(
|
||||
"format_rocprof_output: rocpd", f"{workload_dir}/profiling_config.yaml"
|
||||
)
|
||||
assert test_utils.check_file_pattern("Counter_Name", f"{workload_dir}/pmc_perf.csv")
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roofline_kernel_names_validation_error(binary_handler_profile_rocprof_compute):
|
||||
"""
|
||||
Test validate_parameters() error: --roof-only is required for --kernel-names
|
||||
This should trigger console_error("--roof-only is required for --kernel-names")
|
||||
"""
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
pytest.skip("Skipping roofline test for MI100")
|
||||
return
|
||||
|
||||
options = ["--device", "0", "--kernel-names"] # missing --roof-only
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
assert returncode != 0
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roofline_workload_dir_not_set_error():
|
||||
"""
|
||||
@@ -1612,7 +1666,7 @@ def test_comprehensive_error_paths():
|
||||
assert result == 16
|
||||
|
||||
try:
|
||||
build_eval_string("test", None)
|
||||
build_eval_string("test", None, config={})
|
||||
assert False, "Should raise exception for None coll_level"
|
||||
except Exception as e:
|
||||
assert "coll_level can not be None" in str(e)
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
[profiling] pre-processing using rocprofv3 profiler
|
||||
[gen_sysinfo]
|
||||
Incomplete class definition for gfx942. Expecting populated max_mclk but detected None.
|
||||
Incomplete class definition for gfx942. Expecting populated cur_mclk but detected None.
|
||||
Missing specs fields for gfx942
|
||||
starting "run_profiling" and about to start rocprof's workload
|
||||
[profiling] performing profiling using rocprofv3 profiler
|
||||
Rocprofiler-Compute version: 3.2.0
|
||||
Profiler choice: rocprofv3
|
||||
Path: /app/workloads/vcopy/MI300X_A1
|
||||
Target: MI300X_A1
|
||||
Command: sample/vcopy -n 1048576 -b 256 -i 3
|
||||
Kernel Selection: None
|
||||
Dispatch Selection: None
|
||||
Hardware Blocks: []
|
||||
Report Sections: []
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Collecting Performance Counters
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
[Run 1/12][Approximate profiling time left: pending first measurement...]
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/SQ_IFETCH_LEVEL.txt
|
||||
pmc file: SQ_IFETCH_LEVEL.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_w2ranh0p
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/SQ_IFETCH_LEVEL.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/SQ_IFETCH_LEVEL.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:25.713947 140172743004992 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.166549 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:25.714215 140172743004992 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:25.941891 140172743004992 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.335688 140172743004992 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.621474 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.351137 140172743004992 generateRocpd.cpp:580] writing SQL database for process 3554 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:26.351912 140172743004992 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3554_results.db (UUID=00031de8-fbce-7bce-9912-6a8f9645ae7f)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.391902 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.014565 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.392284 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000363 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.393046 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000752 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:26.406145 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012446 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.791204 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 1.385042 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.792304 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001065 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.792315 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.798288 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005969 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.827005 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.028702 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.827022 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.827026 140172743004992 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.827218 140172743004992 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000190 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.827762 140172743004992 simple_timer.cpp:55] SQLite3 generation :: total :: 1.476625 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:27.832297 140172743004992 simple_timer.cpp:55] [rocprofv3] tool finalization :: 1.495785 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/SQ_IFETCH_LEVEL.txt, the time it takes was 0 m 2.7194788455963135 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/SQ_IFETCH_LEVEL.txt is 0 m 3.8476221561431885 sec
|
||||
[Run 2/12][Approximate profiling time left: 38 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_LDS.txt
|
||||
pmc file: SQ_INST_LEVEL_LDS.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_wp1tt6p6
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_LDS.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_LDS.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:29.552773 140238077793088 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.168769 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:29.553044 140238077793088 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:29.736798 140238077793088 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.154960 140238077793088 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.601916 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.170439 140238077793088 generateRocpd.cpp:580] writing SQL database for process 3561 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:30.171201 140238077793088 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3561_results.db (UUID=00031de9-0ace-7ace-9f47-5cc55ac49931)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.211098 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.014766 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.211463 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000347 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.212224 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000751 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:30.224678 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011904 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.620420 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 1.395725 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.621558 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001104 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.621569 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.627417 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005843 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.658664 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.031232 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.658684 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.658688 140238077793088 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.658887 140238077793088 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000198 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.659456 140238077793088 simple_timer.cpp:55] SQLite3 generation :: total :: 1.489016 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:31.664027 140238077793088 simple_timer.cpp:55] [rocprofv3] tool finalization :: 1.508410 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_LDS.txt, the time it takes was 0 m 2.7174251079559326 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_LDS.txt is 0 m 3.833453416824341 sec
|
||||
[Run 3/12][Approximate profiling time left: 34 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_SMEM.txt
|
||||
pmc file: SQ_INST_LEVEL_SMEM.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_dl9hsbp8
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_SMEM.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_SMEM.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:33.415380 140711413728064 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.170888 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:33.415655 140711413728064 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:33.649099 140711413728064 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.063816 140711413728064 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.648161 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.079514 140711413728064 generateRocpd.cpp:580] writing SQL database for process 3568 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:34.080280 140711413728064 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3568_results.db (UUID=00031de9-19e2-79e2-befc-6c52d81fce09)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.120796 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.014830 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.121112 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000299 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.121859 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000738 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:34.134532 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012161 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.536345 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 1.401796 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.537554 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001175 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.537565 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.543474 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005905 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.574172 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.030682 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.574191 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.574194 140711413728064 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.574389 140711413728064 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000193 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.574956 140711413728064 simple_timer.cpp:55] SQLite3 generation :: total :: 1.495442 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:35.579445 140711413728064 simple_timer.cpp:55] [rocprofv3] tool finalization :: 1.514702 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_SMEM.txt, the time it takes was 0 m 2.7634034156799316 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_SMEM.txt is 0 m 3.9122095108032227 sec
|
||||
[Run 4/12][Approximate profiling time left: 30 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_VMEM.txt
|
||||
pmc file: SQ_INST_LEVEL_VMEM.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_20mmgyh1
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_VMEM.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_VMEM.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:36.918470 140619354510144 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.104106 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:36.918753 140619354510144 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.102945 140619354510144 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.299363 140619354510144 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.380610 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.314710 140619354510144 generateRocpd.cpp:580] writing SQL database for process 3575 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:37.315474 140619354510144 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3575_results.db (UUID=00031de9-27d0-77d0-99c1-6bed5f3fcae1)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.350755 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010948 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.351102 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000328 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.351614 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000503 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:37.364570 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012404 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.033478 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.668889 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.034553 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001043 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.034565 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.040348 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005778 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.053614 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.013255 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.053627 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.053630 140619354510144 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.053815 140619354510144 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000183 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.054183 140619354510144 simple_timer.cpp:55] SQLite3 generation :: total :: 0.739472 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:38.057863 140619354510144 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.757852 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_VMEM.txt, the time it takes was 0 m 1.6674315929412842 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/SQ_INST_LEVEL_VMEM.txt is 0 m 2.43190336227417 sec
|
||||
[Run 5/12][Approximate profiling time left: 24 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/SQ_LEVEL_WAVES.txt
|
||||
pmc file: SQ_LEVEL_WAVES.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_4_rrldsg
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/SQ_LEVEL_WAVES.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/SQ_LEVEL_WAVES.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:39.561464 139705878659904 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.135631 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:39.561722 139705878659904 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:39.744744 139705878659904 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.044125 139705878659904 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.482403 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.059697 139705878659904 generateRocpd.cpp:580] writing SQL database for process 3582 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:40.060465 139705878659904 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3582_results.db (UUID=00031de9-3205-7205-869c-ec5f02149d7e)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.097027 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.012617 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.097312 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000266 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.097962 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000641 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:40.110168 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011707 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.116521 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 1.006337 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.117628 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001074 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.117640 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.123520 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005876 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.143166 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.019632 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.143182 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.143186 139705878659904 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.143382 139705878659904 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000194 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.143922 139705878659904 simple_timer.cpp:55] SQLite3 generation :: total :: 1.084224 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:41.148481 139705878659904 simple_timer.cpp:55] [rocprofv3] tool finalization :: 1.103637 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/SQ_LEVEL_WAVES.txt, the time it takes was 0 m 2.1514015197753906 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/SQ_LEVEL_WAVES.txt is 0 m 3.113880157470703 sec
|
||||
[Run 6/12][Approximate profiling time left: 20 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_0.txt
|
||||
pmc file: pmc_perf_0.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_b7cf79sp
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_0.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_0.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:42.470522 139873853399872 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.104173 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.470776 139873853399872 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.655197 139873853399872 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.849277 139873853399872 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.378502 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.865054 139873853399872 generateRocpd.cpp:580] writing SQL database for process 3589 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:42.865845 139873853399872 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3589_results.db (UUID=00031de9-3d82-7d82-96dc-2dcf0a0631dc)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.903272 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.011102 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.903557 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000267 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.904080 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000512 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:42.916637 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012050 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.580464 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.663812 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.581638 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001143 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.581649 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.587455 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005801 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.599109 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011640 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.599125 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.599128 139873853399872 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.599317 139873853399872 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000186 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.599679 139873853399872 simple_timer.cpp:55] SQLite3 generation :: total :: 0.734625 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:43.603395 139873853399872 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.753253 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_0.txt, the time it takes was 0 m 1.6624979972839355 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_0.txt is 0 m 2.4318082332611084 sec
|
||||
[Run 7/12][Approximate profiling time left: 16 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_1.txt
|
||||
pmc file: pmc_perf_1.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_i5zi8g1c
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_1.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_1.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:44.921375 140329810051904 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.104611 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:44.921646 140329810051904 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.127446 140329810051904 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.322745 140329810051904 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.401100 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.338315 140329810051904 generateRocpd.cpp:580] writing SQL database for process 3596 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:45.339069 140329810051904 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3596_results.db (UUID=00031de9-4713-7713-aca4-263b5bf68657)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.374491 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010790 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.374878 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000369 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.375436 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000547 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:45.388444 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012406 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.060238 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.671777 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.061333 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001063 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.061344 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.067223 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005874 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.078638 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011402 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.078650 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.078653 140329810051904 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.078838 140329810051904 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000182 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.079192 140329810051904 simple_timer.cpp:55] SQLite3 generation :: total :: 0.740877 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:46.082891 140329810051904 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.759142 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_1.txt, the time it takes was 0 m 1.6950774192810059 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_1.txt is 0 m 2.480154514312744 sec
|
||||
[Run 8/12][Approximate profiling time left: 12 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_2.txt
|
||||
pmc file: pmc_perf_2.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_tv_uqwoh
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_2.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_2.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:47.392035 140613043073856 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.103348 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.392296 140613043073856 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.575114 140613043073856 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.769792 140613043073856 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.377496 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.785435 140613043073856 generateRocpd.cpp:580] writing SQL database for process 3603 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:47.786191 140613043073856 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3603_results.db (UUID=00031de9-50bd-70bd-ae26-e3dd40c28d4c)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.820358 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010878 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.820688 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000313 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.821208 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000510 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:47.833615 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011855 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.485409 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.651779 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.486280 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.000839 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.486291 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.492148 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005851 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.503742 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011578 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.503758 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.503762 140613043073856 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.503952 140613043073856 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000188 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.504347 140613043073856 simple_timer.cpp:55] SQLite3 generation :: total :: 0.718912 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:48.508036 140613043073856 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.737530 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_2.txt, the time it takes was 0 m 1.6442956924438477 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_2.txt is 0 m 2.423243761062622 sec
|
||||
[Run 9/12][Approximate profiling time left: 9 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_3.txt
|
||||
pmc file: pmc_perf_3.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_d6edxy7z
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_3.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_3.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:49.825653 139970175718208 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.103156 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:49.825903 139970175718208 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.011954 139970175718208 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.207081 139970175718208 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.381179 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.222580 139970175718208 generateRocpd.cpp:580] writing SQL database for process 3610 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:50.223351 139970175718208 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3610_results.db (UUID=00031de9-5a3e-7a3e-8845-b724df0c5328)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.277220 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.030272 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.277672 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000428 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.278189 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000509 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.290370 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011683 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.954577 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.664191 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.955670 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.001063 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.955682 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.961629 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005942 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.973364 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011722 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.973378 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.973381 139970175718208 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.973569 139970175718208 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000187 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.973960 139970175718208 simple_timer.cpp:55] SQLite3 generation :: total :: 0.751380 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:50.977619 139970175718208 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.769843 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_3.txt, the time it takes was 0 m 1.6794917583465576 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_3.txt is 0 m 2.4662797451019287 sec
|
||||
[Run 10/12][Approximate profiling time left: 5 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_4.txt
|
||||
pmc file: pmc_perf_4.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_7pmy1aeg
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_4.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_4.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:52.291220 140318446199616 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.102577 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.291470 140318446199616 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.492822 140318446199616 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.684795 140318446199616 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.393325 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.700354 140318446199616 generateRocpd.cpp:580] writing SQL database for process 3617 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:52.701113 140318446199616 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3617_results.db (UUID=00031de9-63e2-73e2-b1de-4720cbe72cf4)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.757125 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010891 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.757435 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000290 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.757948 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000505 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:52.771027 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.012490 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.432385 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.661342 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.433363 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.000947 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.433374 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.439104 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005725 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.450240 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011123 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.450253 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.450257 140318446199616 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.450440 140318446199616 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000181 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.450806 140318446199616 simple_timer.cpp:55] SQLite3 generation :: total :: 0.750452 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:53.454365 140318446199616 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.768517 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_4.txt, the time it takes was 0 m 1.689640760421753 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_4.txt is 0 m 2.470271110534668 sec
|
||||
[Run 11/12][Approximate profiling time left: 2 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_5.txt
|
||||
pmc file: pmc_perf_5.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_i1z59usc
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_5.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_5.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:54.741603 140680114004800 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.103912 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:54.741856 140680114004800 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:54.934468 140680114004800 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.127192 140680114004800 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.385336 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.142986 140680114004800 generateRocpd.cpp:580] writing SQL database for process 3624 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:55.143809 140680114004800 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3624_results.db (UUID=00031de9-6d70-7d70-801f-957d2f8ca86a)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.177382 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010768 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.177741 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000341 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.178266 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000516 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.190617 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011831 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.867254 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.676620 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.868249 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.000964 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.868260 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.874104 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005839 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.885401 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.011285 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.885413 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.885417 140680114004800 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.885599 140680114004800 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000180 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.885979 140680114004800 simple_timer.cpp:55] SQLite3 generation :: total :: 0.742993 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:55.889649 140680114004800 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.761385 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_5.txt, the time it takes was 0 m 1.6850097179412842 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_5.txt is 0 m 2.4484713077545166 sec
|
||||
[Run 12/12][Approximate profiling time left: 0 seconds]...
|
||||
[profiling] Current input file: /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_6.txt
|
||||
pmc file: pmc_perf_6.txt
|
||||
Adding env var for counter definitions: ROCPROFILER_METRICS_PATH=/tmp/rocprof_counter_defs_8km5kicc
|
||||
rocprof command: ['rocprofv3', '-A', 'absolute', '-i', '/app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_6.txt', '-d', '/app/workloads/vcopy/MI300X_A1/out', '--kernel-trace', '--output-format', 'rocpd', '--', 'sample/vcopy', '-n', '1048576', '-b', '256', '-i', '3']
|
||||
[subprocess] Running: rocprofv3 -A absolute -i /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_6.txt -d /app/workloads/vcopy/MI300X_A1/out --kernel-trace --output-format rocpd -- sample/vcopy -n 1048576 -b 256 -i 3
|
||||
|-> [rocprofv3] [0;33mW20250710 21:33:57.212530 139907727749952 simple_timer.cpp:55] [rocprofv3] tool initialization :: 0.103224 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.212781 139907727749952 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.411536 139907727749952 tool.cpp:2150] HSA version 8.18.0 initialized (instance=0)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.602202 139907727749952 simple_timer.cpp:55] [rocprofv3] 'sample/vcopy -n 1048576 -b 256 -i 3' :: 0.389421 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.617817 139907727749952 generateRocpd.cpp:580] writing SQL database for process 3631 on node 3224294684
|
||||
|-> [rocprofv3] [m[0;31mE20250710 21:33:57.618592 139907727749952 generateRocpd.cpp:603] Opened result file: /app/workloads/vcopy/MI300X_A1/out/pmc_1/7f6eaef84eaf/3631_results.db (UUID=00031de9-771a-771a-95de-02702fbe6904)
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.682260 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_string :: 0.010946 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.682562 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_node :: 0.000283 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.683093 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_process :: 0.000521 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:57.695274 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_agent :: 0.011651 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.358124 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_info_pmc :: 0.662833 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.358983 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd kernel info :: 0.000831 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.358994 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_region :: 0.000002 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.364913 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_kernel_dispatch :: 0.005914 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.372546 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_pmc_event :: 0.007617 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.372562 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_copy :: 0.000000 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.372566 139907727749952 simple_timer.cpp:55] SQLite3 generation :: rocpd_memory_allocate :: 0.000001 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.372750 139907727749952 simple_timer.cpp:55] SQLite3 generation :: SQL indexing :: 0.000182 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.373145 139907727749952 simple_timer.cpp:55] SQLite3 generation :: total :: 0.755328 sec
|
||||
|-> [rocprofv3] [m[0;33mW20250710 21:33:58.376827 139907727749952 simple_timer.cpp:55] [rocprofv3] tool finalization :: 0.773574 sec
|
||||
|-> [rocprofv3] [mvcopy testing on GCD 0
|
||||
|-> [rocprofv3] Finished allocating vectors on the CPU
|
||||
Finishing subprocess of fname /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_6.txt, the time it takes was 0 m 1.6893463134765625 sec
|
||||
The time of run_prof of /app/workloads/vcopy/MI300X_A1/perfmon/pmc_perf_6.txt is 0 m 2.4789364337921143 sec
|
||||
finished "run_profiling" and finished rocprof's workload, time taken was 0 m 34.34300780296326 sec
|
||||
[profiling] performing post-processing using rocprofv3 profiler
|
||||
Created file: /app/workloads/vcopy/MI300X_A1/pmc_perf.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_6.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_SQ_IFETCH_LEVEL.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_2.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_SQ_INST_LEVEL_VMEM.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_SQ_INST_LEVEL_LDS.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_SQ_INST_LEVEL_SMEM.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_5.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_3.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_0.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_4.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_pmc_perf_1.csv
|
||||
Deleted file: /app/workloads/vcopy/MI300X_A1/results_SQ_LEVEL_WAVES.csv
|
||||
time taken for "post_processing" was 0 seconds
|
||||
[profiling] perform SoC post processing for gfx942
|
||||
[roofline] Skipping roofline
|
||||
+5
File diff soppresso perché una o più righe sono troppo lunghe
+3594
File diff soppresso perché troppo grande
Carica Diff
+5
File diff soppresso perché una o più righe sono troppo lunghe
+3594
File diff soppresso perché troppo grande
Carica Diff
+5
File diff soppresso perché una o più righe sono troppo lunghe
+3594
File diff soppresso perché troppo grande
Carica Diff
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_INST_LEVEL_VMEM SQ_INST_LEVEL_VMEM_ACCUM SQ_LDS_UNALIGNED_STALL SQ_INSTS_VALU_MFMA_F32 SQ_INSTS SQ_WAIT_ANY SQC_ICACHE_MISSES SQ_INSTS_VALU_MFMA_BF16 TA_TOTAL_WAVEFRONTS_sum TA_BUFFER_READ_WAVEFRONTS_sum TCP_TCC_RW_READ_REQ_sum TCP_UTCL1_TRANSLATION_MISS_sum TCP_TCC_CC_READ_REQ_sum TCP_UTCL1_PERMISSION_MISS_sum TCC_EA0_RDREQ_LEVEL_sum TCC_ALL_TC_OP_INV_EVICT_sum TCC_EA0_WRREQ_64B_sum TCC_MISS_sum CPC_ME1_DC0_SPI_BUSY CPC_CPC_STAT_BUSY
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
rocprofiler-sdk:
|
||||
counters-schema-version: 1
|
||||
counters:
|
||||
- name: SQ_INST_LEVEL_VMEM_ACCUM
|
||||
description: SQ_INST_LEVEL_VMEM accumulation
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
expression: accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_LEVEL_WAVES SQ_LEVEL_WAVES_ACCUM SQ_INSTS_VMEM SQC_TC_DATA_READ_REQ SQ_INSTS_VALU_MFMA_F64 SQ_INSTS_VALU_MFMA_F16 SQ_ACTIVE_INST_FLAT SQC_DCACHE_HITS TA_BUFFER_ATOMIC_WAVEFRONTS_sum TA_ADDR_STALLED_BY_TC_CYCLES_sum TD_SPI_STALL_sum TCP_TCC_UC_ATOMIC_REQ_sum TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum TCP_TOTAL_READ_sum TCP_TOTAL_CACHE_ACCESSES_sum TCC_WRITE[99] TCC_REQ[28] TCC_REQ[31] TCC_WRITE[100] TCC_WRITE[112] TCC_WRITE[75] TCC_WRITE[114] TCC_EA0_WRREQ_DRAM_sum TCC_REQ[9] TCC_WRITE[39] TCC_REQ[122] TCC_WRITE[76] TCC_NORMAL_EVICT_sum TCC_WRITE[66] TCC_REQ[90] TCC_WRITE[61] TCC_REQ[45] TCC_WRITE[74] TCC_WRITE[108] TCC_REQ[76] TCC_WRITE[85] TCC_WRITE[111] TCC_REQ[46] TCC_WRITE[33] TCC_WRITE[0] TCC_WRITE[113] TCC_WRITE[86] TCC_REQ[37] TCC_REQ[77] TCC_REQ[13] TCC_WRITE[55] TCC_WRITE[101] TCC_REQ[27] TCC_REQ[89] TCC_WRITE[126] TCC_WRITE[34] TCC_REQ[112] TCC_WRITE[105] TCC_WRITE[2] TCC_REQ[39] TCC_WRITE[31] TCC_REQ[18] TCC_REQ[120] TCC_WRITE[15] TCC_WRITE[77] TCC_WRITE[68] TCC_REQ[51] TCC_WRITE[115] TCC_WRITE[63] TCC_REQ[23] TCC_REQ[32] TCC_REQ[85] TCC_REQ[95] TCC_WRITE[8] TCC_WRITE[98] TCC_WRITE[53] TCC_WRITE[51] TCC_WRITE[67] TCC_WRITE[1] TCC_REQ[110] TCC_REQ[65] TCC_WRITE[7] TCC_REQ[107] TCC_WRITE[89] TCC_WRITE[123] TCC_WRITE[23] TCC_REQ[116] TCC_WRITE[110] TCC_REQ[36] TCC_WRITE[107] TCC_REQ[74] TCC_REQ[12] TCC_WRITE[124] TCC_REQ[71] TCC_WRITE[4] TCC_WRITE[71] TCC_WRITE[103] TCC_WRITE[93] TCC_REQ[6] TCC_WRITE[6] TCC_REQ[96] TCC_REQ[99] TCC_WRITE[37] TCC_WRITE[22] TCC_WRITE[12] TCC_WRITE[122] TCC_REQ[127] TCC_REQ[68] TCC_WRITE[82] TCC_REQ[30] TCC_WRITE[45] TCC_REQ[38] TCC_WRITE[11] TCC_REQ[35] TCC_REQ[126] TCC_REQ[69] TCC_REQ[98] TCC_WRITE[3] TCC_WRITE[38] TCC_REQ[15] TCC_WRITE[49] TCC_WRITE[72] TCC_WRITE[116] TCC_REQ[59] TCC_WRITE[47] TCC_REQ[40] TCC_REQ[78] TCC_WRITE[70] TCC_WRITE[88] TCC_WRITE[17] TCC_WRITE[48] TCC_REQ[123] TCC_WRITE[102] TCC_REQ[53] TCC_REQ[19] TCC_REQ[108] TCC_REQ[11] TCC_REQ[64] TCC_WRITE[95] TCC_REQ[58] TCC_REQ[106] TCC_WRITE[81] TCC_WRITE[32] TCC_REQ[117] TCC_REQ[24] TCC_REQ[82] TCC_WRITE[65] TCC_WRITE[24] TCC_WRITE[19] TCC_WRITE[60] TCC_WRITE[58] TCC_REQ[66] TCC_WRITE[30] TCC_WRITE[118] TCC_REQ[80] TCC_REQ[94] TCC_REQ[20] TCC_REQ[102] TCC_WRITE[5] TCC_REQ[33] TCC_REQ[3] TCC_WRITE[25] TCC_REQ[118] TCC_REQ[52] TCC_REQ[17] TCC_REQ[8] TCC_WRITE[56] TCC_REQ[125] TCC_WRITE[97] TCC_WRITE[90] TCC_WRITE[20] TCC_WRITE[64] TCC_REQ[54] TCC_REQ[4] TCC_WRITE[84] TCC_REQ[29] TCC_REQ[92] TCC_REQ[101] TCC_WRITE[13] TCC_WRITE[96] TCC_REQ[7] TCC_WRITE[80] TCC_WRITE[46] TCC_REQ[62] TCC_WRITE[83] TCC_REQ[49] TCC_REQ[88] TCC_WRITE[104] TCC_REQ[84] TCC_REQ[41] TCC_REQ[44] TCC_REQ[60] TCC_REQ[119] TCC_REQ[0] TCC_REQ[55] TCC_REQ[26] TCC_REQ[81] TCC_WRITE[59] TCC_WRITE[42] TCC_WRITE[119] TCC_REQ[109] TCC_WRITE[50] TCC_WRITE[121] TCC_WRITE[18] TCC_REQ[70] TCC_WRITE[125] TCC_REQ[48] TCC_REQ[16] TCC_REQ[72] TCC_REQ[91] TCC_REQ[22] TCC_WRITE[40] TCC_REQ[75] TCC_WRITE[79] TCC_REQ[93] TCC_WRITE[78] TCC_WRITE[92] TCC_REQ[25] TCC_WRITE[9] TCC_REQ[43] TCC_WRITE[28] TCC_REQ[124] TCC_WRITE[52] TCC_WRITE[27] TCC_WRITE[87] TCC_WRITE[29] TCC_WRITE[73] TCC_WRITE[44] TCC_WRITE[41] TCC_REQ[73] TCC_REQ[111] TCC_REQ[87] TCC_REQ[86] TCC_REQ[5] TCC_WRITE[120] TCC_REQ[114] TCC_REQ[1] TCC_REQ[67] TCC_REQ[42] TCC_WRITE[10] TCC_REQ[2] TCC_WRITE[62] TCC_WRITE[106] TCC_REQ[97] TCC_REQ[57] TCC_WRITE[54] TCC_WRITE[117] TCC_WRITE[109] TCC_WRITE[43] TCC_REQ[115] TCC_REQ[100] TCC_REQ[121] TCC_WRITE[127] TCC_REQ[10] TCC_REQ[61] TCC_REQ[50] TCC_REQ[56] TCC_WRITE[94] TCC_REQ[103] TCC_REQ[21] TCC_WRITE[36] TCC_REQ[47] TCC_WRITE[26] TCC_REQ[79] TCC_WRITE[14] TCC_WRITE[91] TCC_WRITE[35] TCC_WRITE[21] TCC_WRITE[69] TCC_REQ[34] TCC_REQ[14] TCC_WRITE[57] TCC_REQ[105] TCC_REQ[83] TCC_REQ[113] TCC_REQ[63] TCC_REQ[104] TCC_WRITE[16] CPC_CPC_STAT_IDLE CPC_UTCL1_STALL_ON_TRANSLATION CPF_CPF_TCIU_IDLE
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+1802
File diff soppresso perché troppo grande
Carica Diff
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_ACTIVE_INST_VMEM SQC_DCACHE_REQ_READ_1 SQ_ACTIVE_INST_ANY SQC_TC_DATA_ATOMIC_REQ SQ_INSTS_LDS SQ_LDS_IDX_ACTIVE SQ_LDS_MEM_VIOLATIONS SQ_INSTS_SENDMSG TA_BUFFER_COALESCED_READ_CYCLES_sum TA_FLAT_WRITE_WAVEFRONTS_sum TCP_GATE_EN2_sum TCP_TCC_WRITE_REQ_sum TCP_UTCL1_REQUEST_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCC_TOO_MANY_EA_WRREQS_STALL_sum TCC_HIT_sum TCC_NORMAL_WRITEBACK_sum TCC_UC_REQ_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_INSTS_VALU_INT64 SQ_INSTS_SALU SQ_INSTS_VALU_MFMA_F8 SQ_INSTS_VALU_MUL_F64 SQ_WAVES SQ_WAVES_RESTORED SQC_DCACHE_MISSES_DUPLICATE SQ_INSTS_VSKIPPED TA_FLAT_READ_WAVEFRONTS_sum TA_BUFFER_WAVEFRONTS_sum TCP_TOTAL_WRITEBACK_INVALIDATES_sum TCP_TOTAL_ACCESSES_sum TCP_TCR_TCP_STALL_CYCLES_sum TCP_TCC_NC_WRITE_REQ_sum TCC_EA0_RDREQ_sum TCC_WRITEBACK_sum TCC_READ_sum TCC_EA0_RD_UNCACHED_32B_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQC_TC_INST_REQ SQ_ACTIVE_INST_LDS SQC_DCACHE_MISSES SQ_BUSY_CU_CYCLES SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MFMA_I8 SQ_LDS_ADDR_CONFLICT SQ_IFETCH TA_DATA_STALLED_BY_TC_CYCLES_sum TA_TA_BUSY_sum TCP_TCC_RW_ATOMIC_REQ_sum TCP_GATE_EN1_sum TCP_TA_TCP_STATE_READ_sum TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum TCC_BUSY_sum TCC_BUBBLE_sum TCC_WRITE_sum TCC_STREAMING_REQ_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQC_DCACHE_REQ_READ_8 SQC_ICACHE_HITS SQ_ACTIVE_INST_VALU SQ_INSTS_VALU_FMA_F16 TCP_TCC_CC_ATOMIC_REQ_sum TCP_TCC_CC_WRITE_REQ_sum TCP_TOTAL_WRITE_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_WR_UNCACHED_32B_sum TCC_NC_REQ_sum TCC_EA0_WRREQ_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_WAIT_INST_ANY SQ_INSTS_GDS SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MFMA_MOPS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_INT32 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ TCC_EA0_ATOMIC_sum TCC_CC_REQ_sum TCC_ATOMIC_sum TCC_PROBE_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_INSTS_MFMA SQ_INSTS_VALU_TRANS_F16 SQ_BUSY_CYCLES SQC_DCACHE_ATOMIC SQ_INSTS_VALU_CVT SQC_TC_DATA_WRITE_REQ SQ_INSTS_VALU_FMA_F32 SQ_INSTS_BRANCH TCC_EA0_ATOMIC_LEVEL_sum TCC_EA0_RDREQ_DRAM_sum TCC_REQ_sum TCC_ALL_TC_OP_WB_WRITEBACK_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8 SQ_LDS_ATOMIC_RETURN SQ_INSTS_VALU_MFMA_MOPS_I8 TCC_RW_REQ_sum TCC_EA0_WRREQ_LEVEL_sum
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
File diff soppresso perché troppo grande
Carica Diff
+40
@@ -0,0 +1,40 @@
|
||||
config_dir: /app/src/rocprof_compute_soc/analysis_configs
|
||||
device: -1
|
||||
dispatch: null
|
||||
filter_blocks: {}
|
||||
format_rocprof_output: rocpd
|
||||
hip_trace: false
|
||||
join_type: grid
|
||||
kernel: null
|
||||
kernel_names: false
|
||||
kokkos_trace: false
|
||||
list_metrics: null
|
||||
loglevel: 10
|
||||
mem_level: ALL
|
||||
mode: profile
|
||||
name: vcopy
|
||||
no_roof: true
|
||||
path: /app/workloads/vcopy/MI300X_A1
|
||||
pc_sampling_interval: 1048576
|
||||
pc_sampling_method: stochastic
|
||||
quiet: false
|
||||
remaining:
|
||||
- --
|
||||
- sample/vcopy
|
||||
- -n
|
||||
- '1048576'
|
||||
- -b
|
||||
- '256'
|
||||
- -i
|
||||
- '3'
|
||||
retain_rocpd_output: false
|
||||
rocprofiler_sdk_library_path: /opt/rocm/lib/librocprofiler-sdk.so
|
||||
roof_only: false
|
||||
roofline_data_type:
|
||||
- FP32
|
||||
sort: kernels
|
||||
spatial_multiplexing: null
|
||||
specs: false
|
||||
subpath: gpu
|
||||
target: null
|
||||
verbose: 1
|
||||
@@ -0,0 +1,2 @@
|
||||
workload_name,command,ip_blocks,timestamp,version,hostname,cpu_model,sbios,linux_distro,linux_kernel_version,amd_gpu_kernel_version,cpu_memory,gpu_memory,rocm_version,vbios,compute_partition,memory_partition,gpu_series,gpu_model,gpu_arch,gpu_chip_id,gpu_l1,gpu_l2,cu_per_gpu,simd_per_cu,se_per_gpu,wave_size,workgroup_max_size,max_waves_per_cu,max_sclk,max_mclk,cur_sclk,cur_mclk,total_l2_chan,lds_banks_per_cu,sqc_per_gpu,pipes_per_gpu,num_xcd,num_hbm_channels
|
||||
vcopy,sample/vcopy -n 1048576 -b 256 -i 3,SQ|LDS|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF,Thu Jul 10 21:33:20 2025 (UTC),3,7f6eaef84eaf,AMD EPYC 9354 32-Core Processor,"American Megatrends International, LLC.1.8",Ubuntu 22.04.5 LTS,5.15.0-70-generic,,1584988420,,7.0.0,113-M3000100-103,SPX,NPS1,MI300,MI300X_A1,gfx942,29857,32,4096,304,4,32,64,1024,32,2100,,2100,,128,32,160,4,8,128
|
||||
|
Fai riferimento in un nuovo problema
Block a user