From 7522233da84002041ac8eb5ca63e12aca65a7457 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:00:56 -0500 Subject: [PATCH 01/28] Fix VGPR issue (#139) Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: acb972954091d14bf91797383ebf0017810a4168] --- projects/rocprofiler-compute/src/utils/perfagg.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index e965b784c6..651bcb86d8 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "wgr": [col for col in df.columns if "wgr" in col], "lds": [col for col in df.columns if "lds" in col], "scr": [col for col in df.columns if "scr" in col], - "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], - "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], "spgr": [col for col in df.columns if "sgpr" in col], } + # Check for vgpr counter in ROCm < 5.3 + if "vgpr" in df.columns: + duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col] + # Check for vgpr counter in ROCm >= 5.3 + else: + duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): + print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( From 2b360f4257e8f0701990104a3c9c67dde5cb9e57 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:01:37 -0500 Subject: [PATCH 02/28] Omniperf rocomni changes Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 5f6c776170f01bd62c4eac16a0ec4257583c32c3] --- .../src/omniperf_analyze/omniperf_analyze.py | 38 +++++++++++------ .../src/omniperf_analyze/utils/parser.py | 41 +++++++++++++++++-- .../src/omniperf_analyze/utils/schema.py | 2 + 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py b/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py index 58991e8b37..c15181c6f6 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py @@ -47,36 +47,50 @@ from omniperf_analyze.utils import parser, file_io from omniperf_analyze.utils.gui_components.roofline import get_roofline -def initialize_run(args, normalization_filter=None): - import pandas as pd - from collections import OrderedDict +################################################ +# Helper Functions +################################################ +def generate_configs(config_dir, list_kernels, filter_metrics): from omniperf_analyze.utils import schema - from tabulate import tabulate - # Fixme: cur_root.parent.joinpath('soc_params') - soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") - soc_spec_df = file_io.load_soc_params(soc_params_dir) - - single_panel_config = file_io.is_single_panel_config(Path(args.config_dir)) + single_panel_config = file_io.is_single_panel_config(Path(config_dir)) global archConfigs archConfigs = {} for arch in file_io.supported_arch.keys(): ac = schema.ArchConfig() - if args.list_kernels: + if list_kernels: ac.panel_configs = file_io.top_stats_build_in_config else: arch_panel_config = ( - args.config_dir if single_panel_config else args.config_dir.joinpath(arch) + config_dir if single_panel_config else config_dir.joinpath(arch) ) ac.panel_configs = file_io.load_panel_configs(arch_panel_config) # TODO: filter_metrics should/might be one per arch # print(ac) - parser.build_dfs(ac, args.filter_metrics) + parser.build_dfs(ac, filter_metrics) archConfigs[arch] = ac + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + + +################################################ +# Core Functions +################################################ +def initialize_run(args, normalization_filter=None): + import pandas as pd + from collections import OrderedDict + from tabulate import tabulate + from omniperf_analyze.utils import schema + + # Fixme: cur_root.parent.joinpath('soc_params') + soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") + soc_spec_df = file_io.load_soc_params(soc_params_dir) + + generate_configs(args.config_dir, args.list_kernels, args.filter_metrics) + if args.list_metrics in file_io.supported_arch.keys(): print( tabulate( diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py index d11cbbbfcf..5fb03c39a1 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py @@ -320,6 +320,26 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() +def gen_counter_list(formula): + function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + + counters = [] + if not isinstance(formula,str): + return counters + try: + tree = ast.parse( + formula + .replace("$normUnit", "SQ_WAVES") + .replace("$denom", "SQ_WAVES") + .replace("$","") + ) + for node in ast.walk(tree): + if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + counters.append(node.id.rstrip("_sum")) + except: + pass + return counters + def build_dfs(archConfigs, filter_metrics): """ @@ -338,6 +358,7 @@ def build_dfs(archConfigs, filter_metrics): d = {} metric_list = {} dfs_type = {} + metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): for data_source in panel["data source"]: for type, data_cofig in data_source.items(): @@ -362,6 +383,7 @@ def build_dfs(archConfigs, filter_metrics): ) metric_idx = data_source_idx + "." + str(i) values = [] + eqn_content = [] if ( (not filter_metrics) @@ -378,6 +400,7 @@ def build_dfs(archConfigs, filter_metrics): for k, v in entries.items(): if k != "tips" and k != "coll_level" and k != "alias": values.append(v) + eqn_content.append(v) if "alias" in entries.keys(): values.append(entries["alias"]) @@ -396,6 +419,15 @@ def build_dfs(archConfigs, filter_metrics): # collect metric_list metric_list[metric_idx] = key.replace(" ", "_") + # generate mapping of counters and metrics + filter = {} + for formula in eqn_content: + if formula is not None and formula != "None": + for k in gen_counter_list(formula): + filter[k] = None + if len(filter) > 0: + metric_counters[key] = list(filter) + i += 1 df.set_index("Index", inplace=True) @@ -431,6 +463,7 @@ def build_dfs(archConfigs, filter_metrics): setattr(archConfigs, "dfs", d) setattr(archConfigs, "metric_list", metric_list) setattr(archConfigs, "dfs_type", dfs_type) + setattr(archConfigs, "metric_counters", metric_counters) def build_metric_value_string(dfs, dfs_type, normal_unit): @@ -469,7 +502,8 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" - if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + rocscope_run = sys_info.ip_blocks == "rocscope" + if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) @@ -711,12 +745,13 @@ def load_kernel_top(workload, dir): workload.dfs.update(tmp) -def load_table_data(workload, dir, is_gui, debug, verbose): +def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False): """ Load data for all "raw_csv_table". Calculate mertric value for all "metric_table". """ - load_kernel_top(workload, dir) + if not skipKernelTop: + load_kernel_top(workload, dir) eval_metric( workload.dfs, diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py index bcfc0bff5d..6e147fcae7 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py @@ -52,6 +52,8 @@ class ArchConfig: # [Index: Metric name] pairs metric_list: Dict[str, str] = field(default_factory=dict) + # [Metric name: Counters] pairs + metric_counters: Dict[str, list] = field(default_factory=dict) @dataclass class Workload: From 7e1a29299d6faf2d34a2f017405302f09f071cfa Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:04:32 -0500 Subject: [PATCH 03/28] Comply to Python formatting Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 79eecb445e4cc4fdc02bdf20fe638bb9c10f755d] --- .../src/omniperf_analyze/omniperf_analyze.py | 2 +- .../src/omniperf_analyze/utils/parser.py | 41 +++++++++++++++---- .../src/omniperf_analyze/utils/schema.py | 1 + .../rocprofiler-compute/src/utils/perfagg.py | 2 +- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py b/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py index c15181c6f6..6415ed285c 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/omniperf_analyze.py @@ -73,7 +73,7 @@ def generate_configs(config_dir, list_kernels, filter_metrics): archConfigs[arch] = ac - return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin ################################################ diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py index 5fb03c39a1..b6573566bf 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py @@ -320,26 +320,47 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() + def gen_counter_list(formula): - function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + function_filter = { + "MIN": None, + "MAX": None, + "AVG": None, + "ROUND": None, + "TO_INT": None, + "GB": None, + "STD": None, + "GFLOP": None, + "GOP": None, + "OP": None, + "CU": None, + "NC": None, + "UC": None, + "CC": None, + "RW": None, + "GIOP": None, + } counters = [] - if not isinstance(formula,str): + if not isinstance(formula, str): return counters try: tree = ast.parse( - formula - .replace("$normUnit", "SQ_WAVES") + formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") - .replace("$","") + .replace("$", "") ) for node in ast.walk(tree): - if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + if ( + isinstance(node, ast.Name) + and node.id.rstrip("_sum").isupper() + and node.id not in function_filter + ): counters.append(node.id.rstrip("_sum")) except: pass return counters - + def build_dfs(archConfigs, filter_metrics): """ @@ -503,7 +524,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" rocscope_run = sys_info.ip_blocks == "rocscope" - if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + if ( + not rocscope_run + and not roof_only_run + and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any() + ): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py index 6e147fcae7..f9b59868f5 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/schema.py @@ -55,6 +55,7 @@ class ArchConfig: # [Metric name: Counters] pairs metric_counters: Dict[str, list] = field(default_factory=dict) + @dataclass class Workload: sys_info: pd.DataFrame = None diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 651bcb86d8..109fdecda1 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -135,7 +135,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Check for vgpr counter in ROCm >= 5.3 else: duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] - duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): print("Key is ", key) _df = df[cols] From 493d1d2628c983866e887bd2670482b8fc012c22 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Wed, 21 Jun 2023 11:06:03 -0500 Subject: [PATCH 04/28] Add subsection title to System Speed-of-Light Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 049ba12f6994cbf617e69980d1a2f5b897e306a7] --- .../configs/gfx906/0200_system-speed-of-light.yaml | 1 + .../configs/gfx908/0200_system-speed-of-light.yaml | 1 + .../configs/gfx90a/0200_system-speed-of-light.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml index 74de040b27..986b2f0aec 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml index 74de040b27..986b2f0aec 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml index f10d7630f0..20721ee1f6 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value From 8f287bb7f90d0b759a58bab04fa0c73dccfa7954 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 26 Jun 2023 15:30:38 -0500 Subject: [PATCH 05/28] Extend filtering into timestamps.csv (#80) Signed-off-by: coleramos425 [ROCm/rocprofiler-compute commit: a89cb96b69ca9969bdb182d6d21f214494ee1a98] --- projects/rocprofiler-compute/src/omniperf | 34 ++----------------- .../rocprofiler-compute/src/utils/perfagg.py | 9 +++++ 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf b/projects/rocprofiler-compute/src/omniperf index 3b38e419b1..e611547d4d 100755 --- a/projects/rocprofiler-compute/src/omniperf +++ b/projects/rocprofiler-compute/src/omniperf @@ -439,23 +439,7 @@ def characterize_app(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - - - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + app_cmd + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output @@ -676,21 +660,7 @@ def omniperf_profile(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + args.remaining + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 109fdecda1..1c80a22a96 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -345,6 +345,15 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] + + # add a timestamp file + fd = open(workload_perfmon_dir + "/timestamps.txt", "w") + fd.write("pmc:\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From 4c607e2741b549097c2c22d8a7934a6cd1f43658 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 26 Jun 2023 15:38:51 -0500 Subject: [PATCH 06/28] Comply to Python formatting Signed-off-by: coleramos425 [ROCm/rocprofiler-compute commit: f91de7d2f7478ac143b77914ad6560c5a5816f23] --- projects/rocprofiler-compute/src/utils/perfagg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 1c80a22a96..59460bc80a 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -345,7 +345,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] - # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -353,7 +352,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): fd.write("range:\n") fd.write("kernel:\n") fd.close() - + # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From 9edbf34b0f1a16d851d714e39231881cb5cfb5f5 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 11:01:37 -0400 Subject: [PATCH 07/28] Incorporate review comments Signed-off-by: Nicholas Curtis [ROCm/rocprofiler-compute commit: 54bc0580850095a91a60c9115934f5e747774426] --- .../rocprofiler-compute/src/docs/analysis.md | 16 +++++++-------- projects/rocprofiler-compute/src/docs/conf.py | 4 ++++ .../src/docs/getting_started.md | 20 +++++++++++-------- .../src/docs/high_level_design.md | 4 ++-- .../src/docs/installation.md | 14 ++++++++----- .../src/docs/introduction.md | 9 ++++----- .../rocprofiler-compute/src/docs/profiling.md | 15 ++++++++------ projects/rocprofiler-compute/src/parser.py | 2 +- 8 files changed, 49 insertions(+), 35 deletions(-) diff --git a/projects/rocprofiler-compute/src/docs/analysis.md b/projects/rocprofiler-compute/src/docs/analysis.md index 2321ddabd3..9feff1f64b 100644 --- a/projects/rocprofiler-compute/src/docs/analysis.md +++ b/projects/rocprofiler-compute/src/docs/analysis.md @@ -26,7 +26,7 @@ Run `omniperf analyze -h` for more details. ### Recommended workflow 1) Do a comprehensive analysis with Omniperf CLI at the beginning. -```shell +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ -------- @@ -108,7 +108,7 @@ Analyze .... ``` 2. Use `--list-metrics` to generate a list of availible metrics for inspection - ```shell + ```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a ╒═════════╤═════════════════════════════╕ │ │ Metric │ @@ -172,7 +172,7 @@ $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a ... ``` 2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light). -```shell +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ -b 2 -------- Analyze @@ -286,7 +286,7 @@ Analyze - Filter kernels First, list the top kernels in your application using `--list-kernels`. - ```shell + ```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels -------- @@ -306,7 +306,7 @@ Analyze Second, select the index of the kernel you'd like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`. - ```shell + ```shell-session $ omniperf -p workloads/vcopy/mi200/ -k 0 -------- @@ -372,7 +372,7 @@ See [FAQ](https://amdresearch.github.io/omniperf/faq.html) for more details on S To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example: -```bash +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --gui -------- @@ -499,7 +499,7 @@ e.g., omniperf_asw_vcopy_mi200. Below is the sample command to import the *vcopy* profiling data. -```shell +```shell-session $ omniperf database --help ROC Profiler: /usr/bin/rocprof @@ -544,7 +544,7 @@ Connection Options: ``` **omniperf import for vcopy:** -```shell +```shell-session $ omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/ ROC Profiler: /usr/bin/rocprof diff --git a/projects/rocprofiler-compute/src/docs/conf.py b/projects/rocprofiler-compute/src/docs/conf.py index 48d4c5596f..b659553f9a 100644 --- a/projects/rocprofiler-compute/src/docs/conf.py +++ b/projects/rocprofiler-compute/src/docs/conf.py @@ -53,6 +53,10 @@ extensions = [ ] myst_heading_anchors = 2 +# enable replacement of (tm) & friends +myst_enable_extensions = [ + "replacements" +] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/projects/rocprofiler-compute/src/docs/getting_started.md b/projects/rocprofiler-compute/src/docs/getting_started.md index c75bda9abe..80ae888f06 100644 --- a/projects/rocprofiler-compute/src/docs/getting_started.md +++ b/projects/rocprofiler-compute/src/docs/getting_started.md @@ -10,24 +10,28 @@ 1. **Launch & Profile the target application with the command line profiler** - The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or ipblock’s. + The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or IP blocks. If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable. - To collect the default set of data for all kernels in the target application, launch: + To collect the default set of data for all kernels in the target application, launch, e.g.: ```shell - $ omniperf profile -n vcopy -- ./vcopy 1048576 256 + $ omniperf profile -n vcopy_data -- ./vcopy 1048576 256 ``` - The app runs, each kernel is launched, and profiling results are generated. By default, results are written to ./workloads/\. To collect all requested profile information, it may be required to replay kernels multiple times. + The app runs, each kernel is launched, and profiling results are generated. By default, results are written to (e.g.,) ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times. 2. **Customize data collection** - Options are available to specify for which kernels data should be collected. - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID. `-b`/`--ipblocks` enables profiling on one or more IP Block(s). + Options are available to specify for which kernels/metrics data should be collected. + Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time. - To view available metrics by IP Block you can always use `--list-metrics` to view a list of all available metrics organized by IP Block. + Some common filters include: + + - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID + - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks. + + To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block. ```shell $ omniperf analyze --list-metrics ``` - Note that filtering can also be applied after the fact, at the analysis stage, however filtering at the profiling level will often speed up your overall profiling run time. 3. **Analyze at the command line** diff --git a/projects/rocprofiler-compute/src/docs/high_level_design.md b/projects/rocprofiler-compute/src/docs/high_level_design.md index 28c09ff129..6168b7ace9 100644 --- a/projects/rocprofiler-compute/src/docs/high_level_design.md +++ b/projects/rocprofiler-compute/src/docs/high_level_design.md @@ -8,10 +8,10 @@ The [Omniperf](https://github.com/AMDResearch/omniperf) Tool is architecturally composed of three major components, as shown in the following figure. -- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. +- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). The counters are stored in a comma-seperated value, for further analyis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators. - **Omniperf Grafana Analyzer**: - - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. + - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guarenteed. - *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization. - **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database. diff --git a/projects/rocprofiler-compute/src/docs/installation.md b/projects/rocprofiler-compute/src/docs/installation.md index ee2489710c..e550669b7e 100644 --- a/projects/rocprofiler-compute/src/docs/installation.md +++ b/projects/rocprofiler-compute/src/docs/installation.md @@ -109,7 +109,7 @@ ROC Profiler: /opt/rocm-5.1.0/bin/rocprof omniperf (v{__VERSION__}) ``` -```{tip} Sites relying on an Lmod Python module locally may wish to +```{tip} Users relying on an Lmod Python module locally may wish to customize the resulting Omniperf modulefile post-installation to include additional module dependencies. ``` @@ -129,8 +129,9 @@ export PYTHONPATH=$INSTALL_DIR/python-libs Omniperf relies on a rocprof binary during the profiling process. Normally the path to this binary will be detected -automatically, but it can also be overridden via the use of an -optional `ROCPROF` environment variable. +automatically, but it can also be overridden via the setting the +optional `ROCPROF` environment variable to the path of the binary the user +wishes to use instead. @@ -162,9 +163,12 @@ Omniperf uses [mongoimport](https://www.mongodb.com/docs/database-tools/mongoimp $ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb $ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb ``` -> Find install for alternative distros [here](https://www.mongodb.com/download-center/database-tools/releases/archive) +> Installation instructions for alternative distributions can be found [here](https://www.mongodb.com/download-center/database-tools/releases/archive) + +### Persistent Storage + +The user may also choose to bind MongoDB to a directory on the host OS to create a local backup in case of a crash or reset: -### Persist Storage ```bash $ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/ $ sudo mkdir -p grafana-storage mongodb diff --git a/projects/rocprofiler-compute/src/docs/introduction.md b/projects/rocprofiler-compute/src/docs/introduction.md index 436146db8b..6b39d4088a 100644 --- a/projects/rocprofiler-compute/src/docs/introduction.md +++ b/projects/rocprofiler-compute/src/docs/introduction.md @@ -10,17 +10,17 @@ ## Scope -MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD MI GPUs. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets MI100 and MI200 silicon. Development is in progress to support MI300 and NAVI GPUs. +MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD Instinct (tm) Accelerators. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets accelerators in the MI100 and MI200 families. Development is in progress to support MI300 and Radeon (tm) RDNA (tm) GPUs. ## Features -The Omniperf tool performs system profiling based on all approved hardware counters for MI200. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... +The Omniperf tool performs system profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... Both command line analysis and GUI analysis are supported. Detailed Feature List: -- MI200 support - MI100 support +- MI200 support - Standalone GUI Analyzer - Grafana/MongoDB GUI Analyzer - Dispatch Filtering @@ -50,8 +50,7 @@ Detailed Feature List: | Platform | Status | | :------- | :------------- | -| Vega 20 | No | -| MI50 | No | +| Vega 20 (MI-50/60) | No | | MI100 | Supported | | MI200 | Supported | | MI300 | In development | diff --git a/projects/rocprofiler-compute/src/docs/profiling.md b/projects/rocprofiler-compute/src/docs/profiling.md index 6776097c92..1a95477758 100644 --- a/projects/rocprofiler-compute/src/docs/profiling.md +++ b/projects/rocprofiler-compute/src/docs/profiling.md @@ -19,7 +19,7 @@ the MI200 platform. ## Workload Compilation **vcopy compilation:** -```shell +```shell-session $ hipcc vcopy.cpp -o vcopy $ ls vcopy vcopy.cpp @@ -40,7 +40,7 @@ Releasing CPU memory The *omniperf* script, availible through the [Omniperf](https://github.com/AMDResearch/omniperf) repository, is used to aquire all necessary perfmon data through analysis of compute workloads. **omniperf help:** -```shell +```shell-session $ omniperf profile --help ROC Profiler: /usr/bin/rocprof @@ -56,7 +56,7 @@ Examples: omniperf profile -n vcopy_all -- ./vcopy 1048576 256 - omniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256 + omniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256 omniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256 @@ -111,7 +111,7 @@ Standalone Roofline Options: The following sample command profiles the *vcopy* workload. **vcopy profiling:** -```shell +```shell-session $ omniperf profile --name vcopy -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof @@ -206,7 +206,10 @@ Peak MFMA IOPs (I8), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments ``` You'll notice two stages in *default* Omniperf profiling. The first stage collects all the counters needed for Omniperf analysis (omitting any filters you've provided). The second stage collects data for the roofline analysis (this stage can be disabled using `--no-roof`) -At the end of the profiling, all resulting csv files should be located in the SOC specific target directory, e.g., mi200. +At the end of the profiling, all resulting csv files should be located in a SOC specific target directory, e.g.: + - "mi200" for the AMD Instinct (tm) MI-200 family of accelerators + - "mi100" for the AMD Instinct (tm) MI-100 family of accelerators +etc. The SOC names are generated as a part of Omniperf, and do not necessarily distinguish between different accelerators in the same family (e.g., an AMD Instinct (tm) MI-210 vs an MI-250) > Note: Additionally, you'll notice a few extra files. An SoC parameters file, *sysinfo.csv*, is created to reflect the target device settings. All profiling output is stored in *log.txt*. Roofline specific benchmark results are stored in *roofline.csv*. @@ -316,7 +319,7 @@ ROCProfiler: input from "/tmp/rpl_data_230411_170300_29696/input0.xml" #### Dispatch Filtering The following example demonstrates profiling on selected dispatches: -```shell +```shell-session $ omniperf profile --name vcopy -d 0 -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof diff --git a/projects/rocprofiler-compute/src/parser.py b/projects/rocprofiler-compute/src/parser.py index da018ba944..9d6dd8f6f2 100644 --- a/projects/rocprofiler-compute/src/parser.py +++ b/projects/rocprofiler-compute/src/parser.py @@ -66,7 +66,7 @@ def parse(my_parser): \n\n------------------------------------------------------------------------------- \nExamples: \n\tomniperf profile -n vcopy_all -- ./vcopy 1048576 256 - \n\tomniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256 + \n\tomniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_disp -d 0 -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_roof --roof-only -- ./vcopy 1048576 256 From c051beba86eed3b0adbd1283f3fb8f9c3dd5d1aa Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 12:03:55 -0400 Subject: [PATCH 08/28] fix formatting Signed-off-by: Nicholas Curtis [ROCm/rocprofiler-compute commit: aaed37d00417014e35bb2dc97cc95a2aa89b6bf9] --- projects/rocprofiler-compute/src/docs/conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/projects/rocprofiler-compute/src/docs/conf.py b/projects/rocprofiler-compute/src/docs/conf.py index b659553f9a..014ae77527 100644 --- a/projects/rocprofiler-compute/src/docs/conf.py +++ b/projects/rocprofiler-compute/src/docs/conf.py @@ -54,9 +54,7 @@ extensions = [ myst_heading_anchors = 2 # enable replacement of (tm) & friends -myst_enable_extensions = [ - "replacements" -] +myst_enable_extensions = ["replacements"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From f74f9269513b1b429abb0306676fcb2dbf311717 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 12:25:40 -0400 Subject: [PATCH 09/28] fix missing Signed-off-by: Nicholas Curtis [ROCm/rocprofiler-compute commit: 8857393571a97f2550ce012098893c3be4751de5] --- projects/rocprofiler-compute/src/docs/profiling.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/projects/rocprofiler-compute/src/docs/profiling.md b/projects/rocprofiler-compute/src/docs/profiling.md index 1a95477758..b0b56dc91a 100644 --- a/projects/rocprofiler-compute/src/docs/profiling.md +++ b/projects/rocprofiler-compute/src/docs/profiling.md @@ -370,7 +370,7 @@ Standalone Roofline Options: #### Roofline Only The following example demonstrates profiling roofline data only: -```shell +```shell-session $ omniperf profile --name vcopy --roof-only -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof @@ -394,7 +394,8 @@ Checking for pmc_perf.csv in /home/colramos/GitHub/omniperf-pub/workloads/mix/m Empirical Roofline PDFs saved! ``` An inspection of our workload output folder shows .pdf plots were generated successfully -```shell +```shell-session +$ ls workloads/vcopy/mi200/ total 176 drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:18 . drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:15 .. @@ -409,4 +410,4 @@ drwxrwxr-x 2 colramos colramos 4096 Apr 11 17:16 perfmon ``` A sample *empirRoof_gpu-ALL_fp32.pdf* looks something like this: -![Sample Standalone Roof Plot](images/sample-roof-plot.png) +![Sample Standalone Roof Plot](images/sample-roof-plot.png) \ No newline at end of file From 8e71e8a44e6a1952ac42f4d8197c4130700c02e5 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 7 Jun 2023 10:23:49 -0400 Subject: [PATCH 10/28] Add options to enable latexpdf builds Signed-off-by: Nicholas Curtis [ROCm/rocprofiler-compute commit: 60d4a425366ddf96fb14554564e6d8412d5e8e3c] --- projects/rocprofiler-compute/src/docs/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/rocprofiler-compute/src/docs/conf.py b/projects/rocprofiler-compute/src/docs/conf.py index 014ae77527..d97f79bb7b 100644 --- a/projects/rocprofiler-compute/src/docs/conf.py +++ b/projects/rocprofiler-compute/src/docs/conf.py @@ -89,6 +89,10 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None +# options for latex output +latex_engine = 'lualatex' +latex_show_urls = 'footnote' + # -- Options for HTML output ------------------------------------------------- From 1c6b676ea139328aa8e9dc89a630c656efc21b29 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 7 Jun 2023 15:19:56 -0400 Subject: [PATCH 11/28] apply formatting Signed-off-by: Nicholas Curtis [ROCm/rocprofiler-compute commit: be1eeee370cbbccb4c5667d2f359f9fd125431b6] --- projects/rocprofiler-compute/src/docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/src/docs/conf.py b/projects/rocprofiler-compute/src/docs/conf.py index d97f79bb7b..af0003fb73 100644 --- a/projects/rocprofiler-compute/src/docs/conf.py +++ b/projects/rocprofiler-compute/src/docs/conf.py @@ -90,8 +90,8 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] pygments_style = None # options for latex output -latex_engine = 'lualatex' -latex_show_urls = 'footnote' +latex_engine = "lualatex" +latex_show_urls = "footnote" # -- Options for HTML output ------------------------------------------------- From e7dbe462097135c50cc10e04cabe3b290a50e7dd Mon Sep 17 00:00:00 2001 From: "Karl W. Schulz" Date: Fri, 30 Jun 2023 15:01:57 -0500 Subject: [PATCH 12/28] updating path for rocm repo to supported rhel8 release (8.8) Signed-off-by: Karl W. Schulz [ROCm/rocprofiler-compute commit: 8edba713fbbf1294b412d2eb603f1af082839ba6] --- projects/rocprofiler-compute/docker/rhel8/rocm.repo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprofiler-compute/docker/rhel8/rocm.repo b/projects/rocprofiler-compute/docker/rhel8/rocm.repo index 17171d755d..8b20489780 100644 --- a/projects/rocprofiler-compute/docker/rhel8/rocm.repo +++ b/projects/rocprofiler-compute/docker/rhel8/rocm.repo @@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key [amdgpu] name=amdgpu -baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64 +baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64 enabled=1 gpgcheck=1 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key \ No newline at end of file From 2d3bc1fdb54486b4d98b7e704720032dc52df227 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 10 Jul 2023 16:26:56 -0500 Subject: [PATCH 13/28] Fixed Units inconsistencies - Table 10: Units were output as "$normUnit" now they are instr + normUnit - Table 16: Changed to Req per $normUnit Signed-off-by: JoseSantosAMD [ROCm/rocprofiler-compute commit: 5d84d0bb63c78c386761a91547611281f0e29138] --- .../gfx906/1000_compute-unit-instruction-mix.yaml | 4 ++-- .../configs/gfx906/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx906/1800_L2_cache_per_channel.yaml | 14 +++++++------- .../configs/gfx908/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx908/1800_L2_cache_per_channel.yaml | 14 +++++++------- .../configs/gfx90a/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 14 +++++++------- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml index b72344f3b8..fd4653c23e 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml @@ -27,7 +27,7 @@ Panel Config: tips: LDS: count: AVG((SQ_INSTS_LDS / $denom)) - unit: $normUnit + unit: (instr + $normUnit) tips: VALU - MFMA: count: None # No HW module @@ -61,7 +61,7 @@ Panel Config: metric: INT-32: count: None # No perf counter - unit: $normUnit + unit: (instr + $normUnit) tips: INT-64: count: None # No perf counter diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml index 1713068d2d..1e05b3e4c0 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml index 95bba22e89..08a9a9f76d 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml index 4ff3fd4d49..f65309a31d 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml index e68511e9eb..3acee57404 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml index 985be38030..917cb3aa0e 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index a5bf6fa259..094df5b198 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) From 8e8571e1bff644f7677afe07a10224da2f8ff202 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 11 Jul 2023 13:27:46 -0500 Subject: [PATCH 14/28] Update 1800_L2_cache_per_channel.yaml Capitalizing for consistency Signed-off-by: Cole Ramos [ROCm/rocprofiler-compute commit: 2469716d13b227b2f9435f2e86160a2b8851c9c2] --- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index 094df5b198..93fc2b4121 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) From 1f86cba6ee379d22059c54cd10363318e0ecbf54 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 11 Jul 2023 13:29:07 -0500 Subject: [PATCH 15/28] Update 1800_L2_cache_per_channel.yaml Capitalizing for consistency Signed-off-by: Cole Ramos [ROCm/rocprofiler-compute commit: 6042cfb16a908ae89a7091a927cd124126b04643] --- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index 93fc2b4121..f136478472 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/projects/rocprofiler-compute/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) From ca993d11848c7f25ac1d470f14b355b86bc7ea94 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:11:38 -0500 Subject: [PATCH 16/28] Rearranging build_df func to optimize ArchConfig for rocomni plugin Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 267750c085b222748971ea96f369f1a886aa4fef] --- .../src/omniperf_analyze/utils/parser.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py index b6573566bf..025745b174 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py @@ -339,11 +339,25 @@ def gen_counter_list(formula): "CC": None, "RW": None, "GIOP": None, + "GFLOPs": None, } + built_in_counter=[ + "lds", + "grd", + "wgr", + "arch_vgpr", + "accum_vgpr", + "sgpr", + "scr", + "BeginNs", + "EndNs" + ] + + visited = False counters = [] if not isinstance(formula, str): - return counters + return visited, counters try: tree = ast.parse( formula.replace("$normUnit", "SQ_WAVES") @@ -351,15 +365,17 @@ def gen_counter_list(formula): .replace("$", "") ) for node in ast.walk(tree): - if ( - isinstance(node, ast.Name) - and node.id.rstrip("_sum").isupper() - and node.id not in function_filter - ): - counters.append(node.id.rstrip("_sum")) + if isinstance(node, ast.Name): + val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id) + if (val.isupper() and val not in function_filter): + counters.append(val) + visited = True + if val in built_in_counter: + visited = True except: pass - return counters + + return visited, counters def build_dfs(archConfigs, filter_metrics): @@ -381,9 +397,14 @@ def build_dfs(archConfigs, filter_metrics): dfs_type = {} metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): + panel_idx = str(panel_id // 100) for data_source in panel["data source"]: for type, data_cofig in data_source.items(): if type == "metric_table": + metric_list[panel_idx] = panel["title"] + table_idx = panel_idx + "." + str(data_cofig["id"] % 100) + metric_list[table_idx] = data_cofig["title"] + headers = ["Index"] for key, tile in data_cofig["header"].items(): if key != "tips": @@ -397,12 +418,7 @@ def build_dfs(archConfigs, filter_metrics): i = 0 for key, entries in data_cofig["metric"].items(): - data_source_idx = ( - str(data_cofig["id"] // 100) - + "." - + str(data_cofig["id"] % 100) - ) - metric_idx = data_source_idx + "." + str(i) + metric_idx = table_idx + "." + str(i) values = [] eqn_content = [] @@ -411,7 +427,7 @@ def build_dfs(archConfigs, filter_metrics): or (metric_idx in filter_metrics) # no filter or # metric in filter # the whole table in filter - (data_source_idx in filter_metrics) + (table_idx in filter_metrics) or # the whole IP block in filter (str(panel_id // 100) in filter_metrics) @@ -439,14 +455,19 @@ def build_dfs(archConfigs, filter_metrics): df = pd.concat([df, df_new_row]) # collect metric_list - metric_list[metric_idx] = key.replace(" ", "_") + metric_list[metric_idx] = key # generate mapping of counters and metrics filter = {} + _visited = False for formula in eqn_content: if formula is not None and formula != "None": - for k in gen_counter_list(formula): + visited, counters = gen_counter_list(formula) + if visited: + _visited = True + for k in counters: filter[k] = None - if len(filter) > 0: + + if len(filter) > 0 or _visited: metric_counters[key] = list(filter) i += 1 From 1145579dbe1ea4d841147cd9218072e8a706323e Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:13:09 -0500 Subject: [PATCH 17/28] Abstract perfmon coalesing for useage in rocomni plugin Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 80c04feb77961d17b3e062d8d8f1fa78897d318d] --- .../rocprofiler-compute/src/utils/perfagg.py | 173 ++++++++++++------ 1 file changed, 115 insertions(+), 58 deletions(-) diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 59460bc80a..8e95482c54 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -256,6 +256,96 @@ def pmc_perf_split(workload_dir): os.remove(workload_perfmon_dir + "/pmc_perf.txt") +def update_pmc_bucket( + counters, + save_file, + soc, + pmc_list=None, + stext=None, + workload_perfmon_dir=None + ): + # Verify inputs. + # If save_file is True, we're being called internally, from perfmon_coalesce + # Else we're being called externally, from rocomni + detected_extermal_call = False + if save_file and (stext is None or workload_perfmon_dir is None): + raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True") + if pmc_list is None: + detected_extermal_call = True + pmc_list = dict( + [ + ("SQ", []), + ("GRBM", []), + ("TCP", []), + ("TA", []), + ("TD", []), + ("TCC", []), + ("SPI", []), + ("CPC", []), + ("CPF", []), + ("GDS", []), + ("TCC2", {}), # per-channel TCC perfmon + ] + ) + for ch in range(perfmon_config[soc]["TCC_channels"]): + pmc_list["TCC2"][str(ch)] = [] + + if "SQ_ACCUM_PREV_HIRES" in counters: + # save all level counters separately + nindex = counters.index("SQ_ACCUM_PREV_HIRES") + level_counter = counters[nindex - 1] + + if save_file: + # Save to level counter file, file name = level counter name + fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w") + fd.write(stext + "\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + + return pmc_list + + # save normal pmc counters in matching buckets + for counter in counters: + IP_block = counter.split(sep="_")[0].upper() + # SQC and SQ belong to the IP block, coalesce them + if IP_block == "SQC": + IP_block = "SQ" + + if IP_block != "TCC": + # Insert unique pmc counters into its bucket + if counter not in pmc_list[IP_block]: + pmc_list[IP_block].append(counter) + + else: + # TCC counters processing + m = re.match(r"[\s\S]+\[(\d+)\]", counter) + if m is None: + # Aggregated TCC counters + if counter not in pmc_list[IP_block]: + pmc_list[IP_block].append(counter) + + else: + # TCC channel ID + ch = m.group(1) + + # fake IP block for per channel TCC + if str(ch) in pmc_list["TCC2"]: + # append unique counter into the channel + if counter not in pmc_list["TCC2"][str(ch)]: + pmc_list["TCC2"][str(ch)].append(counter) + else: + # initial counter in this channel + pmc_list["TCC2"][str(ch)] = [counter] + + if detected_extermal_call: + # sort the per channel counter, so that same counter in all channels can be aligned + for ch in range(perfmon_config[soc]["TCC_channels"]): + pmc_list["TCC2"][str(ch)].sort() + return pmc_list + + def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" @@ -296,55 +386,11 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # we have found all the counters, store them in buckets counters = m.group(1).split() - if "SQ_ACCUM_PREV_HIRES" in counters: - # save all level counters separately - - nindex = counters.index("SQ_ACCUM_PREV_HIRES") - level_counter = counters[nindex - 1] - - # Save to level counter file, file name = level counter name - fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w") - fd.write(stext + "\n\n") - fd.write("gpu:\n") - fd.write("range:\n") - fd.write("kernel:\n") - fd.close() - - continue - - # save normal pmc counters in matching buckets - for counter in counters: - IP_block = counter.split(sep="_")[0].upper() - # SQC and SQ belong to the IP block, coalesce them - if IP_block == "SQC": - IP_block = "SQ" - - if IP_block != "TCC": - # Insert unique pmc counters into its bucket - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) - - else: - # TCC counters processing - m = re.match(r"[\s\S]+\[(\d+)\]", counter) - if m is None: - # Aggregated TCC counters - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) - - else: - # TCC channel ID - ch = m.group(1) - - # fake IP block for per channel TCC - if str(ch) in pmc_list["TCC2"]: - # append unique counter into the channel - if counter not in pmc_list["TCC2"][str(ch)]: - pmc_list["TCC2"][str(ch)].append(counter) - else: - # initial counter in this channel - pmc_list["TCC2"][str(ch)] = [counter] - + + # Utilitze helper function once a list of counters has be extracted + save_file = True + pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir) + # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -360,9 +406,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): return pmc_list -def perfmon_emit(pmc_list, workload_dir, soc): - workload_perfmon_dir = workload_dir + "/perfmon" - +def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None): # Calculate the minimum number of iteration to save the pmc counters # non-TCC counters pmc_cnt = [ @@ -384,7 +428,12 @@ def perfmon_emit(pmc_list, workload_dir, soc): niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt))) # Emit PMC counters into pmc config file - fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w") + if save_file: + workload_perfmon_dir = workload_dir + "/perfmon" + fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w") + else: + batches = [] + tcc2_index = 0 for iter in range(niter): @@ -414,12 +463,20 @@ def perfmon_emit(pmc_list, workload_dir, soc): # TCC aggregated counters line = line + " " + " ".join(tcc_counters) - fd.write(line + "\n") + if save_file: + fd.write(line + "\n") + else: + b = line.split() + b.remove("pmc:") + batches.append(b) - fd.write("\ngpu:\n") - fd.write("range:\n") - fd.write("kernel:\n") - fd.close() + if save_file: + fd.write("\ngpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + else: + return batches def perfmon_filter(workload_dir, perfmon_dir, args): From 91dee25ea2f54f825d41fe829fed241eceeb98a7 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:14:10 -0500 Subject: [PATCH 18/28] Comply to Python formatting Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 4d8383b4390cf8cacf225954e971f31891dea39d] --- .../src/omniperf_analyze/utils/parser.py | 8 +++--- .../rocprofiler-compute/src/utils/perfagg.py | 26 +++++++++---------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py index 025745b174..0328d7aa84 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py @@ -342,7 +342,7 @@ def gen_counter_list(formula): "GFLOPs": None, } - built_in_counter=[ + built_in_counter = [ "lds", "grd", "wgr", @@ -351,7 +351,7 @@ def gen_counter_list(formula): "sgpr", "scr", "BeginNs", - "EndNs" + "EndNs", ] visited = False @@ -367,7 +367,7 @@ def gen_counter_list(formula): for node in ast.walk(tree): if isinstance(node, ast.Name): val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id) - if (val.isupper() and val not in function_filter): + if val.isupper() and val not in function_filter: counters.append(val) visited = True if val in built_in_counter: @@ -404,7 +404,7 @@ def build_dfs(archConfigs, filter_metrics): metric_list[panel_idx] = panel["title"] table_idx = panel_idx + "." + str(data_cofig["id"] % 100) metric_list[table_idx] = data_cofig["title"] - + headers = ["Index"] for key, tile in data_cofig["header"].items(): if key != "tips": diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 8e95482c54..04658795c8 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -257,19 +257,16 @@ def pmc_perf_split(workload_dir): def update_pmc_bucket( - counters, - save_file, - soc, - pmc_list=None, - stext=None, - workload_perfmon_dir=None - ): + counters, save_file, soc, pmc_list=None, stext=None, workload_perfmon_dir=None +): # Verify inputs. # If save_file is True, we're being called internally, from perfmon_coalesce # Else we're being called externally, from rocomni detected_extermal_call = False if save_file and (stext is None or workload_perfmon_dir is None): - raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True") + raise ValueError( + "stext and workload_perfmon_dir must be specified if save_file is True" + ) if pmc_list is None: detected_extermal_call = True pmc_list = dict( @@ -289,7 +286,7 @@ def update_pmc_bucket( ) for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)] = [] - + if "SQ_ACCUM_PREV_HIRES" in counters: # save all level counters separately nindex = counters.index("SQ_ACCUM_PREV_HIRES") @@ -305,7 +302,7 @@ def update_pmc_bucket( fd.close() return pmc_list - + # save normal pmc counters in matching buckets for counter in counters: IP_block = counter.split(sep="_")[0].upper() @@ -386,11 +383,13 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # we have found all the counters, store them in buckets counters = m.group(1).split() - + # Utilitze helper function once a list of counters has be extracted save_file = True - pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir) - + pmc_list = update_pmc_bucket( + counters, save_file, soc, pmc_list, stext, workload_perfmon_dir + ) + # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -434,7 +433,6 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None): else: batches = [] - tcc2_index = 0 for iter in range(niter): # Prefix From b1a181c255127df4bd621ff78f0a2c5d8d6237fe Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 12:46:05 -0500 Subject: [PATCH 19/28] Migrate to @grafana/create-plugin Signed-off-by: JoseSantosAMD [ROCm/rocprofiler-compute commit: 3137076a72fa5cf35e723d6211f7b6cec17bc3f9] --- .../grafana_plugins/svg_plugin/.prettierrc.js | 5 +- .../grafana_plugins/svg_plugin/package.json | 65 +++++++++++++++---- .../grafana_plugins/svg_plugin/tsconfig.json | 12 +--- 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.prettierrc.js b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.prettierrc.js index f60eb1d252..aaa5045c6e 100644 --- a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.prettierrc.js +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.prettierrc.js @@ -1,3 +1,4 @@ module.exports = { - ...require("./node_modules/@grafana/toolkit/src/config/prettier.plugin.config.json"), - }; \ No newline at end of file + // Prettier configuration provided by Grafana scaffolding + ...require("./.config/.prettierrc.js") +}; \ No newline at end of file diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/package.json b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/package.json index fb88025c1d..aa445bd25d 100644 --- a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/package.json +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/package.json @@ -3,29 +3,72 @@ "version": "1.0.0", "description": "", "scripts": { - "build": "grafana-toolkit plugin:build", - "test": "grafana-toolkit plugin:test", - "dev": "grafana-toolkit plugin:dev", - "watch": "grafana-toolkit plugin:dev --watch", - "sign": "grafana-toolkit plugin:sign", - "start": "yarn watch" + "build": "webpack -c ./.config/webpack/webpack.config.ts --env production", + "dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development", + "e2e": "yarn exec cypress install && yarn exec grafana-e2e run", + "e2e:update": "yarn exec cypress install && yarn exec grafana-e2e run --update-screenshots", + "lint": "eslint --cache --ignore-path ./.gitignore --ext .js,.jsx,.ts,.tsx .", + "lint:fix": "yarn run lint --fix", + "server": "docker-compose up --build", + "sign": "npx --yes @grafana/sign-plugin@latest", + "start": "yarn watch", + "test": "jest --watch --onlyChanged", + "test:ci": "jest --passWithNoTests --maxWorkers 4", + "typecheck": "tsc --noEmit" }, "author": "Audacious Software Group", "license": "MIT", "devDependencies": { - "@grafana/toolkit": "latest", + "@babel/core": "^7.21.4", + "@grafana/e2e": "9.5.3", + "@grafana/e2e-selectors": "9.5.3", + "@grafana/eslint-config": "^6.0.0", + "@grafana/tsconfig": "^1.2.0-rc1", + "@swc/core": "^1.3.62", + "@swc/helpers": "^0.5.0", + "@swc/jest": "^0.2.26", + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^12.1.4", + "@types/jest": "^29.5.0", + "@types/lodash": "^4.14.194", + "@types/node": "^18.15.11", + "copy-webpack-plugin": "^11.0.0", + "css-loader": "^6.7.3", "emotion": "10.0.27", + "eslint-webpack-plugin": "^4.0.1", + "fork-ts-checker-webpack-plugin": "^8.0.0", + "glob": "^10.2.7", + "identity-obj-proxy": "3.0.0", + "jest": "^29.5.0", + "jest-environment-jsdom": "^29.5.0", + "prettier": "^2.8.7", "react-monaco-editor": "^0.44.0", - "tslib": "^2.3.1" + "replace-in-file-webpack-plugin": "^1.0.6", + "sass": "1.63.2", + "sass-loader": "13.3.1", + "style-loader": "3.3.3", + "swc-loader": "^0.2.3", + "ts-node": "^10.9.1", + "tsconfig-paths": "^4.2.0", + "tslib": "^2.3.1", + "typescript": "4.8.4", + "webpack": "^5.86.0", + "webpack-cli": "^5.1.4", + "webpack-livereload-plugin": "^3.0.2" }, "engines": { "node": ">=14" }, "dependencies": { - "@grafana/runtime": "9.1.2", + "@emotion/css": "^11.1.3", "@grafana/data": "9.1.2", + "@grafana/runtime": "9.1.2", "@grafana/ui": "9.1.2", - "@svgdotjs/svg.js": "^3.1.1" + "@svgdotjs/svg.js": "^3.1.1", + "react": "17.0.2", + "react-dom": "17.0.2", + "tslib": "2.5.3" }, - "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project." + "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project.", + "packageManager": "yarn@1.22.19" } diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/tsconfig.json b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/tsconfig.json index 7e6657d2fe..d294745aa8 100644 --- a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/tsconfig.json +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/tsconfig.json @@ -1,11 +1,3 @@ { - "extends": "./node_modules/@grafana/toolkit/src/config/tsconfig.plugin.json", - "include": ["src", "types"], - "compilerOptions": { - "types": ["@emotion/core"], - "rootDir": "./src", - "baseUrl": "./src", - "typeRoots": ["./node_modules/@types"], - "jsx": "react" - } -} + "extends": "./.config/tsconfig.json" +} \ No newline at end of file From e5cc9b3f88f129e366509b3363ee7ab406825d27 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 13:02:00 -0500 Subject: [PATCH 20/28] Adding config files Signed-off-by: JoseSantosAMD [ROCm/rocprofiler-compute commit: 43d492dce2bb0bdbc2f26de9b569fa3b4010dbd0] --- .../svg_plugin/.config/tsconfig.json | 26 +++ .../.config/webpack/webpack.config.ts | 201 ++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/tsconfig.json create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/tsconfig.json b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/tsconfig.json new file mode 100644 index 0000000000..64b3769074 --- /dev/null +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/tsconfig.json @@ -0,0 +1,26 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-typescript-config + */ + { + "compilerOptions": { + "alwaysStrict": true, + "declaration": false, + "rootDir": "../src", + "baseUrl": "../src", + "typeRoots": ["../node_modules/@types"], + "resolveJsonModule": true + }, + "ts-node": { + "compilerOptions": { + "module": "commonjs", + "target": "es5", + "esModuleInterop": true + }, + "transpileOnly": true + }, + "include": ["../src", "./types"], + "extends": "@grafana/tsconfig" +} diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts new file mode 100644 index 0000000000..22cb86ca4e --- /dev/null +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts @@ -0,0 +1,201 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-webpack-config + */ + +import CopyWebpackPlugin from 'copy-webpack-plugin'; +import ESLintPlugin from 'eslint-webpack-plugin'; +import ForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin'; +import LiveReloadPlugin from 'webpack-livereload-plugin'; +import path from 'path'; +import ReplaceInFileWebpackPlugin from 'replace-in-file-webpack-plugin'; +import { Configuration } from 'webpack'; + +import { getPackageJson, getPluginJson, hasReadme, getEntries } from './utils'; +import { SOURCE_DIR, DIST_DIR } from './constants'; + +const pluginJson = getPluginJson(); + +const config = async (env): Promise => ({ + cache: { + type: 'filesystem', + buildDependencies: { + config: [__filename], + }, + }, + + context: path.join(process.cwd(), SOURCE_DIR), + + devtool: env.production ? 'source-map' : 'eval-source-map', + + entry: await getEntries(), + + externals: [ + 'lodash', + 'jquery', + 'moment', + 'slate', + 'emotion', + '@emotion/react', + '@emotion/css', + 'prismjs', + 'slate-plain-serializer', + '@grafana/slate-react', + 'react', + 'react-dom', + 'react-redux', + 'redux', + 'rxjs', + 'react-router', + 'react-router-dom', + 'd3', + 'angular', + '@grafana/ui', + '@grafana/runtime', + '@grafana/data', + + // Mark legacy SDK imports as external if their name starts with the "grafana/" prefix + ({ request }, callback) => { + const prefix = 'grafana/'; + const hasPrefix = (request) => request.indexOf(prefix) === 0; + const stripPrefix = (request) => request.substr(prefix.length); + + if (hasPrefix(request)) { + return callback(undefined, stripPrefix(request)); + } + + callback(); + }, + ], + + mode: env.production ? 'production' : 'development', + + module: { + rules: [ + { + exclude: /(node_modules)/, + test: /\.[tj]sx?$/, + use: { + loader: 'swc-loader', + options: { + jsc: { + baseUrl: './src', + target: 'es2015', + loose: false, + parser: { + syntax: 'typescript', + tsx: true, + decorators: false, + dynamicImport: true, + }, + }, + }, + }, + }, + { + test: /\.css$/, + use: ["style-loader", "css-loader"] + }, + { + test: /\.s[ac]ss$/, + use: ['style-loader', 'css-loader', 'sass-loader'], + }, + { + test: /\.(png|jpe?g|gif|svg)$/, + type: 'asset/resource', + generator: { + // Keep publicPath relative for host.com/grafana/ deployments + publicPath: `public/plugins/${pluginJson.id}/img/`, + outputPath: 'img/', + filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]', + }, + }, + { + test: /\.(woff|woff2|eot|ttf|otf)(\?v=\d+\.\d+\.\d+)?$/, + type: 'asset/resource', + generator: { + // Keep publicPath relative for host.com/grafana/ deployments + publicPath: `public/plugins/${pluginJson.id}/fonts/`, + outputPath: 'fonts/', + filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]', + }, + }, + ], + }, + + output: { + clean: { + keep: new RegExp(`.*?_(amd64|arm(64)?)(.exe)?`), + }, + filename: '[name].js', + library: { + type: 'amd', + }, + path: path.resolve(process.cwd(), DIST_DIR), + publicPath: '/', + }, + + plugins: [ + new CopyWebpackPlugin({ + patterns: [ + // If src/README.md exists use it; otherwise the root README + // To `compiler.options.output` + { from: hasReadme() ? 'README.md' : '../README.md', to: '.', force: true }, + { from: 'plugin.json', to: '.' }, + { from: '../LICENSE', to: '.' }, + { from: '../CHANGELOG.md', to: '.', force: true }, + { from: '**/*.json', to: '.' }, // TODO + { from: '**/*.svg', to: '.', noErrorOnMissing: true }, // Optional + { from: '**/*.png', to: '.', noErrorOnMissing: true }, // Optional + { from: '**/*.html', to: '.', noErrorOnMissing: true }, // Optional + { from: 'img/**/*', to: '.', noErrorOnMissing: true }, // Optional + { from: 'libs/**/*', to: '.', noErrorOnMissing: true }, // Optional + { from: 'static/**/*', to: '.', noErrorOnMissing: true }, // Optional + ], + }), + // Replace certain template-variables in the README and plugin.json + new ReplaceInFileWebpackPlugin([ + { + dir: DIST_DIR, + files: ['plugin.json', 'README.md'], + rules: [ + { + search: /\%VERSION\%/g, + replace: getPackageJson().version, + }, + { + search: /\%TODAY\%/g, + replace: new Date().toISOString().substring(0, 10), + }, + { + search: /\%PLUGIN_ID\%/g, + replace: pluginJson.id, + }, + ], + }, + ]), + new ForkTsCheckerWebpackPlugin({ + async: Boolean(env.development), + issue: { + include: [{ file: '**/*.{ts,tsx}' }], + }, + typescript: { configFile: path.join(process.cwd(), 'tsconfig.json') }, + }), + new ESLintPlugin({ + extensions: ['.ts', '.tsx'], + lintDirtyModulesOnly: Boolean(env.development), // don't lint on start, only lint changed files + }), + ...(env.development ? [new LiveReloadPlugin()] : []), + ], + + resolve: { + extensions: ['.js', '.jsx', '.ts', '.tsx'], + // handle resolving "rootDir" paths + modules: [path.resolve(process.cwd(), 'src'), 'node_modules'], + unsafeCache: true, + }, +}); + +export default config; From 3e6b0ab388b07be8ee53e615d27bb14a69bbe82f Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Mon, 17 Jul 2023 13:12:22 -0500 Subject: [PATCH 21/28] Filter additional ops in gen_counter_list fucn Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: fd55a698057929b2cc9bceb47ec5dac9ea941e18] --- .../src/omniperf_analyze/utils/parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py index 0328d7aa84..bff3314b2a 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/utils/parser.py @@ -340,6 +340,8 @@ def gen_counter_list(formula): "RW": None, "GIOP": None, "GFLOPs": None, + "CONCAT": None, + "MOD": None, } built_in_counter = [ @@ -362,6 +364,12 @@ def gen_counter_list(formula): tree = ast.parse( formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") + .replace( + "$numActiveCUs", + "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / GRBM_GUI_ACTIVE)), \ + 0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \ + / GRBM_GUI_ACTIVE)), 0), $maxWavesPerCU), 8)), $numCU))", + ) .replace("$", "") ) for node in ast.walk(tree): From e565946a4d484eaa88753af35db02bd0b0a1e102 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Mon, 17 Jul 2023 13:12:56 -0500 Subject: [PATCH 22/28] Enable join_prof() merge util to be called from outside Omniperf Signed-off-by: colramos-amd [ROCm/rocprofiler-compute commit: 2b0ac9b5d8c9edb94ce62a9e0480790121e3a15d] --- .../rocprofiler-compute/src/utils/perfagg.py | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/projects/rocprofiler-compute/src/utils/perfagg.py b/projects/rocprofiler-compute/src/utils/perfagg.py index 04658795c8..1c21b1736a 100755 --- a/projects/rocprofiler-compute/src/utils/perfagg.py +++ b/projects/rocprofiler-compute/src/utils/perfagg.py @@ -95,13 +95,19 @@ def test_df_column_equality(df): # joins disparate runs less dumbly than rocprof def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Set default output directory if not specified - if out == None: - out = workload_dir + "/pmc_perf.csv" - files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") - df = None + if type(workload_dir) == str: + if out is None: + out = workload_dir + "/pmc_perf.csv" + files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") + elif type(workload_dir) == list: + files = workload_dir + else: + print("ERROR: Invalid workload_dir") + sys.exit(1) + df = None for i, file in enumerate(files): - _df = pd.read_csv(file) + _df = pd.read_csv(file) if type(workload_dir) == str else file if join_type == "kernel": key = _df.groupby("KernelName").cumcount() _df["key"] = _df.KernelName + " - " + key.astype(str) @@ -137,7 +143,6 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): - print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( @@ -146,10 +151,12 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): ) ) warnings.warn(msg) - log_file.write(msg + "\n") + if log_file: + log_file.write(msg + "\n") else: msg = "Successfully joined {} in pmc_perf.csv".format(key) - log_file.write(msg + "\n") + if log_file: + log_file.write(msg + "\n") if test_df_column_equality(_df) and verbose: print(msg) @@ -179,6 +186,8 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "fbar", "sig", "obj", + # rocscope specific merged counters, keep original + "dispatch_", ] ) ] @@ -189,7 +198,15 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): [ k for k in df.keys() - if not any(check in k for check in ["DispatchNs", "CompleteNs"]) + if not any( + check in k + for check in [ + "DispatchNs", + "CompleteNs", + # rocscope specific timestamp + "HostDuration", + ] + ) ] ] #   C) sanity check the name and key @@ -216,12 +233,14 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): df["EndNs"] = endNs # finally, join the drop key df = df.drop(columns=["key"]) - # and save to file - df.to_csv(out, index=False) - # and delete old file(s) - if not verbose: - for file in files: - os.remove(file) + # save to file and delete old file(s), skip if we're being called outside of Omniperf + if type(workload_dir) == str: + df.to_csv(out, index=False) + if not verbose: + for file in files: + os.remove(file) + else: + return df def pmc_perf_split(workload_dir): @@ -262,13 +281,13 @@ def update_pmc_bucket( # Verify inputs. # If save_file is True, we're being called internally, from perfmon_coalesce # Else we're being called externally, from rocomni - detected_extermal_call = False + detected_external_call = False if save_file and (stext is None or workload_perfmon_dir is None): raise ValueError( "stext and workload_perfmon_dir must be specified if save_file is True" ) if pmc_list is None: - detected_extermal_call = True + detected_external_call = True pmc_list = dict( [ ("SQ", []), @@ -287,7 +306,7 @@ def update_pmc_bucket( for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)] = [] - if "SQ_ACCUM_PREV_HIRES" in counters: + if "SQ_ACCUM_PREV_HIRES" in counters and not detected_external_call: # save all level counters separately nindex = counters.index("SQ_ACCUM_PREV_HIRES") level_counter = counters[nindex - 1] @@ -336,7 +355,7 @@ def update_pmc_bucket( # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] - if detected_extermal_call: + if detected_external_call: # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From e0c4061b1cd1314eaa41f536160a21775e4eea2c Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 13:19:03 -0500 Subject: [PATCH 23/28] Adding config files Signed-off-by: JoseSantosAMD [ROCm/rocprofiler-compute commit: d7ba2acec93a2fec5438593f6fe5c67e462a617f] --- .../svg_plugin/.config/.eslintrc | 13 ++ .../svg_plugin/.config/.prettierrc.js | 16 ++ .../svg_plugin/.config/Dockerfile | 16 ++ .../svg_plugin/.config/README.md | 164 ++++++++++++++++++ .../svg_plugin/.config/jest-setup.js | 25 +++ .../svg_plugin/.config/jest.config.js | 43 +++++ .../.config/jest/mocks/react-inlinesvg.tsx | 25 +++ .../svg_plugin/.config/jest/utils.js | 31 ++++ .../svg_plugin/.config/types/custom.d.ts | 37 ++++ .../svg_plugin/.config/webpack/constants.ts | 2 + .../svg_plugin/.config/webpack/utils.ts | 40 +++++ .../grafana_plugins/svg_plugin/.eslintrc | 3 + .../grafana_plugins/svg_plugin/.nvmrc | 1 + .../svg_plugin/docker-compose.yaml | 15 ++ .../grafana_plugins/svg_plugin/jest-setup.js | 2 + .../grafana_plugins/svg_plugin/jest.config.js | 8 + 16 files changed, 441 insertions(+) create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.eslintrc create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.prettierrc.js create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/Dockerfile create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/README.md create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/jest-setup.js create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/jest.config.js create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/jest/utils.js create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/types/custom.d.ts create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/constants.ts create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/webpack/utils.ts create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.eslintrc create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/.nvmrc create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/docker-compose.yaml create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/jest-setup.js create mode 100644 projects/rocprofiler-compute/grafana_plugins/svg_plugin/jest.config.js diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.eslintrc b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.eslintrc new file mode 100644 index 0000000000..3f8c381a4b --- /dev/null +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.eslintrc @@ -0,0 +1,13 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-eslint-config + */ + { + "extends": ["@grafana/eslint-config"], + "root": true, + "rules": { + "react/prop-types": "off" + } +} diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.prettierrc.js b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.prettierrc.js new file mode 100644 index 0000000000..66a76ec5bc --- /dev/null +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/.prettierrc.js @@ -0,0 +1,16 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in .config/README.md + */ + +module.exports = { + "endOfLine": "auto", + "printWidth": 120, + "trailingComma": "es5", + "semi": true, + "jsxSingleQuote": false, + "singleQuote": true, + "useTabs": false, + "tabWidth": 2 +}; \ No newline at end of file diff --git a/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/Dockerfile b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/Dockerfile new file mode 100644 index 0000000000..35d89bd1c1 --- /dev/null +++ b/projects/rocprofiler-compute/grafana_plugins/svg_plugin/.config/Dockerfile @@ -0,0 +1,16 @@ +ARG grafana_version=latest +ARG grafana_image=grafana-enterprise + +FROM grafana/${grafana_image}:${grafana_version} + +# Make it as simple as possible to access the grafana instance for development purposes +# Do NOT enable these settings in a public facing / production grafana instance +ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin" +ENV GF_AUTH_ANONYMOUS_ENABLED "true" +ENV GF_AUTH_BASIC_ENABLED "false" +# Set development mode so plugins can be loaded without the need to sign +ENV GF_DEFAULT_APP_MODE "development" + +# Inject livereload script into grafana index.html +USER root +RUN sed -i 's/<\/body><\/html>/