From 2b63da809362c50d60ca6f2c046e6c3010e745f8 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Fri, 24 Feb 2023 11:24:37 -0600 Subject: [PATCH 1/9] Change develop to dev in CHANGELOG Signed-off-by: Cole Ramos --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 05cd26f8db..aeeea7f0bd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,12 @@ ## How to fork from us -To keep our development fast and conflict free, we recommend you to [fork](https://github.com/AMDResearch/omniperf/fork) our repository and start your work from our `develop` branch in your private repository. +To keep our development fast and conflict free, we recommend you to [fork](https://github.com/AMDResearch/omniperf/fork) our repository and start your work from our `dev` branch in your private repository. Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote. ``` git remote add mainline https://github.com/AMDResearch/omniperf.git -git checkout develop +git checkout dev ``` As always in git, start a new branch with @@ -31,7 +31,7 @@ and apply your changes there. - Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it. -- Ensure the PR is based on the `develop` branch of the Omniperf GitHub repository. +- Ensure the PR is based on the `dev` branch of the Omniperf GitHub repository. - Omniperf requires new commits to include a "Signed-off-by" token in the commit message (typically enabled via the `git commit -s` option), indicating your agreement to the projects's [Developer's Certificate of Origin](https://developercertificate.org/) and compatability with the project [LICENSE](https://github.com/AMDResearch/omniperf/blob/main/LICENSE): From fef68d7d80414c205ef98661449b7d332179fdc3 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 24 Feb 2023 11:41:50 -0600 Subject: [PATCH 2/9] Fix link for LDS in standalone GUI Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/gui_components/header.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omniperf_analyze/utils/gui_components/header.py b/src/omniperf_analyze/utils/gui_components/header.py index 3dcf34be69..dbb89982a2 100644 --- a/src/omniperf_analyze/utils/gui_components/header.py +++ b/src/omniperf_analyze/utils/gui_components/header.py @@ -104,7 +104,7 @@ def get_header(raw_pmc, input_filters, kernel_names): dbc.DropdownMenuItem("Cache", header=True), dbc.DropdownMenuItem( "Local Data Share (LDS)", - href="#local_data_sharelds", + href="#local_data_share_lds", external_link=True, ), dbc.DropdownMenuItem( From 27fa97aad95511b44b55c708a18d92d091a41906 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 27 Feb 2023 16:18:42 -0600 Subject: [PATCH 3/9] Option for different markers per kernel name Signed-off-by: coleramos425 --- src/omniperf | 2 +- src/omniperf_analyze/omniperf_analyze.py | 3 +- .../utils/gui_components/roofline.py | 29 +++++++++++++++++++ src/omniperf_analyze/utils/roofline_calc.py | 9 +++++- src/parser.py | 7 +++++ 5 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/omniperf b/src/omniperf index dd1679afe9..72839cf0a1 100755 --- a/src/omniperf +++ b/src/omniperf @@ -715,7 +715,7 @@ def main(): # Setup prerequisits for roofline roof_setup(args, my_parser, VER) # Generate roofline - roofline_only(args.path, args.device, args.sort, args.mem_level, args.verbose) + roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose) # Profile only else: diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 368e5c50c5..0825cbe2ef 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -212,7 +212,7 @@ def run_cli(args, runs): ) -def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose): +def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbose): import pandas as pd from collections import OrderedDict @@ -235,6 +235,7 @@ def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose): dev_id, # [Optional] Specify device id to collect roofline info from sort_type, # [Optional] Sort AI by top kernels or dispatches mem_level, # [Optional] Toggle particular level(s) of memory hierarchy + kernel_names, # [Optional] Toggle overlay of kernel names in plot True, # [Optional] Generate a standalone roofline analysis ) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index 8fb6e36a74..13c6787d64 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -25,6 +25,7 @@ from omniperf_analyze.utils import roofline_calc import time +import sys import numpy as np from dash import html, dash_table @@ -32,6 +33,8 @@ from dash import dcc import plotly.graph_objects as go +SYMBOLS = [0,1,2,3,4,5,13,17,18,20] + def to_int(a): if str(type(a)) == "": return np.nan @@ -120,6 +123,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_l1"][1], name="ai_l1", mode="markers", + marker_symbol=SYMBOLS, ) ) fig.add_trace( @@ -128,6 +132,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_l2"][1], name="ai_l2", mode="markers", + marker_symbol=SYMBOLS, ) ) fig.add_trace( @@ -136,6 +141,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_hbm"][1], name="ai_hbm", mode="markers", + marker_symbol=SYMBOLS, ) ) @@ -158,8 +164,13 @@ def get_roofline( dev_id=None, sort_type="kernels", mem_level="ALL", + kernel_names=False, is_standalone=False, ): + if kernel_names and (not is_standalone): + print("ERROR: --roof-only is required for --kernel-names") + sys.exit(1) + # Roofline settings fp32_details = { "path": path_to_dir, @@ -190,6 +201,18 @@ def get_roofline( ml_combo_fig = generate_plots( int8_details, ai_data, mem_level, is_standalone, verbose, fp16_fig ) + legend = go.Figure( + go.Scatter( + mode="markers", + x=[0]*10, + y=ai_data["kernelNames"], + marker_symbol=SYMBOLS, + marker_size=15, + ) + ) + legend.update_layout(title="Kernel Names and Markers", + margin=dict(b=0,r=0), xaxis_range=[-1,1], xaxis_side="top", height=400, width=1000) + legend.update_xaxes(dtick=1) if is_standalone: dev_id = "ALL" if dev_id == -1 else str(dev_id) @@ -198,12 +221,18 @@ def get_roofline( ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) + legend.write_image( + path_to_dir + "/kernelName_legend.pdf" + ) time.sleep(1) # Re-save to remove loading MathJax pop up fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id)) ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) + legend.write_image( + path_to_dir + "/kernelName_legend.pdf" + ) print("Empirical Roofline PDFs saved!") else: return html.Section( diff --git a/src/omniperf_analyze/utils/roofline_calc.py b/src/omniperf_analyze/utils/roofline_calc.py index 987b6694de..7ba4725f9e 100644 --- a/src/omniperf_analyze/utils/roofline_calc.py +++ b/src/omniperf_analyze/utils/roofline_calc.py @@ -44,6 +44,8 @@ FONT_WEIGHT = "bold" SUPPORTED_SOC = ["mi200"] +TOP_N = 10 + ################################################ # Helper funcs @@ -428,9 +430,11 @@ def plot_application(sortType, ret_df, verbose): # print("Top 5 intensities ('{}')...".format(roof_details["sort"])) intensities = {"ai_l1": [], "ai_l2": [], "ai_hbm": []} curr_perf = [] + kernelNames = [] i = 0 # Create list of top 5 intensities - while i <= 9 and i != len(myList): + while i < TOP_N and i != len(myList): + kernelNames.append(myList[i].KernelName) intensities["ai_l1"].append( myList[i].total_flops / myList[i].L1cache_data ) if myList[i].L1cache_data else intensities["ai_l1"].append(0) @@ -470,6 +474,9 @@ def plot_application(sortType, ret_df, verbose): intensityPoints[i].append(x) intensityPoints[i].append(y) + # Add an entry for kernel names + intensityPoints["kernelNames"] = kernelNames + return intensityPoints diff --git a/src/parser.py b/src/parser.py index b305b60187..061d85d438 100644 --- a/src/parser.py +++ b/src/parser.py @@ -234,6 +234,13 @@ def parse(my_parser): type=int, help="\t\t\tGPU device ID. (DEFAULT: ALL)", ) + roofline_group.add_argument( + "--kernel-names", + required=False, + default=False, + action="store_true", + help="\t\t\tInclude kernel names in roofline plot.", + ) # roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)") # roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)") # roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)") From 4503be8d1ce0716b19fdc22019e756443fbf919e Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 2 Mar 2023 12:48:37 -0600 Subject: [PATCH 4/9] Remove unused Python module (#96) Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index cddf4960a2..ace633e35a 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -27,7 +27,6 @@ import sys import astunparse import re import os -from matplotlib.pyplot import axis import pandas as pd import numpy as np from tabulate import tabulate From bdd28595f1de4e2151c73fa326baf4d11037b16d Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 6 Mar 2023 14:39:22 -0600 Subject: [PATCH 5/9] Match AI and BW colors in roofline Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/gui_components/roofline.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index 13c6787d64..4ff1debba6 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -124,6 +124,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_l1", mode="markers", marker_symbol=SYMBOLS, + marker={"color": "#00CC96"} ) ) fig.add_trace( @@ -133,6 +134,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_l2", mode="markers", marker_symbol=SYMBOLS, + marker={"color": "#EF553B"} ) ) fig.add_trace( @@ -142,6 +144,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_hbm", mode="markers", marker_symbol=SYMBOLS, + marker={"color": "#636EFA"} ) ) From 2b429bdd39649f3cd114c5838afee6ba5699e9ea Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 6 Mar 2023 14:40:20 -0600 Subject: [PATCH 6/9] Comply to Python formatting Signed-off-by: coleramos425 --- src/omniperf_analyze/omniperf_analyze.py | 2 +- .../utils/gui_components/roofline.py | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 0825cbe2ef..ebd6a92f04 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -235,7 +235,7 @@ def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbo dev_id, # [Optional] Specify device id to collect roofline info from sort_type, # [Optional] Sort AI by top kernels or dispatches mem_level, # [Optional] Toggle particular level(s) of memory hierarchy - kernel_names, # [Optional] Toggle overlay of kernel names in plot + kernel_names, # [Optional] Toggle overlay of kernel names in plot True, # [Optional] Generate a standalone roofline analysis ) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index 4ff1debba6..ae39384b90 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -33,7 +33,8 @@ from dash import dcc import plotly.graph_objects as go -SYMBOLS = [0,1,2,3,4,5,13,17,18,20] +SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20] + def to_int(a): if str(type(a)) == "": @@ -124,7 +125,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_l1", mode="markers", marker_symbol=SYMBOLS, - marker={"color": "#00CC96"} + marker={"color": "#00CC96"}, ) ) fig.add_trace( @@ -134,7 +135,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_l2", mode="markers", marker_symbol=SYMBOLS, - marker={"color": "#EF553B"} + marker={"color": "#EF553B"}, ) ) fig.add_trace( @@ -144,7 +145,7 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No name="ai_hbm", mode="markers", marker_symbol=SYMBOLS, - marker={"color": "#636EFA"} + marker={"color": "#636EFA"}, ) ) @@ -207,14 +208,20 @@ def get_roofline( legend = go.Figure( go.Scatter( mode="markers", - x=[0]*10, + x=[0] * 10, y=ai_data["kernelNames"], marker_symbol=SYMBOLS, marker_size=15, ) ) - legend.update_layout(title="Kernel Names and Markers", - margin=dict(b=0,r=0), xaxis_range=[-1,1], xaxis_side="top", height=400, width=1000) + legend.update_layout( + title="Kernel Names and Markers", + margin=dict(b=0, r=0), + xaxis_range=[-1, 1], + xaxis_side="top", + height=400, + width=1000, + ) legend.update_xaxes(dtick=1) if is_standalone: @@ -224,18 +231,14 @@ def get_roofline( ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) - legend.write_image( - path_to_dir + "/kernelName_legend.pdf" - ) + legend.write_image(path_to_dir + "/kernelName_legend.pdf") time.sleep(1) # Re-save to remove loading MathJax pop up fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id)) ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) - legend.write_image( - path_to_dir + "/kernelName_legend.pdf" - ) + legend.write_image(path_to_dir + "/kernelName_legend.pdf") print("Empirical Roofline PDFs saved!") else: return html.Section( From 6ced24021bb460b288dc977623ec228c789b496c Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 6 Mar 2023 16:42:55 -0600 Subject: [PATCH 7/9] Hide symbols in regular --roof-only mode Signed-off-by: coleramos425 --- .../utils/gui_components/roofline.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index ae39384b90..0d9e5826b3 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -43,7 +43,9 @@ def to_int(a): return int(a) -def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=None): +def generate_plots( + roof_info, ai_data, mem_level, is_standalone, kernel_names, verbose, fig=None +): if fig is None: fig = go.Figure() plotMode = "lines+text" if is_standalone else "lines" @@ -124,8 +126,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_l1"][1], name="ai_l1", mode="markers", - marker_symbol=SYMBOLS, marker={"color": "#00CC96"}, + marker_symbol=SYMBOLS if kernel_names else None, ) ) fig.add_trace( @@ -134,8 +136,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_l2"][1], name="ai_l2", mode="markers", - marker_symbol=SYMBOLS, marker={"color": "#EF553B"}, + marker_symbol=SYMBOLS if kernel_names else None, ) ) fig.add_trace( @@ -144,8 +146,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No y=ai_data["ai_hbm"][1], name="ai_hbm", mode="markers", - marker_symbol=SYMBOLS, marker={"color": "#636EFA"}, + marker_symbol=SYMBOLS if kernel_names else None, ) ) @@ -200,10 +202,14 @@ def get_roofline( print(i, "->", ai_data[i]) print("\n") - fp32_fig = generate_plots(fp32_details, ai_data, mem_level, is_standalone, verbose) - fp16_fig = generate_plots(fp16_details, ai_data, mem_level, is_standalone, verbose) + fp32_fig = generate_plots( + fp32_details, ai_data, mem_level, is_standalone, kernel_names, verbose + ) + fp16_fig = generate_plots( + fp16_details, ai_data, mem_level, is_standalone, kernel_names, verbose + ) ml_combo_fig = generate_plots( - int8_details, ai_data, mem_level, is_standalone, verbose, fp16_fig + int8_details, ai_data, mem_level, is_standalone, kernel_names, verbose, fp16_fig ) legend = go.Figure( go.Scatter( @@ -231,14 +237,17 @@ def get_roofline( ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) - legend.write_image(path_to_dir + "/kernelName_legend.pdf") + if kernel_names: + # only save a legend if kernel_names option is toggled + legend.write_image(path_to_dir + "/kernelName_legend.pdf") time.sleep(1) # Re-save to remove loading MathJax pop up fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id)) ml_combo_fig.write_image( path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id) ) - legend.write_image(path_to_dir + "/kernelName_legend.pdf") + if kernel_names: + legend.write_image(path_to_dir + "/kernelName_legend.pdf") print("Empirical Roofline PDFs saved!") else: return html.Section( From f32c192abee85d7621e5ebbea91fa1ca908dc66b Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 13 Mar 2023 13:38:41 -0500 Subject: [PATCH 8/9] Fix roofline calculation for single dispatch Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/roofline_calc.py | 266 ++++++++++---------- 1 file changed, 135 insertions(+), 131 deletions(-) diff --git a/src/omniperf_analyze/utils/roofline_calc.py b/src/omniperf_analyze/utils/roofline_calc.py index 7ba4725f9e..1fbca73e02 100644 --- a/src/omniperf_analyze/utils/roofline_calc.py +++ b/src/omniperf_analyze/utils/roofline_calc.py @@ -210,17 +210,144 @@ def plot_application(sortType, ret_df, verbose): kernelName = "" myList = [] - for index, row in df.iterrows(): + at_end = False + next_kernelName = "" + + for idx in df.index: # CASE: Top kernels # Calculate + append AI data if # a) current KernelName is different than previous OR # b) We've reached the end of list - if sortType == "kernels" and ( - (row["KernelName"] != kernelName and kernelName != "") - or index == df.shape[0] - 1 - ): - if df.shape[0] - 1 == index: - calls += 1 + if(idx + 1 == df.shape[0]): + at_end = True + else: + next_kernelName = df["KernelName"][idx+1] + + kernelName = df["KernelName"][idx] + try: + total_flops += ( + ( + 64 + * ( + df["SQ_INSTS_VALU_ADD_F16"][idx] + + df["SQ_INSTS_VALU_MUL_F16"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F16"][idx]) + + df["SQ_INSTS_VALU_TRANS_F16"][idx] + ) + ) + + ( + 64 + * ( + df["SQ_INSTS_VALU_ADD_F32"][idx] + + df["SQ_INSTS_VALU_MUL_F32"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F32"][idx]) + + df["SQ_INSTS_VALU_TRANS_F32"][idx] + ) + ) + + ( + 64 + * ( + df["SQ_INSTS_VALU_ADD_F64"][idx] + + df["SQ_INSTS_VALU_MUL_F64"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F64"][idx]) + + df["SQ_INSTS_VALU_TRANS_F64"][idx] + ) + ) + + (df["SQ_INSTS_VALU_MFMA_MOPS_F16"][idx] * 512) + + (df["SQ_INSTS_VALU_MFMA_MOPS_BF16"][idx] * 512) + + (df["SQ_INSTS_VALU_MFMA_MOPS_F32"][idx] * 512) + + (df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512) + ) + except KeyError: + if verbose >= 3: + print("{}: Skipped total_flops at index {}".format(kernelName[:35], idx)) + pass + try: + valu_flops += ( + 64 + * ( + df["SQ_INSTS_VALU_ADD_F16"][idx] + + df["SQ_INSTS_VALU_MUL_F16"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F16"][idx]) + + df["SQ_INSTS_VALU_TRANS_F16"][idx] + ) + + 64 + * ( + df["SQ_INSTS_VALU_ADD_F32"][idx] + + df["SQ_INSTS_VALU_MUL_F32"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F32"][idx]) + + df["SQ_INSTS_VALU_TRANS_F32"][idx] + ) + + 64 + * ( + df["SQ_INSTS_VALU_ADD_F64"][idx] + + df["SQ_INSTS_VALU_MUL_F64"][idx] + + (2 * df["SQ_INSTS_VALU_FMA_F64"][idx]) + + df["SQ_INSTS_VALU_TRANS_F64"][idx] + ) + ) + except KeyError: + if verbose >= 3: + print("{}: Skipped valu_flops at index {}".format(kernelName[:35], idx)) + pass + + try: + mfma_flops_f16 += df["SQ_INSTS_VALU_MFMA_MOPS_F16"][idx] * 512 + mfma_flops_bf16 += df["SQ_INSTS_VALU_MFMA_MOPS_BF16"][idx] * 512 + mfma_flops_f32 += df["SQ_INSTS_VALU_MFMA_MOPS_F32"][idx] * 512 + mfma_flops_f64 += df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512 + mfma_iops_i8 += df["SQ_INSTS_VALU_MFMA_MOPS_I8"][idx] * 512 + except KeyError: + if verbose >= 3: + print("{}: Skipped mfma ops at index {}".format(kernelName[:35], idx)) + pass + + try: + lds_data += ( + (df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx]) * 4 * L2_BANKS + ) # L2_BANKS = 32 (since assuming mi200) + except KeyError: + if verbose >= 3: + print("{}: Skipped lds_data at index {}".format(kernelName[:35], idx)) + pass + + try: + L1cache_data += df["TCP_TOTAL_CACHE_ACCESSES_sum"][idx] * 64 + except KeyError: + if verbose >= 3: + print("{}: Skipped L1cache_data at index {}".format(kernelName[:35], idx)) + pass + + try: + L2cache_data += ( + df["TCP_TCC_WRITE_REQ_sum"][idx] * 64 + + df["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"][idx] * 64 + + df["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"][idx] * 64 + + df["TCP_TCC_READ_REQ_sum"][idx] * 64 + ) + except KeyError: + if verbose >= 3: + print("{}: Skipped L2cache_data at index {}".format(kernelName[:35], idx)) + pass + try: + hbm_data += ( + (df["TCC_EA_RDREQ_32B_sum"][idx] * 32) + + ((df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx]) * 64) + + (df["TCC_EA_WRREQ_64B_sum"][idx] * 64) + + ((df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx]) * 32) + ) + except KeyError: + if verbose >= 3: + print("{}: Skipped hbm_data at index {}".format(kernelName[:35], idx)) + pass + + totalDuration += df["EndNs"][idx] - df["BeginNs"][idx] + + avgDuration += df["EndNs"][idx] - df["BeginNs"][idx] + + calls += 1 + + if sortType == "kernels" and (at_end == True or (kernelName != next_kernelName)): myList.append( AI_Data( kernelName, @@ -243,7 +370,7 @@ def plot_application(sortType, ret_df, verbose): if verbose >= 2: print( "Just added {} to AI_Data at index {}. # of calls: {}".format( - kernelName, index, calls + kernelName, idx, calls ) ) total_flops = ( @@ -264,129 +391,6 @@ def plot_application(sortType, ret_df, verbose): L1cache_data ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 - kernelName = row["KernelName"] - try: - total_flops += ( - ( - 64 - * ( - row["SQ_INSTS_VALU_ADD_F16"] - + row["SQ_INSTS_VALU_MUL_F16"] - + (2 * row["SQ_INSTS_VALU_FMA_F16"]) - + row["SQ_INSTS_VALU_TRANS_F16"] - ) - ) - + ( - 64 - * ( - row["SQ_INSTS_VALU_ADD_F32"] - + row["SQ_INSTS_VALU_MUL_F32"] - + (2 * row["SQ_INSTS_VALU_FMA_F32"]) - + row["SQ_INSTS_VALU_TRANS_F32"] - ) - ) - + ( - 64 - * ( - row["SQ_INSTS_VALU_ADD_F64"] - + row["SQ_INSTS_VALU_MUL_F64"] - + (2 * row["SQ_INSTS_VALU_FMA_F64"]) - + row["SQ_INSTS_VALU_TRANS_F64"] - ) - ) - + (row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512) - + (row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512) - + (row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512) - + (row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512) - ) - except KeyError: - if verbose >= 2: - print("Skipped total_flops at index {}".format(index)) - pass - try: - valu_flops += ( - 64 - * ( - row["SQ_INSTS_VALU_ADD_F16"] - + row["SQ_INSTS_VALU_MUL_F16"] - + (2 * row["SQ_INSTS_VALU_FMA_F16"]) - + row["SQ_INSTS_VALU_TRANS_F16"] - ) - + 64 - * ( - row["SQ_INSTS_VALU_ADD_F32"] - + row["SQ_INSTS_VALU_MUL_F32"] - + (2 * row["SQ_INSTS_VALU_FMA_F32"]) - + row["SQ_INSTS_VALU_TRANS_F32"] - ) - + 64 - * ( - row["SQ_INSTS_VALU_ADD_F64"] - + row["SQ_INSTS_VALU_MUL_F64"] - + (2 * row["SQ_INSTS_VALU_FMA_F64"]) - + row["SQ_INSTS_VALU_TRANS_F64"] - ) - ) - except KeyError: - if verbose >= 2: - print("Skipped valu_flops at index {}".format(index)) - pass - - try: - mfma_flops_f16 += row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512 - mfma_flops_bf16 += row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512 - mfma_flops_f32 += row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512 - mfma_flops_f64 += row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512 - mfma_iops_i8 += row["SQ_INSTS_VALU_MFMA_MOPS_I8"] * 512 - except KeyError: - if verbose >= 2: - print("Skipped mfma ops at index {}".format(index)) - pass - - try: - lds_data += ( - (row["SQ_LDS_IDX_ACTIVE"] - row["SQ_LDS_BANK_CONFLICT"]) * 4 * L2_BANKS - ) # L2_BANKS = 32 (since assuming mi200) - except KeyError: - if verbose >= 2: - print("Skipped lds_data at index {}".format(index)) - pass - - try: - L1cache_data += row["TCP_TOTAL_CACHE_ACCESSES_sum"] * 64 - except KeyError: - if verbose >= 2: - print("Skipped L1cache_data at index {}".format(index)) - pass - - try: - L2cache_data += ( - row["TCP_TCC_WRITE_REQ_sum"] * 64 - + row["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"] * 64 - + row["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"] * 64 - + row["TCP_TCC_READ_REQ_sum"] * 64 - ) - except KeyError: - if verbose >= 2: - print("Skipped L2cache_data at index {}".format(index)) - pass - try: - hbm_data += ( - (row["TCC_EA_RDREQ_32B_sum"] * 32) - + ((row["TCC_EA_RDREQ_sum"] - row["TCC_EA_RDREQ_32B_sum"]) * 64) - + (row["TCC_EA_WRREQ_64B_sum"] * 64) - + ((row["TCC_EA_WRREQ_sum"] - row["TCC_EA_WRREQ_64B_sum"]) * 32) - ) - except KeyError: - if verbose >= 2: - print("Skipped hbm_data at index {}".format(index)) - pass - - totalDuration += row["EndNs"] - row["BeginNs"] - - avgDuration += row["EndNs"] - row["BeginNs"] - - calls += 1 if sortType == "dispatches": myList.append( AI_Data( From 72031c8ae63f2a4f53f252f401024bfb53e760c0 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 13 Mar 2023 13:48:54 -0500 Subject: [PATCH 9/9] Comply to Python formatting Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/roofline_calc.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/omniperf_analyze/utils/roofline_calc.py b/src/omniperf_analyze/utils/roofline_calc.py index 1fbca73e02..2750052337 100644 --- a/src/omniperf_analyze/utils/roofline_calc.py +++ b/src/omniperf_analyze/utils/roofline_calc.py @@ -218,10 +218,10 @@ def plot_application(sortType, ret_df, verbose): # Calculate + append AI data if # a) current KernelName is different than previous OR # b) We've reached the end of list - if(idx + 1 == df.shape[0]): + if idx + 1 == df.shape[0]: at_end = True else: - next_kernelName = df["KernelName"][idx+1] + next_kernelName = df["KernelName"][idx + 1] kernelName = df["KernelName"][idx] try: @@ -304,7 +304,9 @@ def plot_application(sortType, ret_df, verbose): try: lds_data += ( - (df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx]) * 4 * L2_BANKS + (df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx]) + * 4 + * L2_BANKS ) # L2_BANKS = 32 (since assuming mi200) except KeyError: if verbose >= 3: @@ -347,7 +349,7 @@ def plot_application(sortType, ret_df, verbose): calls += 1 - if sortType == "kernels" and (at_end == True or (kernelName != next_kernelName)): + if sortType == "kernels" and (at_end == True or (kernelName != next_kernelName)): myList.append( AI_Data( kernelName,