@@ -1,12 +1,12 @@
|
||||
## How to fork from us
|
||||
|
||||
To keep our development fast and conflict free, we recommend you to [fork](https://github.com/AMDResearch/omniperf/fork) our repository and start your work from our `develop` branch in your private repository.
|
||||
To keep our development fast and conflict free, we recommend you to [fork](https://github.com/AMDResearch/omniperf/fork) our repository and start your work from our `dev` branch in your private repository.
|
||||
|
||||
Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote.
|
||||
|
||||
```
|
||||
git remote add mainline https://github.com/AMDResearch/omniperf.git
|
||||
git checkout develop
|
||||
git checkout dev
|
||||
```
|
||||
|
||||
As always in git, start a new branch with
|
||||
@@ -31,7 +31,7 @@ and apply your changes there.
|
||||
|
||||
- Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it.
|
||||
|
||||
- Ensure the PR is based on the `develop` branch of the Omniperf GitHub repository.
|
||||
- Ensure the PR is based on the `dev` branch of the Omniperf GitHub repository.
|
||||
|
||||
- Omniperf requires new commits to include a "Signed-off-by" token in the commit message (typically enabled via the `git commit -s` option), indicating your agreement to the projects's [Developer's Certificate of Origin](https://developercertificate.org/) and compatability with the project [LICENSE](https://github.com/AMDResearch/omniperf/blob/main/LICENSE):
|
||||
|
||||
|
||||
+1
-1
@@ -715,7 +715,7 @@ def main():
|
||||
# Setup prerequisits for roofline
|
||||
roof_setup(args, my_parser, VER)
|
||||
# Generate roofline
|
||||
roofline_only(args.path, args.device, args.sort, args.mem_level, args.verbose)
|
||||
roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose)
|
||||
|
||||
# Profile only
|
||||
else:
|
||||
|
||||
@@ -212,7 +212,7 @@ def run_cli(args, runs):
|
||||
)
|
||||
|
||||
|
||||
def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose):
|
||||
def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbose):
|
||||
import pandas as pd
|
||||
from collections import OrderedDict
|
||||
|
||||
@@ -235,6 +235,7 @@ def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose):
|
||||
dev_id, # [Optional] Specify device id to collect roofline info from
|
||||
sort_type, # [Optional] Sort AI by top kernels or dispatches
|
||||
mem_level, # [Optional] Toggle particular level(s) of memory hierarchy
|
||||
kernel_names, # [Optional] Toggle overlay of kernel names in plot
|
||||
True, # [Optional] Generate a standalone roofline analysis
|
||||
)
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
dbc.DropdownMenuItem("Cache", header=True),
|
||||
dbc.DropdownMenuItem(
|
||||
"Local Data Share (LDS)",
|
||||
href="#local_data_sharelds",
|
||||
href="#local_data_share_lds",
|
||||
external_link=True,
|
||||
),
|
||||
dbc.DropdownMenuItem(
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
from omniperf_analyze.utils import roofline_calc
|
||||
|
||||
import time
|
||||
import sys
|
||||
import numpy as np
|
||||
from dash import html, dash_table
|
||||
|
||||
@@ -32,6 +33,9 @@ from dash import dcc
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20]
|
||||
|
||||
|
||||
def to_int(a):
|
||||
if str(type(a)) == "<class 'NoneType'>":
|
||||
return np.nan
|
||||
@@ -39,7 +43,9 @@ def to_int(a):
|
||||
return int(a)
|
||||
|
||||
|
||||
def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=None):
|
||||
def generate_plots(
|
||||
roof_info, ai_data, mem_level, is_standalone, kernel_names, verbose, fig=None
|
||||
):
|
||||
if fig is None:
|
||||
fig = go.Figure()
|
||||
plotMode = "lines+text" if is_standalone else "lines"
|
||||
@@ -120,6 +126,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No
|
||||
y=ai_data["ai_l1"][1],
|
||||
name="ai_l1",
|
||||
mode="markers",
|
||||
marker={"color": "#00CC96"},
|
||||
marker_symbol=SYMBOLS if kernel_names else None,
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
@@ -128,6 +136,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No
|
||||
y=ai_data["ai_l2"][1],
|
||||
name="ai_l2",
|
||||
mode="markers",
|
||||
marker={"color": "#EF553B"},
|
||||
marker_symbol=SYMBOLS if kernel_names else None,
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
@@ -136,6 +146,8 @@ def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=No
|
||||
y=ai_data["ai_hbm"][1],
|
||||
name="ai_hbm",
|
||||
mode="markers",
|
||||
marker={"color": "#636EFA"},
|
||||
marker_symbol=SYMBOLS if kernel_names else None,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -158,8 +170,13 @@ def get_roofline(
|
||||
dev_id=None,
|
||||
sort_type="kernels",
|
||||
mem_level="ALL",
|
||||
kernel_names=False,
|
||||
is_standalone=False,
|
||||
):
|
||||
if kernel_names and (not is_standalone):
|
||||
print("ERROR: --roof-only is required for --kernel-names")
|
||||
sys.exit(1)
|
||||
|
||||
# Roofline settings
|
||||
fp32_details = {
|
||||
"path": path_to_dir,
|
||||
@@ -185,11 +202,33 @@ def get_roofline(
|
||||
print(i, "->", ai_data[i])
|
||||
print("\n")
|
||||
|
||||
fp32_fig = generate_plots(fp32_details, ai_data, mem_level, is_standalone, verbose)
|
||||
fp16_fig = generate_plots(fp16_details, ai_data, mem_level, is_standalone, verbose)
|
||||
ml_combo_fig = generate_plots(
|
||||
int8_details, ai_data, mem_level, is_standalone, verbose, fp16_fig
|
||||
fp32_fig = generate_plots(
|
||||
fp32_details, ai_data, mem_level, is_standalone, kernel_names, verbose
|
||||
)
|
||||
fp16_fig = generate_plots(
|
||||
fp16_details, ai_data, mem_level, is_standalone, kernel_names, verbose
|
||||
)
|
||||
ml_combo_fig = generate_plots(
|
||||
int8_details, ai_data, mem_level, is_standalone, kernel_names, verbose, fp16_fig
|
||||
)
|
||||
legend = go.Figure(
|
||||
go.Scatter(
|
||||
mode="markers",
|
||||
x=[0] * 10,
|
||||
y=ai_data["kernelNames"],
|
||||
marker_symbol=SYMBOLS,
|
||||
marker_size=15,
|
||||
)
|
||||
)
|
||||
legend.update_layout(
|
||||
title="Kernel Names and Markers",
|
||||
margin=dict(b=0, r=0),
|
||||
xaxis_range=[-1, 1],
|
||||
xaxis_side="top",
|
||||
height=400,
|
||||
width=1000,
|
||||
)
|
||||
legend.update_xaxes(dtick=1)
|
||||
|
||||
if is_standalone:
|
||||
dev_id = "ALL" if dev_id == -1 else str(dev_id)
|
||||
@@ -198,12 +237,17 @@ def get_roofline(
|
||||
ml_combo_fig.write_image(
|
||||
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
|
||||
)
|
||||
if kernel_names:
|
||||
# only save a legend if kernel_names option is toggled
|
||||
legend.write_image(path_to_dir + "/kernelName_legend.pdf")
|
||||
time.sleep(1)
|
||||
# Re-save to remove loading MathJax pop up
|
||||
fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id))
|
||||
ml_combo_fig.write_image(
|
||||
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
|
||||
)
|
||||
if kernel_names:
|
||||
legend.write_image(path_to_dir + "/kernelName_legend.pdf")
|
||||
print("Empirical Roofline PDFs saved!")
|
||||
else:
|
||||
return html.Section(
|
||||
|
||||
@@ -27,7 +27,6 @@ import sys
|
||||
import astunparse
|
||||
import re
|
||||
import os
|
||||
from matplotlib.pyplot import axis
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tabulate import tabulate
|
||||
|
||||
@@ -44,6 +44,8 @@ FONT_WEIGHT = "bold"
|
||||
|
||||
SUPPORTED_SOC = ["mi200"]
|
||||
|
||||
TOP_N = 10
|
||||
|
||||
|
||||
################################################
|
||||
# Helper funcs
|
||||
@@ -208,17 +210,146 @@ def plot_application(sortType, ret_df, verbose):
|
||||
kernelName = ""
|
||||
|
||||
myList = []
|
||||
for index, row in df.iterrows():
|
||||
at_end = False
|
||||
next_kernelName = ""
|
||||
|
||||
for idx in df.index:
|
||||
# CASE: Top kernels
|
||||
# Calculate + append AI data if
|
||||
# a) current KernelName is different than previous OR
|
||||
# b) We've reached the end of list
|
||||
if sortType == "kernels" and (
|
||||
(row["KernelName"] != kernelName and kernelName != "")
|
||||
or index == df.shape[0] - 1
|
||||
):
|
||||
if df.shape[0] - 1 == index:
|
||||
calls += 1
|
||||
if idx + 1 == df.shape[0]:
|
||||
at_end = True
|
||||
else:
|
||||
next_kernelName = df["KernelName"][idx + 1]
|
||||
|
||||
kernelName = df["KernelName"][idx]
|
||||
try:
|
||||
total_flops += (
|
||||
(
|
||||
64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F16"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F16"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F16"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F16"][idx]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F32"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F32"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F32"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F32"][idx]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F64"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F64"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F64"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F64"][idx]
|
||||
)
|
||||
)
|
||||
+ (df["SQ_INSTS_VALU_MFMA_MOPS_F16"][idx] * 512)
|
||||
+ (df["SQ_INSTS_VALU_MFMA_MOPS_BF16"][idx] * 512)
|
||||
+ (df["SQ_INSTS_VALU_MFMA_MOPS_F32"][idx] * 512)
|
||||
+ (df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped total_flops at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
try:
|
||||
valu_flops += (
|
||||
64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F16"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F16"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F16"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F16"][idx]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F32"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F32"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F32"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F32"][idx]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
df["SQ_INSTS_VALU_ADD_F64"][idx]
|
||||
+ df["SQ_INSTS_VALU_MUL_F64"][idx]
|
||||
+ (2 * df["SQ_INSTS_VALU_FMA_F64"][idx])
|
||||
+ df["SQ_INSTS_VALU_TRANS_F64"][idx]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped valu_flops at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
|
||||
try:
|
||||
mfma_flops_f16 += df["SQ_INSTS_VALU_MFMA_MOPS_F16"][idx] * 512
|
||||
mfma_flops_bf16 += df["SQ_INSTS_VALU_MFMA_MOPS_BF16"][idx] * 512
|
||||
mfma_flops_f32 += df["SQ_INSTS_VALU_MFMA_MOPS_F32"][idx] * 512
|
||||
mfma_flops_f64 += df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512
|
||||
mfma_iops_i8 += df["SQ_INSTS_VALU_MFMA_MOPS_I8"][idx] * 512
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped mfma ops at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
|
||||
try:
|
||||
lds_data += (
|
||||
(df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx])
|
||||
* 4
|
||||
* L2_BANKS
|
||||
) # L2_BANKS = 32 (since assuming mi200)
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped lds_data at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
|
||||
try:
|
||||
L1cache_data += df["TCP_TOTAL_CACHE_ACCESSES_sum"][idx] * 64
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped L1cache_data at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
|
||||
try:
|
||||
L2cache_data += (
|
||||
df["TCP_TCC_WRITE_REQ_sum"][idx] * 64
|
||||
+ df["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"][idx] * 64
|
||||
+ df["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"][idx] * 64
|
||||
+ df["TCP_TCC_READ_REQ_sum"][idx] * 64
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped L2cache_data at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
try:
|
||||
hbm_data += (
|
||||
(df["TCC_EA_RDREQ_32B_sum"][idx] * 32)
|
||||
+ ((df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx]) * 64)
|
||||
+ (df["TCC_EA_WRREQ_64B_sum"][idx] * 64)
|
||||
+ ((df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx]) * 32)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 3:
|
||||
print("{}: Skipped hbm_data at index {}".format(kernelName[:35], idx))
|
||||
pass
|
||||
|
||||
totalDuration += df["EndNs"][idx] - df["BeginNs"][idx]
|
||||
|
||||
avgDuration += df["EndNs"][idx] - df["BeginNs"][idx]
|
||||
|
||||
calls += 1
|
||||
|
||||
if sortType == "kernels" and (at_end == True or (kernelName != next_kernelName)):
|
||||
myList.append(
|
||||
AI_Data(
|
||||
kernelName,
|
||||
@@ -241,7 +372,7 @@ def plot_application(sortType, ret_df, verbose):
|
||||
if verbose >= 2:
|
||||
print(
|
||||
"Just added {} to AI_Data at index {}. # of calls: {}".format(
|
||||
kernelName, index, calls
|
||||
kernelName, idx, calls
|
||||
)
|
||||
)
|
||||
total_flops = (
|
||||
@@ -262,129 +393,6 @@ def plot_application(sortType, ret_df, verbose):
|
||||
L1cache_data
|
||||
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
|
||||
|
||||
kernelName = row["KernelName"]
|
||||
try:
|
||||
total_flops += (
|
||||
(
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F16"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F16"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F16"]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F32"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F32"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F32"]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F64"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F64"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F64"]
|
||||
)
|
||||
)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped total_flops at index {}".format(index))
|
||||
pass
|
||||
try:
|
||||
valu_flops += (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F16"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F16"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F16"]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F32"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F32"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F32"]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F64"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F64"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F64"]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped valu_flops at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
mfma_flops_f16 += row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512
|
||||
mfma_flops_bf16 += row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512
|
||||
mfma_flops_f32 += row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512
|
||||
mfma_flops_f64 += row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512
|
||||
mfma_iops_i8 += row["SQ_INSTS_VALU_MFMA_MOPS_I8"] * 512
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped mfma ops at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
lds_data += (
|
||||
(row["SQ_LDS_IDX_ACTIVE"] - row["SQ_LDS_BANK_CONFLICT"]) * 4 * L2_BANKS
|
||||
) # L2_BANKS = 32 (since assuming mi200)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped lds_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
L1cache_data += row["TCP_TOTAL_CACHE_ACCESSES_sum"] * 64
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped L1cache_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
L2cache_data += (
|
||||
row["TCP_TCC_WRITE_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_READ_REQ_sum"] * 64
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped L2cache_data at index {}".format(index))
|
||||
pass
|
||||
try:
|
||||
hbm_data += (
|
||||
(row["TCC_EA_RDREQ_32B_sum"] * 32)
|
||||
+ ((row["TCC_EA_RDREQ_sum"] - row["TCC_EA_RDREQ_32B_sum"]) * 64)
|
||||
+ (row["TCC_EA_WRREQ_64B_sum"] * 64)
|
||||
+ ((row["TCC_EA_WRREQ_sum"] - row["TCC_EA_WRREQ_64B_sum"]) * 32)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped hbm_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
totalDuration += row["EndNs"] - row["BeginNs"]
|
||||
|
||||
avgDuration += row["EndNs"] - row["BeginNs"]
|
||||
|
||||
calls += 1
|
||||
if sortType == "dispatches":
|
||||
myList.append(
|
||||
AI_Data(
|
||||
@@ -428,9 +436,11 @@ def plot_application(sortType, ret_df, verbose):
|
||||
# print("Top 5 intensities ('{}')...".format(roof_details["sort"]))
|
||||
intensities = {"ai_l1": [], "ai_l2": [], "ai_hbm": []}
|
||||
curr_perf = []
|
||||
kernelNames = []
|
||||
i = 0
|
||||
# Create list of top 5 intensities
|
||||
while i <= 9 and i != len(myList):
|
||||
while i < TOP_N and i != len(myList):
|
||||
kernelNames.append(myList[i].KernelName)
|
||||
intensities["ai_l1"].append(
|
||||
myList[i].total_flops / myList[i].L1cache_data
|
||||
) if myList[i].L1cache_data else intensities["ai_l1"].append(0)
|
||||
@@ -470,6 +480,9 @@ def plot_application(sortType, ret_df, verbose):
|
||||
intensityPoints[i].append(x)
|
||||
intensityPoints[i].append(y)
|
||||
|
||||
# Add an entry for kernel names
|
||||
intensityPoints["kernelNames"] = kernelNames
|
||||
|
||||
return intensityPoints
|
||||
|
||||
|
||||
|
||||
@@ -234,6 +234,13 @@ def parse(my_parser):
|
||||
type=int,
|
||||
help="\t\t\tGPU device ID. (DEFAULT: ALL)",
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"--kernel-names",
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="\t\t\tInclude kernel names in roofline plot.",
|
||||
)
|
||||
# roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)")
|
||||
# roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)")
|
||||
# roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)")
|
||||
|
||||
Ссылка в новой задаче
Block a user