Merge pull request #92 from AMDResearch/dev

Release v1.0.7
This commit is contained in:
Cole Ramos
2023-02-21 16:41:46 -06:00
committed by GitHub
commit f251c55f62
79 muutettua tiedostoa jossa 16558 lisäystä ja 2109 poistoa
@@ -1,7 +1,6 @@
# This is a basic workflow to help you get started with Actions
name: analyze-workloads
name: RHEL 8
# Controls when the workflow will run
on:
@@ -19,7 +18,7 @@ jobs:
# The type of runner that the job will run on
runs-on: ubuntu-latest
container:
image: rocm/dev-ubuntu-22.04:5.3
image: colramos/target-images:rhel8
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Cancel any previous runs
@@ -27,26 +26,31 @@ jobs:
uses: styfle/cancel-workflow-action@0.11.0
- name: Install baseline OS dependencies
run: |
sudo apt-get update
sudo apt-get install -y git
sudo apt-get install -y python3-pip
sudo apt-get install -y cmake
- name: Checkout
yum -y update
yum -y install git
yum -y install python39
yum -y install cmake3
yum -y install which
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
token: ${{ secrets.GH_PAT }}
- name: Install Python prereqs
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install pyinstaller pytest pytest-cov
- name: Configure and install
python3.9 -m pip install -r requirements.txt
python3.9 -m pip install pyinstaller pytest pytest-cov mock
- name: Configure and install
run: |
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=/opt/omniperf ..
make install
- name: run ctest
- name: CTest- Analyze Commands
run: |
cd build
ctest --verbose -R test_analyze_commands
- name: CTest- Analyze Workloads
run: |
cd build
ctest --verbose -R test_analyze_workloads test_saved_analysis
@@ -1,6 +1,6 @@
# This is a basic workflow to help you get started with Actions
name: analyze-commands
name: Ubuntu 20.04
# Controls when the workflow will run
on:
@@ -45,7 +45,11 @@ jobs:
cd build
cmake -DCMAKE_INSTALL_PREFIX=/opt/omniperf ..
make install
- name: Run ctest
- name: CTest- Analyze Commands
run: |
cd build
ctest --verbose -R test_analyze_commands
- name: CTest- Analyze Workloads
run: |
cd build
ctest --verbose -R test_analyze_workloads test_saved_analysis
+14
Näytä tiedosto
@@ -1,3 +1,17 @@
Version 1.0.7 (21 Feb 2023)
* update documentation (#52, #64)
* improved detection of invalid command line arguments (#58, #76)
* enhancements to standalone roofline (#61)
* enable Omniperf on systems with X-server (#62)
* raise minimum version requirement for rocm (#64)
* enable baseline comparison in CLI analysis (#65)
* add multi-normalization to new metrics (#68, #81)
* support alternative profilers (#70)
* add MI100 configs to override rocprofiler's incomplete default (#75)
* improve error message when no GPU(s) detected (#85)
* separate CI tests by Linux distro and add status badges
Version 1.0.6 (21 Dec 2022)
* CI update: documentation now published via github action (#22)
+14 -1
Näytä tiedosto
@@ -83,7 +83,8 @@ set(pythonDeps
"tabulate"
"tqdm"
"dash_svg"
"dash_bootstrap_components")
"dash_bootstrap_components"
"kaleido")
message(STATUS "Checking for required Python package dependencies...")
set_property(GLOBAL PROPERTY pythonDepsFlag "groovy")
@@ -291,6 +292,18 @@ install(
# set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "omniperf") set(CPACK_RESOURCE_FILE_LICENSE
# "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
# License header update(s)
add_custom_target(
license
COMMAND
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}/src
--license ${PROJECT_SOURCE_DIR}/LICENSE --extension '.py'
COMMAND
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}
--license ${PROJECT_SOURCE_DIR}/LICENSE --file
"src/omniperf,cmake/Dockerfile,cmake/rocm_install.sh,docker/docker-entrypoint.sh,src/omniperf_analyze/convertor/mongodb/convert"
)
# Source tarball
set(CPACK_SOURCE_GENERATOR "TGZ")
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${FULL_VERSION_STRING})
+1 -1
Näytä tiedosto
@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+2
Näytä tiedosto
@@ -1,3 +1,5 @@
[![Ubuntu 20.04](https://github.com/AMDResearch/omniperf/actions/workflows/ubuntu-focal.yml/badge.svg)](https://github.com/AMDResearch/omniperf/actions/workflows/ubuntu-focal.yml)
[![RHEL 8](https://github.com/AMDResearch/omniperf/actions/workflows/opensuse.yml/badge.svg)](https://github.com/AMDResearch/omniperf/actions/workflows/opensuse.yml)
[![Docs](https://github.com/AMDResearch/omniperf/actions/workflows/docs.yml/badge.svg)](https://amdresearch.github.io/omniperf/)
[![DOI](https://zenodo.org/badge/561919887.svg)](https://zenodo.org/badge/latestdoi/561919887)
+10 -8
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
FROM ubuntu:20.04
-36
Näytä tiedosto
@@ -1,36 +0,0 @@
#!/usr/bin/env bash
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
set -e
SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]}))
OMNIPERF_DIR=$(realpath ${SCRIPT_DIR}/@OMNIPERF_RELATIVE_PATH@)
if [ ! -f ${OMNIPERF_DIR}/omniperf ]; then
echo -e "Error! Expected omniperf installation in ${OMNIPERF_DIR}"
exit 1
fi
eval ${OMNIPERF_DIR}/omniperf "${@}"
+1 -1
Näytä tiedosto
@@ -1,4 +1,4 @@
-- Crusher-specific additions
depends_on "cray-python"
depends_on "rocm"
prereq(atleast("rocm","5.1.0"))
prereq(atleast("rocm","5.2.0"))
+1 -1
Näytä tiedosto
@@ -1,6 +1,6 @@
-- Thera-specific additions
depends_on "python"
depends_on "rocm"
prereq(atleast("rocm","5.1.0"))
prereq(atleast("rocm","5.2.0"))
local home = os.getenv("HOME")
setenv("MPLCONFIGDIR",pathJoin(home,".matplotlib"))
+11 -9
Näytä tiedosto
@@ -1,7 +1,9 @@
#!/usr/bin/env bash
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
declare -a rocm_versions=("4.3.1" "4.5.2" "5.0.2" "5.1.3" "5.2.3")
wget https://repo.radeon.com/amdgpu-install/22.10/ubuntu/focal/amdgpu-install_22.10.50100-1_all.deb
@@ -29,4 +31,4 @@ for rocm_version in ${rocm_versions[@]}; do
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$rocm_version ubuntu main" | tee /etc/apt/sources.list.d/rocm.list
apt update
amdgpu-install -y --usecase=rocm --rocmrelease=$rocm_version --no-dkms
done
done
File diff suppressed because one or more lines are too long
+10 -8
Näytä tiedosto
@@ -1,7 +1,9 @@
#!/bin/bash
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
pushd /var/lib/grafana/plugins/omniperfData_plugin
npm run server &
+1
Näytä tiedosto
@@ -10,3 +10,4 @@ tabulate
tqdm
dash-svg
dash-bootstrap-components
kaleido
+11 -9
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import sys
@@ -31,7 +33,7 @@ OMNIPERF_HOME = Path(__file__).resolve().parent
# OMNIPERF INFO
PROG = "omniperf"
SOC_LIST = ["mi50", "mi100", "mi200"]
SOC_LIST = ["mi50", "mi100", "mi200", "vega10"]
DISTRO_MAP = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"}
+4 -2
Näytä tiedosto
@@ -171,7 +171,7 @@ $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
├─────────┼─────────────────────────────┤
...
```
2. Choose your own customized subset of metrics with `-b` (a.k.a. `--filter-metrics`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light).
2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light).
```shell
$ omniperf analyze -p workloads/vcopy/mi200/ -b 2
--------
@@ -317,10 +317,12 @@ allowing users to view results from within a web browser.
Note that the standalone GUI analyzer publishes a web interface on port 8050 by default.
On production HPC systems where profiling jobs run
under the auspices of a resource manager, additional ssh tunneling
under the auspices of a resource manager, additional SSH tunneling
between the desired web browser host (e.g. login node or remote workstation) and compute host may be
required. Alternatively, users may find it more convenient to download
profiled workloads to perform analysis on their local system.
See [FAQ](https://amdresearch.github.io/omniperf/faq.html) for more details on SSH tunneling.
```
#### Usage
+1
Näytä tiedosto
@@ -141,6 +141,7 @@ for pref in preferences:
from recommonmark.transform import AutoStructify
# app setup hook
def setup(app):
app.add_config_value(
+23
Näytä tiedosto
@@ -30,3 +30,26 @@ Workaround:
$ export LC_ALL=C.UTF-8
$ export LANG=C.UTF-8
```
**3. How can I SSH Tunnel in MobaXterm?**
1. Open MobaXterm
2. In the top ribbon, select `Tunneling`
![Tunnel Button](images/tunnel_demo1.png)
This pop up will appear
![Pop up](images/tunnel_demo2.png)
3. Press `New SSH tunnel`
![Pop up](images/tunnel_demo3.png)
4. Configure tunnel accordingly
Local clients
- Forwarded Port: [PORT]
Remote Server
- Remote Server: localhost
- Remote Port: [PORT]
SSH Server
- SSH server: Name of the server one is connecting to
- SSH login: Username to login to the server
- SSH port: 22
+3 -2
Näytä tiedosto
@@ -82,7 +82,8 @@ Modes change the fundamental behavior of the Omniperf command line tool. Dependi
Operation | Mode | Required Arguments
:--|:--|:--
Profile a workload | profile | `--name`, `-- <profile_cmd>`
Standalone roofline analysis | profile | `--name`, `--only-roof`, `-- <profile_cmd>`
Standalone roofline analysis | profile | `--name`, `--roof-only`, `-- <profile_cmd>`
Import a workload to database | database | `--import`, `--host`, `--username`, `--workload`, `--team`
Remove a workload from database | database | `--remove`, `--host`, `--username`, `--workload`, `--team`
Interact with profiling results from CLI | analyze | `--path`, `--gui`
Launch standalone GUI from CLI | analyze | `--path`, `--gui`
Interact with profiling results from CLI | analyze | `--path`
Binary file not shown.

After

Leveys:  |  Korkeus:  |  Koko: 23 KiB

Binary file not shown.

After

Leveys:  |  Korkeus:  |  Koko: 12 KiB

Binary file not shown.

After

Leveys:  |  Korkeus:  |  Koko: 29 KiB

+1 -1
Näytä tiedosto
@@ -24,7 +24,7 @@ Omniperf requires the following basic software dependencies prior to usage:
* Python (>=3.7)
* CMake (>= 3.19)
* ROCm (>= 5.1)
* ROCm (>= 5.2.0)
In addition, Omniperf leverages a number of Python packages that are
documented in the top-level `requirements.txt` file. These must be
+288 -155
Näytä tiedosto
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import sys
import os
@@ -37,7 +39,7 @@ from utils import specs
from utils.perfagg import perfmon_filter, pmc_filter
from utils import remove_workload
from utils import csv_converter # Import workload
from utils import plot_roofline # standalone roofline
from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline
from omniperf_analyze.omniperf_analyze import analyze # CLI analysis
from common import (
@@ -53,10 +55,8 @@ from common import getVersion
# Helper Functions
################################################
def run_subprocess(cmd):
subprocess.run(
cmd,
check=True
)
subprocess.run(cmd, check=True)
def resolve_rocprof():
# ROCPROF INFO
@@ -89,6 +89,8 @@ def get_soc():
target = "mi100"
elif mspec.GPU == "gfx90a":
target = "mi200"
elif mspec.GPU == "gfx900":
target = "vega10"
else:
print("\nInvalid SoC")
sys.exit(0)
@@ -123,7 +125,9 @@ def replace_timestamps(workload_dir):
df_pmc_perf["EndNs"] = df_stamps["EndNs"]
df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False)
else:
warnings.warn("WARNING: Incomplete profiling data detected. Unable to update timestamps.")
warnings.warn(
"WARNING: Incomplete profiling data detected. Unable to update timestamps."
)
def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
@@ -148,7 +152,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
timestamp = now.strftime("%c") + " (" + local_tzname + ")"
# host info
param = [workload_name]
param += [app_cmd]
param += ['"' + app_cmd + '"']
param += [
mspec.hostname,
mspec.cpu,
@@ -212,6 +216,48 @@ def mongo_import(args, profileAndImport):
################################################
# Roofline Helpers
################################################
def roof_setup(args, my_parser):
if args.path == os.getcwd() + "/workloads":
args.path += "/" + args.name + "/" + str(get_soc())
# We need to make a directory for a new roofline
if not os.path.isdir(args.path):
os.makedirs(args.path)
# does roof data exist?
print("Checking for roofline.csv in ", args.path)
roof_path = args.path + "/roofline.csv"
roofline_exists = os.path.isfile(roof_path)
if not roofline_exists:
if get_soc() != "mi200":
throw_parse_error(
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
)
mibench(args)
# does sysinfo exist?
print("Checking for sysinfo.csv in ", args.path)
sysinfo_path = args.path + "/sysinfo.csv"
sysinfo_exists = os.path.isfile(sysinfo_path)
if not sysinfo_exists:
print("sysinfo not found")
gen_sysinfo(args.name, args.path, [], args.remaining, False)
# does app data exist?
print("Checking for pmc_perf.csv in ", args.path)
app_path = args.path + "/pmc_perf.csv"
app_exists = os.path.isfile(app_path)
if not app_exists:
if get_soc() != "mi200":
throw_parse_error(
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
)
if not args.remaining:
throw_parse_error(
my_parser,
"Cannot find existing application data.\nAttempting to generate application data from -- <app_cmd>.\n-- <app_cmd> option is required to generate application data.",
)
else:
characterize_app(args.path, args.remaining, args.verbose)
def detect_roofline():
@@ -297,89 +343,7 @@ def characterize_app(path, cmd, verbose):
# Workload profiling
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
print(fname)
run_prof(fname, workload_dir, perfmon_dir, app_cmd, verbose)
################################################
# Profiling Helpers
################################################
def run_prof(fname, workload_dir, perfmon_dir, cmd, verbose):
global rocprof_cmd
fbase = os.path.splitext(os.path.basename(fname))[0]
if verbose:
print("pmc file:", os.path.basename(fname))
# profile the app
run_subprocess(
[
rocprof_cmd,
"-i",
fname,
"--timestamp",
"on",
"-o",
workload_dir + "/" + fbase + ".csv",
'"' + cmd + '"',
]
)
def omniperf_profile(args, VER):
# Verify valid target
if args.target not in SOC_LIST:
parse.print_help(sys.stderr)
sys.exit(1)
# Basic Info
print(PROG, "ver: ", VER)
print("Path: ", args.path)
print("Target: ", args.target)
print("Command: ", args.remaining)
print("Kernel Selection: ", args.kernel)
print("Dispatch Selection: ", args.dispatch)
if args.ipblocks == None:
print("IP Blocks: All")
else:
print("IP Blocks: ", args.ipblocks)
# Set up directories
workload_dir = args.path + "/" + args.name + "/" + args.target
perfmon_dir = str(OMNIPERF_HOME) + "/perfmon_pub"
# Perfmon filtering
perfmon_filter(workload_dir, perfmon_dir, args)
# Workload profiling
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
# Kernel filtering (in-place replacement)
if not args.kernel == None:
run_subprocess(
[
"sed",
"-i",
"-r",
"s%^(kernel:).*%" + "kernel: " + ",".join(args.kernel) + "%g",
fname,
]
)
# Dispatch filtering (inplace replacement)
if not args.dispatch == None:
run_subprocess(
[
"sed",
"-i",
"-r",
"s%^(range:).*%" + "range: " + ",".join(args.dispatch) + "%g",
fname,
]
)
run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.verbose)
run_prof(fname, workload_dir, perfmon_dir, app_cmd, target, verbose)
# run again with timestamps
run_subprocess(
[
@@ -390,13 +354,233 @@ def omniperf_profile(args, VER):
"on",
"-o",
workload_dir + "/" + "timestamps.csv",
'"' + args.remaining + '"',
'"' + app_cmd + '"',
]
)
# Update pmc_perf.csv timestamps
replace_timestamps(workload_dir)
################################################
# Profiling Helpers
################################################
def run_rocscope(args, fname):
# profile the app
if args.use_rocscope == True:
result = subprocess.run(
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
)
if result.returncode == 0:
rs_cmd = [
result.stdout.decode("ascii").strip(),
"metrics",
"-p",
args.path,
"-n",
args.name,
"-t",
fname,
"--",
]
for i in args.remaining.split():
rs_cmd.append(i)
print(rs_cmd)
result = run_subprocess(
rs_cmd
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(result.stderr.decode("ascii"))
sys.exit(1)
def run_prof(fname, workload_dir, perfmon_dir, cmd, target, verbose):
global rocprof_cmd
fbase = os.path.splitext(os.path.basename(fname))[0]
if verbose:
print("pmc file:", os.path.basename(fname))
# profile the app (run w/ custom config files for mi100)
if target == "mi100":
print("RUNNING WITH CUSTOM METRICS")
run_subprocess(
[
rocprof_cmd,
"-i",
fname,
"-m",
perfmon_dir + "/" + "metrics.xml",
"--timestamp",
"on",
"-o",
workload_dir + "/" + fbase + ".csv",
'"' + cmd + '"',
]
)
else:
run_subprocess(
[
rocprof_cmd,
"-i",
fname,
"--timestamp",
"on",
"-o",
workload_dir + "/" + fbase + ".csv",
'"' + cmd + '"',
]
)
def omniperf_profile(args, VER):
# Verify valid target
if args.target not in SOC_LIST:
parse.print_help(sys.stderr)
sys.exit(1)
# Verify valid name
if args.name.find(".") != -1 or args.name.find("-") != -1:
raise ValueError("'-' and '.' are not permited in workload name", args.name)
# Basic Info
print(PROG, "ver: ", VER)
print("Path: ", args.path)
print("Target: ", args.target)
print("Command: ", args.remaining)
print("Kernel Selection: ", args.kernel)
print("Dispatch Selection: ", args.dispatch)
if args.ipblocks == None:
print("IP Blocks: All", "\n")
else:
print("IP Blocks: ", args.ipblocks, "\n")
# Set up directories
workload_dir = args.path + "/" + args.name + "/" + args.target
perfmon_dir = str(OMNIPERF_HOME) + "/perfmon_pub"
# Perfmon filtering
perfmon_filter(workload_dir, perfmon_dir, args)
if not args.lucky == None and args.lucky == True:
print("You're feeling lucky - only profiling top N kernels")
# look for whether workload_dir exists - create if not
try:
os.makedirs(workload_dir, exist_ok=True)
except Exception as e:
print("Unable to create workload directory: ", workload_dir)
print(e)
sys.exit(1)
result = subprocess.run(
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
)
if result.returncode == 0:
rs_cmd = [
result.stdout.decode("ascii").strip(),
"top10",
"-p",
args.path,
"-n",
args.name,
"--",
]
for i in args.remaining.split():
rs_cmd.append(i)
print(rs_cmd)
result = run_subprocess(
rs_cmd
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(result.stderr.decode("ascii"))
else:
print("rocscope must be in the PATH")
sys.exit(1)
elif not args.summaries == None and args.summaries == True:
print("creating kernel summaries")
# look for whether workload_dir exists - create if not
try:
os.makedirs(workload_dir, exist_ok=True)
except Exception as e:
print("Unable to create workload directory: ", workload_dir)
print(e)
sys.exit(1)
result = subprocess.run(
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
)
if result.returncode == 0:
rs_cmd = [
result.stdout.decode("ascii").strip(),
"summary",
"-p",
args.path,
"-n",
args.name,
"--",
]
for i in args.remaining.split():
rs_cmd.append(i)
print(rs_cmd)
result = run_subprocess(
rs_cmd
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(result.stderr.decode("ascii"))
else:
print("rocscope must be in the PATH")
sys.exit(1)
else:
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
# Kernel filtering (in-place replacement)
if not args.kernel == None:
run_subprocess(
[
"sed",
"-i",
"-r",
"s%^(kernel:).*%" + "kernel: " + ",".join(args.kernel) + "%g",
fname,
]
)
# Dispatch filtering (inplace replacement)
if not args.dispatch == None:
run_subprocess(
[
"sed",
"-i",
"-r",
"s%^(range:).*%" + "range: " + " ".join(args.dispatch) + "%g",
fname,
]
)
if args.use_rocscope == True:
run_rocscope(args, fname)
else:
run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, args.verbose)
# run again with timestamps
run_subprocess(
[
rocprof_cmd,
# "-i", fname,
# "-m", perfmon_dir + "/" + "metrics.xml",
"--timestamp",
"on",
"-o",
workload_dir + "/" + "timestamps.csv",
'"' + args.remaining + '"',
]
)
# Update pmc_perf.csv timestamps
replace_timestamps(workload_dir)
# Generate sysinfo
gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
@@ -466,6 +650,7 @@ def main():
# PROFILE MODE
##############
if args.mode == "profile":
print("Resolving rocprof")
resolve_rocprof()
if ".." in str(args.path):
throw_parse_error(
@@ -495,62 +680,10 @@ def main():
elif args.roof_only:
print("\n--------\nRoofline only\n--------\n")
if args.path == os.getcwd() + "/workloads":
args.path += "/" + args.name + "/" + str(get_soc())
# Verify valid axes parameters
if args.axes:
if len(args.axes) != 4:
throw_parse_error(
my_parser,
"Invalid argument for --axes.\nMust contain four values formatted as: --axes xmin xmax ymin ymax",
)
if args.axes[0] > args.axes[1] or args.axes[2] > args.axes[3]:
throw_parse_error(
my_parser,
"Invalid argument for --axes.\nBreaks required conditions: (xmax > xmin && ymax > ymin)",
)
# We need to make a directory for a new roofline
if not os.path.isdir(args.path):
os.makedirs(args.path)
# does roof data exist?
print("Checking for roofline.csv in ", args.path)
roof_path = args.path + "/roofline.csv"
roofline_exists = os.path.isfile(roof_path)
if not roofline_exists:
if get_soc() != "mi200":
throw_parse_error(
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
)
mibench(args)
# does sysinfo exist?
print("Checking for sysinfo.csv in ", args.path)
sysinfo_path = args.path + "/sysinfo.csv"
sysinfo_exists = os.path.isfile(sysinfo_path)
if not sysinfo_exists:
print("sysinfo not found")
gen_sysinfo(args.name, args.path, [], args.remaining, False)
# does app data exist?
print("Checking for pmc_perf.csv in ", args.path)
app_path = args.path + "/pmc_perf.csv"
app_exists = os.path.isfile(app_path)
if not app_exists:
if get_soc() != "mi200":
throw_parse_error(
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
)
if not args.remaining:
throw_parse_error(
my_parser,
"Cannot find existing application data.\nAttempting to generate application data from -- <app_cmd>.\n-- <app_cmd> option is required to generate application data.",
)
else:
characterize_app(args.path, args.remaining, args.verbose)
# Setup prerequisits for roofline
roof_setup(args, my_parser)
# Generate roofline
plot_roofline.empirical_roof(args)
roofline_only(args.path, args.device, args.sort, args.mem_level, args.verbose)
# Profile only
else:
@@ -593,20 +593,6 @@ button.report:hover {
#l2_cache_per_channel a, #l2_cache_per_channel a:visited { color: #fff; }
#l2_cache_per_channel a:hover, #l2_cache_per_channel a:focus { color: #11ABB0; }
#l2_cache_per_channel .float-container {
/* border: 3px solid #fff; */
padding: 20px;
}
#l2_cache_per_channel .float-child {
width: 100%;
float: left;
padding: 20px;
/* border: 2px solid red; */
}
#l2_cache_per_channel .float-child h3 {
color: #fff;
}
/* ------------------------------------------------------------------ */
/* c. About Section
/* ------------------------------------------------------------------ */
@@ -104,28 +104,28 @@ Panel Config:
unit: Instr/wavefront
tips:
Wave Cycles:
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
unit: (Cycles + $normUnit)
tips:
Dependency Wait Cycles:
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Issue Wait Cycles:
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Active Cycles:
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Wavefront Occupancy:
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
@@ -237,81 +237,107 @@ Panel Config:
id: 1604
title: L1D - L2 Transactions
header:
metric: Metric
xfer: Xfer
mean: Mean
coherency: Coherency
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
NC - Read:
mean: None # No perf counter
xfer: Read
coherency: NC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
UC - Read:
mean: None # No perf counter
xfer: Read
coherency: UC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
CC - Read:
mean: None # No perf counter
xfer: Read
coherency: CC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
RW - Read:
mean: None # No perf counter
xfer: Read
coherency: RW
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
RW - Write:
mean: None # No perf counter
xfer: Write
coherency: RW
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
NC - Write:
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: NC
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Write:
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
CC - Write:
xfer: Write
coherency: CC
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Write:
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: UC
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Atomic:
mean: None # No perf counter
xfer: Atomic
coherency: NC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
UC - Atomic:
mean: None # No perf counter
xfer: Atomic
coherency: UC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
CC - Atomic:
mean: None # No perf counter
xfer: Atomic
coherency: CC
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
tips:
RW - Atomic:
mean: None # No perf counter
xfer: Atomic
coherency: RW
avg: None # No perf counter
min: None # No perf counter
max: None # No perf counter
unit: (Req + $normUnit)
@@ -294,6 +294,8 @@ Panel Config:
title: L2 - EA Interface Stalls
header:
metric: Metric
type: Type
transaction: Transaction
avg: Avg
min: Min
max: Max
@@ -301,42 +303,56 @@ Panel Config:
tips: Tips
metric:
Read - Remote Socket Stall:
type: Remote Socket Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - Peer GCD Stall:
type: Peer GCD Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - HBM Stall:
type: HBM Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Remote Socket Stall:
type: Remote Socket Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Peer GCD Stall:
type: Peer GCD Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - HBM Stall:
type: HBM Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Credit Starvation:
type: Credit Starvation
transaction: Write
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
@@ -10,28 +10,28 @@ Panel Config:
data source:
- metric_table:
id: 1801
title: Channel 0 -15
title: Channel 0-15
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
"0":
@@ -41,10 +41,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
!= 0) else None))
@@ -69,10 +69,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
!= 0) else None))
@@ -97,10 +97,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
!= 0) else None))
@@ -125,10 +125,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
!= 0) else None))
@@ -153,10 +153,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
!= 0) else None))
@@ -181,10 +181,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
!= 0) else None))
@@ -209,10 +209,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
!= 0) else None))
@@ -237,10 +237,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
!= 0) else None))
@@ -265,10 +265,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
!= 0) else None))
@@ -293,10 +293,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
!= 0) else None))
@@ -321,10 +321,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
!= 0) else None))
@@ -349,10 +349,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
!= 0) else None))
@@ -377,10 +377,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
!= 0) else None))
@@ -405,10 +405,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
!= 0) else None))
@@ -433,10 +433,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
!= 0) else None))
@@ -461,10 +461,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
!= 0) else None))
@@ -485,28 +485,28 @@ Panel Config:
- metric_table:
id: 1802
title: Channel 16 -31
title: Channel 16-31
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
"16":
@@ -514,10 +514,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -534,10 +534,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -554,10 +554,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -574,10 +574,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -594,10 +594,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -614,10 +614,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -634,10 +634,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -654,10 +654,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -674,10 +674,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -694,10 +694,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -714,10 +714,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -734,10 +734,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -754,10 +754,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -774,10 +774,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -794,10 +794,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -814,10 +814,10 @@ Panel Config:
req: None # No perf counter
read req: None # No perf counter
write req: None # No perf counter
atomicreq: None # No perf counter
atomic req: None # No perf counter
ea read req: None # No perf counter
ea write req: None # No perf counter
ea atomicreq: None # No perf counter
ea atomic req: None # No perf counter
ea read lat - cycles: None # No perf counter
ea write lat - cycles: None # No perf counter
ea atomic lat - cycles: None # No perf counter
@@ -104,28 +104,28 @@ Panel Config:
unit: Instr/wavefront
tips:
Wave Cycles:
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
unit: (Cycles + $normUnit)
tips:
Dependency Wait Cycles:
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Issue Wait Cycles:
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Active Cycles:
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Wavefront Occupancy:
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
@@ -237,81 +237,107 @@ Panel Config:
id: 1604
title: L1D - L2 Transactions
header:
metric: Metric
xfer: Xfer
mean: Mean
coherency: Coherency
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
NC - Read:
mean: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
xfer: Read
coherency: NC
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Read:
mean: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
xfer: Read
coherency: UC
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Read:
mean: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
xfer: Read
coherency: CC
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Read:
mean: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
xfer: Read
coherency: RW
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Write:
mean: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
xfer: Write
coherency: RW
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Write:
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: NC
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Write:
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: UC
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Write:
mean: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: CC
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Atomic:
mean: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: NC
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Atomic:
mean: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: UC
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Atomic:
mean: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: CC
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Atomic:
mean: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: RW
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
@@ -294,6 +294,8 @@ Panel Config:
title: L2 - EA Interface Stalls
header:
metric: Metric
type: Type
transaction: Transaction
avg: Avg
min: Min
max: Max
@@ -301,42 +303,56 @@ Panel Config:
tips: Tips
metric:
Read - Remote Socket Stall:
type: Remote Socket Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - Peer GCD Stall:
type: Peer GCD Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - HBM Stall:
type: HBM Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Remote Socket Stall:
type: Remote Socket Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Peer GCD Stall:
type: Peer GCD Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - HBM Stall:
type: HBM Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Credit Starvation:
type: Credit Starvation
transaction: Write
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
@@ -10,28 +10,28 @@ Panel Config:
data source:
- metric_table:
id: 1801
title: Channel 0 -15
title: Channel 0-15
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
"0":
@@ -41,10 +41,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
!= 0) else None))
@@ -69,10 +69,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
!= 0) else None))
@@ -97,10 +97,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
!= 0) else None))
@@ -125,10 +125,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
!= 0) else None))
@@ -153,10 +153,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
!= 0) else None))
@@ -181,10 +181,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
!= 0) else None))
@@ -209,10 +209,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
!= 0) else None))
@@ -237,10 +237,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
!= 0) else None))
@@ -265,10 +265,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
!= 0) else None))
@@ -293,10 +293,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
!= 0) else None))
@@ -321,10 +321,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
!= 0) else None))
@@ -349,10 +349,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
!= 0) else None))
@@ -377,10 +377,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
!= 0) else None))
@@ -405,10 +405,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
!= 0) else None))
@@ -433,10 +433,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
!= 0) else None))
@@ -461,10 +461,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
!= 0) else None))
@@ -485,28 +485,28 @@ Panel Config:
- metric_table:
id: 1802
title: Channel 16 -31
title: Channel 16-31
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
"16":
@@ -516,10 +516,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[16]) / $denom))
read req: AVG((TO_INT(TCC_READ[16]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[16]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[16]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[16]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[16] / TCC_EA_RDREQ[16]) if (TCC_EA_RDREQ[16]
!= 0) else None))
@@ -544,10 +544,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[17]) / $denom))
read req: AVG((TO_INT(TCC_READ[17]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[17]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[17]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[17]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[17] / TCC_EA_RDREQ[17]) if (TCC_EA_RDREQ[17]
!= 0) else None))
@@ -572,10 +572,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[18]) / $denom))
read req: AVG((TO_INT(TCC_READ[18]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[18]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[18]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[18]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[18] / TCC_EA_RDREQ[18]) if (TCC_EA_RDREQ[18]
!= 0) else None))
@@ -600,10 +600,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[19]) / $denom))
read req: AVG((TO_INT(TCC_READ[19]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[19]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[19]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[19]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[19] / TCC_EA_RDREQ[19]) if (TCC_EA_RDREQ[19]
!= 0) else None))
@@ -628,10 +628,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[20]) / $denom))
read req: AVG((TO_INT(TCC_READ[20]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[20]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[20]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[20]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[20] / TCC_EA_RDREQ[20]) if (TCC_EA_RDREQ[20]
!= 0) else None))
@@ -656,10 +656,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[21]) / $denom))
read req: AVG((TO_INT(TCC_READ[21]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[21]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[21]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[21]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[21] / TCC_EA_RDREQ[21]) if (TCC_EA_RDREQ[21]
!= 0) else None))
@@ -684,10 +684,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[22]) / $denom))
read req: AVG((TO_INT(TCC_READ[22]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[22]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[22]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[22]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[22] / TCC_EA_RDREQ[22]) if (TCC_EA_RDREQ[22]
!= 0) else None))
@@ -712,10 +712,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[23]) / $denom))
read req: AVG((TO_INT(TCC_READ[23]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[23]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[23]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[23]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[23] / TCC_EA_RDREQ[23]) if (TCC_EA_RDREQ[23]
!= 0) else None))
@@ -740,10 +740,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[24]) / $denom))
read req: AVG((TO_INT(TCC_READ[24]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[24]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[24]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[24]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[24] / TCC_EA_RDREQ[24]) if (TCC_EA_RDREQ[24]
!= 0) else None))
@@ -768,10 +768,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[25]) / $denom))
read req: AVG((TO_INT(TCC_READ[25]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[25]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[25]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[25]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[25] / TCC_EA_RDREQ[25]) if (TCC_EA_RDREQ[25]
!= 0) else None))
@@ -796,10 +796,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[26]) / $denom))
read req: AVG((TO_INT(TCC_READ[26]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[26]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[26]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[26]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[26] / TCC_EA_RDREQ[26]) if (TCC_EA_RDREQ[26]
!= 0) else None))
@@ -824,10 +824,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[27]) / $denom))
read req: AVG((TO_INT(TCC_READ[27]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[27]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[27]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[27]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[27] / TCC_EA_RDREQ[27]) if (TCC_EA_RDREQ[27]
!= 0) else None))
@@ -852,10 +852,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[28]) / $denom))
read req: AVG((TO_INT(TCC_READ[28]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[28]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[28]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[28]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[28] / TCC_EA_RDREQ[28]) if (TCC_EA_RDREQ[28]
!= 0) else None))
@@ -880,10 +880,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[29]) / $denom))
read req: AVG((TO_INT(TCC_READ[29]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[29]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[29]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[29]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[29] / TCC_EA_RDREQ[29]) if (TCC_EA_RDREQ[29]
!= 0) else None))
@@ -908,10 +908,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[30]) / $denom))
read req: AVG((TO_INT(TCC_READ[30]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[30]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[30]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[30]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[30] / TCC_EA_RDREQ[30]) if (TCC_EA_RDREQ[30]
!= 0) else None))
@@ -936,10 +936,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[31]) / $denom))
read req: AVG((TO_INT(TCC_READ[31]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[31]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[31]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[31]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
ea read lat - cycles:
AVG(((TCC_EA_RDREQ_LEVEL[31] / TCC_EA_RDREQ[31]) if (TCC_EA_RDREQ[31]
!= 0) else None))
@@ -104,28 +104,28 @@ Panel Config:
unit: Instr/wavefront
tips:
Wave Cycles:
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
unit: (Cycles + $normUnit)
tips:
Dependency Wait Cycles:
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Issue Wait Cycles:
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Active Cycles:
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
unit: Cycles/wave
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
Wavefront Occupancy:
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
@@ -163,17 +163,17 @@ Panel Config:
tips: Tips
metric:
MFMA-I8:
count: AVG((SQ_INSTS_VALU_MFMA_I8 / SQ_WAVES))
count: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom))
tips:
MFMA-F16:
count: AVG((SQ_INSTS_VALU_MFMA_F16 / SQ_WAVES))
count: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
tips:
MFMA-BF16:
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / SQ_WAVES))
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom))
tips:
MFMA-F32:
count: AVG((SQ_INSTS_VALU_MFMA_F32 / SQ_WAVES))
count: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom))
tips:
MFMA-F64:
count: AVG((SQ_INSTS_VALU_MFMA_F64 / SQ_WAVES))
count: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom))
tips:
@@ -237,81 +237,107 @@ Panel Config:
id: 1604
title: L1D - L2 Transactions
header:
metric: Metric
xfer: Xfer
mean: Mean
coherency: Coherency
avg: Avg
min: Min
max: Max
unit: Unit
tips: Tips
metric:
NC - Read:
mean: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
xfer: Read
coherency: NC
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Read:
mean: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
xfer: Read
coherency: UC
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Read:
mean: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
xfer: Read
coherency: CC
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Read:
mean: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
xfer: Read
coherency: RW
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Write:
mean: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
xfer: Write
coherency: RW
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Write:
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: NC
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Write:
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: UC
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Write:
mean: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
xfer: Write
coherency: CC
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
NC - Atomic:
mean: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: NC
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
UC - Atomic:
mean: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: UC
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
CC - Atomic:
mean: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: CC
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
tips:
RW - Atomic:
mean: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
xfer: Atomic
coherency: RW
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
unit: (Req + $normUnit)
@@ -294,6 +294,8 @@ Panel Config:
title: L2 - Fabric Interface Stalls
header:
metric: Metric
type: Type
transaction: Transaction
avg: Avg
min: Min
max: Max
@@ -301,42 +303,56 @@ Panel Config:
tips: Tips
metric:
Read - Remote Socket Stall:
type: Remote Socket Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - Peer GCD Stall:
type: Peer GCD Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Read - HBM Stall:
type: HBM Stall
transaction: Read
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Remote Socket Stall:
type: Remote Socket Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Peer GCD Stall:
type: Peer GCD Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - HBM Stall:
type: HBM Stall
transaction: Write
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
unit: (Req + $normUnit)
tips:
Write - Credit Starvation:
type: Credit Starvation
transaction: Write
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
@@ -10,28 +10,28 @@ Panel Config:
data source:
- metric_table:
id: 1801
title: Channel 0 -15
title: Channel 0-15
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
'0':
@@ -40,10 +40,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[0] / TCC_EA_WRREQ[0]) if (TCC_EA_WRREQ[0]
@@ -64,10 +64,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[1] / TCC_EA_WRREQ[1]) if (TCC_EA_WRREQ[1]
@@ -88,10 +88,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[2] / TCC_EA_WRREQ[2]) if (TCC_EA_WRREQ[2]
@@ -112,10 +112,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[3] / TCC_EA_WRREQ[3]) if (TCC_EA_WRREQ[3]
@@ -136,10 +136,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[4] / TCC_EA_WRREQ[4]) if (TCC_EA_WRREQ[4]
@@ -160,10 +160,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[5] / TCC_EA_WRREQ[5]) if (TCC_EA_WRREQ[5]
@@ -184,10 +184,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[6] / TCC_EA_WRREQ[6]) if (TCC_EA_WRREQ[6]
@@ -208,10 +208,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[7] / TCC_EA_WRREQ[7]) if (TCC_EA_WRREQ[7]
@@ -232,10 +232,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[8] / TCC_EA_WRREQ[8]) if (TCC_EA_WRREQ[8]
@@ -256,10 +256,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[9] / TCC_EA_WRREQ[9]) if (TCC_EA_WRREQ[9]
@@ -280,10 +280,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[10] / TCC_EA_WRREQ[10]) if (TCC_EA_WRREQ[10]
@@ -304,10 +304,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[11] / TCC_EA_WRREQ[11]) if (TCC_EA_WRREQ[11]
@@ -328,10 +328,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[12] / TCC_EA_WRREQ[12]) if (TCC_EA_WRREQ[12]
@@ -352,10 +352,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[13] / TCC_EA_WRREQ[13]) if (TCC_EA_WRREQ[13]
@@ -376,10 +376,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[14] / TCC_EA_WRREQ[14]) if (TCC_EA_WRREQ[14]
@@ -400,10 +400,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[15] / TCC_EA_WRREQ[15]) if (TCC_EA_WRREQ[15]
@@ -420,28 +420,28 @@ Panel Config:
tips:
- metric_table:
id: 1802
title: Channel 16 -31
title: Channel 16-31
columnwise: True
header:
channel: Channel
hit rate: Hit Rate
req: Req
read req: Read Req
write req: Write Req
atomicreq: AtomicReq
ea read req: EA Read Req
ea write req: EA Write Req
ea atomicreq: EA AtomicReq
ea read lat - cycles: EA Read Lat - cycles
ea write lat - cycles: EA Write Lat - cycles
ea atomic lat - cycles: EA Atomic Lat - cycles
ea read stall - io: EA Read Stall - IO
ea read stall - gmi: EA Read Stall - GMI
ea read stall - dram: EA Read Stall - DRAM
ea write stall - io: EA Write Stall - IO
ea write stall - gmi: EA Write Stall - GMI
ea write stall - dram: EA Write Stall - DRAM
ea write stall - starve: EA Write Stall - Starve
hit rate: L2 Cache Hit Rate (%)
req: Requests (Requests)
read req: L1-L2 Read (Requests)
write req: L1-L2 Write (Requests)
atomic req: L1-L2 Atomic (Requests)
ea read req: L2-EA Read (Requests)
ea write req: L2-EA Write (Requests)
ea atomic req: L2-EA Atomic (Requests)
ea read lat - cycles: L2-EA Read Latency (Cycles)
ea write lat - cycles: L2-EA Write Latency (Cycles)
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
tips: Tips
metric:
'16':
@@ -450,10 +450,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[16]) / $denom))
read req: AVG((TO_INT(TCC_READ[16]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[16]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[16]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[16]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[16] / TCC_EA_RDREQ[16]) if (TCC_EA_RDREQ[16]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[16] / TCC_EA_WRREQ[16]) if (TCC_EA_WRREQ[16]
@@ -474,10 +474,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[17]) / $denom))
read req: AVG((TO_INT(TCC_READ[17]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[17]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[17]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[17]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[17] / TCC_EA_RDREQ[17]) if (TCC_EA_RDREQ[17]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[17] / TCC_EA_WRREQ[17]) if (TCC_EA_WRREQ[17]
@@ -498,10 +498,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[18]) / $denom))
read req: AVG((TO_INT(TCC_READ[18]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[18]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[18]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[18]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[18] / TCC_EA_RDREQ[18]) if (TCC_EA_RDREQ[18]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[18] / TCC_EA_WRREQ[18]) if (TCC_EA_WRREQ[18]
@@ -522,10 +522,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[19]) / $denom))
read req: AVG((TO_INT(TCC_READ[19]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[19]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[19]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[19]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[19] / TCC_EA_RDREQ[19]) if (TCC_EA_RDREQ[19]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[19] / TCC_EA_WRREQ[19]) if (TCC_EA_WRREQ[19]
@@ -546,10 +546,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[20]) / $denom))
read req: AVG((TO_INT(TCC_READ[20]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[20]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[20]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[20]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[20] / TCC_EA_RDREQ[20]) if (TCC_EA_RDREQ[20]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[20] / TCC_EA_WRREQ[20]) if (TCC_EA_WRREQ[20]
@@ -570,10 +570,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[21]) / $denom))
read req: AVG((TO_INT(TCC_READ[21]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[21]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[21]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[21]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[21] / TCC_EA_RDREQ[21]) if (TCC_EA_RDREQ[21]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[21] / TCC_EA_WRREQ[21]) if (TCC_EA_WRREQ[21]
@@ -594,10 +594,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[22]) / $denom))
read req: AVG((TO_INT(TCC_READ[22]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[22]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[22]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[22]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[22] / TCC_EA_RDREQ[22]) if (TCC_EA_RDREQ[22]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[22] / TCC_EA_WRREQ[22]) if (TCC_EA_WRREQ[22]
@@ -618,10 +618,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[23]) / $denom))
read req: AVG((TO_INT(TCC_READ[23]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[23]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[23]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[23]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[23] / TCC_EA_RDREQ[23]) if (TCC_EA_RDREQ[23]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[23] / TCC_EA_WRREQ[23]) if (TCC_EA_WRREQ[23]
@@ -642,10 +642,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[24]) / $denom))
read req: AVG((TO_INT(TCC_READ[24]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[24]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[24]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[24]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[24] / TCC_EA_RDREQ[24]) if (TCC_EA_RDREQ[24]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[24] / TCC_EA_WRREQ[24]) if (TCC_EA_WRREQ[24]
@@ -666,10 +666,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[25]) / $denom))
read req: AVG((TO_INT(TCC_READ[25]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[25]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[25]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[25]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[25] / TCC_EA_RDREQ[25]) if (TCC_EA_RDREQ[25]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[25] / TCC_EA_WRREQ[25]) if (TCC_EA_WRREQ[25]
@@ -690,10 +690,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[26]) / $denom))
read req: AVG((TO_INT(TCC_READ[26]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[26]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[26]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[26]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[26] / TCC_EA_RDREQ[26]) if (TCC_EA_RDREQ[26]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[26] / TCC_EA_WRREQ[26]) if (TCC_EA_WRREQ[26]
@@ -714,10 +714,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[27]) / $denom))
read req: AVG((TO_INT(TCC_READ[27]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[27]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[27]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[27]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[27] / TCC_EA_RDREQ[27]) if (TCC_EA_RDREQ[27]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[27] / TCC_EA_WRREQ[27]) if (TCC_EA_WRREQ[27]
@@ -738,10 +738,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[28]) / $denom))
read req: AVG((TO_INT(TCC_READ[28]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[28]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[28]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[28]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[28] / TCC_EA_RDREQ[28]) if (TCC_EA_RDREQ[28]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[28] / TCC_EA_WRREQ[28]) if (TCC_EA_WRREQ[28]
@@ -762,10 +762,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[29]) / $denom))
read req: AVG((TO_INT(TCC_READ[29]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[29]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[29]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[29]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[29] / TCC_EA_RDREQ[29]) if (TCC_EA_RDREQ[29]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[29] / TCC_EA_WRREQ[29]) if (TCC_EA_WRREQ[29]
@@ -786,10 +786,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[30]) / $denom))
read req: AVG((TO_INT(TCC_READ[30]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[30]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[30]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[30]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[30] / TCC_EA_RDREQ[30]) if (TCC_EA_RDREQ[30]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[30] / TCC_EA_WRREQ[30]) if (TCC_EA_WRREQ[30]
@@ -810,10 +810,10 @@ Panel Config:
req: AVG((TO_INT(TCC_REQ[31]) / $denom))
read req: AVG((TO_INT(TCC_READ[31]) / $denom))
write req: AVG((TO_INT(TCC_WRITE[31]) / $denom))
atomicreq: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
atomic req: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
ea read req: AVG((TO_INT(TCC_EA_RDREQ[31]) / $denom))
ea write req: AVG((TO_INT(TCC_EA_WRREQ[31]) / $denom))
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[31] / TCC_EA_RDREQ[31]) if (TCC_EA_RDREQ[31]
!= 0) else None))
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[31] / TCC_EA_WRREQ[31]) if (TCC_EA_WRREQ[31]
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (C) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import ast
import astunparse
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,23 +12,22 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import re
import sys
if __name__ == "__main__":
with open(sys.argv[1], "r") as file:
s = file.read()
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
################################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,23 +12,22 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
################################################################################el
import re
import sys
if __name__ == "__main__":
with open(sys.argv[1], "r") as file:
s = file.read()
+43 -10
Näytä tiedosto
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
"""
Quick run:
@@ -41,6 +43,7 @@ import argparse
import os.path
from pathlib import Path
from omniperf_analyze.utils import parser, file_io
from omniperf_analyze.utils.gui_components.roofline import get_roofline
def initialize_run(args, normalization_filter=None):
@@ -143,7 +146,7 @@ def run_gui(args, runs):
num_results,
)
runs[args.path[0][0]].raw_pmc = file_io.create_df_pmc(
args.path[0][0]
args.path[0][0], args.verbose
) # create mega df
parser.load_kernel_top(runs[args.path[0][0]], args.path[0][0])
@@ -188,7 +191,9 @@ def run_cli(args, runs):
args.time_unit,
num_results,
)
runs[d[0]].raw_pmc = file_io.create_df_pmc(d[0]) # creates mega dataframe
runs[d[0]].raw_pmc = file_io.create_df_pmc(
d[0], args.verbose
) # creates mega dataframe
is_gui = False
parser.load_table_data(
runs[d[0]], d[0], is_gui, args.g, args.verbose
@@ -203,9 +208,37 @@ def run_cli(args, runs):
args.decimal,
args.time_unit,
args.cols,
args.verbose,
)
def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose):
import pandas as pd
from collections import OrderedDict
# Change vL1D to a interpretable str, if required
if "vL1D" in mem_level:
mem_level.remove("vL1D")
mem_level.append("L1")
app_path = path_to_dir + "/pmc_perf.csv"
roofline_exists = os.path.isfile(app_path)
if not roofline_exists:
print("Error: {} does not exist")
sys.exit(0)
t_df = OrderedDict()
t_df["pmc_perf"] = pd.read_csv(app_path)
get_roofline(
path_to_dir,
t_df,
verbose,
dev_id, # [Optional] Specify device id to collect roofline info from
sort_type, # [Optional] Sort AI by top kernels or dispatches
mem_level, # [Optional] Toggle particular level(s) of memory hierarchy
True, # [Optional] Generate a standalone roofline analysis
)
def analyze(args):
if args.dependency:
print("pip3 install astunparse numpy tabulate pandas pyyaml")
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import ast
import astunparse
+13 -12
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import pandas as pd
@@ -180,7 +182,7 @@ def create_df_kernel_top_stats(
grouped.to_csv(os.path.join(raw_data_dir, "pmc_kernel_top.csv"), index=False)
def create_df_pmc(raw_data_dir):
def create_df_pmc(raw_data_dir, verbose):
"""
Load all raw pmc counters and join into one df.
"""
@@ -200,8 +202,8 @@ def create_df_pmc(raw_data_dir):
coll_levels.append(f[:-4])
final_df = pd.concat(dfs, keys=coll_levels, axis=1, copy=False)
# TODO: join instead of concat!
# print("pmc_raw_data final_df ", final_df.info())
if verbose >= 2:
print("pmc_raw_data final_df ", final_df.info())
return final_df
@@ -217,7 +219,6 @@ def collect_wave_occu_per_cu(in_dir, out_dir, numSE):
for i in range(numSE):
p = Path(in_dir, "wave_occu_se" + str(i) + ".csv")
if p.exists():
tmp_df = pd.read_csv(p)
SE_idx = "SE" + str(tmp_df.loc[0, "SE"])
tmp_df.rename(
+131 -30
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,22 +10,21 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from selectors import EpollSelector
import sys
import copy
from matplotlib.axis import XAxis
import pandas as pd
from dash.dash_table import FormatTemplate
from dash.dash_table.Format import Format, Scheme, Symbol
@@ -51,19 +52,19 @@ HIDDEN_SECTIONS = ["Memory Chart Analysis", "Kernels"]
HIDDEN_COLUMNS = ["Tips", "coll_level"]
IS_DARK = True # default dark theme
# Add any elements you'd like displayed as a bar chart
barchart_elements = [
1001, # Instr mix
1002, # VALU Arith Instr mix
1101, # Compute pipe SOL
1201, # LDS SOL
1301, # Instruc cache SOL
1401, # SL1D cache SOL
1601, # VL1D cache SOL
1701, # L2 cache SOL
]
# Define different types of bar charts
barchart_elements = {
# Group table ids by chart type
"instr_mix": [1001, 1002],
"multi_bar": [1604, 1704],
"sol": [1101, 1201, 1301, 1401, 1601, 1701],
"l2_cache_per_chan": [1801, 1802],
}
##################
# HELPER FUNCTIONS
##################
def filter_df(column, df, filt):
filt_df = df
if filt != []:
@@ -71,8 +72,20 @@ def filter_df(column, df, filt):
return filt_df
def discrete_background_color_bins(df, n_bins=5, columns="all"):
def multi_bar_chart(table_id, display_df):
if table_id == 1604:
nested_bar = {"NC": {}, "UC": {}, "RW": {}, "CC": {}}
for index, row in display_df.iterrows():
nested_bar[row["Coherency"]][row["Xfer"]] = row["Avg"]
if table_id == 1704:
nested_bar = {"Read": {}, "Write": {}}
for index, row in display_df.iterrows():
nested_bar[row["Transaction"]][row["Type"]] = row["Avg"]
return nested_bar
def discrete_background_color_bins(df, n_bins=5, columns="all"):
bounds = [i * (1.0 / n_bins) for i in range(n_bins + 1)]
if columns == "all":
if "id" in df:
@@ -129,11 +142,14 @@ def discrete_background_color_bins(df, n_bins=5, columns="all"):
return (styles, html.Div(legend, style={"padding": "5px 0 5px 0"}))
def build_bar_chart(display_df, table_config):
####################
# GRAPHICAL ELEMENTS
####################
def build_bar_chart(display_df, table_config, norm_filt):
d_figs = []
# Insr Mix bar chart
if table_config["id"] == 1001 or table_config["id"] == 1002:
if table_config["id"] in barchart_elements["instr_mix"]:
display_df["Count"] = [
x.astype(int) if x != "" else int(0) for x in display_df["Count"]
]
@@ -150,8 +166,59 @@ def build_bar_chart(display_df, table_config):
)
)
# Multi bar chart
elif table_config["id"] in barchart_elements["multi_bar"]:
display_df["Avg"] = [
x.astype(int) if x != "" else int(0) for x in display_df["Avg"]
]
df_unit = display_df["Unit"][0]
nested_bar = multi_bar_chart(table_config["id"], display_df)
# generate chart for each coherency
for group, metric in nested_bar.items():
d_figs.append(
px.bar(
title=group,
x=metric.values(),
y=metric.keys(),
labels={"x": df_unit, "y": ""},
text=metric.values(),
orientation="h",
height=200,
)
.update_xaxes(showgrid=False, rangemode="nonnegative")
.update_yaxes(showgrid=False)
.update_layout(title_x=0.5)
)
# L2 Cache per channel
elif table_config["id"] in barchart_elements["l2_cache_per_chan"]:
nested_bar = {}
channels = []
for colName, colData in display_df.items():
if colName == "Channel":
channels = list(colData.values)
else:
display_df[colName] = [
x.astype(float) if x != "" and x != None else float(0)
for x in display_df[colName]
]
nested_bar[colName] = list(display_df[colName])
for group, metric in nested_bar.items():
d_figs.append(
px.bar(
title=group[0 : group.rfind("(")],
x=channels,
y=metric,
labels={
"x": "Channel",
"y": group[group.rfind("(") + 1 : len(group) - 1].replace(
"per", norm_filt
),
},
).update_yaxes(rangemode="nonnegative")
)
# Speed-of-light bar chart
else:
elif table_config["id"] in barchart_elements["sol"]:
display_df["Value"] = [
x.astype(float) if x != "" else float(0) for x in display_df["Value"]
]
@@ -194,6 +261,13 @@ def build_bar_chart(display_df, table_config):
orientation="h",
).update_xaxes(range=[0, 110])
)
else:
print(
"ERROR: Table id {}. Cannot determine barchart type.".format(
table_config["id"]
)
)
sys.exit(-1)
# update layout for each of the charts
for fig in d_figs:
@@ -343,13 +417,13 @@ def build_layout(
def generate_from_filter(
disp_filt, kernel_filter, gcd_filter, norm_filt, div_children
):
if verbose <= 1:
if verbose >= 1:
print("normalization is ", norm_filt)
base_data = initialize_run(args, norm_filt) # Re-initalize everything
panel_configs = copy.deepcopy(archConfigs.panel_configs)
# Generate original raw df
base_data[base_run].raw_pmc = file_io.create_df_pmc(path_to_dir)
base_data[base_run].raw_pmc = file_io.create_df_pmc(path_to_dir, verbose)
if verbose >= 1:
print("disp-filter is ", disp_filt)
print("kernel-filter is ", kernel_filter)
@@ -432,12 +506,39 @@ def build_layout(
# Determine chart type:
# a) Barchart
if table_config["id"] in barchart_elements:
d_figs = build_bar_chart(display_df, table_config)
for fig in d_figs:
if table_config["id"] in [
x for i in barchart_elements.values() for x in i
]:
d_figs = build_bar_chart(display_df, table_config, norm_filt)
# Smaller formatting if barchart yeilds several graphs
if (
len(d_figs) > 2
and not table_config["id"]
in barchart_elements["l2_cache_per_chan"]
):
temp_obj = []
for fig in d_figs:
temp_obj.append(
html.Div(
className="float-child",
children=[
dcc.Graph(
figure=fig, style={"margin": "2%"}
)
],
)
)
content.append(
dcc.Graph(figure=fig, style={"margin": "2%"})
html.Div(
className="float-container", children=temp_obj
)
)
# Normal formatting if < 2 graphs
else:
for fig in d_figs:
content.append(
dcc.Graph(figure=fig, style={"margin": "2%"})
)
# B) Tablechart
else:
d_figs = build_table_chart(
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,26 +10,26 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from dash import html, dash_table, dcc
from dash import html, dcc
import dash_bootstrap_components as dbc
from matplotlib.style import available
from omniperf_analyze.utils import schema
avail_normalizations = ["per_wave", "per_cycle", "per_second", "per_kernel"]
# List all the unique column values for desired column in df, 'target_col'
def list_unique(orig_list, is_numeric):
list_set = set(orig_list)
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import sys
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,20 +10,21 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from omniperf_analyze.utils import roofline_calc
import time
import numpy as np
from dash import html, dash_table
@@ -36,91 +39,75 @@ def to_int(a):
return int(a)
def generate_plots(roof_info, ai_data, verbose, fig=None):
def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=None):
if fig is None:
fig = go.Figure()
line_data = roofline_calc.empirical_roof(roof_info)
plotMode = "lines+text" if is_standalone else "lines"
line_data = roofline_calc.empirical_roof(roof_info, mem_level, verbose)
print("Line data:\n", line_data)
#######################
# Plot BW Lines
#######################
fig.add_trace(
go.Scatter(
x=line_data["hbm"][0],
y=line_data["hbm"][1],
name="HBM-{}".format(roof_info["dtype"]),
mode="lines",
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(to_int(line_data["hbm"][2])),
"{} GFLOP/s".format(to_int(line_data["hbm"][2])),
],
if mem_level == "ALL":
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
else:
cacheHierarchy = mem_level
for cacheLevel in cacheHierarchy:
fig.add_trace(
go.Scatter(
x=line_data[cacheLevel.lower()][0],
y=line_data[cacheLevel.lower()][1],
name="{}-{}".format(cacheLevel, roof_info["dtype"]),
mode=plotMode,
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(to_int(line_data[cacheLevel.lower()][2])),
None
if is_standalone
else "{} GB/s".format(to_int(line_data[cacheLevel.lower()][2])),
],
textposition="top right",
)
)
)
fig.add_trace(
go.Scatter(
x=line_data["l2"][0],
y=line_data["l2"][1],
name="L2-{}".format(roof_info["dtype"]),
mode="lines",
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(to_int(line_data["l2"][2])),
"{} GFLOP/s".format(to_int(line_data["l2"][2])),
],
)
)
fig.add_trace(
go.Scatter(
x=line_data["l1"][0],
y=line_data["l1"][1],
name="L1-{}".format(roof_info["dtype"]),
mode="lines",
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(to_int(line_data["l1"][2])),
"{} GFLOP/s".format(to_int(line_data["l1"][2])),
],
)
)
fig.add_trace(
go.Scatter(
x=line_data["lds"][0],
y=line_data["lds"][1],
name="LDS-{}".format(roof_info["dtype"]),
mode="lines",
hovertemplate="<b>%{text}</b>",
text=[
"{} GB/s".format(to_int(line_data["lds"][2])),
"{} GFLOP/s".format(to_int(line_data["lds"][2])),
],
)
)
if roof_info["dtype"] != "FP16" and roof_info["dtype"] != "I8":
fig.add_trace(
go.Scatter(
x=line_data["valu"][0],
y=line_data["valu"][1],
name="Peak VALU-{}".format(roof_info["dtype"]),
mode="lines",
mode=plotMode,
hovertemplate="<b>%{text}</b>",
text=[
"{} GFLOP/s".format(to_int(line_data["valu"][2])),
None
if is_standalone
else "{} GFLOP/s".format(to_int(line_data["valu"][2])),
"{} GFLOP/s".format(to_int(line_data["valu"][2])),
],
textposition="top left",
)
)
if roof_info["dtype"] == "FP16":
pos = "bottom left"
else:
pos = "top left"
fig.add_trace(
go.Scatter(
x=line_data["mfma"][0],
y=line_data["mfma"][1],
name="Peak MFMA-{}".format(roof_info["dtype"]),
mode="lines",
mode=plotMode,
hovertemplate="<b>%{text}</b>",
text=[
"{} GFLOP/s".format(to_int(line_data["mfma"][2])),
None
if is_standalone
else "{} GFLOP/s".format(to_int(line_data["mfma"][2])),
"{} GFLOP/s".format(to_int(line_data["mfma"][2])),
],
textposition=pos,
)
)
#######################
@@ -164,56 +151,86 @@ def generate_plots(roof_info, ai_data, verbose, fig=None):
return fig
def get_roofline(path_to_dir, ret_df, verbose):
def get_roofline(
path_to_dir,
ret_df,
verbose,
dev_id=None,
sort_type="kernels",
mem_level="ALL",
is_standalone=False,
):
# Roofline settings
fp32_details = {
"path": path_to_dir,
"sort": "kernels",
"sort": sort_type,
"device": 0,
"dtype": "FP32",
}
fp16_details = {
"path": path_to_dir,
"sort": "kernels",
"sort": sort_type,
"device": 0,
"dtype": "FP16",
}
int8_details = {"path": path_to_dir, "sort": "kernels", "device": 0, "dtype": "I8"}
int8_details = {"path": path_to_dir, "sort": sort_type, "device": 0, "dtype": "I8"}
# Generate roofline plots
print("Path: ", path_to_dir)
ai_data = roofline_calc.plot_application("kernels", ret_df, verbose)
ai_data = roofline_calc.plot_application(sort_type, ret_df, verbose)
if verbose >= 1:
# print AI data for each mem level
print("AI at each mem level")
for i in ai_data:
print(i, "->", ai_data[i])
print("\n")
fp32_fig = generate_plots(fp32_details, ai_data, verbose)
fp16_fig = generate_plots(fp16_details, ai_data, verbose)
ml_combo_fig = generate_plots(int8_details, ai_data, verbose, fp16_fig)
return html.Section(
id="roofline",
children=[
html.Div(
className="float-container",
children=[
html.Div(
className="float-child",
children=[
html.H3(children="Empirical Roofline Analysis (FP32/FP64)"),
dcc.Graph(figure=fp32_fig),
],
),
html.Div(
className="float-child",
children=[
html.H3(children="Empirical Roofline Analysis (FP16/INT8)"),
dcc.Graph(figure=ml_combo_fig),
],
),
],
)
],
fp32_fig = generate_plots(fp32_details, ai_data, mem_level, is_standalone, verbose)
fp16_fig = generate_plots(fp16_details, ai_data, mem_level, is_standalone, verbose)
ml_combo_fig = generate_plots(
int8_details, ai_data, mem_level, is_standalone, verbose, fp16_fig
)
if is_standalone:
dev_id = "ALL" if dev_id == -1 else str(dev_id)
fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id))
ml_combo_fig.write_image(
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
)
time.sleep(1)
# Re-save to remove loading MathJax pop up
fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id))
ml_combo_fig.write_image(
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
)
print("Empirical Roofline PDFs saved!")
else:
return html.Section(
id="roofline",
children=[
html.Div(
className="float-container",
children=[
html.Div(
className="float-child",
children=[
html.H3(
children="Empirical Roofline Analysis (FP32/FP64)"
),
dcc.Graph(figure=fp32_fig),
],
),
html.Div(
className="float-child",
children=[
html.H3(
children="Empirical Roofline Analysis (FP16/INT8)"
),
dcc.Graph(figure=ml_combo_fig),
],
),
],
)
],
)
+11 -12
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import ast
import sys
@@ -346,7 +348,6 @@ def build_dfs(archConfigs, filter_metrics):
i = 0
for key, entries in data_cofig["metric"].items():
data_source_idx = (
str(data_cofig["id"] // 100)
+ "."
@@ -365,7 +366,6 @@ def build_dfs(archConfigs, filter_metrics):
# the whole IP block in filter
(str(panel_id // 100) in filter_metrics)
):
values.append(metric_idx)
values.append(key)
for k, v in entries.items():
@@ -401,7 +401,6 @@ def build_dfs(archConfigs, filter_metrics):
or (data_source_idx == "0") # no filter
or (data_source_idx in filter_metrics)
):
if (
"columnwise" in data_cofig
and data_cofig["columnwise"] == True
@@ -651,7 +650,7 @@ def apply_filters(workload, is_gui, debug):
# NB: support ignoring the 1st n dispatched execution by '> n'
# The better way may be parsing python slice string
for d in workload.filter_dispatch_ids:
if int(d) > len(ret_df) - 2: # subtract 2 bc of the two header rows
if int(d) >= len(ret_df): # subtract 2 bc of the two header rows
print("{} is an invalid dispatch id.".format(d))
sys.exit(1)
if ">" in workload.filter_dispatch_ids[0]:
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,31 +10,19 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from linecache import cache
import subprocess
from operator import sub
import os
import sys
from pathlib import Path
import numpy
import matplotlib.pyplot as plt
from matplotlib.pyplot import get, text
from math import log, pi, sqrt
import pandas as pd
import pylab
from dataclasses import dataclass
import csv
@@ -54,6 +44,7 @@ FONT_WEIGHT = "bold"
SUPPORTED_SOC = ["mi200"]
################################################
# Helper funcs
################################################
@@ -68,6 +59,7 @@ class AI_Data:
mfma_flops_bf16: float
mfma_flops_f32: float
mfma_flops_f64: float
mfma_iops_i8: float
lds_data: float
L1cache_data: float
L2cache_data: float
@@ -100,11 +92,14 @@ def get_color(catagory):
# -------------------------------------------------------------------------------------
# Plot BW at each cache level
# -------------------------------------------------------------------------------------
def plot_roof(roof_details, roof_data):
def plot_roof(roof_details, roof_data, mem_level, verbose):
# TODO: This is where filtering by memory level will need to occur for standalone
graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []}
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
if mem_level == "ALL":
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
else:
cacheHierarchy = mem_level
x1 = y1 = x2 = y2 = -1
x1_mfma = y1_mfma = x2_mfma = y2_mfma = -1
@@ -116,7 +111,8 @@ def plot_roof(roof_details, roof_data):
)
for i in range(0, len(cacheHierarchy)):
# Plot BW line
# print("Current cache level is ", cacheHierarchy[i])
if verbose >= 3:
print("Current cache level is ", cacheHierarchy[i])
curr_bw = cacheHierarchy[i] + "Bw"
peakBw = float(roof_data[curr_bw][roof_details["device"]])
@@ -142,8 +138,9 @@ def plot_roof(roof_details, roof_data):
y2_mfma = peakMFMA
# These are the points to use:
# print("x = [{}, {}]".format(x1,x2_mfma))
# print("y = [{}, {}]".format(y1, y2_mfma))
if verbose >= 3:
print("x = [{}, {}]".format(x1, x2_mfma))
print("y = [{}, {}]".format(y1, y2_mfma))
graphPoints[cacheHierarchy[i].lower()].append([x1, x2_mfma])
graphPoints[cacheHierarchy[i].lower()].append([y1, y2_mfma])
@@ -159,7 +156,8 @@ def plot_roof(roof_details, roof_data):
if x2 < x0:
x0 = x2
# print("FMA ROOF [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
if verbose >= 3:
print("FMA ROOF [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
graphPoints["valu"].append([x0, XMAX])
graphPoints["valu"].append([peakOps, peakOps])
graphPoints["valu"].append(peakOps)
@@ -172,7 +170,8 @@ def plot_roof(roof_details, roof_data):
if x2_mfma < x0_mfma:
x0_mfma = x2_mfma
# print("MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
if verbose >= 3:
print("MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
graphPoints["mfma"].append([x0_mfma, XMAX])
graphPoints["mfma"].append([peakMFMA, peakMFMA])
graphPoints["mfma"].append(peakMFMA)
@@ -185,7 +184,6 @@ def plot_roof(roof_details, roof_data):
# -------------------------------------------------------------------------------------
# Calculate relevent metrics for ai calculation
def plot_application(sortType, ret_df, verbose):
df = ret_df["pmc_perf"]
# Sort by top kernels or top dispatches?
df = df.sort_values(by=["KernelName"])
@@ -231,6 +229,7 @@ def plot_application(sortType, ret_df, verbose):
mfma_flops_bf16 / calls,
mfma_flops_f32 / calls,
mfma_flops_f64 / calls,
mfma_iops_i8 / calls,
lds_data / calls,
L1cache_data / calls,
L2cache_data / calls,
@@ -474,11 +473,7 @@ def plot_application(sortType, ret_df, verbose):
return intensityPoints
def empirical_roof(roof_info):
if roof_info["sort"] != "kernels" and roof_info["sort"] != "dispatches":
sys.exit("Invalid sort. Must be either 'kernels' or 'dispatches'")
def empirical_roof(roof_info, mem_level, verbose):
roofPath = roof_info["path"] + "/roofline.csv"
# -----------------------------------------------------
# Initialize roofline data dictionary from roofline.csv
@@ -517,7 +512,7 @@ def empirical_roof(roof_info):
# ------------------
# Generate Roofline
# ------------------
results = plot_roof(roof_info, roof_data)
results = plot_roof(roof_info, roof_data, mem_level, verbose)
# for key in results:
# print(key, "->", results[key])
+28 -27
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
#
# Define all common data storage classes,
@@ -33,7 +35,6 @@ from collections import OrderedDict
@dataclass
class ArchConfig:
# [id: panel_config] pairs
panel_configs: OrderedDict = field(default=dict)
@@ -84,24 +85,24 @@ supported_field = [
"Alias",
# Special keywords for L2 channel
"Channel",
"Hit Rate",
"Req",
"Read Req",
"Write Req",
"AtomicReq",
"EA Read Req",
"EA Write Req",
"EA AtomicReq",
"EA Read Lat - cycles",
"EA Write Lat - cycles",
"EA Atomic Lat - cycles",
"EA Read Stall - IO",
"EA Read Stall - GMI",
"EA Read Stall - DRAM",
"EA Write Stall - IO",
"EA Write Stall - GMI",
"EA Write Stall - DRAM",
"EA Write Stall - Starve",
"L2 Cache Hit Rate (%)",
"Requests (Requests)",
"L1-L2 Read (Requests)",
"L1-L2 Write (Requests)",
"L1-L2 Atomic (Requests)",
"L2-EA Read (Requests)",
"L2-EA Write (Requests)",
"L2-EA Atomic (Requests)",
"L2-EA Read Latency (Cycles)",
"L2-EA Write Latency (Cycles)",
"L2-EA Atomic Latency (Cycles)",
"L2-EA Read Stall - IO (Cycles per)",
"L2-EA Read Stall - GMI (Cycles per)",
"L2-EA Read Stall - DRAM (Cycles per)",
"L2-EA Write Stall - IO (Cycles per)",
"L2-EA Write Stall - GMI (Cycles per)",
"L2-EA Write Stall - DRAM (Cycles per)",
"L2-EA Write Stall - Starve (Cycles per)",
]
# The prefix of raw pmc_perf.csv
+27 -16
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import pandas as pd
from tabulate import tabulate
@@ -26,6 +28,7 @@ from tabulate import tabulate
from omniperf_analyze.utils import schema, parser
hidden_columns = ["Tips", "coll_level"]
hidden_sections = [1900, 2000]
def string_multiple_lines(source, width, max_rows):
@@ -44,19 +47,20 @@ def string_multiple_lines(source, width, max_rows):
return "\n".join(lines)
def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols, verbose):
"""
Show all panels with their data in plain text mode.
"""
comparable_columns = parser.build_comparable_columns(time_unit)
for panel_id, panel in archConfigs.panel_configs.items():
# Skip panels that don't support baseline comparison
if panel_id in hidden_sections:
continue
ss = "" # store content of all data_source from one pannel
for data_source in panel["data source"]:
for type, table_config in data_source.items():
# take the 1st run as baseline
base_run, base_data = next(iter(runs.items()))
base_df = base_data.dfs[table_config["id"]]
@@ -72,11 +76,9 @@ def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
)
or (type == "raw_csv_table")
):
if header in hidden_columns:
pass
elif header not in comparable_columns:
if (
type == "raw_csv_table"
and table_config["source"] == "pmc_kernel_top.csv"
@@ -102,18 +104,27 @@ def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
):
if run != base_run:
# calc percentage over the baseline
base_df[header] = [
float(x) if x != "" else float(0)
for x in base_df[header]
]
cur_df[header] = [
float(x) if x != "" else float(0)
for x in cur_df[header]
]
t_df = (
pd.concat(
[
base_df[header].astype("double"),
cur_df[header].astype("double"),
base_df[header],
cur_df[header],
],
axis=1,
)
.pct_change(axis="columns")
.iloc[:, 1]
)
# print("---------", header, t_df)
if verbose >= 2:
print("---------", header, t_df)
# show value + percentage
# TODO: better alignment
+69 -27
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,20 +10,22 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import argparse
import subprocess
from common import (
OMNIPERF_HOME,
PROG,
@@ -31,7 +35,6 @@ from common import getVersion, getVersionDisplay
def parse(my_parser):
# versioning info
vData = getVersion()
versionString = getVersionDisplay(vData["version"], vData["sha"], vData["mode"])
@@ -116,6 +119,49 @@ def parse(my_parser):
default=None,
help="\t\t\tKernel filtering.",
)
result = subprocess.run(
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
)
if result.returncode == 0:
profile_group.add_argument(
"-l",
"--i-feel-lucky",
required=False,
default=False,
action="store_true",
dest="lucky",
help="\t\t\tProfile only the most time consuming kernels.",
)
profile_group.add_argument(
"-r",
"--use-rocscope",
required=False,
default=False,
action="store_true",
dest="use_rocscope",
help="\t\t\tUse rocscope for profiling",
)
profile_group.add_argument(
"-s",
"--kernel-summaries",
required=False,
default=False,
action="store_true",
dest="summaries",
help="\t\t\tCreate kernel summaries.",
)
else:
profile_group.add_argument(
"--i-feel-lucky", default=False, dest="lucky", help=argparse.SUPPRESS
)
profile_group.add_argument(
"--use-rocscope", default=False, dest="use_rocscope", help=argparse.SUPPRESS
)
profile_group.add_argument(
"--kernel-summaries", default=False, dest="summaries", help=argparse.SUPPRESS
)
profile_group.add_argument(
"-b",
"--ipblocks",
@@ -166,6 +212,7 @@ def parse(my_parser):
metavar="",
type=str,
default="kernels",
choices=["kernels", "dispatches"],
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
)
roofline_group.add_argument(
@@ -174,19 +221,11 @@ def parse(my_parser):
required=False,
choices=["HBM", "L2", "vL1D", "LDS"],
metavar="",
nargs="+",
type=str,
default="ALL",
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
)
roofline_group.add_argument(
"--axes",
default=None,
type=float,
required=False,
nargs="+",
metavar="",
help="\t\t\tDesired axis values for graph. As follows:\n\t\t\t xmin xmax ymin ymax",
)
roofline_group.add_argument(
"--device",
metavar="",
@@ -348,7 +387,9 @@ def parse(my_parser):
help="\t\tSpecify the output file.",
)
analyze_group.add_argument(
"--list-kernels", action="store_true", help="\t\tList kernels."
"--list-kernels",
action="store_true",
help="\t\tList kernels. Top 10 kernels sorted by duration (descending order).",
)
analyze_group.add_argument(
"--list-metrics",
@@ -358,35 +399,36 @@ def parse(my_parser):
)
analyze_group.add_argument(
"-b",
"--filter-metrics",
"--metric",
dest="filter_metrics",
metavar="",
nargs="+",
help="\t\tSpecify IP block/metric Ids from --list-metrics.",
help="\t\tSpecify IP block/metric id(s) from --list-metrics for filtering.",
)
analyze_group.add_argument(
"-k",
"--filter-kernels",
"--kernel",
metavar="",
type=int,
dest="gpu_kernel",
nargs="+",
action="append",
help="\t\tSpecify kernel id from --list-kernels.",
help="\t\tSpecify kernel id(s) from --list-kernels for filtering.",
)
analyze_group.add_argument(
"--filter-dispatch-ids",
"--dispatch",
dest="gpu_dispatch_id",
metavar="",
nargs="+",
action="append",
help="\t\tSpecify dispatch IDs.",
help="\t\tSpecify dispatch id(s) for filtering.",
)
analyze_group.add_argument(
"--filter-gpu-ids",
"--gpu-id",
dest="gpu_id",
metavar="",
nargs="+",
help="\t\tSpecify GPU IDs.",
help="\t\tSpecify GPU id(s) for filtering.",
)
analyze_group.add_argument(
"-n",
+737
Näytä tiedosto
@@ -0,0 +1,737 @@
<gfx908>
# CPC counters
<metric
name="CPC_ME1_BUSY_FOR_PACKET_DECODE" block=CPC event=13 descr="Me1 busy for packet decode."
></metric>
<metric
name="CPC_UTCL1_STALL_ON_TRANSLATION" block=CPC event=24 descr="One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."
></metric>
<metric
name="CPC_CPC_STAT_BUSY" block=CPC event=25 descr="CPC Busy."
></metric>
<metric
name="CPC_CPC_STAT_IDLE" block=CPC event=26 descr="CPC Idle."
></metric>
<metric
name="CPC_CPC_STAT_STALL" block=CPC event=27 descr="CPC Stalled."
></metric>
<metric
name="CPC_CPC_TCIU_BUSY" block=CPC event=28 descr="CPC TCIU interface Busy."
></metric>
<metric
name="CPC_CPC_TCIU_IDLE" block=CPC event=29 descr="CPC TCIU interface Idle."
></metric>
<metric
name="CPC_CPC_UTCL2IU_BUSY" block=CPC event=30 descr="CPC UTCL2 interface Busy."
></metric>
<metric
name="CPC_CPC_UTCL2IU_IDLE" block=CPC event=31 descr="CPC UTCL2 interface Idle."
></metric>
<metric
name="CPC_CPC_UTCL2IU_STALL" block=CPC event=32 descr="CPC UTCL2 interface Stalled waiting on Free, Tags or Translation."
></metric>
<metric
name="CPC_ME1_DC0_SPI_BUSY" block=CPC event=33 descr="CPC Me1 Processor Busy."
></metric>
<metric
name="CPF_CMP_UTCL1_STALL_ON_TRANSLATION" block=CPF event=20 descr="One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING response."
></metric>
<metric
name="CPF_CPF_STAT_BUSY" block=CPF event=23 descr="CPF Busy."
></metric>
<metric
name="CPF_CPF_STAT_IDLE" block=CPF event=24 descr="CPF Idle."
></metric>
<metric
name="CPF_CPF_STAT_STALL" block=CPF event=25 descr="CPF Stalled."
></metric>
<metric
name="CPF_CPF_TCIU_BUSY" block=CPF event=26 descr="CPF TCIU interface Busy."
></metric>
<metric
name="CPF_CPF_TCIU_IDLE" block=CPF event=27 descr="CPF TCIU interface Idle."
></metric>
<metric
name="CPF_CPF_TCIU_STALL" block=CPF event=28 descr="CPF TCIU interface Stalled waiting on Free, Tags."
></metric>
# GRBM counters
<metric
name="GRBM_COUNT" block=GRBM event=0 descr="Tie High - Count Number of Clocks"
></metric>
<metric
name="GRBM_GUI_ACTIVE" block=GRBM event=2 descr="The GUI is Active"
></metric>
<metric
name="GRBM_CP_BUSY" block=GRBM event=3 descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."
></metric>
<metric
name="GRBM_SPI_BUSY" block=GRBM event=11 descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."
></metric>
<metric
name="GRBM_TA_BUSY" block=GRBM event=13 descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."
></metric>
<metric
name="GRBM_TC_BUSY" block=GRBM event=28 descr="Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."
></metric>
<metric
name="GRBM_CPC_BUSY" block=GRBM event=30 descr="The Command Processor Compute (CPC) is busy."
></metric>
<metric
name="GRBM_CPF_BUSY" block=GRBM event=31 descr="The Command Processor Fetchers (CPF) is busy."
></metric>
<metric
name="GRBM_UTCL2_BUSY" block=GRBM event=34 descr="The Unified Translation Cache Level-2 (UTCL2) block is busy."
></metric>
<metric
name="GRBM_EA_BUSY" block=GRBM event=35 descr="The Efficiency Arbiter (EA) block is busy."
></metric>
# SPI counters
<metric
name="SPI_CSN_WINDOW_VALID" block=SPI event=47 descr="Clock count enabled by perfcounter_start event. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_BUSY" block=SPI event=48 descr="Number of clocks with outstanding waves (SPI or SH). Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_NUM_THREADGROUPS" block=SPI event=49 descr="Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_CSN_WAVE" block=SPI event=52 descr="Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_RA_REQ_NO_ALLOC" block=SPI event=79 descr="Arb cycles with requests but no allocation. Source is RA0"
></metric>
<metric
name="SPI_RA_REQ_NO_ALLOC_CSN" block=SPI event=85 descr="Arb cycles with CSn req and no CSn alloc. Source is RA0"
></metric>
<metric
name="SPI_RA_RES_STALL_CSN" block=SPI event=91 descr="Arb cycles with CSn req and no CSn fits. Source is RA0"
></metric>
<metric
name="SPI_RA_TMP_STALL_CSN" block=SPI event=97 descr="Cycles where csn wants to req but does not fit in temp space."
></metric>
<metric
name="SPI_RA_WAVE_SIMD_FULL_CSN" block=SPI event=103 descr="Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_VGPR_SIMD_FULL_CSN" block=SPI event=109 descr="Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_SGPR_SIMD_FULL_CSN" block=SPI event=115 descr="Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_LDS_CU_FULL_CSN" block=SPI event=120 descr="Sum of CU where LDS can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_BAR_CU_FULL_CSN" block=SPI event=123 descr="Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_BULKY_CU_FULL_CSN" block=SPI event=125 descr="Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"
></metric>
<metric
name="SPI_RA_TGLIM_CU_FULL_CSN" block=SPI event=127 descr="Cycles where csn wants to req but all CU are at tg_limit"
></metric>
<metric
name="SPI_RA_WVLIM_STALL_CSN" block=SPI event=133 descr="Number of clocks csn is stalled due to WAVE LIMIT."
></metric>
<metric
name="SPI_SWC_CSC_WR" block=SPI event=189 descr="Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
<metric
name="SPI_VWC_CSC_WR" block=SPI event=195 descr="Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
></metric>
# SQ counters
<metric
name="SQ_ACCUM_PREV" block=SQ event=1 descr="For counter N, increment by the value of counter N-1. Only accumulates once every 4 cycles."
></metric>
<metric
name="SQ_CYCLES" block=SQ event=2 descr="Clock cycles. (nondeterministic, per-simd, global)"
></metric>
<metric
name="SQ_BUSY_CYCLES" block=SQ event=3 descr="Clock cycles while SQ is reporting that it is busy. (nondeterministic, per-simd, global)"
></metric>
<metric
name="SQ_WAVES" block=SQ event=4 descr="Count number of waves sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_LEVEL_WAVES" block=SQ event=5 descr="Track the number of waves. Set ACCUM_PREV for the next counter to use this. (level, per-simd, global)"
></metric>
<metric
name="SQ_WAVES_EQ_64" block=SQ event=6 descr="Count number of waves with exactly 64 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_64" block=SQ event=7 descr="Count number of waves with <64 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_48" block=SQ event=8 descr="Count number of waves with <48 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_32" block=SQ event=9 descr="Count number of waves sent <32 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_LT_16" block=SQ event=10 descr="Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_BUSY_CU_CYCLES" block=SQ event=13 descr="Count quad-cycles each CU is busy. (nondeterministic, per-simd)"
></metric>
<metric
name="SQ_ITEMS" block=SQ event=14 descr="Number of valid items per wave. (per-simd, global)"
></metric>
<metric
name="SQ_INSTS" block=SQ event=25 descr="Number of instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VALU" block=SQ event=26 descr="Number of VALU instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_MFMA" block=SQ event=27 descr="Number of MFMA instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM_WR" block=SQ event=28 descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM_RD" block=SQ event=29 descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VMEM" block=SQ event=30 descr="Number of VMEM instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SALU" block=SQ event=31 descr="Number of SALU instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SMEM" block=SQ event=32 descr="Number of SMEM instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_FLAT" block=SQ event=33 descr="Number of FLAT instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_FLAT_LDS_ONLY" block=SQ event=34 descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_LDS" block=SQ event=35 descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_GDS" block=SQ event=36 descr="Number of GDS instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_EXP_GDS" block=SQ event=38 descr="Number of EXP and GDS instructions issued, excluding skipped export instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_BRANCH" block=SQ event=39 descr="Number of Branch instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_SENDMSG" block=SQ event=40 descr="Number of Sendmsg instructions issued. (per-simd, emulated)"
></metric>
<metric
name="SQ_INSTS_VSKIPPED" block=SQ event=41 descr="Number of vector instructions skipped. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_LEVEL_VMEM" block=SQ event=42 descr="Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by INSTS_VMEM for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_INST_LEVEL_SMEM" block=SQ event=43 descr="Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average latency per smem request. Falls slightly short of total request latency because some fetches are divided into two requests that may finish at different times and this counter collects the average latency of the two. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_INST_LEVEL_LDS" block=SQ event=44 descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
></metric>
<metric
name="SQ_WAVE_CYCLES" block=SQ event=47 descr="Number of wave-cycles spent by waves in the CUs (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_ANY" block=SQ event=58 descr="Number of wave-cycles spent waiting for anything (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_INST_ANY" block=SQ event=61 descr="Number of wave-cycles spent waiting for any instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_WAIT_INST_LDS" block=SQ event=64 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
></metric>
<metric
name="SQ_ACTIVE_INST_ANY" block=SQ event=69 descr="Number of cycles each wave is working on an instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_VMEM" block=SQ event=70 descr="Number of cycles the SQ instruction arbiter is working on a VMEM instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_LDS" block=SQ event=71 descr="Number of cycles the SQ instruction arbiter is working on a LDS instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_VALU" block=SQ event=72 descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_SCA" block=SQ event=73 descr="Number of cycles the SQ instruction arbiter is working on a SALU or SMEM instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_EXP_GDS" block=SQ event=74 descr="Number of cycles the SQ instruction arbiter is working on an EXPORT or GDS instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_MISC" block=SQ event=75 descr="Number of cycles the SQ instruction aribter is working on a BRANCH or SENDMSG instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACTIVE_INST_FLAT" block=SQ event=76 descr="Number of cycles the SQ instruction arbiter is working on a FLAT instruction. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_VMEM_WR" block=SQ event=77 descr="Number of cycles needed to send addr and cmd data for VMEM write instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_VMEM_RD" block=SQ event=78 descr="Number of cycles needed to send addr and cmd data for VMEM read instructions. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_SMEM" block=SQ event=84 descr="Number of cycles needed to execute scalar memory reads. (per-simd, emulated)"
></metric>
<metric
name="SQ_INST_CYCLES_SALU" block=SQ event=85 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"
></metric>
<metric
name="SQ_THREAD_CYCLES_VALU" block=SQ event=86 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"
></metric>
<metric
name="SQ_IFETCH" block=SQ event=88 descr="Number of instruction fetch requests from cache. (per-simd, emulated)"
></metric>
<metric
name="SQ_IFETCH_LEVEL" block=SQ event=89 descr="Number of instruction fetch requests from cache. (per-simd, level)"
></metric>
<metric
name="SQ_LDS_BANK_CONFLICT" block=SQ event=94 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"
></metric>
<metric
name="SQ_LDS_ADDR_CONFLICT" block=SQ event=95 descr="Number of cycles LDS is stalled by address conflicts. (emulated,nondeterministic)"
></metric>
<metric
name="SQ_LDS_UNALIGNED_STALL" block=SQ event=96 descr="Number of cycles LDS is stalled processing flat unaligned load/store ops. (emulated)"
></metric>
<metric
name="SQ_LDS_MEM_VIOLATIONS" block=SQ event=97 descr="Number of threads that have a memory violation in the LDS.(emulated)"
></metric>
<metric
name="SQ_LDS_ATOMIC_RETURN" block=SQ event=98 descr="Number of atomic return cycles in LDS. (per-simd, emulated)"
></metric>
<metric
name="SQ_LDS_IDX_ACTIVE" block=SQ event=99 descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. (per-simd, emulated)"
></metric>
<metric
name="SQ_ACCUM_PREV_HIRES" block=SQ event=158 descr="For counter N, increment by the value of counter N-1."
></metric>
<metric
name="SQ_WAVES_RESTORED" block=SQ event=159 descr="Count number of context-restored waves sent to SQs. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_WAVES_SAVED" block=SQ event=160 descr="Count number of context-saved waves. (per-simd, emulated, global)"
></metric>
<metric
name="SQ_INSTS_SMEM_NORM" block=SQ event=161 descr="Number of SMEM instructions issued normalized to match smem_level (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). (per-simd, emulated)"
></metric>
<metric
name="SQC_DCACHE_INPUT_VALID_READYB" block=SQ event=260 descr="Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"
></metric>
<metric
name="SQC_TC_REQ" block=SQ event=262 descr="Total number of TC requests that were issued by instruction and constant caches. (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_INST_REQ" block=SQ event=263 descr="Number of insruction requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_READ_REQ" block=SQ event=264 descr="Number of data read requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_WRITE_REQ" block=SQ event=265 descr="Number of data write requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_DATA_ATOMIC_REQ" block=SQ event=266 descr="Number of data atomic requests to the TC (No-Masking, nondeterministic)"
></metric>
<metric
name="SQC_TC_STALL" block=SQ event=267 descr="Valid request stalled TC request interface (no-credits). (No-Masking, nondeterministic, unwindowed)"
></metric>
<metric
name="SQC_ICACHE_REQ" block=SQ event=270 descr="Number of requests. (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_ICACHE_HITS" block=SQ event=271 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_ICACHE_MISSES" block=SQ event=272 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_ICACHE_MISSES_DUPLICATE" block=SQ event=273 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_REQ" block=SQ event=290 descr="Number of requests (post-bank-serialization). (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_DCACHE_HITS" block=SQ event=291 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_MISSES" block=SQ event=292 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_MISSES_DUPLICATE" block=SQ event=293 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
></metric>
<metric
name="SQC_DCACHE_ATOMIC" block=SQ event=298 descr="Number of atomic requests. (per-SQ, per-Bank)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_1" block=SQ event=323 descr="Number of constant cache 1 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_2" block=SQ event=324 descr="Number of constant cache 2 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_4" block=SQ event=325 descr="Number of constant cache 4 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_8" block=SQ event=326 descr="Number of constant cache 8 dw read requests. (per-SQ)"
></metric>
<metric
name="SQC_DCACHE_REQ_READ_16" block=SQ event=327 descr="Number of constant cache 16 dw read requests. (per-SQ)"
></metric>
# TA counters
<metric
name="TA_TA_BUSY" block=TA event=15 descr="TA block is busy. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_TOTAL_WAVEFRONTS" block=TA event=32 descr="Total number of wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_WAVEFRONTS" block=TA event=44 descr="Number of buffer wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_READ_WAVEFRONTS" block=TA event=45 descr="Number of buffer read wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_WRITE_WAVEFRONTS" block=TA event=46 descr="Number of buffer write wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_ATOMIC_WAVEFRONTS" block=TA event=47 descr="Number of buffer atomic wavefronts processed by TA."
></metric>
<metric
name="TA_BUFFER_TOTAL_CYCLES" block=TA event=49 descr="Number of buffer cycles issued to TC."
></metric>
<metric
name="TA_BUFFER_COALESCED_READ_CYCLES" block=TA event=52 descr="Number of buffer coalesced read cycles issued to TC."
></metric>
<metric
name="TA_BUFFER_COALESCED_WRITE_CYCLES" block=TA event=53 descr="Number of buffer coalesced write cycles issued to TC."
></metric>
<metric
name="TA_ADDR_STALLED_BY_TC_CYCLES" block=TA event=54 descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_ADDR_STALLED_BY_TD_CYCLES" block=TA event=55 descr="Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_DATA_STALLED_BY_TC_CYCLES" block=TA event=56 descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TA_FLAT_WAVEFRONTS" block=TA event=100 descr="Number of flat opcode wavfronts processed by the TA."
></metric>
<metric
name="TA_FLAT_READ_WAVEFRONTS" block=TA event=101 descr="Number of flat opcode reads processed by the TA."
></metric>
<metric
name="TA_FLAT_WRITE_WAVEFRONTS" block=TA event=102 descr="Number of flat opcode writes processed by the TA."
></metric>
<metric
name="TA_FLAT_ATOMIC_WAVEFRONTS" block=TA event=103 descr="Number of flat opcode atomics processed by the TA."
></metric>
# TCA counters
<metric
name="TCA_CYCLE" block=TCA event=1 descr="Number of cycles. Not windowable."
></metric>
<metric
name="TCA_BUSY" block=TCA event=2 descr="Number of cycles we have a request pending. Not windowable."
></metric>
# TCC counters
<metric
name="TCC_CYCLE" block=TCC event=1 descr="Number of cycles. Not windowable."
></metric>
<metric
name="TCC_BUSY" block=TCC event=2 descr="Number of cycles we have a request pending. Not windowable."
></metric>
<metric
name="TCC_REQ" block=TCC event=3 descr="Number of requests of all types. This is measured at the tag block. This may be more than the number of requests arriving at the TCC, but it is a good indication of the total amount of work that needs to be performed."
></metric>
<metric
name="TCC_STREAMING_REQ" block=TCC event=4 descr="Number of streaming requests. This is measured at the tag block."
></metric>
<metric
name="TCC_NC_REQ" block=TCC event=5 descr="The number of noncoherently cached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_UC_REQ" block=TCC event=6 descr="The number of uncached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_CC_REQ" block=TCC event=7 descr="The number of coherently cached requests. This is measured at the tag block."
></metric>
<metric
name="TCC_RW_REQ" block=TCC event=8 descr="The number of RW requests. This is measured at the tag block."
></metric>
<metric
name="TCC_PROBE" block=TCC event=9 descr="Number of probe requests. Not windowable."
></metric>
<metric
name="TCC_PROBE_ALL" block=TCC event=10 descr="Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."
></metric>
<metric
name="TCC_READ" block=TCC event=12 descr="Number of read requests. Compressed reads are included in this, but metadata reads are not included."
></metric>
<metric
name="TCC_WRITE" block=TCC event=13 descr="Number of write requests."
></metric>
<metric
name="TCC_ATOMIC" block=TCC event=14 descr="Number of atomic requests of all types."
></metric>
<metric
name="TCC_HIT" block=TCC event=17 descr="Number of cache hits."
></metric>
<metric
name="TCC_MISS" block=TCC event=19 descr="Number of cache misses. UC reads count as misses."
></metric>
<metric
name="TCC_WRITEBACK" block=TCC event=22 descr="Number of lines written back to main memory. This includes writebacks of dirty lines and uncached write/atomic requests."
></metric>
<metric
name="TCC_EA_WRREQ" block=TCC event=26 descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."
></metric>
<metric
name="TCC_EA_WRREQ_64B" block=TCC event=27 descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."
></metric>
<metric
name="TCC_EA_WR_UNCACHED_32B" block=TCC event=29 descr="Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached traffic. Note that CC mtypes can produce uncached requests, and those are included in this. A 64-byte request will be counted as 2"
></metric>
<metric
name="TCC_EA_WRREQ_STALL" block=TCC event=30 descr="Number of cycles a write request was stalled."
></metric>
<metric
name="TCC_EA_WRREQ_IO_CREDIT_STALL" block=TCC event=31 descr="Number of cycles a EA write request was stalled because the interface was out of IO credits."
></metric>
<metric
name="TCC_EA_WRREQ_GMI_CREDIT_STALL" block=TCC event=32 descr="Number of cycles a EA write request was stalled because the interface was out of GMI credits."
></metric>
<metric
name="TCC_EA_WRREQ_DRAM_CREDIT_STALL" block=TCC event=33 descr="Number of cycles a EA write request was stalled because the interface was out of DRAM credits."
></metric>
<metric
name="TCC_TOO_MANY_EA_WRREQS_STALL" block=TCC event=34 descr="Number of cycles the TCC could not send a EA write request because it already reached its maximum number of pending EA write requests."
></metric>
<metric
name="TCC_EA_WRREQ_LEVEL" block=TCC event=35 descr="The sum of the number of EA write requests in flight. This is primarily meant for measure average EA write latency. Average write latency = TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."
></metric>
<metric
name="TCC_EA_ATOMIC" block=TCC event=36 descr="Number of transactions going over the TC_EA_wrreq interface that are actually atomic requests."
></metric>
<metric
name="TCC_EA_ATOMIC_LEVEL" block=TCC event=37 descr="The sum of the number of EA atomics in flight. This is primarily meant for measure average EA atomic latency. Average atomic latency = TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."
></metric>
<metric
name="TCC_EA_RDREQ" block=TCC event=38 descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"
></metric>
<metric
name="TCC_EA_RDREQ_32B" block=TCC event=39 descr="Number of 32-byte TCC/EA read requests"
></metric>
<metric
name="TCC_EA_RD_UNCACHED_32B" block=TCC event=40 descr="Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted as 2"
></metric>
<metric
name="TCC_EA_RDREQ_IO_CREDIT_STALL" block=TCC event=41 descr="Number of cycles there was a stall because the read request interface was out of IO credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_GMI_CREDIT_STALL" block=TCC event=42 descr="Number of cycles there was a stall because the read request interface was out of GMI credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_DRAM_CREDIT_STALL" block=TCC event=43 descr="Number of cycles there was a stall because the read request interface was out of DRAM credits. Stalls occur regardless of whether a read needed to be performed or not."
></metric>
<metric
name="TCC_EA_RDREQ_LEVEL" block=TCC event=44 descr="The sum of the number of TCC/EA read requests in flight. This is primarily meant for measure average EA read latency. Average read latency = TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."
></metric>
<metric
name="TCC_TAG_STALL" block=TCC event=45 descr="Number of cycles the normal request pipeline in the tag was stalled for any reason. Normally, stalls of this nature are measured exactly from one point the pipeline, but that is not the case for this counter. Probes can stall the pipeline at a variety of places, and there is no single point that can reasonably measure the total stalls accurately."
></metric>
<metric
name="TCC_NORMAL_WRITEBACK" block=TCC event=68 descr="Number of writebacks due to requests that are not writeback requests."
></metric>
<metric
name="TCC_ALL_TC_OP_WB_WRITEBACK" block=TCC event=73 descr="Number of writebacks due to all TC_OP writeback requests."
></metric>
<metric
name="TCC_NORMAL_EVICT" block=TCC event=74 descr="Number of evictions due to requests that are not invalidate or probe requests."
></metric>
<metric
name="TCC_ALL_TC_OP_INV_EVICT" block=TCC event=80 descr="Number of evictions due to all TC_OP invalidate requests."
></metric>
<metric
name="TCC_EA_RDREQ_DRAM" block=TCC event=102 descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."
></metric>
<metric
name="TCC_EA_WRREQ_DRAM" block=TCC event=103 descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."
></metric>
<metric
name="TCC_CLIENT184_REQ" block=TCC event=312 descr=""
></metric>
<metric
name="TCC_CLIENT185_REQ" block=TCC event=313 descr=""
></metric>
<metric
name="TCC_CLIENT186_REQ" block=TCC event=314 descr=""
></metric>
<metric
name="TCC_CLIENT187_REQ" block=TCC event=315 descr=""
></metric>
<metric
name="TCC_CLIENT188_REQ" block=TCC event=316 descr=""
></metric>
<metric
name="TCC_CLIENT189_REQ" block=TCC event=317 descr=""
></metric>
<metric
name="TCC_CLIENT190_REQ" block=TCC event=318 descr=""
></metric>
<metric
name="TCC_CLIENT191_REQ" block=TCC event=319 descr=""
></metric>
# TCP counters
<metric
name="TCP_GATE_EN1" block=TCP event=0 descr="TCP interface clocks are turned on. Not Windowed."
></metric>
<metric
name="TCP_GATE_EN2" block=TCP event=1 descr="TCP core clocks are turned on. Not Windowed."
></metric>
<metric
name="TCP_TCP_TA_DATA_STALL_CYCLES" block=TCP event=6 descr="TCP stalls TA data interface. Not Windowed."
></metric>
<metric
name="TCP_TD_TCP_STALL_CYCLES" block=TCP event=7 descr="TD stalls TCP"
></metric>
<metric
name="TCP_TCR_TCP_STALL_CYCLES" block=TCP event=8 descr="TCR stalls TCP_TCR_req interface"
></metric>
<metric
name="TCP_READ_TAGCONFLICT_STALL_CYCLES" block=TCP event=11 descr="Tagram conflict stall on a read"
></metric>
<metric
name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES" block=TCP event=12 descr="Tagram conflict stall on a write"
></metric>
<metric
name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES" block=TCP event=13 descr="Tagram conflict stall on an atomic"
></metric>
<metric
name="TCP_PENDING_STALL_CYCLES" block=TCP event=22 descr="Stall due to data pending from L2"
></metric>
<metric
name="TCP_TA_TCP_STATE_READ" block=TCP event=27 descr="Number of state reads"
></metric>
<metric
name="TCP_VOLATILE" block=TCP event=28 descr="Total number of L1 volatile pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_ACCESSES" block=TCP event=29 descr="Total number of pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"
></metric>
<metric
name="TCP_TOTAL_READ" block=TCP event=30 descr="Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"
></metric>
<metric
name="TCP_TOTAL_WRITE" block=TCP event=32 descr="Total number of local write pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"
></metric>
<metric
name="TCP_TOTAL_ATOMIC_WITH_RET" block=TCP event=38 descr="Total number of atomic with return pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_ATOMIC_WITHOUT_RET" block=TCP event=39 descr="Total number of atomic without return pixels/buffers from TA"
></metric>
<metric
name="TCP_TOTAL_WRITEBACK_INVALIDATES" block=TCP event=45 descr="Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."
></metric>
<metric
name="TCP_UTCL1_REQUEST" block=TCP event=47 descr="Total CLIENT_UTCL1 NORMAL requests"
></metric>
<metric
name="TCP_UTCL1_TRANSLATION_MISS" block=TCP event=48 descr="Total utcl1 translation misses"
></metric>
<metric
name="TCP_UTCL1_TRANSLATION_HIT" block=TCP event=49 descr="Total utcl1 translation hits"
></metric>
<metric
name="TCP_UTCL1_PERMISSION_MISS" block=TCP event=50 descr="Total utcl1 permission misses"
></metric>
<metric
name="TCP_TOTAL_CACHE_ACCESSES" block=TCP event=60 descr="Count of total cache line (tag) accesses (includes hits and misses)."
></metric>
<metric
name="TCP_TCP_LATENCY" block=TCP event=65 descr="Total TCP wave latency (from first clock of wave entering to first clock of wave leaving), divide by TA_TCP_STATE_READ to avg wave latency"
></metric>
<metric
name="TCP_TCC_READ_REQ_LATENCY" block=TCP event=66 descr="Total TCP->TCC request latency for reads and atomics with return. Not Windowed."
></metric>
<metric
name="TCP_TCC_WRITE_REQ_LATENCY" block=TCP event=67 descr="Total TCP->TCC request latency for writes and atomics without return. Not Windowed."
></metric>
<metric
name="TCP_TCC_READ_REQ" block=TCP event=69 descr="Total read requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_WRITE_REQ" block=TCP event=70 descr="Total write requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_ATOMIC_WITH_RET_REQ" block=TCP event=71 descr="Total atomic with return requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ" block=TCP event=72 descr="Total atomic without return requests from TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_READ_REQ" block=TCP event=75 descr="Total read requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_WRITE_REQ" block=TCP event=76 descr="Total write requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_NC_ATOMIC_REQ" block=TCP event=77 descr="Total atomic requests with NC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_READ_REQ" block=TCP event=78 descr="Total read requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_WRITE_REQ" block=TCP event=79 descr="Total write requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_UC_ATOMIC_REQ" block=TCP event=80 descr="Total atomic requests with UC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_READ_REQ" block=TCP event=81 descr="Total write requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_WRITE_REQ" block=TCP event=82 descr="Total write requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_CC_ATOMIC_REQ" block=TCP event=83 descr="Total atomic requests with CC mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_READ_REQ" block=TCP event=85 descr="Total write requests with RW mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_WRITE_REQ" block=TCP event=86 descr="Total write requests with RW mtype from this TCP to all TCCs"
></metric>
<metric
name="TCP_TCC_RW_ATOMIC_REQ" block=TCP event=87 descr="Total atomic requests with RW mtype from this TCP to all TCCs"
></metric>
# TD counters
<metric
name="TD_TD_BUSY" block=TD event=1 descr="TD is processing or waiting for data. Perf_Windowing not supported for this counter."
></metric>
<metric
name="TD_TC_STALL" block=TD event=15 descr="TD is stalled waiting for TC data."
></metric>
<metric
name="TD_RESERVED_18" block=TD event=18 descr="RESERVED_18"
></metric>
<metric
name="TD_LOAD_WAVEFRONT" block=TD event=25 descr="Count the wavefronts with opcode = load, include atomics and store."
></metric>
<metric
name="TD_ATOMIC_WAVEFRONT" block=TD event=26 descr="Count the wavefronts with opcode = atomic."
></metric>
<metric
name="TD_STORE_WAVEFRONT" block=TD event=27 descr="Count the wavefronts with opcode = store."
></metric>
<metric
name="TD_COALESCABLE_WAVEFRONT" block=TD event=32 descr="Count wavefronts that TA finds coalescable."
></metric>
</gfx908>
+163
Näytä tiedosto
@@ -0,0 +1,163 @@
#include "gfx908_metrics.xml"
<gfx9_expr>
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) descr="TA block is busy. Average over TA instances."></metric>
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) descr="TA block is busy. Max over TA instances."></metric>
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) descr="TA block is busy. Min over TA instances."></metric>
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="Number of flat opcode reads processed by the TA. Sum over TA instances."></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="Number of flat opcode writes processed by the TA. Sum over TA instances."></metric>
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,16) descr="TCC_BUSY avr over all memory channels."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,16) descr="TCC_REQ sum over all memory channels."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,16) descr="Number of cache hits. Sum over TCC instances."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,16) descr="Number of cache misses. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,16) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,16) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,16) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,16) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,16) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
<metric name="WRITE_REQ_32B" expr=TCC_EA_WRREQ_64B_sum*2+(TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
#xlu - TA
<metric name="TA_TA_BUSY_sum" expr=sum(TA_TA_BUSY,16) descr="."></metric>
<metric name="TA_TOTAL_WAVEFRONTS_sum" expr=sum(TA_TOTAL_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_ADDR_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TC_CYCLES,16) descr="."></metric>
<metric name="TA_ADDR_STALLED_BY_TD_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TD_CYCLES,16) descr="."></metric>
<metric name="TA_DATA_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_DATA_STALLED_BY_TC_CYCLES,16) descr="."></metric>
<metric name="TA_FLAT_WAVEFRONTS_sum" expr=sum(TA_FLAT_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_FLAT_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_FLAT_ATOMIC_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_READ_WAVEFRONTS_sum" expr=sum(TA_BUFFER_READ_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_WRITE_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WRITE_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_BUFFER_ATOMIC_WAVEFRONTS,16) descr="."></metric>
<metric name="TA_BUFFER_TOTAL_CYCLES_sum" expr=sum(TA_BUFFER_TOTAL_CYCLES,16) descr="."></metric>
<metric name="TA_BUFFER_COALESCED_READ_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_READ_CYCLES,16) descr="."></metric>
<metric name="TA_BUFFER_COALESCED_WRITE_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_WRITE_CYCLES,16) descr="."></metric>
#xlu -TD
<metric name="TD_TD_BUSY_sum" expr=sum(TD_TD_BUSY,16) descr="."></metric>
<metric name="TD_TC_STALL_sum" expr=sum(TD_TC_STALL,16) descr="."></metric>
<metric name="TD_LOAD_WAVEFRONT_sum" expr=sum(TD_LOAD_WAVEFRONT,16) descr="."></metric>
<metric name="TD_ATOMIC_WAVEFRONT_sum" expr=sum(TD_ATOMIC_WAVEFRONT,16) descr="."></metric>
<metric name="TD_STORE_WAVEFRONT_sum" expr=sum(TD_STORE_WAVEFRONT,16) descr="."></metric>
<metric name="TD_COALESCABLE_WAVEFRONT_sum" expr=sum(TD_COALESCABLE_WAVEFRONT,16) descr="."></metric>
#xlu -TCP
<metric name="TCP_GATE_EN1_sum" expr=sum(TCP_GATE_EN1,16) descr="."></metric>
<metric name="TCP_GATE_EN2_sum" expr=sum(TCP_GATE_EN2,16) descr="."></metric>
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_sum" expr=sum(TCP_TCP_TA_DATA_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_TD_TCP_STALL_CYCLES_sum" expr=sum(TCP_TD_TCP_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_TCR_TCP_STALL_CYCLES_sum" expr=sum(TCP_TCR_TCP_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_READ_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_READ_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_WRITE_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_PENDING_STALL_CYCLES_sum" expr=sum(TCP_PENDING_STALL_CYCLES,16) descr="."></metric>
<metric name="TCP_VOLATILE_sum" expr=sum(TCP_VOLATILE,16) descr="."></metric>
<metric name="TCP_TOTAL_ACCESSES_sum" expr=sum(TCP_TOTAL_ACCESSES,16) descr="."></metric>
<metric name="TCP_TOTAL_READ_sum" expr=sum(TCP_TOTAL_READ,16) descr="."></metric>
<metric name="TCP_TOTAL_WRITE_sum" expr=sum(TCP_TOTAL_WRITE,16) descr="."></metric>
<metric name="TCP_TOTAL_ATOMIC_WITH_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITH_RET,16) descr="."></metric>
<metric name="TCP_TOTAL_ATOMIC_WITHOUT_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITHOUT_RET,16) descr="."></metric>
<metric name="TCP_TOTAL_WRITEBACK_INVALIDATES_sum" expr=sum(TCP_TOTAL_WRITEBACK_INVALIDATES,16) descr="."></metric>
<metric name="TCP_UTCL1_REQUEST_sum" expr=sum(TCP_UTCL1_REQUEST,16) descr="."></metric>
<metric name="TCP_UTCL1_TRANSLATION_MISS_sum" expr=sum(TCP_UTCL1_TRANSLATION_MISS,16) descr="."></metric>
<metric name="TCP_UTCL1_TRANSLATION_HIT_sum" expr=sum(TCP_UTCL1_TRANSLATION_HIT,16) descr="."></metric>
<metric name="TCP_UTCL1_PERMISSION_MISS_sum" expr=sum(TCP_UTCL1_PERMISSION_MISS,16) descr="."></metric>
<metric name="TCP_TOTAL_CACHE_ACCESSES_sum" expr=sum(TCP_TOTAL_CACHE_ACCESSES,16) descr="."></metric>
<metric name="TCP_TCP_LATENCY_sum" expr=sum(TCP_TCP_LATENCY,16) descr="."></metric>
<metric name="TCP_TA_TCP_STATE_READ_sum" expr=sum(TCP_TA_TCP_STATE_READ,16) descr="."></metric>
<metric name="TCP_TCC_READ_REQ_LATENCY_sum" expr=sum(TCP_TCC_READ_REQ_LATENCY,16) descr="."></metric>
<metric name="TCP_TCC_WRITE_REQ_LATENCY_sum" expr=sum(TCP_TCC_WRITE_REQ_LATENCY,16) descr="."></metric>
<metric name="TCP_TCC_READ_REQ_sum" expr=sum(TCP_TCC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_WRITE_REQ_sum" expr=sum(TCP_TCC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_ATOMIC_WITH_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITH_RET_REQ,16) descr="."></metric>
<metric name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITHOUT_RET_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_READ_REQ_sum" expr=sum(TCP_TCC_NC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_WRITE_REQ_sum" expr=sum(TCP_TCC_NC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_NC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_NC_ATOMIC_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_READ_REQ_sum" expr=sum(TCP_TCC_UC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_WRITE_REQ_sum" expr=sum(TCP_TCC_UC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_UC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_UC_ATOMIC_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_READ_REQ_sum" expr=sum(TCP_TCC_CC_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_WRITE_REQ_sum" expr=sum(TCP_TCC_CC_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_CC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_CC_ATOMIC_REQ,16) descr="."></metric>
</gfx9_expr>
<gfx908_expr base="gfx9_expr">
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,32) descr="TCC_BUSY avr over all memory channels."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="TCC_REQ sum over all memory channels."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="Number of cache hits. Sum over TCC instances."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="Number of cache misses. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,32) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
#xlu - TCP
<metric name="TCP_TCC_RW_READ_REQ_sum" expr=sum(TCP_TCC_RW_READ_REQ,16) descr="."></metric>
<metric name="TCP_TCC_RW_WRITE_REQ_sum" expr=sum(TCP_TCC_RW_WRITE_REQ,16) descr="."></metric>
<metric name="TCP_TCC_RW_ATOMIC_REQ_sum" expr=sum(TCP_TCC_RW_ATOMIC_REQ,16) descr="."></metric>
#xlu - TCC
<metric name="TCC_CYCLE_sum" expr=sum(TCC_CYCLE,32) descr="."></metric>
<metric name="TCC_BUSY_sum" expr=sum(TCC_BUSY,32) descr="."></metric>
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="."></metric>
<metric name="TCC_STREAMING_REQ_sum" expr=sum(TCC_STREAMING_REQ,32) descr="."></metric>
<metric name="TCC_NC_REQ_sum" expr=sum(TCC_NC_REQ,32) descr="."></metric>
<metric name="TCC_UC_REQ_sum" expr=sum(TCC_UC_REQ,32) descr="."></metric>
<metric name="TCC_CC_REQ_sum" expr=sum(TCC_CC_REQ,32) descr="."></metric>
<metric name="TCC_RW_REQ_sum" expr=sum(TCC_RW_REQ,32) descr="."></metric>
<metric name="TCC_PROBE_sum" expr=sum(TCC_PROBE,32) descr="."></metric>
<metric name="TCC_PROBE_ALL_sum" expr=sum(TCC_PROBE_ALL,32) descr="."></metric>
<metric name="TCC_READ_sum" expr=sum(TCC_READ,32) descr="."></metric>
<metric name="TCC_WRITE_sum" expr=sum(TCC_WRITE,32) descr="."></metric>
<metric name="TCC_ATOMIC_sum" expr=sum(TCC_ATOMIC,32) descr="."></metric>
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="."></metric>
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="."></metric>
<metric name="TCC_TAG_STALL_sum" expr=sum(TCC_TAG_STALL,32) descr="."></metric>
<metric name="TCC_WRITEBACK_sum" expr=sum(TCC_WRITEBACK,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="."></metric>
<metric name="TCC_EA_WR_UNCACHED_32B_sum" expr=sum(TCC_EA_WR_UNCACHED_32B,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_STALL_sum" expr=sum(TCC_EA_WRREQ_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_IO_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_GMI_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_TOO_MANY_EA_WRREQS_STALL_sum" expr=sum(TCC_TOO_MANY_EA_WRREQS_STALL,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_LEVEL_sum" expr=sum(TCC_EA_WRREQ_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_LEVEL_sum" expr=sum(TCC_EA_RDREQ_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_ATOMIC_sum" expr=sum(TCC_EA_ATOMIC,32) descr="."></metric>
<metric name="TCC_EA_ATOMIC_LEVEL_sum" expr=sum(TCC_EA_ATOMIC_LEVEL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="."></metric>
<metric name="TCC_EA_RD_UNCACHED_32B_sum" expr=sum(TCC_EA_RD_UNCACHED_32B,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_IO_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_GMI_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
<metric name="TCC_NORMAL_WRITEBACK_sum" expr=sum(TCC_NORMAL_WRITEBACK,32) descr="."></metric>
<metric name="TCC_ALL_TC_OP_WB_WRITEBACK_sum" expr=sum(TCC_ALL_TC_OP_WB_WRITEBACK,32) descr="."></metric>
<metric name="TCC_NORMAL_EVICT_sum" expr=sum(TCC_NORMAL_EVICT,32) descr="."></metric>
<metric name="TCC_ALL_TC_OP_INV_EVICT_sum" expr=sum(TCC_ALL_TC_OP_INV_EVICT,32) descr="."></metric>
<metric name="TCC_EA_RDREQ_DRAM_sum" expr=sum(TCC_EA_RDREQ_DRAM,32) descr="."></metric>
<metric name="TCC_EA_WRREQ_DRAM_sum" expr=sum(TCC_EA_WRREQ_DRAM,32) descr="."></metric>
</gfx908_expr>
<gfx9 base="gfx9_expr"></gfx9>
<gfx908 base="gfx908_expr"> </gfx908>
@@ -0,0 +1,15 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
pmc: CPC_CPC_STAT_BUSY CPC_CPC_STAT_IDLE
pmc: CPC_CPC_TCIU_BUSY CPC_CPC_TCIU_IDLE
pmc: CPC_CPC_STAT_STALL CPC_UTCL1_STALL_ON_TRANSLATION
pmc: CPC_CPC_UTCL2IU_BUSY CPC_CPC_UTCL2IU_IDLE
pmc: CPC_CPC_UTCL2IU_STALL CPC_ME1_BUSY_FOR_PACKET_DECODE
pmc: CPC_ME1_DC0_SPI_BUSY
range:
gpu:
kernel:
@@ -0,0 +1,13 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
pmc: CPF_CPF_STAT_BUSY CPF_CPF_STAT_STALL
pmc: CPF_CPF_TCIU_BUSY CPF_CPF_TCIU_STALL
pmc: CPF_CPF_STAT_IDLE CPF_CPF_TCIU_IDLE
pmc: CPF_CMP_UTCL1_STALL_ON_TRANSLATION
range:
gpu:
kernel:
@@ -0,0 +1,12 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES GRBM_SPI_BUSY
pmc: SPI_CSN_WINDOW_VALID SPI_CSN_BUSY SPI_CSN_NUM_THREADGROUPS SPI_CSN_WAVE SPI_RA_REQ_NO_ALLOC SPI_RA_REQ_NO_ALLOC_CSN
pmc: SPI_RA_RES_STALL_CSN SPI_RA_TMP_STALL_CSN SPI_RA_WAVE_SIMD_FULL_CSN SPI_RA_VGPR_SIMD_FULL_CSN SPI_RA_SGPR_SIMD_FULL_CSN SPI_RA_LDS_CU_FULL_CSN
pmc: SPI_RA_BAR_CU_FULL_CSN SPI_RA_TGLIM_CU_FULL_CSN SPI_RA_WVLIM_STALL_CSN SPI_SWC_CSC_WR SPI_VWC_CSC_WR SPI_RA_BULKY_CU_FULL_CSN
range:
gpu:
kernel:
@@ -0,0 +1,31 @@
#SQ
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_INSTS_VMEM_WR SQ_INSTS_VMEM_RD SQ_INSTS_VMEM SQ_INSTS_SALU SQ_INSTS_VSKIPPED
pmc: SQ_INSTS_SMEM SQ_INSTS_FLAT SQ_INSTS_LDS SQ_INSTS_GDS SQ_INSTS_EXP_GDS SQ_INSTS_BRANCH SQ_INSTS_SENDMSG SQ_INSTS
pmc: SQ_WAVE_CYCLES SQ_WAIT_ANY SQ_WAIT_INST_ANY SQ_ACTIVE_INST_ANY SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_INSTS_VALU
pmc: SQ_ACTIVE_INST_VMEM SQ_ACTIVE_INST_LDS SQ_ACTIVE_INST_VALU SQ_ACTIVE_INST_SCA SQ_ACTIVE_INST_EXP_GDS SQ_ACTIVE_INST_MISC SQ_ACTIVE_INST_FLAT SQ_INST_CYCLES_VMEM_WR
pmc: SQ_INST_CYCLES_VMEM_RD SQ_INST_CYCLES_SMEM SQ_INST_CYCLES_SALU SQ_THREAD_CYCLES_VALU SQ_IFETCH SQ_LDS_BANK_CONFLICT SQ_LDS_ADDR_CONFLICT SQ_LDS_UNALIGNED_STALL
pmc: SQ_WAVES SQ_WAVES_EQ_64 SQ_WAVES_LT_64 SQ_WAVES_LT_48 SQ_WAVES_LT_32 SQ_WAVES_LT_16 SQ_ITEMS SQ_INSTS_VSKIPPED
pmc: SQ_LDS_MEM_VIOLATIONS SQ_LDS_ATOMIC_RETURN SQ_LDS_IDX_ACTIVE SQ_WAVES_RESTORED SQ_WAVES_SAVED SQ_INSTS_SMEM_NORM
#SQ:MI200
#pmc: SQ_INSTS_MFMA SQ_INSTS_VALU_MFMA_I8 SQ_INSTS_VALU_MFMA_F16 SQ_INSTS_VALU_MFMA_BF16 SQ_INSTS_VALU_MFMA_F32 SQ_INSTS_VALU_MFMA_F64 SQ_VALU_MFMA_BUSY_CYCLES
#pmc: SQ_INSTS_FLAT_LDS_ONLY SQ_INSTS_VALU_MFMA_MOPS_I8 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64
#SQC
pmc: SQC_TC_INST_REQ SQC_TC_DATA_READ_REQ SQC_TC_DATA_WRITE_REQ SQC_TC_DATA_ATOMIC_REQ SQC_TC_STALL SQC_TC_REQ SQC_DCACHE_REQ_READ_16
pmc: SQC_ICACHE_REQ SQC_ICACHE_HITS SQC_ICACHE_MISSES SQC_ICACHE_MISSES_DUPLICATE SQC_DCACHE_INPUT_VALID_READYB SQC_DCACHE_ATOMIC SQC_DCACHE_REQ_READ_8
pmc: SQC_DCACHE_REQ SQC_DCACHE_HITS SQC_DCACHE_MISSES SQC_DCACHE_MISSES_DUPLICATE SQC_DCACHE_REQ_READ_1 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ_READ_4
########################################
# Filtering
########################################
range:
gpu:
kernel:
@@ -0,0 +1,12 @@
#################################################
# VMEM latency
#################################################
pmc: SQ_INSTS_VMEM SQ_INST_LEVEL_VMEM SQ_ACCUM_PREV_HIRES
range:
gpu:
kernel:
@@ -0,0 +1,13 @@
#################################################
# SMEM latency
#################################################
pmc: SQ_INSTS_SMEM SQ_INST_LEVEL_SMEM SQ_ACCUM_PREV_HIRES
range:
gpu:
kernel:
@@ -0,0 +1,11 @@
#################################################
# ifetch latency
#################################################
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_WAVES SQ_IFETCH SQ_IFETCH_LEVEL SQ_ACCUM_PREV_HIRES
range:
gpu:
kernel:
@@ -0,0 +1,15 @@
#################################################
# LDS latency
#################################################
pmc: SQ_INSTS_LDS SQ_INST_LEVEL_LDS SQ_ACCUM_PREV_HIRES
range:
gpu:
kernel:
@@ -0,0 +1,7 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE CPC_ME1_BUSY_FOR_PACKET_DECODE SQ_CYCLES SQ_WAVES SQ_WAVE_CYCLES SQ_BUSY_CYCLES SQ_LEVEL_WAVES SQ_ACCUM_PREV_HIRES
gpu:
range:
kernel:
@@ -0,0 +1,12 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
pmc: SQC_TC_INST_REQ SQC_TC_DATA_READ_REQ SQC_TC_DATA_WRITE_REQ SQC_TC_DATA_ATOMIC_REQ SQC_TC_STALL SQC_TC_REQ SQC_DCACHE_REQ_READ_16
pmc: SQC_ICACHE_REQ SQC_ICACHE_HITS SQC_ICACHE_MISSES SQC_ICACHE_MISSES_DUPLICATE SQC_DCACHE_INPUT_VALID_READYB SQC_DCACHE_ATOMIC SQC_DCACHE_REQ_READ_8
pmc: SQC_DCACHE_REQ SQC_DCACHE_HITS SQC_DCACHE_MISSES SQC_DCACHE_MISSES_DUPLICATE SQC_DCACHE_REQ_READ_1 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ_READ_4
range:
gpu:
kernel:
@@ -0,0 +1,37 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
pmc: TA_TA_BUSY_sum TA_SH_FIFO_BUSY_sum
pmc: TA_SH_FIFO_CMD_BUSY_sum TA_SH_FIFO_ADDR_BUSY_sum
pmc: TA_SH_FIFO_DATA_BUSY_sum TA_SH_FIFO_DATA_SFIFO_BUSY_sum
pmc: TA_SH_FIFO_DATA_TFIFO_BUSY_sum TA_SQ_TA_CMD_CYCLES_sum
pmc: TA_SP_TA_ADDR_CYCLES_sum TA_SP_TA_DATA_CYCLES_sum
# Starvation
pmc: TA_SH_FIFO_ADDR_STARVED_WHILE_BUSY_CYCLES_sum TA_SH_FIFO_CMD_STARVED_WHILE_BUSY_CYCLES_sum
pmc: TA_SH_FIFO_DATA_STARVED_WHILE_BUSY_CYCLES_sum TA_TA_SH_FIFO_STARVED_sum
# buffer access
pmc: TA_BUFFER_WAVEFRONTS_sum TA_BUFFER_READ_WAVEFRONTS_sum
pmc: TA_BUFFER_WRITE_WAVEFRONTS_sum TA_BUFFER_ATOMIC_WAVEFRONTS_sum
pmc: TA_BUFFER_TOTAL_CYCLES_sum TA_BUFFER_COALESCABLE_WAVEFRONTS_sum
pmc: TA_BUFFER_COALESCED_READ_CYCLES_sum TA_BUFFER_COALESCED_WRITE_CYCLES_sum
# stalls
pmc: TA_ADDR_STALLED_BY_TC_CYCLES_sum TA_TOTAL_WAVEFRONTS_sum
pmc: TA_ADDR_STALLED_BY_TD_CYCLES_sum TA_DATA_STALLED_BY_TC_CYCLES_sum
# flat accesses
pmc: TA_FLAT_WAVEFRONTS_sum TA_FLAT_READ_WAVEFRONTS_sum
pmc: TA_FLAT_WRITE_WAVEFRONTS_sum TA_FLAT_ATOMIC_WAVEFRONTS_sum
pmc: TA_FLAT_COALESCEABLE_WAVEFRONTS_sum
range:
gpu:
kernel:
@@ -0,0 +1,26 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
# MI50: no TCC_RW_REQ
pmc: TCC_CYCLE[0] TCC_HIT[0] TCC_MISS[0] TCC_CYCLE[1] TCC_HIT[1] TCC_MISS[1] TCC_CYCLE[2] TCC_HIT[2] TCC_MISS[2] TCC_CYCLE[3] TCC_HIT[3] TCC_MISS[3] TCC_CYCLE[4] TCC_HIT[4] TCC_MISS[4] TCC_CYCLE[5] TCC_HIT[5] TCC_MISS[5] TCC_CYCLE[6] TCC_HIT[6] TCC_MISS[6] TCC_CYCLE[7] TCC_HIT[7] TCC_MISS[7] TCC_CYCLE[8] TCC_HIT[8] TCC_MISS[8] TCC_CYCLE[9] TCC_HIT[9] TCC_MISS[9] TCC_CYCLE[10] TCC_HIT[10] TCC_MISS[10] TCC_CYCLE[11] TCC_HIT[11] TCC_MISS[11] TCC_CYCLE[12] TCC_HIT[12] TCC_MISS[12] TCC_CYCLE[13] TCC_HIT[13] TCC_MISS[13] TCC_CYCLE[14] TCC_HIT[14] TCC_MISS[14] TCC_CYCLE[15] TCC_HIT[15] TCC_MISS[15]
pmc: TCC_REQ[0] TCC_READ[0] TCC_WRITE[0] TCC_ATOMIC[0] TCC_REQ[1] TCC_READ[1] TCC_WRITE[1] TCC_ATOMIC[1] TCC_REQ[2] TCC_READ[2] TCC_WRITE[2] TCC_ATOMIC[2] TCC_REQ[3] TCC_READ[3] TCC_WRITE[3] TCC_ATOMIC[3] TCC_REQ[4] TCC_READ[4] TCC_WRITE[4] TCC_ATOMIC[4] TCC_REQ[5] TCC_READ[5] TCC_WRITE[5] TCC_ATOMIC[5] TCC_REQ[6] TCC_READ[6] TCC_WRITE[6] TCC_ATOMIC[6] TCC_REQ[7] TCC_READ[7] TCC_WRITE[7] TCC_ATOMIC[7] TCC_REQ[8] TCC_READ[8] TCC_WRITE[8] TCC_ATOMIC[8] TCC_REQ[9] TCC_READ[9] TCC_WRITE[9] TCC_ATOMIC[9] TCC_REQ[10] TCC_READ[10] TCC_WRITE[10] TCC_ATOMIC[10] TCC_REQ[11] TCC_READ[11] TCC_WRITE[11] TCC_ATOMIC[11] TCC_REQ[12] TCC_READ[12] TCC_WRITE[12] TCC_ATOMIC[12] TCC_REQ[13] TCC_READ[13] TCC_WRITE[13] TCC_ATOMIC[13] TCC_REQ[14] TCC_READ[14] TCC_WRITE[14] TCC_ATOMIC[14] TCC_REQ[15] TCC_READ[15] TCC_WRITE[15] TCC_ATOMIC[15]
pmc: TCC_EA_RDREQ[0] TCC_EA_RDREQ_32B[0] TCC_EA_WRREQ[0] TCC_EA_WRREQ_64B[0] TCC_EA_RDREQ[1] TCC_EA_RDREQ_32B[1] TCC_EA_WRREQ[1] TCC_EA_WRREQ_64B[1] TCC_EA_RDREQ[2] TCC_EA_RDREQ_32B[2] TCC_EA_WRREQ[2] TCC_EA_WRREQ_64B[2] TCC_EA_RDREQ[3] TCC_EA_RDREQ_32B[3] TCC_EA_WRREQ[3] TCC_EA_WRREQ_64B[3] TCC_EA_RDREQ[4] TCC_EA_RDREQ_32B[4] TCC_EA_WRREQ[4] TCC_EA_WRREQ_64B[4] TCC_EA_RDREQ[5] TCC_EA_RDREQ_32B[5] TCC_EA_WRREQ[5] TCC_EA_WRREQ_64B[5] TCC_EA_RDREQ[6] TCC_EA_RDREQ_32B[6] TCC_EA_WRREQ[6] TCC_EA_WRREQ_64B[6] TCC_EA_RDREQ[7] TCC_EA_RDREQ_32B[7] TCC_EA_WRREQ[7] TCC_EA_WRREQ_64B[7] TCC_EA_RDREQ[8] TCC_EA_RDREQ_32B[8] TCC_EA_WRREQ[8] TCC_EA_WRREQ_64B[8] TCC_EA_RDREQ[9] TCC_EA_RDREQ_32B[9] TCC_EA_WRREQ[9] TCC_EA_WRREQ_64B[9] TCC_EA_RDREQ[10] TCC_EA_RDREQ_32B[10] TCC_EA_WRREQ[10] TCC_EA_WRREQ_64B[10] TCC_EA_RDREQ[11] TCC_EA_RDREQ_32B[11] TCC_EA_WRREQ[11] TCC_EA_WRREQ_64B[11] TCC_EA_RDREQ[12] TCC_EA_RDREQ_32B[12] TCC_EA_WRREQ[12] TCC_EA_WRREQ_64B[12] TCC_EA_RDREQ[13] TCC_EA_RDREQ_32B[13] TCC_EA_WRREQ[13] TCC_EA_WRREQ_64B[13] TCC_EA_RDREQ[14] TCC_EA_RDREQ_32B[14] TCC_EA_WRREQ[14] TCC_EA_WRREQ_64B[14] TCC_EA_RDREQ[15] TCC_EA_RDREQ_32B[15] TCC_EA_WRREQ[15] TCC_EA_WRREQ_64B[15]
pmc: TCC_EA_ATOMIC[0] TCC_EA_RDREQ_LEVEL[0] TCC_EA_WRREQ_LEVEL[0] TCC_EA_ATOMIC_LEVEL[0] TCC_EA_ATOMIC[1] TCC_EA_RDREQ_LEVEL[1] TCC_EA_WRREQ_LEVEL[1] TCC_EA_ATOMIC_LEVEL[1] TCC_EA_ATOMIC[2] TCC_EA_RDREQ_LEVEL[2] TCC_EA_WRREQ_LEVEL[2] TCC_EA_ATOMIC_LEVEL[2] TCC_EA_ATOMIC[3] TCC_EA_RDREQ_LEVEL[3] TCC_EA_WRREQ_LEVEL[3] TCC_EA_ATOMIC_LEVEL[3] TCC_EA_ATOMIC[4] TCC_EA_RDREQ_LEVEL[4] TCC_EA_WRREQ_LEVEL[4] TCC_EA_ATOMIC_LEVEL[4] TCC_EA_ATOMIC[5] TCC_EA_RDREQ_LEVEL[5] TCC_EA_WRREQ_LEVEL[5] TCC_EA_ATOMIC_LEVEL[5] TCC_EA_ATOMIC[6] TCC_EA_RDREQ_LEVEL[6] TCC_EA_WRREQ_LEVEL[6] TCC_EA_ATOMIC_LEVEL[6] TCC_EA_ATOMIC[7] TCC_EA_RDREQ_LEVEL[7] TCC_EA_WRREQ_LEVEL[7] TCC_EA_ATOMIC_LEVEL[7] TCC_EA_ATOMIC[8] TCC_EA_RDREQ_LEVEL[8] TCC_EA_WRREQ_LEVEL[8] TCC_EA_ATOMIC_LEVEL[8] TCC_EA_ATOMIC[9] TCC_EA_RDREQ_LEVEL[9] TCC_EA_WRREQ_LEVEL[9] TCC_EA_ATOMIC_LEVEL[9] TCC_EA_ATOMIC[10] TCC_EA_RDREQ_LEVEL[10] TCC_EA_WRREQ_LEVEL[10] TCC_EA_ATOMIC_LEVEL[10] TCC_EA_ATOMIC[11] TCC_EA_RDREQ_LEVEL[11] TCC_EA_WRREQ_LEVEL[11] TCC_EA_ATOMIC_LEVEL[11] TCC_EA_ATOMIC[12] TCC_EA_RDREQ_LEVEL[12] TCC_EA_WRREQ_LEVEL[12] TCC_EA_ATOMIC_LEVEL[12] TCC_EA_ATOMIC[13] TCC_EA_RDREQ_LEVEL[13] TCC_EA_WRREQ_LEVEL[13] TCC_EA_ATOMIC_LEVEL[13] TCC_EA_ATOMIC[14] TCC_EA_RDREQ_LEVEL[14] TCC_EA_WRREQ_LEVEL[14] TCC_EA_ATOMIC_LEVEL[14] TCC_EA_ATOMIC[15] TCC_EA_RDREQ_LEVEL[15] TCC_EA_WRREQ_LEVEL[15] TCC_EA_ATOMIC_LEVEL[15]
pmc: TCC_EA_RDREQ_IO_CREDIT_STALL[0] TCC_EA_RDREQ_GMI_CREDIT_STALL[0] TCC_EA_RDREQ_DRAM_CREDIT_STALL[0] TCC_EA_RDREQ_IO_CREDIT_STALL[1] TCC_EA_RDREQ_GMI_CREDIT_STALL[1] TCC_EA_RDREQ_DRAM_CREDIT_STALL[1] TCC_EA_RDREQ_IO_CREDIT_STALL[2] TCC_EA_RDREQ_GMI_CREDIT_STALL[2] TCC_EA_RDREQ_DRAM_CREDIT_STALL[2] TCC_EA_RDREQ_IO_CREDIT_STALL[3] TCC_EA_RDREQ_GMI_CREDIT_STALL[3] TCC_EA_RDREQ_DRAM_CREDIT_STALL[3] TCC_EA_RDREQ_IO_CREDIT_STALL[4] TCC_EA_RDREQ_GMI_CREDIT_STALL[4] TCC_EA_RDREQ_DRAM_CREDIT_STALL[4] TCC_EA_RDREQ_IO_CREDIT_STALL[5] TCC_EA_RDREQ_GMI_CREDIT_STALL[5] TCC_EA_RDREQ_DRAM_CREDIT_STALL[5] TCC_EA_RDREQ_IO_CREDIT_STALL[6] TCC_EA_RDREQ_GMI_CREDIT_STALL[6] TCC_EA_RDREQ_DRAM_CREDIT_STALL[6] TCC_EA_RDREQ_IO_CREDIT_STALL[7] TCC_EA_RDREQ_GMI_CREDIT_STALL[7] TCC_EA_RDREQ_DRAM_CREDIT_STALL[7] TCC_EA_RDREQ_IO_CREDIT_STALL[8] TCC_EA_RDREQ_GMI_CREDIT_STALL[8] TCC_EA_RDREQ_DRAM_CREDIT_STALL[8] TCC_EA_RDREQ_IO_CREDIT_STALL[9] TCC_EA_RDREQ_GMI_CREDIT_STALL[9] TCC_EA_RDREQ_DRAM_CREDIT_STALL[9] TCC_EA_RDREQ_IO_CREDIT_STALL[10] TCC_EA_RDREQ_GMI_CREDIT_STALL[10] TCC_EA_RDREQ_DRAM_CREDIT_STALL[10] TCC_EA_RDREQ_IO_CREDIT_STALL[11] TCC_EA_RDREQ_GMI_CREDIT_STALL[11] TCC_EA_RDREQ_DRAM_CREDIT_STALL[11] TCC_EA_RDREQ_IO_CREDIT_STALL[12] TCC_EA_RDREQ_GMI_CREDIT_STALL[12] TCC_EA_RDREQ_DRAM_CREDIT_STALL[12] TCC_EA_RDREQ_IO_CREDIT_STALL[13] TCC_EA_RDREQ_GMI_CREDIT_STALL[13] TCC_EA_RDREQ_DRAM_CREDIT_STALL[13] TCC_EA_RDREQ_IO_CREDIT_STALL[14] TCC_EA_RDREQ_GMI_CREDIT_STALL[14] TCC_EA_RDREQ_DRAM_CREDIT_STALL[14] TCC_EA_RDREQ_IO_CREDIT_STALL[15] TCC_EA_RDREQ_GMI_CREDIT_STALL[15] TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]
pmc: TCC_EA_WRREQ_IO_CREDIT_STALL[0] TCC_EA_WRREQ_GMI_CREDIT_STALL[0] TCC_EA_WRREQ_DRAM_CREDIT_STALL[0] TCC_TOO_MANY_EA_WRREQS_STALL[0] TCC_EA_WRREQ_IO_CREDIT_STALL[1] TCC_EA_WRREQ_GMI_CREDIT_STALL[1] TCC_EA_WRREQ_DRAM_CREDIT_STALL[1] TCC_TOO_MANY_EA_WRREQS_STALL[1] TCC_EA_WRREQ_IO_CREDIT_STALL[2] TCC_EA_WRREQ_GMI_CREDIT_STALL[2] TCC_EA_WRREQ_DRAM_CREDIT_STALL[2] TCC_TOO_MANY_EA_WRREQS_STALL[2] TCC_EA_WRREQ_IO_CREDIT_STALL[3] TCC_EA_WRREQ_GMI_CREDIT_STALL[3] TCC_EA_WRREQ_DRAM_CREDIT_STALL[3] TCC_TOO_MANY_EA_WRREQS_STALL[3] TCC_EA_WRREQ_IO_CREDIT_STALL[4] TCC_EA_WRREQ_GMI_CREDIT_STALL[4] TCC_EA_WRREQ_DRAM_CREDIT_STALL[4] TCC_TOO_MANY_EA_WRREQS_STALL[4] TCC_EA_WRREQ_IO_CREDIT_STALL[5] TCC_EA_WRREQ_GMI_CREDIT_STALL[5] TCC_EA_WRREQ_DRAM_CREDIT_STALL[5] TCC_TOO_MANY_EA_WRREQS_STALL[5] TCC_EA_WRREQ_IO_CREDIT_STALL[6] TCC_EA_WRREQ_GMI_CREDIT_STALL[6] TCC_EA_WRREQ_DRAM_CREDIT_STALL[6] TCC_TOO_MANY_EA_WRREQS_STALL[6] TCC_EA_WRREQ_IO_CREDIT_STALL[7] TCC_EA_WRREQ_GMI_CREDIT_STALL[7] TCC_EA_WRREQ_DRAM_CREDIT_STALL[7] TCC_TOO_MANY_EA_WRREQS_STALL[7] TCC_EA_WRREQ_IO_CREDIT_STALL[8] TCC_EA_WRREQ_GMI_CREDIT_STALL[8] TCC_EA_WRREQ_DRAM_CREDIT_STALL[8] TCC_TOO_MANY_EA_WRREQS_STALL[8] TCC_EA_WRREQ_IO_CREDIT_STALL[9] TCC_EA_WRREQ_GMI_CREDIT_STALL[9] TCC_EA_WRREQ_DRAM_CREDIT_STALL[9] TCC_TOO_MANY_EA_WRREQS_STALL[9] TCC_EA_WRREQ_IO_CREDIT_STALL[10] TCC_EA_WRREQ_GMI_CREDIT_STALL[10] TCC_EA_WRREQ_DRAM_CREDIT_STALL[10] TCC_TOO_MANY_EA_WRREQS_STALL[10] TCC_EA_WRREQ_IO_CREDIT_STALL[11] TCC_EA_WRREQ_GMI_CREDIT_STALL[11] TCC_EA_WRREQ_DRAM_CREDIT_STALL[11] TCC_TOO_MANY_EA_WRREQS_STALL[11] TCC_EA_WRREQ_IO_CREDIT_STALL[12] TCC_EA_WRREQ_GMI_CREDIT_STALL[12] TCC_EA_WRREQ_DRAM_CREDIT_STALL[12] TCC_TOO_MANY_EA_WRREQS_STALL[12] TCC_EA_WRREQ_IO_CREDIT_STALL[13] TCC_EA_WRREQ_GMI_CREDIT_STALL[13] TCC_EA_WRREQ_DRAM_CREDIT_STALL[13] TCC_TOO_MANY_EA_WRREQS_STALL[13] TCC_EA_WRREQ_IO_CREDIT_STALL[14] TCC_EA_WRREQ_GMI_CREDIT_STALL[14] TCC_EA_WRREQ_DRAM_CREDIT_STALL[14] TCC_TOO_MANY_EA_WRREQS_STALL[14] TCC_EA_WRREQ_IO_CREDIT_STALL[15] TCC_EA_WRREQ_GMI_CREDIT_STALL[15] TCC_EA_WRREQ_DRAM_CREDIT_STALL[15] TCC_TOO_MANY_EA_WRREQS_STALL[15]
gpu:
kernel:
range:
@@ -0,0 +1,19 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
pmc: TCC_CYCLE_sum TCC_BUSY_sum TCC_PROBE_sum TCC_PROBE_ALL_sum
pmc: TCC_NC_REQ_sum TCC_UC_REQ_sum TCC_CC_REQ_sum
pmc: TCC_REQ_sum TCC_STREAMING_REQ_sum TCC_HIT_sum TCC_MISS_sum
pmc: TCC_READ_sum TCC_WRITE_sum TCC_ATOMIC_sum TCC_WRITEBACK_sum
pmc: TCC_EA_WRREQ_sum TCC_EA_WRREQ_64B_sum TCC_EA_WR_UNCACHED_32B_sum
pmc: TCC_EA_WRREQ_STALL_sum TCC_EA_WRREQ_IO_CREDIT_STALL_sum TCC_EA_WRREQ_GMI_CREDIT_STALL_sum TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum
pmc: TCC_TOO_MANY_EA_WRREQS_STALL_sum TCC_EA_ATOMIC_sum TCC_EA_RDREQ_sum TCC_EA_RDREQ_32B_sum
pmc: TCC_EA_RD_UNCACHED_32B_sum TCC_EA_RDREQ_IO_CREDIT_STALL_sum TCC_EA_RDREQ_GMI_CREDIT_STALL_sum TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum
pmc: TCC_NORMAL_WRITEBACK_sum TCC_ALL_TC_OP_WB_WRITEBACK_sum TCC_NORMAL_EVICT_sum
pmc: TCC_ALL_TC_OP_INV_EVICT_sum TCC_EA_RDREQ_DRAM_sum TCC_EA_WRREQ_DRAM_sum
pmc: TCC_EA_RDREQ_LEVEL_sum TCC_EA_WRREQ_LEVEL_sum TCC_EA_ATOMIC_LEVEL_sum
gpu:
kernel:
range:
@@ -0,0 +1,18 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
pmc: TCP_GATE_EN1_sum TCP_GATE_EN2_sum TCP_TD_TCP_STALL_CYCLES_sum TCP_TCR_TCP_STALL_CYCLES_sum
pmc: TCP_READ_TAGCONFLICT_STALL_CYCLES_sum TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum TCP_TA_TCP_STATE_READ_sum
pmc: TCP_VOLATILE_sum TCP_TOTAL_ACCESSES_sum TCP_TOTAL_READ_sum TCP_TOTAL_WRITE_sum
pmc: TCP_TOTAL_ATOMIC_WITH_RET_sum TCP_TOTAL_ATOMIC_WITHOUT_RET_sum TCP_TOTAL_WRITEBACK_INVALIDATES_sum TCP_TOTAL_CACHE_ACCESSES_sum
pmc: TCP_UTCL1_TRANSLATION_MISS_sum TCP_UTCL1_TRANSLATION_HIT_sum TCP_UTCL1_PERMISSION_MISS_sum TCP_UTCL1_REQUEST_sum
pmc: TCP_TCP_LATENCY_sum TCP_TCC_READ_REQ_LATENCY_sum TCP_TCC_WRITE_REQ_LATENCY_sum TCP_TCC_READ_REQ_sum
pmc: TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum TCP_TCC_NC_READ_REQ_sum
pmc: TCP_TCC_NC_WRITE_REQ_sum TCP_TCC_NC_ATOMIC_REQ_sum TCP_TCC_UC_READ_REQ_sum TCP_TCC_UC_WRITE_REQ_sum
pmc: TCP_TCC_UC_ATOMIC_REQ_sum TCP_TCC_CC_READ_REQ_sum TCP_TCC_CC_WRITE_REQ_sum TCP_TCC_CC_ATOMIC_REQ_sum
pmc: TCP_PENDING_STALL_CYCLES_sum
#pmc: TCA_CYCLE_sum TCA_BUSY_sum
gpu:
kernel:
range:
@@ -0,0 +1,12 @@
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
pmc: TD_TD_BUSY_sum TD_TC_STALL_sum
pmc: TD_COALESCABLE_WAVEFRONT_sum TD_LOAD_WAVEFRONT_sum
pmc: TD_ATOMIC_WAVEFRONT_sum TD_STORE_WAVEFRONT_sum
gpu:
range:
kernel:
+15 -15
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import argparse
import collections
@@ -37,7 +39,6 @@ MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout
def kernel_name_shortener(df, cache, level):
if level >= 5:
return df
@@ -50,7 +51,6 @@ def kernel_name_shortener(df, cache, level):
if columnName == "KernelName" or columnName == "Name":
# loop through all indices
for index in df.index:
original_name = df.loc[index, columnName]
if original_name in cache:
continue
@@ -142,7 +142,7 @@ def parse(args, profileAndExport):
print("Unable to parse SoC or workload name from sysinfo.csv")
sys.exit(1)
db = "omniperf_" + args.team + "_" + name + "_" + soc
db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
if Extractionlvl >= 5:
print("KernelName shortening disabled")
@@ -224,27 +224,27 @@ def convert_folder(connectionInfo, Extractionlvl):
df_saved_file = t2.to_csv(newfilepath + file)
cmd = (
'mongoimport --quiet --uri mongodb://{}:{}@{}:{} --authenticationDatabase "admin" --file {} -d {} -c {} --drop --type csv --headerline'
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
).format(
connectionInfo["username"],
connectionInfo["password"],
connectionInfo["host"],
connectionInfo["port"],
newfilepath + file,
connectionInfo["db"],
newfilepath + file,
fileName,
)
os.system(cmd)
else:
cmd = (
'mongoimport --quiet --uri mongodb://{}:{}@{}:{} --authenticationDatabase "admin" --file {} -d {} -c {} --drop --type csv --headerline'
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
).format(
connectionInfo["username"],
connectionInfo["password"],
connectionInfo["host"],
connectionInfo["port"],
connectionInfo["workload"] + "/" + file,
connectionInfo["db"],
connectionInfo["workload"] + "/" + file,
fileName,
)
os.system(cmd)
+396 -402
Näytä tiedosto
@@ -1,402 +1,396 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
from PyQt5.QtWidgets import (
QMainWindow,
QApplication,
QTreeView,
QTableWidget,
QTableWidgetItem,
)
from PyQt5.QtWidgets import (
QHBoxLayout,
QWidget,
QAction,
QFileDialog,
QAbstractItemView,
qApp,
)
from PyQt5.QtGui import QStandardItemModel, QStandardItem
from lxml import html
import sys
# class view(QWidget):
class mainWindow(QMainWindow):
def __init__(self):
super(QMainWindow, self).__init__()
###############################################################################
# SOC Parameters
##############################################################################
# Per IP block max number of simulutaneous counters
# GFX IP Blocks
self.perfmon_config = {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
}
# GFX Architectures
self.soc_arch_list = ["gfx906", "gfx908", "gfx90a"]
###############################################################################
# Window layout Design
##############################################################################
self.block_list = []
self.nodes_dict = {} # list of QStandardItem
self.tree = QTreeView(self)
self.table = QTableWidget()
# Tree setup
self.tree.header().setDefaultSectionSize(180)
self.model = QStandardItemModel()
self.model.setHorizontalHeaderLabels(["Metric", "Block", "Event", "Definition"])
self.tree.setModel(self.model)
# self.importData(data)
self.tree.setEditTriggers(QAbstractItemView.NoEditTriggers)
# Set up click processing
self.tree.clicked.connect(self.pmc_select)
# self.tree.expandAll()
# Table setup
tableHeader = list(self.perfmon_config.keys())
self.table.setColumnCount(len(tableHeader))
self.table.setHorizontalHeaderLabels(tableHeader)
self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
self.table.showGrid()
self.setWindowTitle("GFX Perfmon Builder")
# layout: lhs: metrics; rhs: selected perfmon
layout = QHBoxLayout(self)
layout.addWidget(self.tree)
layout.addWidget(self.table)
widget = QWidget()
widget.setLayout(layout)
self.setCentralWidget(widget)
# Add Status
self.statusBar()
###############################################################################
# Window Menu Design
##############################################################################
# Setup file menu
menuBar = self.menuBar()
menuBar.setNativeMenuBar(False)
openAction = QAction("&Open", self)
openAction.setShortcut("Ctrl+O")
openAction.setStatusTip("Open GFX Metrics file")
openAction.triggered.connect(self.openGFXDialog)
saveAction = QAction("&Save", self)
saveAction.setShortcut("Ctrl+S")
saveAction.setStatusTip("Save to PMC file")
saveAction.triggered.connect(self.exportGFXDialog)
exitAction = QAction("&Exit", self)
exitAction.setShortcut("Ctrl+Q")
exitAction.setStatusTip("Exit")
exitAction.triggered.connect(self.close)
# Create new action
fileMenu = menuBar.addMenu("&File")
fileMenu.addActions([openAction, saveAction])
fileMenu.addSeparator()
fileMenu.addActions([exitAction])
def openGFXDialog(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileName, _ = QFileDialog.getOpenFileName(
self, "Open GFX Metrics", "", "XML Files (*.xml)", "XML(*.xml)"
)
# Parse the xml
if fileName:
xmlparsed = html.parse(fileName)
self.importData(xmlparsed)
def exportGFXDialog(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileName, _ = QFileDialog.getSaveFileName(
self, "Export PMC Counters", "", "Text File (*.txt)", "Text File(*.txt)"
)
# Parse the xml
if fileName:
self.exportPMCCounters(fileName)
def exportPMCCounters(self, fileName):
f = open(fileName, "w")
total_IP_blocks = len(list(self.perfmon_config.keys()))
for row in range(self.table.rowCount()):
pmc_str = "pmc: "
for col in range(total_IP_blocks):
cell = self.table.item(row, col)
if cell:
pmc_str = pmc_str + " ".join(cell.text().split("\n")) + " "
f.write(pmc_str + "\n")
# Add standard lines
f.write("\n\n")
f.write("gpu: \n")
f.write("dispatch: \n")
f.write("kernel: \n")
f.close()
return
def pmc_metric_selected(self, metric_name, col):
# check if the metric already exists
metric_selected = False
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if entry:
pmc_list = entry.text().split(sep="\n")
if metric_name in pmc_list:
metric_selected = True
break
return metric_selected
def pmc_remove_metric(self, metric_name, IP_block):
# Remove the metric to pmc table, if it is selected
# Map SQC to SQ, since they share the same Perfmon block
if IP_block == "SQC":
IP_block = "SQ"
# not action if it is for a ghost IP!
if not IP_block in list(self.perfmon_config.keys()):
return
# This is the column we need to add/remove perfmon counters
col = list(self.perfmon_config.keys()).index(IP_block)
if not self.pmc_metric_selected(metric_name, col):
return
pmc_list = []
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if entry:
pmc_list = pmc_list + entry.text().split(sep="\n")
# clear the cell, we will re-allocate the pmc
self.table.takeItem(row, col)
# allowed PMC counters per batch
max_pmc_num = self.perfmon_config[IP_block]
# remote this metric and re-segment the list and refill all rows in this column
pmc_list.remove(metric_name)
# We are empty now, do nothing
if len(pmc_list) == 0:
return
for row in range((len(pmc_list) + max_pmc_num - 1) // max_pmc_num):
start_index = row * max_pmc_num
pmc_str = "\n".join(pmc_list[start_index : start_index + max_pmc_num])
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
# Remove last row, if empty
last_row = self.table.rowCount() - 1
empty_row = True
total_cols = len(list(self.perfmon_config.keys()))
for cindex in range(total_cols):
x = self.table.item(last_row, cindex)
if x and x.text():
empty_row = False
break
if empty_row:
self.table.removeRow(last_row)
def pmc_add_metric(self, metric_name, IP_block):
# Add the metric to pmc table, if not there yet
# Map SQC to SQ, since they share the same Perfmon block
if IP_block == "SQC":
IP_block = "SQ"
if not IP_block in list(self.perfmon_config.keys()):
return
# This is the column we need to add/remove perfmon counters
col = list(self.perfmon_config.keys()).index(IP_block)
# check if the metric already exists
if self.pmc_metric_selected(metric_name, col):
return
# metric is not bucket yet, add it!
if self.table.rowCount() == 0:
# starting from scratch!
self.table.insertRow(0)
self.table.setItem(0, col, QTableWidgetItem(metric_name))
return
# find the row to insert
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if not entry:
# print("search insert pos, row:", row, ", cell empty")
break
if len(entry.text().split(sep="\n")) < self.perfmon_config[IP_block]:
# print("found")
break
entry = self.table.item(row, col)
if not entry:
# put it into the empty cell
self.table.setItem(row, col, QTableWidgetItem(metric_name))
return
pmc_list = entry.text().split(sep="\n")
if len(pmc_list) < self.perfmon_config[IP_block]:
# we still have hit per-IP HW counters limit, add it to the last row
pmc_list.append(metric_name)
pmc_str = "\n".join(pmc_list)
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
self.table.resizeRowsToContents()
else:
# Start a new row
row = row + 1
self.table.insertRow(row)
self.table.setItem(row, col, QTableWidgetItem(metric_name))
def pmc_select(self, item):
metric_name = item.data()
if (
not metric_name in self.nodes_dict
or not self.nodes_dict[metric_name].isCheckable()
):
return
# only proper metrics check/uncheck is processed here.
IP_block = item.data().split(sep="_")[0]
if self.nodes_dict[metric_name].checkState() == 0:
# unselect the metric in the table if it is currently selected
self.pmc_remove_metric(metric_name, IP_block)
elif self.nodes_dict[metric_name].checkState() == 2:
self.pmc_add_metric(metric_name, IP_block)
# Function to save populate treeview with a dictionary
def importData(self, xmlparsed, root=None):
self.model.setRowCount(0)
if root is None:
root = self.model.invisibleRootItem()
for x in xmlparsed.getiterator():
# Add SoC node to Root
if x.tag in self.soc_arch_list:
parent = root
parent.appendRow([QStandardItem(x.tag)])
self.nodes_dict[x.tag] = parent.child(parent.rowCount() - 1)
# check all metrics in an SoC family
if x.tag == "metric" and x.getparent().tag in self.soc_arch_list:
# New IP block (e.g., SQ), detected, create a new hierarchy for the block
if not x.attrib["block"] in self.block_list:
self.block_list.append(x.attrib["block"])
parent = self.nodes_dict[x.getparent().tag] # the SoC node
parent.appendRow(
[
QStandardItem(x.attrib["block"]),
QStandardItem(""),
QStandardItem(""),
QStandardItem(""),
]
)
# record the tree node for the block
self.nodes_dict[x.attrib["block"]] = parent.child(
parent.rowCount() - 1
)
# Add metric node to the Block node
parent = self.nodes_dict[x.attrib["block"]]
metric_name = QStandardItem(x.attrib["name"])
metric_name.setCheckable(True)
parent.appendRow(
[
metric_name,
QStandardItem(x.attrib["block"]),
QStandardItem(x.attrib["event"]),
QStandardItem(x.attrib["descr"]),
]
)
self.nodes_dict[x.attrib["name"]] = parent.child(parent.rowCount() - 1)
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle("Fusion")
# populate the view with GFX metrics.xml
window = mainWindow()
# show the view
window.setGeometry(300, 100, 600, 300)
# view.setWindowTitle('GFX Perfmon Counters')
window.show()
# start the application
sys.exit(app.exec_())
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
from PyQt5.QtWidgets import (
QMainWindow,
QApplication,
QTreeView,
QTableWidget,
QTableWidgetItem,
)
from PyQt5.QtWidgets import (
QHBoxLayout,
QWidget,
QAction,
QFileDialog,
QAbstractItemView,
qApp,
)
from PyQt5.QtGui import QStandardItemModel, QStandardItem
from lxml import html
import sys
# class view(QWidget):
class mainWindow(QMainWindow):
def __init__(self):
super(QMainWindow, self).__init__()
###############################################################################
# SOC Parameters
##############################################################################
# Per IP block max number of simulutaneous counters
# GFX IP Blocks
self.perfmon_config = {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
}
# GFX Architectures
self.soc_arch_list = ["gfx906", "gfx908", "gfx90a"]
###############################################################################
# Window layout Design
##############################################################################
self.block_list = []
self.nodes_dict = {} # list of QStandardItem
self.tree = QTreeView(self)
self.table = QTableWidget()
# Tree setup
self.tree.header().setDefaultSectionSize(180)
self.model = QStandardItemModel()
self.model.setHorizontalHeaderLabels(["Metric", "Block", "Event", "Definition"])
self.tree.setModel(self.model)
# self.importData(data)
self.tree.setEditTriggers(QAbstractItemView.NoEditTriggers)
# Set up click processing
self.tree.clicked.connect(self.pmc_select)
# self.tree.expandAll()
# Table setup
tableHeader = list(self.perfmon_config.keys())
self.table.setColumnCount(len(tableHeader))
self.table.setHorizontalHeaderLabels(tableHeader)
self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
self.table.showGrid()
self.setWindowTitle("GFX Perfmon Builder")
# layout: lhs: metrics; rhs: selected perfmon
layout = QHBoxLayout(self)
layout.addWidget(self.tree)
layout.addWidget(self.table)
widget = QWidget()
widget.setLayout(layout)
self.setCentralWidget(widget)
# Add Status
self.statusBar()
###############################################################################
# Window Menu Design
##############################################################################
# Setup file menu
menuBar = self.menuBar()
menuBar.setNativeMenuBar(False)
openAction = QAction("&Open", self)
openAction.setShortcut("Ctrl+O")
openAction.setStatusTip("Open GFX Metrics file")
openAction.triggered.connect(self.openGFXDialog)
saveAction = QAction("&Save", self)
saveAction.setShortcut("Ctrl+S")
saveAction.setStatusTip("Save to PMC file")
saveAction.triggered.connect(self.exportGFXDialog)
exitAction = QAction("&Exit", self)
exitAction.setShortcut("Ctrl+Q")
exitAction.setStatusTip("Exit")
exitAction.triggered.connect(self.close)
# Create new action
fileMenu = menuBar.addMenu("&File")
fileMenu.addActions([openAction, saveAction])
fileMenu.addSeparator()
fileMenu.addActions([exitAction])
def openGFXDialog(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileName, _ = QFileDialog.getOpenFileName(
self, "Open GFX Metrics", "", "XML Files (*.xml)", "XML(*.xml)"
)
# Parse the xml
if fileName:
xmlparsed = html.parse(fileName)
self.importData(xmlparsed)
def exportGFXDialog(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileName, _ = QFileDialog.getSaveFileName(
self, "Export PMC Counters", "", "Text File (*.txt)", "Text File(*.txt)"
)
# Parse the xml
if fileName:
self.exportPMCCounters(fileName)
def exportPMCCounters(self, fileName):
f = open(fileName, "w")
total_IP_blocks = len(list(self.perfmon_config.keys()))
for row in range(self.table.rowCount()):
pmc_str = "pmc: "
for col in range(total_IP_blocks):
cell = self.table.item(row, col)
if cell:
pmc_str = pmc_str + " ".join(cell.text().split("\n")) + " "
f.write(pmc_str + "\n")
# Add standard lines
f.write("\n\n")
f.write("gpu: \n")
f.write("dispatch: \n")
f.write("kernel: \n")
f.close()
return
def pmc_metric_selected(self, metric_name, col):
# check if the metric already exists
metric_selected = False
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if entry:
pmc_list = entry.text().split(sep="\n")
if metric_name in pmc_list:
metric_selected = True
break
return metric_selected
def pmc_remove_metric(self, metric_name, IP_block):
# Remove the metric to pmc table, if it is selected
# Map SQC to SQ, since they share the same Perfmon block
if IP_block == "SQC":
IP_block = "SQ"
# not action if it is for a ghost IP!
if not IP_block in list(self.perfmon_config.keys()):
return
# This is the column we need to add/remove perfmon counters
col = list(self.perfmon_config.keys()).index(IP_block)
if not self.pmc_metric_selected(metric_name, col):
return
pmc_list = []
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if entry:
pmc_list = pmc_list + entry.text().split(sep="\n")
# clear the cell, we will re-allocate the pmc
self.table.takeItem(row, col)
# allowed PMC counters per batch
max_pmc_num = self.perfmon_config[IP_block]
# remote this metric and re-segment the list and refill all rows in this column
pmc_list.remove(metric_name)
# We are empty now, do nothing
if len(pmc_list) == 0:
return
for row in range((len(pmc_list) + max_pmc_num - 1) // max_pmc_num):
start_index = row * max_pmc_num
pmc_str = "\n".join(pmc_list[start_index : start_index + max_pmc_num])
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
# Remove last row, if empty
last_row = self.table.rowCount() - 1
empty_row = True
total_cols = len(list(self.perfmon_config.keys()))
for cindex in range(total_cols):
x = self.table.item(last_row, cindex)
if x and x.text():
empty_row = False
break
if empty_row:
self.table.removeRow(last_row)
def pmc_add_metric(self, metric_name, IP_block):
# Add the metric to pmc table, if not there yet
# Map SQC to SQ, since they share the same Perfmon block
if IP_block == "SQC":
IP_block = "SQ"
if not IP_block in list(self.perfmon_config.keys()):
return
# This is the column we need to add/remove perfmon counters
col = list(self.perfmon_config.keys()).index(IP_block)
# check if the metric already exists
if self.pmc_metric_selected(metric_name, col):
return
# metric is not bucket yet, add it!
if self.table.rowCount() == 0:
# starting from scratch!
self.table.insertRow(0)
self.table.setItem(0, col, QTableWidgetItem(metric_name))
return
# find the row to insert
for row in range(self.table.rowCount()):
entry = self.table.item(row, col)
if not entry:
# print("search insert pos, row:", row, ", cell empty")
break
if len(entry.text().split(sep="\n")) < self.perfmon_config[IP_block]:
# print("found")
break
entry = self.table.item(row, col)
if not entry:
# put it into the empty cell
self.table.setItem(row, col, QTableWidgetItem(metric_name))
return
pmc_list = entry.text().split(sep="\n")
if len(pmc_list) < self.perfmon_config[IP_block]:
# we still have hit per-IP HW counters limit, add it to the last row
pmc_list.append(metric_name)
pmc_str = "\n".join(pmc_list)
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
self.table.resizeRowsToContents()
else:
# Start a new row
row = row + 1
self.table.insertRow(row)
self.table.setItem(row, col, QTableWidgetItem(metric_name))
def pmc_select(self, item):
metric_name = item.data()
if (
not metric_name in self.nodes_dict
or not self.nodes_dict[metric_name].isCheckable()
):
return
# only proper metrics check/uncheck is processed here.
IP_block = item.data().split(sep="_")[0]
if self.nodes_dict[metric_name].checkState() == 0:
# unselect the metric in the table if it is currently selected
self.pmc_remove_metric(metric_name, IP_block)
elif self.nodes_dict[metric_name].checkState() == 2:
self.pmc_add_metric(metric_name, IP_block)
# Function to save populate treeview with a dictionary
def importData(self, xmlparsed, root=None):
self.model.setRowCount(0)
if root is None:
root = self.model.invisibleRootItem()
for x in xmlparsed.getiterator():
# Add SoC node to Root
if x.tag in self.soc_arch_list:
parent = root
parent.appendRow([QStandardItem(x.tag)])
self.nodes_dict[x.tag] = parent.child(parent.rowCount() - 1)
# check all metrics in an SoC family
if x.tag == "metric" and x.getparent().tag in self.soc_arch_list:
# New IP block (e.g., SQ), detected, create a new hierarchy for the block
if not x.attrib["block"] in self.block_list:
self.block_list.append(x.attrib["block"])
parent = self.nodes_dict[x.getparent().tag] # the SoC node
parent.appendRow(
[
QStandardItem(x.attrib["block"]),
QStandardItem(""),
QStandardItem(""),
QStandardItem(""),
]
)
# record the tree node for the block
self.nodes_dict[x.attrib["block"]] = parent.child(
parent.rowCount() - 1
)
# Add metric node to the Block node
parent = self.nodes_dict[x.attrib["block"]]
metric_name = QStandardItem(x.attrib["name"])
metric_name.setCheckable(True)
parent.appendRow(
[
metric_name,
QStandardItem(x.attrib["block"]),
QStandardItem(x.attrib["event"]),
QStandardItem(x.attrib["descr"]),
]
)
self.nodes_dict[x.attrib["name"]] = parent.child(parent.rowCount() - 1)
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle("Fusion")
# populate the view with GFX metrics.xml
window = mainWindow()
# show the view
window.setGeometry(300, 100, 600, 300)
# view.setWindowTitle('GFX Perfmon Counters')
window.show()
# start the application
sys.exit(app.exec_())
+23 -13
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import sys, os, pathlib, shutil, subprocess, argparse, glob, re
import numpy as np
@@ -29,6 +31,19 @@ prog = "omniperf"
# Per IP block max number of simulutaneous counters
# GFX IP Blocks
perfmon_config = {
"vega10": {
"SQ": 8,
"TA": 2,
"TD": 2,
"TCP": 4,
"TCC": 4,
"CPC": 2,
"CPF": 2,
"SPI": 2,
"GRBM": 2,
"GDS": 4,
"TCC_channels": 16,
},
"mi50": {
"SQ": 8,
"TA": 2,
@@ -72,7 +87,6 @@ perfmon_config = {
def perfmon_coalesce(pmc_files_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# match pattern for pmc counters
@@ -97,7 +111,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
# Extract all PMC counters and store in separate buckets
for fname in pmc_files_list:
lines = open(fname, "r").read().splitlines()
for line in lines:
@@ -170,7 +183,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
def perfmon_emit(pmc_list, workload_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
# Calculate the minimum number of iteration to save the pmc counters
@@ -233,7 +245,6 @@ def perfmon_emit(pmc_list, workload_dir, soc):
def perfmon_filter(workload_dir, perfmon_dir, args):
workload_perfmon_dir = workload_dir + "/perfmon"
soc = args.target
@@ -275,7 +286,6 @@ def perfmon_filter(workload_dir, perfmon_dir, args):
def pmc_filter(workload_dir, perfmon_dir, soc):
workload_perfmon_dir = workload_dir + "/perfmon"
if not os.path.isdir(workload_perfmon_dir):
-661
Näytä tiedosto
@@ -1,661 +0,0 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
from linecache import cache
import subprocess
from operator import sub
import os
import sys
from pathlib import Path
import numpy
import matplotlib.pyplot as plt
from matplotlib.pyplot import get, text
from math import log, pi, sqrt
import pandas as pd
import pylab
from dataclasses import dataclass
import csv
################################################
# Global vars
################################################
IMGNAME = "empirRoof"
L2_BANKS = 32 # default assuming mi200
XMIN = 0.01
XMAX = 1000
FONT_SIZE = 16
FONT_COLOR = "black"
FONT_WEIGHT = "bold"
SUPPORTED_SOC = ["mi200"]
################################################
# Helper funcs
################################################
@dataclass
class AI_Data:
KernelName: str
numCalls: float
total_flops: float
valu_flops: float
mfma_flops_f16: float
mfma_flops_bf16: float
mfma_flops_f32: float
mfma_flops_f64: float
lds_data: float
L1cache_data: float
L2cache_data: float
hbm_data: float
totalDuration: float
avgDuration: float
def get_font():
return {
"size": FONT_SIZE,
"color": FONT_COLOR,
"weight": FONT_WEIGHT,
"family": "serif",
}
def get_color(catagory):
if catagory == "curr_ai_l1":
return "green"
elif catagory == "curr_ai_l2":
return "blue"
elif catagory == "curr_ai_hbm":
return "red"
else:
raise RuntimeError("Invalid catagory passed to get_color()")
# -------------------------------------------------------------------------------------
# Plot BW at each cache level
# -------------------------------------------------------------------------------------
def plot_roof(inputs, roof_data):
cacheHierarchy = []
if inputs["mem"] == "ALL":
cacheHierarchy += ["HBM", "L2", "L1", "LDS"]
else:
cacheHierarchy.append(inputs["mem"])
targ_dtype = (
"FP32"
if float(roof_data["FP32Flops"][0]) > float(roof_data["FP64Flops"][0])
else "FP64"
)
print("Dtype: ", targ_dtype)
print(inputs["mem"])
x1 = y1 = x2 = y2 = -1
x1_mfma = y1_mfma = x2_mfma = y2_mfma = -1
target_precision = targ_dtype[2:]
peakOps = float(roof_data[targ_dtype + "Flops"][0])
for i in range(0, len(cacheHierarchy)):
# Plot BW line
# print("Current cache level: {}".format(cacheHierarchy[i]))
curr_bw = cacheHierarchy[i] + "Bw"
peakBw = float(roof_data[curr_bw][0])
peakMFMA = float(roof_data["MFMAF{}Flops".format(target_precision)][0])
x1 = float(XMIN)
y1 = float(XMIN) * peakBw
x2 = peakOps / peakBw
y2 = peakOps
plt.plot([x1, x2], [y1, y2], color="magenta")
# print("Mem Points: [{}, {}], [{}, {}]".format(x1, x2, y1, y2))
# Plot MFMA lines (NOTE: Assuming MI200 soc)
x1_mfma = peakOps / peakBw
y1_mfma = peakOps
x2_mfma = peakMFMA / peakBw
y2_mfma = peakMFMA
plt.plot([x1_mfma, x2_mfma], [y1_mfma, y2_mfma], color="blue")
# print("Extend BW Points: [{}, {}], [{}, {}]".format(x1_mfma, x2_mfma, y1_mfma, y2_mfma))
# These are the points to use:
# print("x = [{}, {}]".format(x1,x2_mfma))
# print("y = [{}, {}]".format(y1, y2_mfma))
# Plot BW label
x1log = log(x1) / log(10)
x2log = log(x2) / log(10)
y1log = log(y1) / log(10)
y2log = log(y2) / log(10)
x_text = 10 ** ((x1log + x2log) / 2)
y_text = 10 ** ((y1log + y2log) / 2)
fig = plt.gcf()
size = fig.get_size_inches() * fig.dpi
fig_x, fig_y = size
# dx = log(x2) - log(x1)
# dy = log(y2) - log(y1)
# x_min, x_max = plt.xlim()
# y_min, y_max = plt.ylim()
# Dx = dx * fig_x / (log(x_max) - log(x_min))
# Dy = dy * fig_y / (log(y_max) - log(y_min))
# #fdiv = 0.7 #TODO: improve accuracy of text angle (tilt)
# angle = (180.0 / pi) * numpy.arctan(Dy / Dx )#/fdiv)
dx = abs(log(x2) - log(x1))
dy = abs(log(y2) - log(y1))
angle = (180.0 / pi) * numpy.arctan(dy / dx)
# If user isn't zooming in, print bw labels normally
if not inputs["axes"]:
text(
x_text,
y_text,
"{} vL1D GB/s".format(int(peakBw))
if cacheHierarchy[i].upper() == "L1"
else "{} {} GB/s".format(int(peakBw), cacheHierarchy[i].upper()),
rotation=angle,
rotation_mode="anchor",
**get_font(),
)
else:
# if bw line isn't being cut out then plot bw
print("if {} < {}".format(inputs["axes"][0], 10**x2log))
if inputs["axes"][0] < 10**x2log:
text(
10**x2log,
10**y2log,
"{} {} GB/s".format(int(peakBw), cacheHierarchy[i].upper()),
rotation=angle,
rotation_mode="anchor",
**get_font(),
)
# -------------------------------------------------------------------------------------
# Plot computing roof
# -------------------------------------------------------------------------------------
# Plot FMA roof
x0 = XMAX
if x2 < x0:
x0 = x2
temp_label = "{} VALU GFLOP/sec".format(int(peakOps))
plt.plot([x0, XMAX], [peakOps, peakOps], color="magenta")
# print("FMA Points: [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
text(
XMAX if not inputs["axes"] else inputs["axes"][1],
peakOps - 4000, # should i keep this fixed at 4000?
temp_label,
horizontalalignment="right",
**get_font(),
)
# Plot MFMA roof
if x1_mfma != -1: # assert that mfma has been assigned
x0_mfma = XMAX
if x2_mfma < x0_mfma:
x0_mfma = x2_mfma
peakMFMA = float(roof_data["MFMAF{}Flops".format(target_precision)][0])
temp_label = "{} MFMA GFLOP/sec".format(int(peakMFMA))
plt.plot([x0_mfma, XMAX], [peakMFMA, peakMFMA], color="blue")
# print("MFMA Points: [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
text(
XMAX if not inputs["axes"] else inputs["axes"][1],
peakMFMA + 1000,
temp_label,
horizontalalignment="right",
**get_font(),
)
return targ_dtype
# -------------------------------------------------------------------------------------
# Overlay application performance
# -------------------------------------------------------------------------------------
# Calculate relevent metrics for ai calculation
def plot_application(inputs, verbose):
df = pd.read_csv(inputs["path"] + "/pmc_perf.csv")
# Sort by top kernels or top dispatches?
df = df.sort_values(by=["KernelName"])
df = df.reset_index(drop=True)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
kernelName = ""
myList = []
for index, row in df.iterrows():
# CASE: Top kernels
if inputs["sort"] == "kernels" and (
(row["KernelName"] != kernelName and kernelName != "")
or index == df.shape[0] - 1
):
if df.shape[0] - 1 == index:
calls += 1
myList.append(
AI_Data(
kernelName,
calls,
total_flops / calls,
valu_flops / calls,
mfma_flops_f16 / calls,
mfma_flops_bf16 / calls,
mfma_flops_f32 / calls,
mfma_flops_f64 / calls,
lds_data / calls,
L1cache_data / calls,
L2cache_data / calls,
hbm_data / calls,
totalDuration,
avgDuration / calls,
)
)
if verbose >= 2:
print(
"Just added {} to AI_Data at index {}. # of calls: {}".format(
kernelName, index, calls
)
)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
kernelName = row["KernelName"]
try:
total_flops += (
(
64
* (
row["SQ_INSTS_VALU_ADD_F16"]
+ row["SQ_INSTS_VALU_MUL_F16"]
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
+ row["SQ_INSTS_VALU_TRANS_F16"]
)
)
+ (
64
* (
row["SQ_INSTS_VALU_ADD_F32"]
+ row["SQ_INSTS_VALU_MUL_F32"]
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
+ row["SQ_INSTS_VALU_TRANS_F32"]
)
)
+ (
64
* (
row["SQ_INSTS_VALU_ADD_F64"]
+ row["SQ_INSTS_VALU_MUL_F64"]
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
+ row["SQ_INSTS_VALU_TRANS_F64"]
)
)
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512)
+ (row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512)
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512)
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512)
)
except KeyError:
if verbose >= 2:
print("Skipped total_flops at index {}".format(index))
pass
try:
valu_flops += (
64
* (
row["SQ_INSTS_VALU_ADD_F16"]
+ row["SQ_INSTS_VALU_MUL_F16"]
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
+ row["SQ_INSTS_VALU_TRANS_F16"]
)
+ 64
* (
row["SQ_INSTS_VALU_ADD_F32"]
+ row["SQ_INSTS_VALU_MUL_F32"]
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
+ row["SQ_INSTS_VALU_TRANS_F32"]
)
+ 64
* (
row["SQ_INSTS_VALU_ADD_F64"]
+ row["SQ_INSTS_VALU_MUL_F64"]
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
+ row["SQ_INSTS_VALU_TRANS_F64"]
)
)
except KeyError:
if verbose >= 2:
print("Skipped valu_flops at index {}".format(index))
pass
try:
mfma_flops_f16 += row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512
mfma_flops_bf16 += row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512
mfma_flops_f32 += row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512
mfma_flops_f64 += row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512
mfma_iops_i8 += row["SQ_INSTS_VALU_MFMA_MOPS_I8"] * 512
except KeyError:
if verbose >= 2:
print("Skipped mfma ops at index {}".format(index))
pass
try:
lds_data += (
(row["SQ_LDS_IDX_ACTIVE"] - row["SQ_LDS_BANK_CONFLICT"]) * 4 * L2_BANKS
) # L2_BANKS = 32 (since assuming mi200)
except KeyError:
if verbose >= 2:
print("Skipped lds_data at index {}".format(index))
pass
try:
L1cache_data += row["TCP_TOTAL_CACHE_ACCESSES_sum"] * 64
except KeyError:
if verbose >= 2:
print("Skipped L1cache_data at index {}".format(index))
pass
try:
L2cache_data += (
row["TCP_TCC_WRITE_REQ_sum"] * 64
+ row["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"] * 64
+ row["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"] * 64
+ row["TCP_TCC_READ_REQ_sum"] * 64
)
except KeyError:
if verbose >= 2:
print("Skipped L2cache_data at index {}".format(index))
pass
try:
hbm_data += (
(row["TCC_EA_RDREQ_32B_sum"] * 32)
+ ((row["TCC_EA_RDREQ_sum"] - row["TCC_EA_RDREQ_32B_sum"]) * 64)
+ (row["TCC_EA_WRREQ_64B_sum"] * 64)
+ ((row["TCC_EA_WRREQ_sum"] - row["TCC_EA_WRREQ_64B_sum"]) * 32)
)
except KeyError:
if verbose >= 2:
print("Skipped hbm_data at index {}".format(index))
pass
totalDuration += row["EndNs"] - row["BeginNs"]
avgDuration += row["EndNs"] - row["BeginNs"]
calls += 1
if inputs["sort"] == "dispatches":
myList.append(
AI_Data(
kernelName,
calls,
total_flops,
valu_flops,
mfma_flops_f16,
mfma_flops_bf16,
mfma_flops_f32,
mfma_flops_f64,
mfma_iops_i8,
lds_data,
L1cache_data,
L2cache_data,
hbm_data,
totalDuration,
avgDuration,
)
)
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
myList.sort(key=lambda x: x.totalDuration, reverse=True)
print("Top 10 intensities ('{}')...".format(inputs["sort"]))
intensities = {"curr_ai_l1": [], "curr_ai_l2": [], "curr_ai_hbm": []}
curr_perf = []
i = 0
# Create list of top 5 intensities
while i <= 9 and i != len(myList):
intensities["curr_ai_l1"].append(
myList[i].total_flops / myList[i].L1cache_data
) if myList[i].L1cache_data else intensities["curr_ai_l1"].append(0)
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
# print()
intensities["curr_ai_l2"].append(
myList[i].total_flops / myList[i].L2cache_data
) if myList[i].L2cache_data else intensities["curr_ai_l2"].append(0)
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
# print()
intensities["curr_ai_hbm"].append(
myList[i].total_flops / myList[i].hbm_data
) if myList[i].hbm_data else intensities["curr_ai_hbm"].append(0)
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
# print()
curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[
i
].avgDuration else curr_perf.append(0)
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
i += 1
print(intensities)
# fig, ax = plt.subplots()
plotted_spots = []
labels = []
for i in intensities:
values = intensities[i]
color = get_color(i)
x = []
y = []
for entryIndx in range(0, len(values)):
x.append(values[entryIndx])
y.append(curr_perf[entryIndx])
myScatter = plt.scatter(x, y, c=color, marker="o")
plotted_spots.append(myScatter)
label = i
labels.append(label)
try:
pylab.legend(
plotted_spots,
labels,
prop={"size": (FONT_SIZE - 2)},
bbox_to_anchor=(1.04, 1),
loc="upper left",
title="Top {}".format(inputs["sort"]),
title_fontsize=FONT_SIZE,
)
except Exception as e:
sys.stderr.write(f"{e}\n")
pylab.legend(
plotted_spots,
labels,
prop={"size": (FONT_SIZE - 2)},
)
def empirical_roof(args):
soc = args.target
inputs = {
"path": str,
"cmd": str,
"sort": str,
"mem": str,
"axes": list,
"device": int,
# "workgroups": int,
# "wsize": int,
# "dataset": int,
# "experiments": int,
# "iter": int
}
inputs["sort"] = args.sort.lower()
inputs["mem"] = args.mem_level.upper()
if inputs["sort"] != "kernels" and inputs["sort"] != "dispatches":
sys.exit("Invalid sort. Must be either 'kernels' or 'dispatches'")
if (
inputs["mem"] != "HBM"
and inputs["mem"] != "VL1D"
and inputs["mem"] != "L2"
and inputs["mem"] != "LDS"
and inputs["mem"] != "ALL"
):
sys.exit(
"Invalid mem-level. Must be one of these option 'LDS', 'L2', 'vL1D', or 'HBM'"
)
if inputs["mem"] == "VL1D":
inputs["mem"] = "L1"
inputs["device"] = int(args.device)
# inputs["workgroups"] = int(args.workgroups)
# inputs["wsize"] = int(args.wsize)
# inputs["dataset"] = int(args.dataset)
# inputs["experiments"] = int(args.experiments)
# inputs["iter"] = int(args.iter)
inputs["path"] = args.path
inputs["cmd"] = args.remaining
inputs["axes"] = args.axes
# device_list = [int(item) for item in args.device.split(',')]
if soc not in SUPPORTED_SOC:
sys.exit("SoC not yet supported for Roofline Analysis")
# Basic Info
print("Path: ", inputs["path"])
print("Target: ", soc)
print("Memory Level: ", inputs["mem"])
roofPath = inputs["path"] + "/roofline.csv"
# -----------------------------------------------------
# Initialize roofline data dictionary from roofline.csv
# -----------------------------------------------------
roof_data = (
{}
) # TODO: consider changing this to an ordered dict for consistency over py versions
headers = []
with open(roofPath, "r") as csvfile:
csvReader = csv.reader(csvfile, delimiter=",")
rowCount = 0
for row in csvReader:
row.pop(0) # remove devID
if rowCount == 0:
headers = row
for i in headers:
roof_data[i] = []
else:
for i, key in enumerate(headers):
roof_data[key].append(row[i])
rowCount += 1
csvfile.close()
# Initalize plot
f = plt.figure(figsize=(1600 / 100, 1200 / 100), dpi=100)
f.add_subplot(111)
_title_font = get_font()
_title_font["size"] += 8
plt.title("Empirical Roofline", **_title_font)
plt.xlabel("Arithmetic Intensity (FLOP/Byte)", **get_font())
plt.ylabel("Performance (GFLOP/sec)", **get_font())
plt.grid(True, which="major", ls="--", lw=1)
plt.grid(True, which="minor", ls="--", lw=0.5)
plt.yscale("log")
plt.xscale("log")
# Adjust axes if instructed
if inputs["axes"]:
plt.xlim(inputs["axes"][0], inputs["axes"][1])
plt.ylim(inputs["axes"][2], inputs["axes"][3])
# ------------------
# Generate Roofline
# ------------------
dtype = plot_roof(inputs, roof_data) # Also returns chosen dtype
plot_application(inputs, args.verbose)
filename = IMGNAME + "_gpu-" + str(inputs["device"]) + "_{}".format(dtype) + ".pdf"
full_path = os.path.abspath(inputs["path"])
path_to_output = full_path + "/" + filename
print('Saving plot: "{}"...'.format(filename))
plt.savefig(path_to_output, bbox_inches="tight", format="pdf")
print('File saved to: "{}"'.format(path_to_output))
plt.close()
+11 -8
Näytä tiedosto
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,22 +12,23 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import argparse
import getpass
from pymongo import MongoClient
# Verify target directory and setup connection
def remove_workload(args):
# parser = argparse.ArgumentParser(description='Remove a workload from an Omniperf Instance')
+10 -8
Näytä tiedosto
@@ -1,5 +1,7 @@
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,17 +10,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import pathlib
import sys
+14 -13
Näytä tiedosto
@@ -1,7 +1,9 @@
"""Get host/gpu specs."""
################################################################################
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -10,17 +12,17 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import os
import re
@@ -32,7 +34,7 @@ from dataclasses import dataclass
from pathlib import Path as path
from textwrap import dedent
gpu_list = {"gfx906", "gfx908", "gfx90a"}
gpu_list = {"gfx906", "gfx908", "gfx90a", "gfx900"}
@dataclass
@@ -84,7 +86,6 @@ class MachineSpecs:
def gpuinfo():
rocminfo = run(["rocminfo"]).split("\n")
for idx1, linetext in enumerate(rocminfo):
@@ -97,7 +98,6 @@ def gpuinfo():
L1, L2 = "", ""
for idx2, linetext in enumerate(rocminfo[idx1 + 1 :]):
key = search(r"^\s*L1:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
L1 = key
@@ -148,6 +148,9 @@ def gpuinfo():
def run(cmd):
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if cmd[0] == "rocm-smi" and p.returncode == 8:
print("ERROR: No GPU detected. Unable to load rocm-smi")
sys.exit(1)
return p.stdout.decode("ascii")
@@ -159,7 +162,6 @@ def search(pattern, string):
def get_machine_specs(devicenum):
cpuinfo = path("/proc/cpuinfo").read_text()
meminfo = path("/proc/meminfo").read_text()
version = path("/proc/version").read_text()
@@ -180,7 +182,6 @@ def get_machine_specs(devicenum):
for itr in version_loc:
_path = os.path.join(os.getenv("ROCM_PATH", "/opt/rocm"), ".info", itr)
if os.path.exists(_path):
print(_path)
rocm_ver = path(_path).read_text()
rocmFound = True
break
+8 -8
Näytä tiedosto
@@ -145,7 +145,7 @@ def test_filter_dispatch_ids_mi100():
"analyze",
"--path",
"tests/workloads/mixbench/mi100",
"--filter-dispatch-ids",
"--dispatch",
"0",
],
):
@@ -162,7 +162,7 @@ def test_filter_dispatch_ids_inv_mi100():
"analyze",
"--path",
"tests/workloads/mixbench/mi100",
"--filter-dispatch-ids",
"--dispatch",
"99",
],
):
@@ -179,7 +179,7 @@ def test_filter_gpu_ids_mi100():
"analyze",
"--path",
"tests/workloads/mixbench/mi100",
"--filter-gpu-ids",
"--gpu-id",
"0",
],
):
@@ -196,7 +196,7 @@ def test_filter_gpu_ids_inv_mi100():
"analyze",
"--path",
"tests/workloads/mixbench/mi100",
"--filter-gpu-ids",
"--gpu-id",
"99",
],
):
@@ -490,7 +490,7 @@ def test_filter_dispatch_ids_mi200():
"analyze",
"--path",
"tests/workloads/mixbench/mi200",
"--filter-dispatch-ids",
"--dispatch",
"0",
],
):
@@ -507,7 +507,7 @@ def test_filter_dispatch_ids_inv_mi200():
"analyze",
"--path",
"tests/workloads/mixbench/mi200",
"--filter-dispatch-ids",
"--dispatch",
"99",
],
):
@@ -524,7 +524,7 @@ def test_filter_gpu_ids_mi200():
"analyze",
"--path",
"tests/workloads/mixbench/mi200",
"--filter-gpu-ids",
"--gpu-id",
"0",
],
):
@@ -541,7 +541,7 @@ def test_filter_gpu_ids_inv_mi200():
"analyze",
"--path",
"tests/workloads/mixbench/mi200",
"--filter-gpu-ids",
"--gpu-id",
"99",
],
):
+130
Näytä tiedosto
@@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -------------------------------------------------------------------------------
# Support script for license header management.
# -------------------------------------------------------------------------------
import argparse
import logging
import glob
import os
import sys
import re
import filecmp
import shutil
begDelim = "######bl$"
endDelim = "######el$"
maxHeaderLines = 200
def cacheLicenseFile(infile, comment="#"):
if not os.path.isfile(infile):
logging.error("Unable to access license file - >%s" % infile)
sys.exit(1)
license = ""
with open(infile, "r") as file_in:
for line in file_in:
license += comment
if line.strip() != "":
license += " "
license += line
return license
parser = argparse.ArgumentParser()
parser.add_argument("--license", required=True, help="License File")
parser.add_argument("--source", required=True, help="Source directory")
parser.add_argument("--dryrun", help="enable dryrun mode", action="store_true")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--extension", help="file extension to parse")
group.add_argument("--files", help="specific file(s) to parse")
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)
args = parser.parse_args()
srcDir = args.source
fileExtension = None
specificFiles = None
if args.extension:
fileExtension = args.extension
if args.files:
specificFiles = args.files.split(",")
print("")
logging.info("Source directory = %s" % srcDir)
if fileExtension:
logging.info("File extension = %s" % fileExtension)
if specificFiles:
logging.info("Specific files = %s" % specificFiles)
# cache license file
license = cacheLicenseFile(args.license)
# Scan files in provided source directory...
for filename in glob.iglob(srcDir + "/**", recursive=True):
# skip directories
if os.path.isdir(filename):
continue
# File matching options:
# (1) filter non-matching extensions
if fileExtension:
if not filename.endswith(fileExtension):
continue
# or, (2) filter for specific filename
if specificFiles:
found = False
for file in specificFiles:
fullPath = os.path.join(srcDir, file)
if fullPath == filename:
found = True
break
if not found:
continue
logging.debug("Examining %s for license..." % filename)
# Update license header contents if delimiters are found
with open(filename, "r") as file_in:
baseName = os.path.basename(filename)
dirName = os.path.dirname(filename)
tmpFile = dirName + "/." + baseName + ".tmp"
file_out = open(tmpFile, "w")
for line in file_in:
if re.search(begDelim, line):
logging.debug("Found beginning delimiter")
file_out.write(line)
file_out.write(license)
foundEnd = False
for i in range(maxHeaderLines):
line = file_in.readline()
if re.search(endDelim, line):
logging.debug("Found ending delimiter")
file_out.write(line)
foundEnd = True
break
if not foundEnd:
logging.error("Unable to find end of delimited header")
sys.exit(1)
else:
file_out.write(line)
file_out.close()
# Check if file changed and update
if not filecmp.cmp(filename, tmpFile, shallow=False):
logging.info("%s changed" % filename)
shutil.copystat(filename, tmpFile)
if not args.dryrun:
os.rename(tmpFile, filename)
else:
os.unlink(tmpFile)