@@ -1,7 +1,6 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: analyze-workloads
|
||||
|
||||
name: RHEL 8
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
@@ -19,7 +18,7 @@ jobs:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: rocm/dev-ubuntu-22.04:5.3
|
||||
image: colramos/target-images:rhel8
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Cancel any previous runs
|
||||
@@ -27,26 +26,31 @@ jobs:
|
||||
uses: styfle/cancel-workflow-action@0.11.0
|
||||
- name: Install baseline OS dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git
|
||||
sudo apt-get install -y python3-pip
|
||||
sudo apt-get install -y cmake
|
||||
- name: Checkout
|
||||
yum -y update
|
||||
yum -y install git
|
||||
yum -y install python39
|
||||
yum -y install cmake3
|
||||
yum -y install which
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.GH_PAT }}
|
||||
- name: Install Python prereqs
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install pyinstaller pytest pytest-cov
|
||||
- name: Configure and install
|
||||
python3.9 -m pip install -r requirements.txt
|
||||
python3.9 -m pip install pyinstaller pytest pytest-cov mock
|
||||
- name: Configure and install
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/opt/omniperf ..
|
||||
make install
|
||||
- name: run ctest
|
||||
- name: CTest- Analyze Commands
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_commands
|
||||
- name: CTest- Analyze Workloads
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_workloads test_saved_analysis
|
||||
@@ -1,6 +1,6 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: analyze-commands
|
||||
name: Ubuntu 20.04
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
@@ -45,7 +45,11 @@ jobs:
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/opt/omniperf ..
|
||||
make install
|
||||
- name: Run ctest
|
||||
- name: CTest- Analyze Commands
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_commands
|
||||
- name: CTest- Analyze Workloads
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_workloads test_saved_analysis
|
||||
@@ -1,3 +1,17 @@
|
||||
Version 1.0.7 (21 Feb 2023)
|
||||
|
||||
* update documentation (#52, #64)
|
||||
* improved detection of invalid command line arguments (#58, #76)
|
||||
* enhancements to standalone roofline (#61)
|
||||
* enable Omniperf on systems with X-server (#62)
|
||||
* raise minimum version requirement for rocm (#64)
|
||||
* enable baseline comparison in CLI analysis (#65)
|
||||
* add multi-normalization to new metrics (#68, #81)
|
||||
* support alternative profilers (#70)
|
||||
* add MI100 configs to override rocprofiler's incomplete default (#75)
|
||||
* improve error message when no GPU(s) detected (#85)
|
||||
* separate CI tests by Linux distro and add status badges
|
||||
|
||||
Version 1.0.6 (21 Dec 2022)
|
||||
|
||||
* CI update: documentation now published via github action (#22)
|
||||
|
||||
+14
-1
@@ -83,7 +83,8 @@ set(pythonDeps
|
||||
"tabulate"
|
||||
"tqdm"
|
||||
"dash_svg"
|
||||
"dash_bootstrap_components")
|
||||
"dash_bootstrap_components"
|
||||
"kaleido")
|
||||
|
||||
message(STATUS "Checking for required Python package dependencies...")
|
||||
set_property(GLOBAL PROPERTY pythonDepsFlag "groovy")
|
||||
@@ -291,6 +292,18 @@ install(
|
||||
# set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "omniperf") set(CPACK_RESOURCE_FILE_LICENSE
|
||||
# "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
|
||||
# License header update(s)
|
||||
add_custom_target(
|
||||
license
|
||||
COMMAND
|
||||
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}/src
|
||||
--license ${PROJECT_SOURCE_DIR}/LICENSE --extension '.py'
|
||||
COMMAND
|
||||
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}
|
||||
--license ${PROJECT_SOURCE_DIR}/LICENSE --file
|
||||
"src/omniperf,cmake/Dockerfile,cmake/rocm_install.sh,docker/docker-entrypoint.sh,src/omniperf_analyze/convertor/mongodb/convert"
|
||||
)
|
||||
|
||||
# Source tarball
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${FULL_VERSION_STRING})
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
[](https://github.com/AMDResearch/omniperf/actions/workflows/ubuntu-focal.yml)
|
||||
[](https://github.com/AMDResearch/omniperf/actions/workflows/opensuse.yml)
|
||||
[](https://amdresearch.github.io/omniperf/)
|
||||
[](https://zenodo.org/badge/latestdoi/561919887)
|
||||
|
||||
|
||||
+10
-8
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
FROM ubuntu:20.04
|
||||
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]}))
|
||||
|
||||
OMNIPERF_DIR=$(realpath ${SCRIPT_DIR}/@OMNIPERF_RELATIVE_PATH@)
|
||||
|
||||
if [ ! -f ${OMNIPERF_DIR}/omniperf ]; then
|
||||
echo -e "Error! Expected omniperf installation in ${OMNIPERF_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
eval ${OMNIPERF_DIR}/omniperf "${@}"
|
||||
@@ -1,4 +1,4 @@
|
||||
-- Crusher-specific additions
|
||||
depends_on "cray-python"
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","5.1.0"))
|
||||
prereq(atleast("rocm","5.2.0"))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
-- Thera-specific additions
|
||||
depends_on "python"
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","5.1.0"))
|
||||
prereq(atleast("rocm","5.2.0"))
|
||||
local home = os.getenv("HOME")
|
||||
setenv("MPLCONFIGDIR",pathJoin(home,".matplotlib"))
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
declare -a rocm_versions=("4.3.1" "4.5.2" "5.0.2" "5.1.3" "5.2.3")
|
||||
wget https://repo.radeon.com/amdgpu-install/22.10/ubuntu/focal/amdgpu-install_22.10.50100-1_all.deb
|
||||
@@ -29,4 +31,4 @@ for rocm_version in ${rocm_versions[@]}; do
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$rocm_version ubuntu main" | tee /etc/apt/sources.list.d/rocm.list
|
||||
apt update
|
||||
amdgpu-install -y --usecase=rocm --rocmrelease=$rocm_version --no-dkms
|
||||
done
|
||||
done
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,7 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
pushd /var/lib/grafana/plugins/omniperfData_plugin
|
||||
npm run server &
|
||||
|
||||
@@ -10,3 +10,4 @@ tabulate
|
||||
tqdm
|
||||
dash-svg
|
||||
dash-bootstrap-components
|
||||
kaleido
|
||||
|
||||
+11
-9
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import sys
|
||||
@@ -31,7 +33,7 @@ OMNIPERF_HOME = Path(__file__).resolve().parent
|
||||
|
||||
# OMNIPERF INFO
|
||||
PROG = "omniperf"
|
||||
SOC_LIST = ["mi50", "mi100", "mi200"]
|
||||
SOC_LIST = ["mi50", "mi100", "mi200", "vega10"]
|
||||
DISTRO_MAP = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"}
|
||||
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
|
||||
├─────────┼─────────────────────────────┤
|
||||
...
|
||||
```
|
||||
2. Choose your own customized subset of metrics with `-b` (a.k.a. `--filter-metrics`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light).
|
||||
2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light).
|
||||
```shell
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ -b 2
|
||||
--------
|
||||
@@ -317,10 +317,12 @@ allowing users to view results from within a web browser.
|
||||
|
||||
Note that the standalone GUI analyzer publishes a web interface on port 8050 by default.
|
||||
On production HPC systems where profiling jobs run
|
||||
under the auspices of a resource manager, additional ssh tunneling
|
||||
under the auspices of a resource manager, additional SSH tunneling
|
||||
between the desired web browser host (e.g. login node or remote workstation) and compute host may be
|
||||
required. Alternatively, users may find it more convenient to download
|
||||
profiled workloads to perform analysis on their local system.
|
||||
|
||||
See [FAQ](https://amdresearch.github.io/omniperf/faq.html) for more details on SSH tunneling.
|
||||
```
|
||||
|
||||
#### Usage
|
||||
|
||||
@@ -141,6 +141,7 @@ for pref in preferences:
|
||||
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
|
||||
# app setup hook
|
||||
def setup(app):
|
||||
app.add_config_value(
|
||||
|
||||
@@ -30,3 +30,26 @@ Workaround:
|
||||
$ export LC_ALL=C.UTF-8
|
||||
$ export LANG=C.UTF-8
|
||||
```
|
||||
|
||||
**3. How can I SSH Tunnel in MobaXterm?**
|
||||
|
||||
1. Open MobaXterm
|
||||
2. In the top ribbon, select `Tunneling`
|
||||

|
||||
This pop up will appear
|
||||

|
||||
3. Press `New SSH tunnel`
|
||||

|
||||
4. Configure tunnel accordingly
|
||||
|
||||
Local clients
|
||||
- Forwarded Port: [PORT]
|
||||
|
||||
Remote Server
|
||||
- Remote Server: localhost
|
||||
- Remote Port: [PORT]
|
||||
|
||||
SSH Server
|
||||
- SSH server: Name of the server one is connecting to
|
||||
- SSH login: Username to login to the server
|
||||
- SSH port: 22
|
||||
@@ -82,7 +82,8 @@ Modes change the fundamental behavior of the Omniperf command line tool. Dependi
|
||||
Operation | Mode | Required Arguments
|
||||
:--|:--|:--
|
||||
Profile a workload | profile | `--name`, `-- <profile_cmd>`
|
||||
Standalone roofline analysis | profile | `--name`, `--only-roof`, `-- <profile_cmd>`
|
||||
Standalone roofline analysis | profile | `--name`, `--roof-only`, `-- <profile_cmd>`
|
||||
Import a workload to database | database | `--import`, `--host`, `--username`, `--workload`, `--team`
|
||||
Remove a workload from database | database | `--remove`, `--host`, `--username`, `--workload`, `--team`
|
||||
Interact with profiling results from CLI | analyze | `--path`, `--gui`
|
||||
Launch standalone GUI from CLI | analyze | `--path`, `--gui`
|
||||
Interact with profiling results from CLI | analyze | `--path`
|
||||
Binary file not shown.
|
After Leveys: | Korkeus: | Koko: 23 KiB |
Binary file not shown.
|
After Leveys: | Korkeus: | Koko: 12 KiB |
Binary file not shown.
|
After Leveys: | Korkeus: | Koko: 29 KiB |
@@ -24,7 +24,7 @@ Omniperf requires the following basic software dependencies prior to usage:
|
||||
|
||||
* Python (>=3.7)
|
||||
* CMake (>= 3.19)
|
||||
* ROCm (>= 5.1)
|
||||
* ROCm (>= 5.2.0)
|
||||
|
||||
In addition, Omniperf leverages a number of Python packages that are
|
||||
documented in the top-level `requirements.txt` file. These must be
|
||||
|
||||
+288
-155
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import sys
|
||||
import os
|
||||
@@ -37,7 +39,7 @@ from utils import specs
|
||||
from utils.perfagg import perfmon_filter, pmc_filter
|
||||
from utils import remove_workload
|
||||
from utils import csv_converter # Import workload
|
||||
from utils import plot_roofline # standalone roofline
|
||||
from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline
|
||||
from omniperf_analyze.omniperf_analyze import analyze # CLI analysis
|
||||
|
||||
from common import (
|
||||
@@ -53,10 +55,8 @@ from common import getVersion
|
||||
# Helper Functions
|
||||
################################################
|
||||
def run_subprocess(cmd):
|
||||
subprocess.run(
|
||||
cmd,
|
||||
check=True
|
||||
)
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
|
||||
def resolve_rocprof():
|
||||
# ROCPROF INFO
|
||||
@@ -89,6 +89,8 @@ def get_soc():
|
||||
target = "mi100"
|
||||
elif mspec.GPU == "gfx90a":
|
||||
target = "mi200"
|
||||
elif mspec.GPU == "gfx900":
|
||||
target = "vega10"
|
||||
else:
|
||||
print("\nInvalid SoC")
|
||||
sys.exit(0)
|
||||
@@ -123,7 +125,9 @@ def replace_timestamps(workload_dir):
|
||||
df_pmc_perf["EndNs"] = df_stamps["EndNs"]
|
||||
df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False)
|
||||
else:
|
||||
warnings.warn("WARNING: Incomplete profiling data detected. Unable to update timestamps.")
|
||||
warnings.warn(
|
||||
"WARNING: Incomplete profiling data detected. Unable to update timestamps."
|
||||
)
|
||||
|
||||
|
||||
def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
|
||||
@@ -148,7 +152,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
|
||||
timestamp = now.strftime("%c") + " (" + local_tzname + ")"
|
||||
# host info
|
||||
param = [workload_name]
|
||||
param += [app_cmd]
|
||||
param += ['"' + app_cmd + '"']
|
||||
param += [
|
||||
mspec.hostname,
|
||||
mspec.cpu,
|
||||
@@ -212,6 +216,48 @@ def mongo_import(args, profileAndImport):
|
||||
################################################
|
||||
# Roofline Helpers
|
||||
################################################
|
||||
def roof_setup(args, my_parser):
|
||||
if args.path == os.getcwd() + "/workloads":
|
||||
args.path += "/" + args.name + "/" + str(get_soc())
|
||||
|
||||
# We need to make a directory for a new roofline
|
||||
if not os.path.isdir(args.path):
|
||||
os.makedirs(args.path)
|
||||
# does roof data exist?
|
||||
print("Checking for roofline.csv in ", args.path)
|
||||
roof_path = args.path + "/roofline.csv"
|
||||
roofline_exists = os.path.isfile(roof_path)
|
||||
if not roofline_exists:
|
||||
if get_soc() != "mi200":
|
||||
throw_parse_error(
|
||||
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
|
||||
)
|
||||
mibench(args)
|
||||
|
||||
# does sysinfo exist?
|
||||
print("Checking for sysinfo.csv in ", args.path)
|
||||
sysinfo_path = args.path + "/sysinfo.csv"
|
||||
sysinfo_exists = os.path.isfile(sysinfo_path)
|
||||
if not sysinfo_exists:
|
||||
print("sysinfo not found")
|
||||
gen_sysinfo(args.name, args.path, [], args.remaining, False)
|
||||
|
||||
# does app data exist?
|
||||
print("Checking for pmc_perf.csv in ", args.path)
|
||||
app_path = args.path + "/pmc_perf.csv"
|
||||
app_exists = os.path.isfile(app_path)
|
||||
if not app_exists:
|
||||
if get_soc() != "mi200":
|
||||
throw_parse_error(
|
||||
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
|
||||
)
|
||||
if not args.remaining:
|
||||
throw_parse_error(
|
||||
my_parser,
|
||||
"Cannot find existing application data.\nAttempting to generate application data from -- <app_cmd>.\n-- <app_cmd> option is required to generate application data.",
|
||||
)
|
||||
else:
|
||||
characterize_app(args.path, args.remaining, args.verbose)
|
||||
|
||||
|
||||
def detect_roofline():
|
||||
@@ -297,89 +343,7 @@ def characterize_app(path, cmd, verbose):
|
||||
# Workload profiling
|
||||
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
|
||||
print(fname)
|
||||
run_prof(fname, workload_dir, perfmon_dir, app_cmd, verbose)
|
||||
|
||||
|
||||
################################################
|
||||
# Profiling Helpers
|
||||
################################################
|
||||
|
||||
|
||||
def run_prof(fname, workload_dir, perfmon_dir, cmd, verbose):
|
||||
global rocprof_cmd
|
||||
|
||||
fbase = os.path.splitext(os.path.basename(fname))[0]
|
||||
|
||||
if verbose:
|
||||
print("pmc file:", os.path.basename(fname))
|
||||
|
||||
# profile the app
|
||||
run_subprocess(
|
||||
[
|
||||
rocprof_cmd,
|
||||
"-i",
|
||||
fname,
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
workload_dir + "/" + fbase + ".csv",
|
||||
'"' + cmd + '"',
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def omniperf_profile(args, VER):
|
||||
# Verify valid target
|
||||
if args.target not in SOC_LIST:
|
||||
parse.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Basic Info
|
||||
print(PROG, "ver: ", VER)
|
||||
print("Path: ", args.path)
|
||||
print("Target: ", args.target)
|
||||
print("Command: ", args.remaining)
|
||||
print("Kernel Selection: ", args.kernel)
|
||||
print("Dispatch Selection: ", args.dispatch)
|
||||
if args.ipblocks == None:
|
||||
print("IP Blocks: All")
|
||||
else:
|
||||
print("IP Blocks: ", args.ipblocks)
|
||||
|
||||
# Set up directories
|
||||
workload_dir = args.path + "/" + args.name + "/" + args.target
|
||||
perfmon_dir = str(OMNIPERF_HOME) + "/perfmon_pub"
|
||||
|
||||
# Perfmon filtering
|
||||
perfmon_filter(workload_dir, perfmon_dir, args)
|
||||
|
||||
# Workload profiling
|
||||
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not args.kernel == None:
|
||||
run_subprocess(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(kernel:).*%" + "kernel: " + ",".join(args.kernel) + "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
|
||||
# Dispatch filtering (inplace replacement)
|
||||
if not args.dispatch == None:
|
||||
run_subprocess(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(range:).*%" + "range: " + ",".join(args.dispatch) + "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.verbose)
|
||||
|
||||
run_prof(fname, workload_dir, perfmon_dir, app_cmd, target, verbose)
|
||||
# run again with timestamps
|
||||
run_subprocess(
|
||||
[
|
||||
@@ -390,13 +354,233 @@ def omniperf_profile(args, VER):
|
||||
"on",
|
||||
"-o",
|
||||
workload_dir + "/" + "timestamps.csv",
|
||||
'"' + args.remaining + '"',
|
||||
'"' + app_cmd + '"',
|
||||
]
|
||||
)
|
||||
|
||||
# Update pmc_perf.csv timestamps
|
||||
replace_timestamps(workload_dir)
|
||||
|
||||
|
||||
################################################
|
||||
# Profiling Helpers
|
||||
################################################
|
||||
|
||||
|
||||
def run_rocscope(args, fname):
|
||||
# profile the app
|
||||
if args.use_rocscope == True:
|
||||
result = subprocess.run(
|
||||
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
|
||||
)
|
||||
if result.returncode == 0:
|
||||
rs_cmd = [
|
||||
result.stdout.decode("ascii").strip(),
|
||||
"metrics",
|
||||
"-p",
|
||||
args.path,
|
||||
"-n",
|
||||
args.name,
|
||||
"-t",
|
||||
fname,
|
||||
"--",
|
||||
]
|
||||
for i in args.remaining.split():
|
||||
rs_cmd.append(i)
|
||||
print(rs_cmd)
|
||||
result = run_subprocess(
|
||||
rs_cmd
|
||||
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(result.stderr.decode("ascii"))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_prof(fname, workload_dir, perfmon_dir, cmd, target, verbose):
|
||||
global rocprof_cmd
|
||||
|
||||
fbase = os.path.splitext(os.path.basename(fname))[0]
|
||||
|
||||
if verbose:
|
||||
print("pmc file:", os.path.basename(fname))
|
||||
|
||||
# profile the app (run w/ custom config files for mi100)
|
||||
if target == "mi100":
|
||||
print("RUNNING WITH CUSTOM METRICS")
|
||||
run_subprocess(
|
||||
[
|
||||
rocprof_cmd,
|
||||
"-i",
|
||||
fname,
|
||||
"-m",
|
||||
perfmon_dir + "/" + "metrics.xml",
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
workload_dir + "/" + fbase + ".csv",
|
||||
'"' + cmd + '"',
|
||||
]
|
||||
)
|
||||
else:
|
||||
run_subprocess(
|
||||
[
|
||||
rocprof_cmd,
|
||||
"-i",
|
||||
fname,
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
workload_dir + "/" + fbase + ".csv",
|
||||
'"' + cmd + '"',
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def omniperf_profile(args, VER):
|
||||
# Verify valid target
|
||||
if args.target not in SOC_LIST:
|
||||
parse.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Verify valid name
|
||||
if args.name.find(".") != -1 or args.name.find("-") != -1:
|
||||
raise ValueError("'-' and '.' are not permited in workload name", args.name)
|
||||
|
||||
# Basic Info
|
||||
print(PROG, "ver: ", VER)
|
||||
print("Path: ", args.path)
|
||||
print("Target: ", args.target)
|
||||
print("Command: ", args.remaining)
|
||||
print("Kernel Selection: ", args.kernel)
|
||||
print("Dispatch Selection: ", args.dispatch)
|
||||
|
||||
if args.ipblocks == None:
|
||||
print("IP Blocks: All", "\n")
|
||||
else:
|
||||
print("IP Blocks: ", args.ipblocks, "\n")
|
||||
|
||||
# Set up directories
|
||||
workload_dir = args.path + "/" + args.name + "/" + args.target
|
||||
perfmon_dir = str(OMNIPERF_HOME) + "/perfmon_pub"
|
||||
|
||||
# Perfmon filtering
|
||||
perfmon_filter(workload_dir, perfmon_dir, args)
|
||||
|
||||
if not args.lucky == None and args.lucky == True:
|
||||
print("You're feeling lucky - only profiling top N kernels")
|
||||
# look for whether workload_dir exists - create if not
|
||||
try:
|
||||
os.makedirs(workload_dir, exist_ok=True)
|
||||
except Exception as e:
|
||||
print("Unable to create workload directory: ", workload_dir)
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
|
||||
result = subprocess.run(
|
||||
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
|
||||
)
|
||||
if result.returncode == 0:
|
||||
rs_cmd = [
|
||||
result.stdout.decode("ascii").strip(),
|
||||
"top10",
|
||||
"-p",
|
||||
args.path,
|
||||
"-n",
|
||||
args.name,
|
||||
"--",
|
||||
]
|
||||
for i in args.remaining.split():
|
||||
rs_cmd.append(i)
|
||||
print(rs_cmd)
|
||||
result = run_subprocess(
|
||||
rs_cmd
|
||||
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(result.stderr.decode("ascii"))
|
||||
else:
|
||||
print("rocscope must be in the PATH")
|
||||
sys.exit(1)
|
||||
elif not args.summaries == None and args.summaries == True:
|
||||
print("creating kernel summaries")
|
||||
# look for whether workload_dir exists - create if not
|
||||
try:
|
||||
os.makedirs(workload_dir, exist_ok=True)
|
||||
except Exception as e:
|
||||
print("Unable to create workload directory: ", workload_dir)
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
|
||||
result = subprocess.run(
|
||||
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
|
||||
)
|
||||
if result.returncode == 0:
|
||||
rs_cmd = [
|
||||
result.stdout.decode("ascii").strip(),
|
||||
"summary",
|
||||
"-p",
|
||||
args.path,
|
||||
"-n",
|
||||
args.name,
|
||||
"--",
|
||||
]
|
||||
for i in args.remaining.split():
|
||||
rs_cmd.append(i)
|
||||
print(rs_cmd)
|
||||
result = run_subprocess(
|
||||
rs_cmd
|
||||
) # , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
print(result.stderr.decode("ascii"))
|
||||
else:
|
||||
print("rocscope must be in the PATH")
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
for fname in glob.glob(workload_dir + "/perfmon/*.txt"):
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not args.kernel == None:
|
||||
run_subprocess(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(kernel:).*%" + "kernel: " + ",".join(args.kernel) + "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
|
||||
# Dispatch filtering (inplace replacement)
|
||||
if not args.dispatch == None:
|
||||
run_subprocess(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(range:).*%" + "range: " + " ".join(args.dispatch) + "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
if args.use_rocscope == True:
|
||||
run_rocscope(args, fname)
|
||||
else:
|
||||
run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, args.verbose)
|
||||
|
||||
# run again with timestamps
|
||||
run_subprocess(
|
||||
[
|
||||
rocprof_cmd,
|
||||
# "-i", fname,
|
||||
# "-m", perfmon_dir + "/" + "metrics.xml",
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
workload_dir + "/" + "timestamps.csv",
|
||||
'"' + args.remaining + '"',
|
||||
]
|
||||
)
|
||||
|
||||
# Update pmc_perf.csv timestamps
|
||||
replace_timestamps(workload_dir)
|
||||
|
||||
# Generate sysinfo
|
||||
gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
|
||||
|
||||
@@ -466,6 +650,7 @@ def main():
|
||||
# PROFILE MODE
|
||||
##############
|
||||
if args.mode == "profile":
|
||||
print("Resolving rocprof")
|
||||
resolve_rocprof()
|
||||
if ".." in str(args.path):
|
||||
throw_parse_error(
|
||||
@@ -495,62 +680,10 @@ def main():
|
||||
|
||||
elif args.roof_only:
|
||||
print("\n--------\nRoofline only\n--------\n")
|
||||
if args.path == os.getcwd() + "/workloads":
|
||||
args.path += "/" + args.name + "/" + str(get_soc())
|
||||
# Verify valid axes parameters
|
||||
if args.axes:
|
||||
if len(args.axes) != 4:
|
||||
throw_parse_error(
|
||||
my_parser,
|
||||
"Invalid argument for --axes.\nMust contain four values formatted as: --axes xmin xmax ymin ymax",
|
||||
)
|
||||
|
||||
if args.axes[0] > args.axes[1] or args.axes[2] > args.axes[3]:
|
||||
throw_parse_error(
|
||||
my_parser,
|
||||
"Invalid argument for --axes.\nBreaks required conditions: (xmax > xmin && ymax > ymin)",
|
||||
)
|
||||
|
||||
# We need to make a directory for a new roofline
|
||||
if not os.path.isdir(args.path):
|
||||
os.makedirs(args.path)
|
||||
# does roof data exist?
|
||||
print("Checking for roofline.csv in ", args.path)
|
||||
roof_path = args.path + "/roofline.csv"
|
||||
roofline_exists = os.path.isfile(roof_path)
|
||||
if not roofline_exists:
|
||||
if get_soc() != "mi200":
|
||||
throw_parse_error(
|
||||
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
|
||||
)
|
||||
mibench(args)
|
||||
|
||||
# does sysinfo exist?
|
||||
print("Checking for sysinfo.csv in ", args.path)
|
||||
sysinfo_path = args.path + "/sysinfo.csv"
|
||||
sysinfo_exists = os.path.isfile(sysinfo_path)
|
||||
if not sysinfo_exists:
|
||||
print("sysinfo not found")
|
||||
gen_sysinfo(args.name, args.path, [], args.remaining, False)
|
||||
|
||||
# does app data exist?
|
||||
print("Checking for pmc_perf.csv in ", args.path)
|
||||
app_path = args.path + "/pmc_perf.csv"
|
||||
app_exists = os.path.isfile(app_path)
|
||||
if not app_exists:
|
||||
if get_soc() != "mi200":
|
||||
throw_parse_error(
|
||||
my_parser, "Invalid SoC.\nRoofline only availible on MI200."
|
||||
)
|
||||
if not args.remaining:
|
||||
throw_parse_error(
|
||||
my_parser,
|
||||
"Cannot find existing application data.\nAttempting to generate application data from -- <app_cmd>.\n-- <app_cmd> option is required to generate application data.",
|
||||
)
|
||||
else:
|
||||
characterize_app(args.path, args.remaining, args.verbose)
|
||||
# Setup prerequisits for roofline
|
||||
roof_setup(args, my_parser)
|
||||
# Generate roofline
|
||||
plot_roofline.empirical_roof(args)
|
||||
roofline_only(args.path, args.device, args.sort, args.mem_level, args.verbose)
|
||||
|
||||
# Profile only
|
||||
else:
|
||||
|
||||
@@ -593,20 +593,6 @@ button.report:hover {
|
||||
#l2_cache_per_channel a, #l2_cache_per_channel a:visited { color: #fff; }
|
||||
#l2_cache_per_channel a:hover, #l2_cache_per_channel a:focus { color: #11ABB0; }
|
||||
|
||||
#l2_cache_per_channel .float-container {
|
||||
/* border: 3px solid #fff; */
|
||||
padding: 20px;
|
||||
}
|
||||
#l2_cache_per_channel .float-child {
|
||||
width: 100%;
|
||||
float: left;
|
||||
padding: 20px;
|
||||
/* border: 2px solid red; */
|
||||
}
|
||||
#l2_cache_per_channel .float-child h3 {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* c. About Section
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
@@ -104,28 +104,28 @@ Panel Config:
|
||||
unit: Instr/wavefront
|
||||
tips:
|
||||
Wave Cycles:
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Dependency Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Issue Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Active Cycles:
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Wavefront Occupancy:
|
||||
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
|
||||
@@ -237,81 +237,107 @@ Panel Config:
|
||||
id: 1604
|
||||
title: L1D - L2 Transactions
|
||||
header:
|
||||
metric: Metric
|
||||
xfer: Xfer
|
||||
mean: Mean
|
||||
coherency: Coherency
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
NC - Read:
|
||||
mean: None # No perf counter
|
||||
xfer: Read
|
||||
coherency: NC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Read:
|
||||
mean: None # No perf counter
|
||||
xfer: Read
|
||||
coherency: UC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Read:
|
||||
mean: None # No perf counter
|
||||
xfer: Read
|
||||
coherency: CC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Read:
|
||||
mean: None # No perf counter
|
||||
xfer: Read
|
||||
coherency: RW
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Write:
|
||||
mean: None # No perf counter
|
||||
xfer: Write
|
||||
coherency: RW
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Write:
|
||||
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Write:
|
||||
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
CC - Write:
|
||||
xfer: Write
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Write:
|
||||
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Atomic:
|
||||
mean: None # No perf counter
|
||||
xfer: Atomic
|
||||
coherency: NC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Atomic:
|
||||
mean: None # No perf counter
|
||||
xfer: Atomic
|
||||
coherency: UC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Atomic:
|
||||
mean: None # No perf counter
|
||||
xfer: Atomic
|
||||
coherency: CC
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Atomic:
|
||||
mean: None # No perf counter
|
||||
xfer: Atomic
|
||||
coherency: RW
|
||||
avg: None # No perf counter
|
||||
min: None # No perf counter
|
||||
max: None # No perf counter
|
||||
unit: (Req + $normUnit)
|
||||
|
||||
@@ -294,6 +294,8 @@ Panel Config:
|
||||
title: L2 - EA Interface Stalls
|
||||
header:
|
||||
metric: Metric
|
||||
type: Type
|
||||
transaction: Transaction
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
@@ -301,42 +303,56 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Read - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
|
||||
@@ -10,28 +10,28 @@ Panel Config:
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1801
|
||||
title: Channel 0 -15
|
||||
title: Channel 0-15
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
"0":
|
||||
@@ -41,10 +41,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
|
||||
!= 0) else None))
|
||||
@@ -69,10 +69,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
|
||||
!= 0) else None))
|
||||
@@ -97,10 +97,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
|
||||
!= 0) else None))
|
||||
@@ -125,10 +125,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
|
||||
!= 0) else None))
|
||||
@@ -153,10 +153,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
|
||||
!= 0) else None))
|
||||
@@ -181,10 +181,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
|
||||
!= 0) else None))
|
||||
@@ -209,10 +209,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
|
||||
!= 0) else None))
|
||||
@@ -237,10 +237,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
|
||||
!= 0) else None))
|
||||
@@ -265,10 +265,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
|
||||
!= 0) else None))
|
||||
@@ -293,10 +293,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
|
||||
!= 0) else None))
|
||||
@@ -321,10 +321,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
|
||||
!= 0) else None))
|
||||
@@ -349,10 +349,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
|
||||
!= 0) else None))
|
||||
@@ -377,10 +377,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
|
||||
!= 0) else None))
|
||||
@@ -405,10 +405,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
|
||||
!= 0) else None))
|
||||
@@ -433,10 +433,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
|
||||
!= 0) else None))
|
||||
@@ -461,10 +461,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
|
||||
!= 0) else None))
|
||||
@@ -485,28 +485,28 @@ Panel Config:
|
||||
|
||||
- metric_table:
|
||||
id: 1802
|
||||
title: Channel 16 -31
|
||||
title: Channel 16-31
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
"16":
|
||||
@@ -514,10 +514,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -534,10 +534,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -554,10 +554,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -574,10 +574,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -594,10 +594,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -614,10 +614,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -634,10 +634,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -654,10 +654,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -674,10 +674,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -694,10 +694,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -714,10 +714,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -734,10 +734,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -754,10 +754,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -774,10 +774,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -794,10 +794,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
@@ -814,10 +814,10 @@ Panel Config:
|
||||
req: None # No perf counter
|
||||
read req: None # No perf counter
|
||||
write req: None # No perf counter
|
||||
atomicreq: None # No perf counter
|
||||
atomic req: None # No perf counter
|
||||
ea read req: None # No perf counter
|
||||
ea write req: None # No perf counter
|
||||
ea atomicreq: None # No perf counter
|
||||
ea atomic req: None # No perf counter
|
||||
ea read lat - cycles: None # No perf counter
|
||||
ea write lat - cycles: None # No perf counter
|
||||
ea atomic lat - cycles: None # No perf counter
|
||||
|
||||
@@ -104,28 +104,28 @@ Panel Config:
|
||||
unit: Instr/wavefront
|
||||
tips:
|
||||
Wave Cycles:
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Dependency Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Issue Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Active Cycles:
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Wavefront Occupancy:
|
||||
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
|
||||
@@ -237,81 +237,107 @@ Panel Config:
|
||||
id: 1604
|
||||
title: L1D - L2 Transactions
|
||||
header:
|
||||
metric: Metric
|
||||
xfer: Xfer
|
||||
mean: Mean
|
||||
coherency: Coherency
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
NC - Read:
|
||||
mean: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Read:
|
||||
mean: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Read:
|
||||
mean: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Read:
|
||||
mean: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Write:
|
||||
mean: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Write:
|
||||
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Write:
|
||||
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Write:
|
||||
mean: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Atomic:
|
||||
mean: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Atomic:
|
||||
mean: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Atomic:
|
||||
mean: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Atomic:
|
||||
mean: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
|
||||
@@ -294,6 +294,8 @@ Panel Config:
|
||||
title: L2 - EA Interface Stalls
|
||||
header:
|
||||
metric: Metric
|
||||
type: Type
|
||||
transaction: Transaction
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
@@ -301,42 +303,56 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Read - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
|
||||
@@ -10,28 +10,28 @@ Panel Config:
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1801
|
||||
title: Channel 0 -15
|
||||
title: Channel 0-15
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
"0":
|
||||
@@ -41,10 +41,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
|
||||
!= 0) else None))
|
||||
@@ -69,10 +69,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
|
||||
!= 0) else None))
|
||||
@@ -97,10 +97,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
|
||||
!= 0) else None))
|
||||
@@ -125,10 +125,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
|
||||
!= 0) else None))
|
||||
@@ -153,10 +153,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
|
||||
!= 0) else None))
|
||||
@@ -181,10 +181,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
|
||||
!= 0) else None))
|
||||
@@ -209,10 +209,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
|
||||
!= 0) else None))
|
||||
@@ -237,10 +237,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
|
||||
!= 0) else None))
|
||||
@@ -265,10 +265,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
|
||||
!= 0) else None))
|
||||
@@ -293,10 +293,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
|
||||
!= 0) else None))
|
||||
@@ -321,10 +321,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
|
||||
!= 0) else None))
|
||||
@@ -349,10 +349,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
|
||||
!= 0) else None))
|
||||
@@ -377,10 +377,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
|
||||
!= 0) else None))
|
||||
@@ -405,10 +405,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
|
||||
!= 0) else None))
|
||||
@@ -433,10 +433,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
|
||||
!= 0) else None))
|
||||
@@ -461,10 +461,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
|
||||
!= 0) else None))
|
||||
@@ -485,28 +485,28 @@ Panel Config:
|
||||
|
||||
- metric_table:
|
||||
id: 1802
|
||||
title: Channel 16 -31
|
||||
title: Channel 16-31
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
"16":
|
||||
@@ -516,10 +516,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[16]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[16]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[16]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[16]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[16]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[16] / TCC_EA_RDREQ[16]) if (TCC_EA_RDREQ[16]
|
||||
!= 0) else None))
|
||||
@@ -544,10 +544,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[17]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[17]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[17]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[17]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[17]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[17] / TCC_EA_RDREQ[17]) if (TCC_EA_RDREQ[17]
|
||||
!= 0) else None))
|
||||
@@ -572,10 +572,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[18]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[18]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[18]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[18]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[18]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[18] / TCC_EA_RDREQ[18]) if (TCC_EA_RDREQ[18]
|
||||
!= 0) else None))
|
||||
@@ -600,10 +600,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[19]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[19]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[19]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[19]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[19]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[19] / TCC_EA_RDREQ[19]) if (TCC_EA_RDREQ[19]
|
||||
!= 0) else None))
|
||||
@@ -628,10 +628,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[20]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[20]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[20]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[20]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[20]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[20] / TCC_EA_RDREQ[20]) if (TCC_EA_RDREQ[20]
|
||||
!= 0) else None))
|
||||
@@ -656,10 +656,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[21]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[21]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[21]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[21]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[21]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[21] / TCC_EA_RDREQ[21]) if (TCC_EA_RDREQ[21]
|
||||
!= 0) else None))
|
||||
@@ -684,10 +684,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[22]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[22]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[22]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[22]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[22]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[22] / TCC_EA_RDREQ[22]) if (TCC_EA_RDREQ[22]
|
||||
!= 0) else None))
|
||||
@@ -712,10 +712,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[23]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[23]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[23]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[23]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[23]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[23] / TCC_EA_RDREQ[23]) if (TCC_EA_RDREQ[23]
|
||||
!= 0) else None))
|
||||
@@ -740,10 +740,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[24]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[24]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[24]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[24]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[24]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[24] / TCC_EA_RDREQ[24]) if (TCC_EA_RDREQ[24]
|
||||
!= 0) else None))
|
||||
@@ -768,10 +768,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[25]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[25]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[25]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[25]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[25]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[25] / TCC_EA_RDREQ[25]) if (TCC_EA_RDREQ[25]
|
||||
!= 0) else None))
|
||||
@@ -796,10 +796,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[26]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[26]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[26]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[26]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[26]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[26] / TCC_EA_RDREQ[26]) if (TCC_EA_RDREQ[26]
|
||||
!= 0) else None))
|
||||
@@ -824,10 +824,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[27]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[27]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[27]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[27]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[27]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[27] / TCC_EA_RDREQ[27]) if (TCC_EA_RDREQ[27]
|
||||
!= 0) else None))
|
||||
@@ -852,10 +852,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[28]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[28]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[28]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[28]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[28]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[28] / TCC_EA_RDREQ[28]) if (TCC_EA_RDREQ[28]
|
||||
!= 0) else None))
|
||||
@@ -880,10 +880,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[29]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[29]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[29]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[29]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[29]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[29] / TCC_EA_RDREQ[29]) if (TCC_EA_RDREQ[29]
|
||||
!= 0) else None))
|
||||
@@ -908,10 +908,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[30]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[30]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[30]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[30]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[30]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[30] / TCC_EA_RDREQ[30]) if (TCC_EA_RDREQ[30]
|
||||
!= 0) else None))
|
||||
@@ -936,10 +936,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[31]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[31]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[31]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[31]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[31]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
|
||||
ea read lat - cycles:
|
||||
AVG(((TCC_EA_RDREQ_LEVEL[31] / TCC_EA_RDREQ[31]) if (TCC_EA_RDREQ[31]
|
||||
!= 0) else None))
|
||||
|
||||
@@ -104,28 +104,28 @@ Panel Config:
|
||||
unit: Instr/wavefront
|
||||
tips:
|
||||
Wave Cycles:
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Dependency Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Issue Wait Cycles:
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Active Cycles:
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / SQ_WAVES))
|
||||
unit: Cycles/wave
|
||||
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
|
||||
unit: (Cycles + $normUnit)
|
||||
tips:
|
||||
Wavefront Occupancy:
|
||||
avg: AVG((SQ_ACCUM_PREV_HIRES / GRBM_GUI_ACTIVE))
|
||||
|
||||
@@ -163,17 +163,17 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
MFMA-I8:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_I8 / SQ_WAVES))
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_I8 / $denom))
|
||||
tips:
|
||||
MFMA-F16:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F16 / SQ_WAVES))
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F16 / $denom))
|
||||
tips:
|
||||
MFMA-BF16:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / SQ_WAVES))
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_BF16 / $denom))
|
||||
tips:
|
||||
MFMA-F32:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F32 / SQ_WAVES))
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F32 / $denom))
|
||||
tips:
|
||||
MFMA-F64:
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F64 / SQ_WAVES))
|
||||
count: AVG((SQ_INSTS_VALU_MFMA_F64 / $denom))
|
||||
tips:
|
||||
|
||||
@@ -237,81 +237,107 @@ Panel Config:
|
||||
id: 1604
|
||||
title: L1D - L2 Transactions
|
||||
header:
|
||||
metric: Metric
|
||||
xfer: Xfer
|
||||
mean: Mean
|
||||
coherency: Coherency
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
unit: Unit
|
||||
tips: Tips
|
||||
metric:
|
||||
NC - Read:
|
||||
mean: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Read:
|
||||
mean: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Read:
|
||||
mean: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Read:
|
||||
mean: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
xfer: Read
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_READ_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Write:
|
||||
mean: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Write:
|
||||
mean: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Write:
|
||||
mean: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Write:
|
||||
mean: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
xfer: Write
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_WRITE_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
NC - Atomic:
|
||||
mean: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: NC
|
||||
avg: AVG((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_NC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
UC - Atomic:
|
||||
mean: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: UC
|
||||
avg: AVG((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_UC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
CC - Atomic:
|
||||
mean: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: CC
|
||||
avg: AVG((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_CC_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
RW - Atomic:
|
||||
mean: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
xfer: Atomic
|
||||
coherency: RW
|
||||
avg: AVG((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
min: MIN((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
max: MAX((TCP_TCC_RW_ATOMIC_REQ_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
|
||||
@@ -294,6 +294,8 @@ Panel Config:
|
||||
title: L2 - Fabric Interface Stalls
|
||||
header:
|
||||
metric: Metric
|
||||
type: Type
|
||||
transaction: Transaction
|
||||
avg: Avg
|
||||
min: Min
|
||||
max: Max
|
||||
@@ -301,42 +303,56 @@ Panel Config:
|
||||
tips: Tips
|
||||
metric:
|
||||
Read - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Read - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Read
|
||||
avg: AVG((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Remote Socket Stall:
|
||||
type: Remote Socket Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_IO_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Peer GCD Stall:
|
||||
type: Peer GCD Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_GMI_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - HBM Stall:
|
||||
type: HBM Stall
|
||||
transaction: Write
|
||||
avg: AVG((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
min: MIN((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
max: MAX((TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum / $denom))
|
||||
unit: (Req + $normUnit)
|
||||
tips:
|
||||
Write - Credit Starvation:
|
||||
type: Credit Starvation
|
||||
transaction: Write
|
||||
avg: AVG((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
min: MIN((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
max: MAX((TCC_TOO_MANY_EA_WRREQS_STALL_sum / $denom))
|
||||
|
||||
@@ -10,28 +10,28 @@ Panel Config:
|
||||
data source:
|
||||
- metric_table:
|
||||
id: 1801
|
||||
title: Channel 0 -15
|
||||
title: Channel 0-15
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
'0':
|
||||
@@ -40,10 +40,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[0]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[0]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[0]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[0]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[0]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[0]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[0]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[0] / TCC_EA_RDREQ[0]) if (TCC_EA_RDREQ[0]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[0] / TCC_EA_WRREQ[0]) if (TCC_EA_WRREQ[0]
|
||||
@@ -64,10 +64,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[1]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[1]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[1]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[1]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[1]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[1]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[1]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[1] / TCC_EA_RDREQ[1]) if (TCC_EA_RDREQ[1]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[1] / TCC_EA_WRREQ[1]) if (TCC_EA_WRREQ[1]
|
||||
@@ -88,10 +88,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[2]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[2]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[2]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[2]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[2]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[2]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[2]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[2] / TCC_EA_RDREQ[2]) if (TCC_EA_RDREQ[2]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[2] / TCC_EA_WRREQ[2]) if (TCC_EA_WRREQ[2]
|
||||
@@ -112,10 +112,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[3]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[3]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[3]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[3]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[3]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[3]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[3]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[3] / TCC_EA_RDREQ[3]) if (TCC_EA_RDREQ[3]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[3] / TCC_EA_WRREQ[3]) if (TCC_EA_WRREQ[3]
|
||||
@@ -136,10 +136,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[4]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[4]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[4]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[4]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[4]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[4]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[4]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[4] / TCC_EA_RDREQ[4]) if (TCC_EA_RDREQ[4]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[4] / TCC_EA_WRREQ[4]) if (TCC_EA_WRREQ[4]
|
||||
@@ -160,10 +160,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[5]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[5]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[5]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[5]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[5]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[5]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[5]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[5] / TCC_EA_RDREQ[5]) if (TCC_EA_RDREQ[5]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[5] / TCC_EA_WRREQ[5]) if (TCC_EA_WRREQ[5]
|
||||
@@ -184,10 +184,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[6]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[6]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[6]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[6]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[6]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[6]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[6]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[6] / TCC_EA_RDREQ[6]) if (TCC_EA_RDREQ[6]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[6] / TCC_EA_WRREQ[6]) if (TCC_EA_WRREQ[6]
|
||||
@@ -208,10 +208,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[7]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[7]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[7]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[7]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[7]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[7]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[7]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[7] / TCC_EA_RDREQ[7]) if (TCC_EA_RDREQ[7]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[7] / TCC_EA_WRREQ[7]) if (TCC_EA_WRREQ[7]
|
||||
@@ -232,10 +232,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[8]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[8]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[8]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[8]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[8]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[8]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[8]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[8] / TCC_EA_RDREQ[8]) if (TCC_EA_RDREQ[8]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[8] / TCC_EA_WRREQ[8]) if (TCC_EA_WRREQ[8]
|
||||
@@ -256,10 +256,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[9]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[9]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[9]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[9]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[9]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[9]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[9]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[9] / TCC_EA_RDREQ[9]) if (TCC_EA_RDREQ[9]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[9] / TCC_EA_WRREQ[9]) if (TCC_EA_WRREQ[9]
|
||||
@@ -280,10 +280,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[10]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[10]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[10]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[10]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[10]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[10]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[10]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[10] / TCC_EA_RDREQ[10]) if (TCC_EA_RDREQ[10]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[10] / TCC_EA_WRREQ[10]) if (TCC_EA_WRREQ[10]
|
||||
@@ -304,10 +304,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[11]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[11]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[11]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[11]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[11]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[11]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[11]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[11] / TCC_EA_RDREQ[11]) if (TCC_EA_RDREQ[11]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[11] / TCC_EA_WRREQ[11]) if (TCC_EA_WRREQ[11]
|
||||
@@ -328,10 +328,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[12]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[12]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[12]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[12]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[12]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[12]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[12]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[12] / TCC_EA_RDREQ[12]) if (TCC_EA_RDREQ[12]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[12] / TCC_EA_WRREQ[12]) if (TCC_EA_WRREQ[12]
|
||||
@@ -352,10 +352,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[13]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[13]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[13]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[13]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[13]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[13]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[13]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[13] / TCC_EA_RDREQ[13]) if (TCC_EA_RDREQ[13]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[13] / TCC_EA_WRREQ[13]) if (TCC_EA_WRREQ[13]
|
||||
@@ -376,10 +376,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[14]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[14]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[14]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[14]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[14]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[14]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[14]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[14] / TCC_EA_RDREQ[14]) if (TCC_EA_RDREQ[14]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[14] / TCC_EA_WRREQ[14]) if (TCC_EA_WRREQ[14]
|
||||
@@ -400,10 +400,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[15]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[15]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[15]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[15]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[15]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[15]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[15]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[15] / TCC_EA_RDREQ[15]) if (TCC_EA_RDREQ[15]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[15] / TCC_EA_WRREQ[15]) if (TCC_EA_WRREQ[15]
|
||||
@@ -420,28 +420,28 @@ Panel Config:
|
||||
tips:
|
||||
- metric_table:
|
||||
id: 1802
|
||||
title: Channel 16 -31
|
||||
title: Channel 16-31
|
||||
columnwise: True
|
||||
header:
|
||||
channel: Channel
|
||||
hit rate: Hit Rate
|
||||
req: Req
|
||||
read req: Read Req
|
||||
write req: Write Req
|
||||
atomicreq: AtomicReq
|
||||
ea read req: EA Read Req
|
||||
ea write req: EA Write Req
|
||||
ea atomicreq: EA AtomicReq
|
||||
ea read lat - cycles: EA Read Lat - cycles
|
||||
ea write lat - cycles: EA Write Lat - cycles
|
||||
ea atomic lat - cycles: EA Atomic Lat - cycles
|
||||
ea read stall - io: EA Read Stall - IO
|
||||
ea read stall - gmi: EA Read Stall - GMI
|
||||
ea read stall - dram: EA Read Stall - DRAM
|
||||
ea write stall - io: EA Write Stall - IO
|
||||
ea write stall - gmi: EA Write Stall - GMI
|
||||
ea write stall - dram: EA Write Stall - DRAM
|
||||
ea write stall - starve: EA Write Stall - Starve
|
||||
hit rate: L2 Cache Hit Rate (%)
|
||||
req: Requests (Requests)
|
||||
read req: L1-L2 Read (Requests)
|
||||
write req: L1-L2 Write (Requests)
|
||||
atomic req: L1-L2 Atomic (Requests)
|
||||
ea read req: L2-EA Read (Requests)
|
||||
ea write req: L2-EA Write (Requests)
|
||||
ea atomic req: L2-EA Atomic (Requests)
|
||||
ea read lat - cycles: L2-EA Read Latency (Cycles)
|
||||
ea write lat - cycles: L2-EA Write Latency (Cycles)
|
||||
ea atomic lat - cycles: L2-EA Atomic Latency (Cycles)
|
||||
ea read stall - io: L2-EA Read Stall - IO (Cycles per)
|
||||
ea read stall - gmi: L2-EA Read Stall - GMI (Cycles per)
|
||||
ea read stall - dram: L2-EA Read Stall - DRAM (Cycles per)
|
||||
ea write stall - io: L2-EA Write Stall - IO (Cycles per)
|
||||
ea write stall - gmi: L2-EA Write Stall - GMI (Cycles per)
|
||||
ea write stall - dram: L2-EA Write Stall - DRAM (Cycles per)
|
||||
ea write stall - starve: L2-EA Write Stall - Starve (Cycles per)
|
||||
tips: Tips
|
||||
metric:
|
||||
'16':
|
||||
@@ -450,10 +450,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[16]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[16]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[16]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[16]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[16]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[16]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[16]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[16] / TCC_EA_RDREQ[16]) if (TCC_EA_RDREQ[16]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[16] / TCC_EA_WRREQ[16]) if (TCC_EA_WRREQ[16]
|
||||
@@ -474,10 +474,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[17]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[17]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[17]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[17]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[17]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[17]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[17]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[17] / TCC_EA_RDREQ[17]) if (TCC_EA_RDREQ[17]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[17] / TCC_EA_WRREQ[17]) if (TCC_EA_WRREQ[17]
|
||||
@@ -498,10 +498,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[18]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[18]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[18]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[18]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[18]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[18]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[18]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[18] / TCC_EA_RDREQ[18]) if (TCC_EA_RDREQ[18]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[18] / TCC_EA_WRREQ[18]) if (TCC_EA_WRREQ[18]
|
||||
@@ -522,10 +522,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[19]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[19]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[19]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[19]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[19]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[19]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[19]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[19] / TCC_EA_RDREQ[19]) if (TCC_EA_RDREQ[19]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[19] / TCC_EA_WRREQ[19]) if (TCC_EA_WRREQ[19]
|
||||
@@ -546,10 +546,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[20]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[20]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[20]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[20]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[20]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[20]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[20]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[20] / TCC_EA_RDREQ[20]) if (TCC_EA_RDREQ[20]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[20] / TCC_EA_WRREQ[20]) if (TCC_EA_WRREQ[20]
|
||||
@@ -570,10 +570,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[21]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[21]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[21]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[21]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[21]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[21]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[21]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[21] / TCC_EA_RDREQ[21]) if (TCC_EA_RDREQ[21]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[21] / TCC_EA_WRREQ[21]) if (TCC_EA_WRREQ[21]
|
||||
@@ -594,10 +594,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[22]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[22]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[22]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[22]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[22]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[22]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[22]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[22] / TCC_EA_RDREQ[22]) if (TCC_EA_RDREQ[22]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[22] / TCC_EA_WRREQ[22]) if (TCC_EA_WRREQ[22]
|
||||
@@ -618,10 +618,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[23]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[23]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[23]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[23]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[23]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[23]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[23]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[23] / TCC_EA_RDREQ[23]) if (TCC_EA_RDREQ[23]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[23] / TCC_EA_WRREQ[23]) if (TCC_EA_WRREQ[23]
|
||||
@@ -642,10 +642,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[24]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[24]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[24]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[24]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[24]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[24]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[24]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[24] / TCC_EA_RDREQ[24]) if (TCC_EA_RDREQ[24]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[24] / TCC_EA_WRREQ[24]) if (TCC_EA_WRREQ[24]
|
||||
@@ -666,10 +666,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[25]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[25]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[25]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[25]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[25]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[25]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[25]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[25] / TCC_EA_RDREQ[25]) if (TCC_EA_RDREQ[25]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[25] / TCC_EA_WRREQ[25]) if (TCC_EA_WRREQ[25]
|
||||
@@ -690,10 +690,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[26]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[26]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[26]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[26]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[26]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[26]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[26]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[26] / TCC_EA_RDREQ[26]) if (TCC_EA_RDREQ[26]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[26] / TCC_EA_WRREQ[26]) if (TCC_EA_WRREQ[26]
|
||||
@@ -714,10 +714,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[27]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[27]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[27]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[27]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[27]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[27]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[27]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[27] / TCC_EA_RDREQ[27]) if (TCC_EA_RDREQ[27]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[27] / TCC_EA_WRREQ[27]) if (TCC_EA_WRREQ[27]
|
||||
@@ -738,10 +738,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[28]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[28]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[28]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[28]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[28]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[28]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[28]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[28] / TCC_EA_RDREQ[28]) if (TCC_EA_RDREQ[28]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[28] / TCC_EA_WRREQ[28]) if (TCC_EA_WRREQ[28]
|
||||
@@ -762,10 +762,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[29]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[29]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[29]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[29]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[29]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[29]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[29]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[29] / TCC_EA_RDREQ[29]) if (TCC_EA_RDREQ[29]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[29] / TCC_EA_WRREQ[29]) if (TCC_EA_WRREQ[29]
|
||||
@@ -786,10 +786,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[30]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[30]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[30]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[30]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[30]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[30]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[30]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[30] / TCC_EA_RDREQ[30]) if (TCC_EA_RDREQ[30]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[30] / TCC_EA_WRREQ[30]) if (TCC_EA_WRREQ[30]
|
||||
@@ -810,10 +810,10 @@ Panel Config:
|
||||
req: AVG((TO_INT(TCC_REQ[31]) / $denom))
|
||||
read req: AVG((TO_INT(TCC_READ[31]) / $denom))
|
||||
write req: AVG((TO_INT(TCC_WRITE[31]) / $denom))
|
||||
atomicreq: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
|
||||
atomic req: AVG((TO_INT(TCC_ATOMIC[31]) / $denom))
|
||||
ea read req: AVG((TO_INT(TCC_EA_RDREQ[31]) / $denom))
|
||||
ea write req: AVG((TO_INT(TCC_EA_WRREQ[31]) / $denom))
|
||||
ea atomicreq: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
|
||||
ea atomic req: AVG((TO_INT(TCC_EA_ATOMIC[31]) / $denom))
|
||||
ea read lat - cycles: AVG(((TCC_EA_RDREQ_LEVEL[31] / TCC_EA_RDREQ[31]) if (TCC_EA_RDREQ[31]
|
||||
!= 0) else None))
|
||||
ea write lat - cycles: AVG(((TCC_EA_WRREQ_LEVEL[31] / TCC_EA_WRREQ[31]) if (TCC_EA_WRREQ[31]
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (C) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import ast
|
||||
import astunparse
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,23 +12,22 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
with open(sys.argv[1], "r") as file:
|
||||
s = file.read()
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
################################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,23 +12,22 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
################################################################################el
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
with open(sys.argv[1], "r") as file:
|
||||
s = file.read()
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
"""
|
||||
Quick run:
|
||||
@@ -41,6 +43,7 @@ import argparse
|
||||
import os.path
|
||||
from pathlib import Path
|
||||
from omniperf_analyze.utils import parser, file_io
|
||||
from omniperf_analyze.utils.gui_components.roofline import get_roofline
|
||||
|
||||
|
||||
def initialize_run(args, normalization_filter=None):
|
||||
@@ -143,7 +146,7 @@ def run_gui(args, runs):
|
||||
num_results,
|
||||
)
|
||||
runs[args.path[0][0]].raw_pmc = file_io.create_df_pmc(
|
||||
args.path[0][0]
|
||||
args.path[0][0], args.verbose
|
||||
) # create mega df
|
||||
parser.load_kernel_top(runs[args.path[0][0]], args.path[0][0])
|
||||
|
||||
@@ -188,7 +191,9 @@ def run_cli(args, runs):
|
||||
args.time_unit,
|
||||
num_results,
|
||||
)
|
||||
runs[d[0]].raw_pmc = file_io.create_df_pmc(d[0]) # creates mega dataframe
|
||||
runs[d[0]].raw_pmc = file_io.create_df_pmc(
|
||||
d[0], args.verbose
|
||||
) # creates mega dataframe
|
||||
is_gui = False
|
||||
parser.load_table_data(
|
||||
runs[d[0]], d[0], is_gui, args.g, args.verbose
|
||||
@@ -203,9 +208,37 @@ def run_cli(args, runs):
|
||||
args.decimal,
|
||||
args.time_unit,
|
||||
args.cols,
|
||||
args.verbose,
|
||||
)
|
||||
|
||||
|
||||
def roofline_only(path_to_dir, dev_id, sort_type, mem_level, verbose):
|
||||
import pandas as pd
|
||||
from collections import OrderedDict
|
||||
|
||||
# Change vL1D to a interpretable str, if required
|
||||
if "vL1D" in mem_level:
|
||||
mem_level.remove("vL1D")
|
||||
mem_level.append("L1")
|
||||
|
||||
app_path = path_to_dir + "/pmc_perf.csv"
|
||||
roofline_exists = os.path.isfile(app_path)
|
||||
if not roofline_exists:
|
||||
print("Error: {} does not exist")
|
||||
sys.exit(0)
|
||||
t_df = OrderedDict()
|
||||
t_df["pmc_perf"] = pd.read_csv(app_path)
|
||||
get_roofline(
|
||||
path_to_dir,
|
||||
t_df,
|
||||
verbose,
|
||||
dev_id, # [Optional] Specify device id to collect roofline info from
|
||||
sort_type, # [Optional] Sort AI by top kernels or dispatches
|
||||
mem_level, # [Optional] Toggle particular level(s) of memory hierarchy
|
||||
True, # [Optional] Generate a standalone roofline analysis
|
||||
)
|
||||
|
||||
|
||||
def analyze(args):
|
||||
if args.dependency:
|
||||
print("pip3 install astunparse numpy tabulate pandas pyyaml")
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import ast
|
||||
import astunparse
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
@@ -180,7 +182,7 @@ def create_df_kernel_top_stats(
|
||||
grouped.to_csv(os.path.join(raw_data_dir, "pmc_kernel_top.csv"), index=False)
|
||||
|
||||
|
||||
def create_df_pmc(raw_data_dir):
|
||||
def create_df_pmc(raw_data_dir, verbose):
|
||||
"""
|
||||
Load all raw pmc counters and join into one df.
|
||||
"""
|
||||
@@ -200,8 +202,8 @@ def create_df_pmc(raw_data_dir):
|
||||
coll_levels.append(f[:-4])
|
||||
final_df = pd.concat(dfs, keys=coll_levels, axis=1, copy=False)
|
||||
# TODO: join instead of concat!
|
||||
|
||||
# print("pmc_raw_data final_df ", final_df.info())
|
||||
if verbose >= 2:
|
||||
print("pmc_raw_data final_df ", final_df.info())
|
||||
return final_df
|
||||
|
||||
|
||||
@@ -217,7 +219,6 @@ def collect_wave_occu_per_cu(in_dir, out_dir, numSE):
|
||||
for i in range(numSE):
|
||||
p = Path(in_dir, "wave_occu_se" + str(i) + ".csv")
|
||||
if p.exists():
|
||||
|
||||
tmp_df = pd.read_csv(p)
|
||||
SE_idx = "SE" + str(tmp_df.loc[0, "SE"])
|
||||
tmp_df.rename(
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,22 +10,21 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
from selectors import EpollSelector
|
||||
import sys
|
||||
import copy
|
||||
from matplotlib.axis import XAxis
|
||||
import pandas as pd
|
||||
from dash.dash_table import FormatTemplate
|
||||
from dash.dash_table.Format import Format, Scheme, Symbol
|
||||
@@ -51,19 +52,19 @@ HIDDEN_SECTIONS = ["Memory Chart Analysis", "Kernels"]
|
||||
HIDDEN_COLUMNS = ["Tips", "coll_level"]
|
||||
IS_DARK = True # default dark theme
|
||||
|
||||
# Add any elements you'd like displayed as a bar chart
|
||||
barchart_elements = [
|
||||
1001, # Instr mix
|
||||
1002, # VALU Arith Instr mix
|
||||
1101, # Compute pipe SOL
|
||||
1201, # LDS SOL
|
||||
1301, # Instruc cache SOL
|
||||
1401, # SL1D cache SOL
|
||||
1601, # VL1D cache SOL
|
||||
1701, # L2 cache SOL
|
||||
]
|
||||
# Define different types of bar charts
|
||||
barchart_elements = {
|
||||
# Group table ids by chart type
|
||||
"instr_mix": [1001, 1002],
|
||||
"multi_bar": [1604, 1704],
|
||||
"sol": [1101, 1201, 1301, 1401, 1601, 1701],
|
||||
"l2_cache_per_chan": [1801, 1802],
|
||||
}
|
||||
|
||||
|
||||
##################
|
||||
# HELPER FUNCTIONS
|
||||
##################
|
||||
def filter_df(column, df, filt):
|
||||
filt_df = df
|
||||
if filt != []:
|
||||
@@ -71,8 +72,20 @@ def filter_df(column, df, filt):
|
||||
return filt_df
|
||||
|
||||
|
||||
def discrete_background_color_bins(df, n_bins=5, columns="all"):
|
||||
def multi_bar_chart(table_id, display_df):
|
||||
if table_id == 1604:
|
||||
nested_bar = {"NC": {}, "UC": {}, "RW": {}, "CC": {}}
|
||||
for index, row in display_df.iterrows():
|
||||
nested_bar[row["Coherency"]][row["Xfer"]] = row["Avg"]
|
||||
if table_id == 1704:
|
||||
nested_bar = {"Read": {}, "Write": {}}
|
||||
for index, row in display_df.iterrows():
|
||||
nested_bar[row["Transaction"]][row["Type"]] = row["Avg"]
|
||||
|
||||
return nested_bar
|
||||
|
||||
|
||||
def discrete_background_color_bins(df, n_bins=5, columns="all"):
|
||||
bounds = [i * (1.0 / n_bins) for i in range(n_bins + 1)]
|
||||
if columns == "all":
|
||||
if "id" in df:
|
||||
@@ -129,11 +142,14 @@ def discrete_background_color_bins(df, n_bins=5, columns="all"):
|
||||
return (styles, html.Div(legend, style={"padding": "5px 0 5px 0"}))
|
||||
|
||||
|
||||
def build_bar_chart(display_df, table_config):
|
||||
####################
|
||||
# GRAPHICAL ELEMENTS
|
||||
####################
|
||||
def build_bar_chart(display_df, table_config, norm_filt):
|
||||
d_figs = []
|
||||
|
||||
# Insr Mix bar chart
|
||||
if table_config["id"] == 1001 or table_config["id"] == 1002:
|
||||
if table_config["id"] in barchart_elements["instr_mix"]:
|
||||
display_df["Count"] = [
|
||||
x.astype(int) if x != "" else int(0) for x in display_df["Count"]
|
||||
]
|
||||
@@ -150,8 +166,59 @@ def build_bar_chart(display_df, table_config):
|
||||
)
|
||||
)
|
||||
|
||||
# Multi bar chart
|
||||
elif table_config["id"] in barchart_elements["multi_bar"]:
|
||||
display_df["Avg"] = [
|
||||
x.astype(int) if x != "" else int(0) for x in display_df["Avg"]
|
||||
]
|
||||
df_unit = display_df["Unit"][0]
|
||||
nested_bar = multi_bar_chart(table_config["id"], display_df)
|
||||
# generate chart for each coherency
|
||||
for group, metric in nested_bar.items():
|
||||
d_figs.append(
|
||||
px.bar(
|
||||
title=group,
|
||||
x=metric.values(),
|
||||
y=metric.keys(),
|
||||
labels={"x": df_unit, "y": ""},
|
||||
text=metric.values(),
|
||||
orientation="h",
|
||||
height=200,
|
||||
)
|
||||
.update_xaxes(showgrid=False, rangemode="nonnegative")
|
||||
.update_yaxes(showgrid=False)
|
||||
.update_layout(title_x=0.5)
|
||||
)
|
||||
# L2 Cache per channel
|
||||
elif table_config["id"] in barchart_elements["l2_cache_per_chan"]:
|
||||
nested_bar = {}
|
||||
channels = []
|
||||
for colName, colData in display_df.items():
|
||||
if colName == "Channel":
|
||||
channels = list(colData.values)
|
||||
else:
|
||||
display_df[colName] = [
|
||||
x.astype(float) if x != "" and x != None else float(0)
|
||||
for x in display_df[colName]
|
||||
]
|
||||
nested_bar[colName] = list(display_df[colName])
|
||||
for group, metric in nested_bar.items():
|
||||
d_figs.append(
|
||||
px.bar(
|
||||
title=group[0 : group.rfind("(")],
|
||||
x=channels,
|
||||
y=metric,
|
||||
labels={
|
||||
"x": "Channel",
|
||||
"y": group[group.rfind("(") + 1 : len(group) - 1].replace(
|
||||
"per", norm_filt
|
||||
),
|
||||
},
|
||||
).update_yaxes(rangemode="nonnegative")
|
||||
)
|
||||
|
||||
# Speed-of-light bar chart
|
||||
else:
|
||||
elif table_config["id"] in barchart_elements["sol"]:
|
||||
display_df["Value"] = [
|
||||
x.astype(float) if x != "" else float(0) for x in display_df["Value"]
|
||||
]
|
||||
@@ -194,6 +261,13 @@ def build_bar_chart(display_df, table_config):
|
||||
orientation="h",
|
||||
).update_xaxes(range=[0, 110])
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"ERROR: Table id {}. Cannot determine barchart type.".format(
|
||||
table_config["id"]
|
||||
)
|
||||
)
|
||||
sys.exit(-1)
|
||||
|
||||
# update layout for each of the charts
|
||||
for fig in d_figs:
|
||||
@@ -343,13 +417,13 @@ def build_layout(
|
||||
def generate_from_filter(
|
||||
disp_filt, kernel_filter, gcd_filter, norm_filt, div_children
|
||||
):
|
||||
if verbose <= 1:
|
||||
if verbose >= 1:
|
||||
print("normalization is ", norm_filt)
|
||||
|
||||
base_data = initialize_run(args, norm_filt) # Re-initalize everything
|
||||
panel_configs = copy.deepcopy(archConfigs.panel_configs)
|
||||
# Generate original raw df
|
||||
base_data[base_run].raw_pmc = file_io.create_df_pmc(path_to_dir)
|
||||
base_data[base_run].raw_pmc = file_io.create_df_pmc(path_to_dir, verbose)
|
||||
if verbose >= 1:
|
||||
print("disp-filter is ", disp_filt)
|
||||
print("kernel-filter is ", kernel_filter)
|
||||
@@ -432,12 +506,39 @@ def build_layout(
|
||||
|
||||
# Determine chart type:
|
||||
# a) Barchart
|
||||
if table_config["id"] in barchart_elements:
|
||||
d_figs = build_bar_chart(display_df, table_config)
|
||||
for fig in d_figs:
|
||||
if table_config["id"] in [
|
||||
x for i in barchart_elements.values() for x in i
|
||||
]:
|
||||
d_figs = build_bar_chart(display_df, table_config, norm_filt)
|
||||
# Smaller formatting if barchart yeilds several graphs
|
||||
if (
|
||||
len(d_figs) > 2
|
||||
and not table_config["id"]
|
||||
in barchart_elements["l2_cache_per_chan"]
|
||||
):
|
||||
temp_obj = []
|
||||
for fig in d_figs:
|
||||
temp_obj.append(
|
||||
html.Div(
|
||||
className="float-child",
|
||||
children=[
|
||||
dcc.Graph(
|
||||
figure=fig, style={"margin": "2%"}
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
content.append(
|
||||
dcc.Graph(figure=fig, style={"margin": "2%"})
|
||||
html.Div(
|
||||
className="float-container", children=temp_obj
|
||||
)
|
||||
)
|
||||
# Normal formatting if < 2 graphs
|
||||
else:
|
||||
for fig in d_figs:
|
||||
content.append(
|
||||
dcc.Graph(figure=fig, style={"margin": "2%"})
|
||||
)
|
||||
# B) Tablechart
|
||||
else:
|
||||
d_figs = build_table_chart(
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,26 +10,26 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
from dash import html, dash_table, dcc
|
||||
from dash import html, dcc
|
||||
import dash_bootstrap_components as dbc
|
||||
from matplotlib.style import available
|
||||
|
||||
from omniperf_analyze.utils import schema
|
||||
|
||||
avail_normalizations = ["per_wave", "per_cycle", "per_second", "per_kernel"]
|
||||
|
||||
|
||||
# List all the unique column values for desired column in df, 'target_col'
|
||||
def list_unique(orig_list, is_numeric):
|
||||
list_set = set(orig_list)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,20 +10,21 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
from omniperf_analyze.utils import roofline_calc
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
from dash import html, dash_table
|
||||
|
||||
@@ -36,91 +39,75 @@ def to_int(a):
|
||||
return int(a)
|
||||
|
||||
|
||||
def generate_plots(roof_info, ai_data, verbose, fig=None):
|
||||
def generate_plots(roof_info, ai_data, mem_level, is_standalone, verbose, fig=None):
|
||||
if fig is None:
|
||||
fig = go.Figure()
|
||||
line_data = roofline_calc.empirical_roof(roof_info)
|
||||
plotMode = "lines+text" if is_standalone else "lines"
|
||||
line_data = roofline_calc.empirical_roof(roof_info, mem_level, verbose)
|
||||
print("Line data:\n", line_data)
|
||||
|
||||
#######################
|
||||
# Plot BW Lines
|
||||
#######################
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["hbm"][0],
|
||||
y=line_data["hbm"][1],
|
||||
name="HBM-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GB/s".format(to_int(line_data["hbm"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["hbm"][2])),
|
||||
],
|
||||
if mem_level == "ALL":
|
||||
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
|
||||
else:
|
||||
cacheHierarchy = mem_level
|
||||
|
||||
for cacheLevel in cacheHierarchy:
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data[cacheLevel.lower()][0],
|
||||
y=line_data[cacheLevel.lower()][1],
|
||||
name="{}-{}".format(cacheLevel, roof_info["dtype"]),
|
||||
mode=plotMode,
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GB/s".format(to_int(line_data[cacheLevel.lower()][2])),
|
||||
None
|
||||
if is_standalone
|
||||
else "{} GB/s".format(to_int(line_data[cacheLevel.lower()][2])),
|
||||
],
|
||||
textposition="top right",
|
||||
)
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["l2"][0],
|
||||
y=line_data["l2"][1],
|
||||
name="L2-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GB/s".format(to_int(line_data["l2"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["l2"][2])),
|
||||
],
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["l1"][0],
|
||||
y=line_data["l1"][1],
|
||||
name="L1-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GB/s".format(to_int(line_data["l1"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["l1"][2])),
|
||||
],
|
||||
)
|
||||
)
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["lds"][0],
|
||||
y=line_data["lds"][1],
|
||||
name="LDS-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GB/s".format(to_int(line_data["lds"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["lds"][2])),
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
if roof_info["dtype"] != "FP16" and roof_info["dtype"] != "I8":
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["valu"][0],
|
||||
y=line_data["valu"][1],
|
||||
name="Peak VALU-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
mode=plotMode,
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GFLOP/s".format(to_int(line_data["valu"][2])),
|
||||
None
|
||||
if is_standalone
|
||||
else "{} GFLOP/s".format(to_int(line_data["valu"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["valu"][2])),
|
||||
],
|
||||
textposition="top left",
|
||||
)
|
||||
)
|
||||
|
||||
if roof_info["dtype"] == "FP16":
|
||||
pos = "bottom left"
|
||||
else:
|
||||
pos = "top left"
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=line_data["mfma"][0],
|
||||
y=line_data["mfma"][1],
|
||||
name="Peak MFMA-{}".format(roof_info["dtype"]),
|
||||
mode="lines",
|
||||
mode=plotMode,
|
||||
hovertemplate="<b>%{text}</b>",
|
||||
text=[
|
||||
"{} GFLOP/s".format(to_int(line_data["mfma"][2])),
|
||||
None
|
||||
if is_standalone
|
||||
else "{} GFLOP/s".format(to_int(line_data["mfma"][2])),
|
||||
"{} GFLOP/s".format(to_int(line_data["mfma"][2])),
|
||||
],
|
||||
textposition=pos,
|
||||
)
|
||||
)
|
||||
#######################
|
||||
@@ -164,56 +151,86 @@ def generate_plots(roof_info, ai_data, verbose, fig=None):
|
||||
return fig
|
||||
|
||||
|
||||
def get_roofline(path_to_dir, ret_df, verbose):
|
||||
def get_roofline(
|
||||
path_to_dir,
|
||||
ret_df,
|
||||
verbose,
|
||||
dev_id=None,
|
||||
sort_type="kernels",
|
||||
mem_level="ALL",
|
||||
is_standalone=False,
|
||||
):
|
||||
# Roofline settings
|
||||
fp32_details = {
|
||||
"path": path_to_dir,
|
||||
"sort": "kernels",
|
||||
"sort": sort_type,
|
||||
"device": 0,
|
||||
"dtype": "FP32",
|
||||
}
|
||||
fp16_details = {
|
||||
"path": path_to_dir,
|
||||
"sort": "kernels",
|
||||
"sort": sort_type,
|
||||
"device": 0,
|
||||
"dtype": "FP16",
|
||||
}
|
||||
int8_details = {"path": path_to_dir, "sort": "kernels", "device": 0, "dtype": "I8"}
|
||||
int8_details = {"path": path_to_dir, "sort": sort_type, "device": 0, "dtype": "I8"}
|
||||
|
||||
# Generate roofline plots
|
||||
print("Path: ", path_to_dir)
|
||||
ai_data = roofline_calc.plot_application("kernels", ret_df, verbose)
|
||||
ai_data = roofline_calc.plot_application(sort_type, ret_df, verbose)
|
||||
if verbose >= 1:
|
||||
# print AI data for each mem level
|
||||
print("AI at each mem level")
|
||||
for i in ai_data:
|
||||
print(i, "->", ai_data[i])
|
||||
print("\n")
|
||||
|
||||
fp32_fig = generate_plots(fp32_details, ai_data, verbose)
|
||||
fp16_fig = generate_plots(fp16_details, ai_data, verbose)
|
||||
ml_combo_fig = generate_plots(int8_details, ai_data, verbose, fp16_fig)
|
||||
|
||||
return html.Section(
|
||||
id="roofline",
|
||||
children=[
|
||||
html.Div(
|
||||
className="float-container",
|
||||
children=[
|
||||
html.Div(
|
||||
className="float-child",
|
||||
children=[
|
||||
html.H3(children="Empirical Roofline Analysis (FP32/FP64)"),
|
||||
dcc.Graph(figure=fp32_fig),
|
||||
],
|
||||
),
|
||||
html.Div(
|
||||
className="float-child",
|
||||
children=[
|
||||
html.H3(children="Empirical Roofline Analysis (FP16/INT8)"),
|
||||
dcc.Graph(figure=ml_combo_fig),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
],
|
||||
fp32_fig = generate_plots(fp32_details, ai_data, mem_level, is_standalone, verbose)
|
||||
fp16_fig = generate_plots(fp16_details, ai_data, mem_level, is_standalone, verbose)
|
||||
ml_combo_fig = generate_plots(
|
||||
int8_details, ai_data, mem_level, is_standalone, verbose, fp16_fig
|
||||
)
|
||||
|
||||
if is_standalone:
|
||||
dev_id = "ALL" if dev_id == -1 else str(dev_id)
|
||||
|
||||
fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id))
|
||||
ml_combo_fig.write_image(
|
||||
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
|
||||
)
|
||||
time.sleep(1)
|
||||
# Re-save to remove loading MathJax pop up
|
||||
fp32_fig.write_image(path_to_dir + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id))
|
||||
ml_combo_fig.write_image(
|
||||
path_to_dir + "/empirRoof_gpu-{}_fp8_fp16.pdf".format(dev_id)
|
||||
)
|
||||
print("Empirical Roofline PDFs saved!")
|
||||
else:
|
||||
return html.Section(
|
||||
id="roofline",
|
||||
children=[
|
||||
html.Div(
|
||||
className="float-container",
|
||||
children=[
|
||||
html.Div(
|
||||
className="float-child",
|
||||
children=[
|
||||
html.H3(
|
||||
children="Empirical Roofline Analysis (FP32/FP64)"
|
||||
),
|
||||
dcc.Graph(figure=fp32_fig),
|
||||
],
|
||||
),
|
||||
html.Div(
|
||||
className="float-child",
|
||||
children=[
|
||||
html.H3(
|
||||
children="Empirical Roofline Analysis (FP16/INT8)"
|
||||
),
|
||||
dcc.Graph(figure=ml_combo_fig),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import ast
|
||||
import sys
|
||||
@@ -346,7 +348,6 @@ def build_dfs(archConfigs, filter_metrics):
|
||||
|
||||
i = 0
|
||||
for key, entries in data_cofig["metric"].items():
|
||||
|
||||
data_source_idx = (
|
||||
str(data_cofig["id"] // 100)
|
||||
+ "."
|
||||
@@ -365,7 +366,6 @@ def build_dfs(archConfigs, filter_metrics):
|
||||
# the whole IP block in filter
|
||||
(str(panel_id // 100) in filter_metrics)
|
||||
):
|
||||
|
||||
values.append(metric_idx)
|
||||
values.append(key)
|
||||
for k, v in entries.items():
|
||||
@@ -401,7 +401,6 @@ def build_dfs(archConfigs, filter_metrics):
|
||||
or (data_source_idx == "0") # no filter
|
||||
or (data_source_idx in filter_metrics)
|
||||
):
|
||||
|
||||
if (
|
||||
"columnwise" in data_cofig
|
||||
and data_cofig["columnwise"] == True
|
||||
@@ -651,7 +650,7 @@ def apply_filters(workload, is_gui, debug):
|
||||
# NB: support ignoring the 1st n dispatched execution by '> n'
|
||||
# The better way may be parsing python slice string
|
||||
for d in workload.filter_dispatch_ids:
|
||||
if int(d) > len(ret_df) - 2: # subtract 2 bc of the two header rows
|
||||
if int(d) >= len(ret_df): # subtract 2 bc of the two header rows
|
||||
print("{} is an invalid dispatch id.".format(d))
|
||||
sys.exit(1)
|
||||
if ">" in workload.filter_dispatch_ids[0]:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,31 +10,19 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
from linecache import cache
|
||||
import subprocess
|
||||
from operator import sub
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.pyplot import get, text
|
||||
from math import log, pi, sqrt
|
||||
import pandas as pd
|
||||
import pylab
|
||||
|
||||
from dataclasses import dataclass
|
||||
import csv
|
||||
@@ -54,6 +44,7 @@ FONT_WEIGHT = "bold"
|
||||
|
||||
SUPPORTED_SOC = ["mi200"]
|
||||
|
||||
|
||||
################################################
|
||||
# Helper funcs
|
||||
################################################
|
||||
@@ -68,6 +59,7 @@ class AI_Data:
|
||||
mfma_flops_bf16: float
|
||||
mfma_flops_f32: float
|
||||
mfma_flops_f64: float
|
||||
mfma_iops_i8: float
|
||||
lds_data: float
|
||||
L1cache_data: float
|
||||
L2cache_data: float
|
||||
@@ -100,11 +92,14 @@ def get_color(catagory):
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Plot BW at each cache level
|
||||
# -------------------------------------------------------------------------------------
|
||||
def plot_roof(roof_details, roof_data):
|
||||
|
||||
def plot_roof(roof_details, roof_data, mem_level, verbose):
|
||||
# TODO: This is where filtering by memory level will need to occur for standalone
|
||||
graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []}
|
||||
|
||||
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
|
||||
if mem_level == "ALL":
|
||||
cacheHierarchy = ["HBM", "L2", "L1", "LDS"]
|
||||
else:
|
||||
cacheHierarchy = mem_level
|
||||
|
||||
x1 = y1 = x2 = y2 = -1
|
||||
x1_mfma = y1_mfma = x2_mfma = y2_mfma = -1
|
||||
@@ -116,7 +111,8 @@ def plot_roof(roof_details, roof_data):
|
||||
)
|
||||
for i in range(0, len(cacheHierarchy)):
|
||||
# Plot BW line
|
||||
# print("Current cache level is ", cacheHierarchy[i])
|
||||
if verbose >= 3:
|
||||
print("Current cache level is ", cacheHierarchy[i])
|
||||
curr_bw = cacheHierarchy[i] + "Bw"
|
||||
peakBw = float(roof_data[curr_bw][roof_details["device"]])
|
||||
|
||||
@@ -142,8 +138,9 @@ def plot_roof(roof_details, roof_data):
|
||||
y2_mfma = peakMFMA
|
||||
|
||||
# These are the points to use:
|
||||
# print("x = [{}, {}]".format(x1,x2_mfma))
|
||||
# print("y = [{}, {}]".format(y1, y2_mfma))
|
||||
if verbose >= 3:
|
||||
print("x = [{}, {}]".format(x1, x2_mfma))
|
||||
print("y = [{}, {}]".format(y1, y2_mfma))
|
||||
|
||||
graphPoints[cacheHierarchy[i].lower()].append([x1, x2_mfma])
|
||||
graphPoints[cacheHierarchy[i].lower()].append([y1, y2_mfma])
|
||||
@@ -159,7 +156,8 @@ def plot_roof(roof_details, roof_data):
|
||||
if x2 < x0:
|
||||
x0 = x2
|
||||
|
||||
# print("FMA ROOF [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
|
||||
if verbose >= 3:
|
||||
print("FMA ROOF [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
|
||||
graphPoints["valu"].append([x0, XMAX])
|
||||
graphPoints["valu"].append([peakOps, peakOps])
|
||||
graphPoints["valu"].append(peakOps)
|
||||
@@ -172,7 +170,8 @@ def plot_roof(roof_details, roof_data):
|
||||
if x2_mfma < x0_mfma:
|
||||
x0_mfma = x2_mfma
|
||||
|
||||
# print("MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
|
||||
if verbose >= 3:
|
||||
print("MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
|
||||
graphPoints["mfma"].append([x0_mfma, XMAX])
|
||||
graphPoints["mfma"].append([peakMFMA, peakMFMA])
|
||||
graphPoints["mfma"].append(peakMFMA)
|
||||
@@ -185,7 +184,6 @@ def plot_roof(roof_details, roof_data):
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Calculate relevent metrics for ai calculation
|
||||
def plot_application(sortType, ret_df, verbose):
|
||||
|
||||
df = ret_df["pmc_perf"]
|
||||
# Sort by top kernels or top dispatches?
|
||||
df = df.sort_values(by=["KernelName"])
|
||||
@@ -231,6 +229,7 @@ def plot_application(sortType, ret_df, verbose):
|
||||
mfma_flops_bf16 / calls,
|
||||
mfma_flops_f32 / calls,
|
||||
mfma_flops_f64 / calls,
|
||||
mfma_iops_i8 / calls,
|
||||
lds_data / calls,
|
||||
L1cache_data / calls,
|
||||
L2cache_data / calls,
|
||||
@@ -474,11 +473,7 @@ def plot_application(sortType, ret_df, verbose):
|
||||
return intensityPoints
|
||||
|
||||
|
||||
def empirical_roof(roof_info):
|
||||
|
||||
if roof_info["sort"] != "kernels" and roof_info["sort"] != "dispatches":
|
||||
sys.exit("Invalid sort. Must be either 'kernels' or 'dispatches'")
|
||||
|
||||
def empirical_roof(roof_info, mem_level, verbose):
|
||||
roofPath = roof_info["path"] + "/roofline.csv"
|
||||
# -----------------------------------------------------
|
||||
# Initialize roofline data dictionary from roofline.csv
|
||||
@@ -517,7 +512,7 @@ def empirical_roof(roof_info):
|
||||
# ------------------
|
||||
# Generate Roofline
|
||||
# ------------------
|
||||
results = plot_roof(roof_info, roof_data)
|
||||
results = plot_roof(roof_info, roof_data, mem_level, verbose)
|
||||
# for key in results:
|
||||
# print(key, "->", results[key])
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
#
|
||||
# Define all common data storage classes,
|
||||
@@ -33,7 +35,6 @@ from collections import OrderedDict
|
||||
|
||||
@dataclass
|
||||
class ArchConfig:
|
||||
|
||||
# [id: panel_config] pairs
|
||||
panel_configs: OrderedDict = field(default=dict)
|
||||
|
||||
@@ -84,24 +85,24 @@ supported_field = [
|
||||
"Alias",
|
||||
# Special keywords for L2 channel
|
||||
"Channel",
|
||||
"Hit Rate",
|
||||
"Req",
|
||||
"Read Req",
|
||||
"Write Req",
|
||||
"AtomicReq",
|
||||
"EA Read Req",
|
||||
"EA Write Req",
|
||||
"EA AtomicReq",
|
||||
"EA Read Lat - cycles",
|
||||
"EA Write Lat - cycles",
|
||||
"EA Atomic Lat - cycles",
|
||||
"EA Read Stall - IO",
|
||||
"EA Read Stall - GMI",
|
||||
"EA Read Stall - DRAM",
|
||||
"EA Write Stall - IO",
|
||||
"EA Write Stall - GMI",
|
||||
"EA Write Stall - DRAM",
|
||||
"EA Write Stall - Starve",
|
||||
"L2 Cache Hit Rate (%)",
|
||||
"Requests (Requests)",
|
||||
"L1-L2 Read (Requests)",
|
||||
"L1-L2 Write (Requests)",
|
||||
"L1-L2 Atomic (Requests)",
|
||||
"L2-EA Read (Requests)",
|
||||
"L2-EA Write (Requests)",
|
||||
"L2-EA Atomic (Requests)",
|
||||
"L2-EA Read Latency (Cycles)",
|
||||
"L2-EA Write Latency (Cycles)",
|
||||
"L2-EA Atomic Latency (Cycles)",
|
||||
"L2-EA Read Stall - IO (Cycles per)",
|
||||
"L2-EA Read Stall - GMI (Cycles per)",
|
||||
"L2-EA Read Stall - DRAM (Cycles per)",
|
||||
"L2-EA Write Stall - IO (Cycles per)",
|
||||
"L2-EA Write Stall - GMI (Cycles per)",
|
||||
"L2-EA Write Stall - DRAM (Cycles per)",
|
||||
"L2-EA Write Stall - Starve (Cycles per)",
|
||||
]
|
||||
|
||||
# The prefix of raw pmc_perf.csv
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import pandas as pd
|
||||
from tabulate import tabulate
|
||||
@@ -26,6 +28,7 @@ from tabulate import tabulate
|
||||
from omniperf_analyze.utils import schema, parser
|
||||
|
||||
hidden_columns = ["Tips", "coll_level"]
|
||||
hidden_sections = [1900, 2000]
|
||||
|
||||
|
||||
def string_multiple_lines(source, width, max_rows):
|
||||
@@ -44,19 +47,20 @@ def string_multiple_lines(source, width, max_rows):
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
|
||||
def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols, verbose):
|
||||
"""
|
||||
Show all panels with their data in plain text mode.
|
||||
"""
|
||||
comparable_columns = parser.build_comparable_columns(time_unit)
|
||||
|
||||
for panel_id, panel in archConfigs.panel_configs.items():
|
||||
|
||||
# Skip panels that don't support baseline comparison
|
||||
if panel_id in hidden_sections:
|
||||
continue
|
||||
ss = "" # store content of all data_source from one pannel
|
||||
|
||||
for data_source in panel["data source"]:
|
||||
for type, table_config in data_source.items():
|
||||
|
||||
# take the 1st run as baseline
|
||||
base_run, base_data = next(iter(runs.items()))
|
||||
base_df = base_data.dfs[table_config["id"]]
|
||||
@@ -72,11 +76,9 @@ def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
|
||||
)
|
||||
or (type == "raw_csv_table")
|
||||
):
|
||||
|
||||
if header in hidden_columns:
|
||||
pass
|
||||
elif header not in comparable_columns:
|
||||
|
||||
if (
|
||||
type == "raw_csv_table"
|
||||
and table_config["source"] == "pmc_kernel_top.csv"
|
||||
@@ -102,18 +104,27 @@ def show_all(runs, archConfigs, output, decimal, time_unit, selected_cols):
|
||||
):
|
||||
if run != base_run:
|
||||
# calc percentage over the baseline
|
||||
base_df[header] = [
|
||||
float(x) if x != "" else float(0)
|
||||
for x in base_df[header]
|
||||
]
|
||||
cur_df[header] = [
|
||||
float(x) if x != "" else float(0)
|
||||
for x in cur_df[header]
|
||||
]
|
||||
t_df = (
|
||||
pd.concat(
|
||||
[
|
||||
base_df[header].astype("double"),
|
||||
cur_df[header].astype("double"),
|
||||
base_df[header],
|
||||
cur_df[header],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
.pct_change(axis="columns")
|
||||
.iloc[:, 1]
|
||||
)
|
||||
# print("---------", header, t_df)
|
||||
if verbose >= 2:
|
||||
print("---------", header, t_df)
|
||||
|
||||
# show value + percentage
|
||||
# TODO: better alignment
|
||||
|
||||
+69
-27
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,20 +10,22 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
from common import (
|
||||
OMNIPERF_HOME,
|
||||
PROG,
|
||||
@@ -31,7 +35,6 @@ from common import getVersion, getVersionDisplay
|
||||
|
||||
|
||||
def parse(my_parser):
|
||||
|
||||
# versioning info
|
||||
vData = getVersion()
|
||||
versionString = getVersionDisplay(vData["version"], vData["sha"], vData["mode"])
|
||||
@@ -116,6 +119,49 @@ def parse(my_parser):
|
||||
default=None,
|
||||
help="\t\t\tKernel filtering.",
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
["which", "rocscope"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
|
||||
)
|
||||
if result.returncode == 0:
|
||||
profile_group.add_argument(
|
||||
"-l",
|
||||
"--i-feel-lucky",
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
dest="lucky",
|
||||
help="\t\t\tProfile only the most time consuming kernels.",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"-r",
|
||||
"--use-rocscope",
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
dest="use_rocscope",
|
||||
help="\t\t\tUse rocscope for profiling",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"-s",
|
||||
"--kernel-summaries",
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
dest="summaries",
|
||||
help="\t\t\tCreate kernel summaries.",
|
||||
)
|
||||
else:
|
||||
profile_group.add_argument(
|
||||
"--i-feel-lucky", default=False, dest="lucky", help=argparse.SUPPRESS
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--use-rocscope", default=False, dest="use_rocscope", help=argparse.SUPPRESS
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--kernel-summaries", default=False, dest="summaries", help=argparse.SUPPRESS
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
"-b",
|
||||
"--ipblocks",
|
||||
@@ -166,6 +212,7 @@ def parse(my_parser):
|
||||
metavar="",
|
||||
type=str,
|
||||
default="kernels",
|
||||
choices=["kernels", "dispatches"],
|
||||
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
@@ -174,19 +221,11 @@ def parse(my_parser):
|
||||
required=False,
|
||||
choices=["HBM", "L2", "vL1D", "LDS"],
|
||||
metavar="",
|
||||
nargs="+",
|
||||
type=str,
|
||||
default="ALL",
|
||||
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"--axes",
|
||||
default=None,
|
||||
type=float,
|
||||
required=False,
|
||||
nargs="+",
|
||||
metavar="",
|
||||
help="\t\t\tDesired axis values for graph. As follows:\n\t\t\t xmin xmax ymin ymax",
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"--device",
|
||||
metavar="",
|
||||
@@ -348,7 +387,9 @@ def parse(my_parser):
|
||||
help="\t\tSpecify the output file.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"--list-kernels", action="store_true", help="\t\tList kernels."
|
||||
"--list-kernels",
|
||||
action="store_true",
|
||||
help="\t\tList kernels. Top 10 kernels sorted by duration (descending order).",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"--list-metrics",
|
||||
@@ -358,35 +399,36 @@ def parse(my_parser):
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"-b",
|
||||
"--filter-metrics",
|
||||
"--metric",
|
||||
dest="filter_metrics",
|
||||
metavar="",
|
||||
nargs="+",
|
||||
help="\t\tSpecify IP block/metric Ids from --list-metrics.",
|
||||
help="\t\tSpecify IP block/metric id(s) from --list-metrics for filtering.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"-k",
|
||||
"--filter-kernels",
|
||||
"--kernel",
|
||||
metavar="",
|
||||
type=int,
|
||||
dest="gpu_kernel",
|
||||
nargs="+",
|
||||
action="append",
|
||||
help="\t\tSpecify kernel id from --list-kernels.",
|
||||
help="\t\tSpecify kernel id(s) from --list-kernels for filtering.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"--filter-dispatch-ids",
|
||||
"--dispatch",
|
||||
dest="gpu_dispatch_id",
|
||||
metavar="",
|
||||
nargs="+",
|
||||
action="append",
|
||||
help="\t\tSpecify dispatch IDs.",
|
||||
help="\t\tSpecify dispatch id(s) for filtering.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"--filter-gpu-ids",
|
||||
"--gpu-id",
|
||||
dest="gpu_id",
|
||||
metavar="",
|
||||
nargs="+",
|
||||
help="\t\tSpecify GPU IDs.",
|
||||
help="\t\tSpecify GPU id(s) for filtering.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"-n",
|
||||
|
||||
@@ -0,0 +1,737 @@
|
||||
<gfx908>
|
||||
# CPC counters
|
||||
<metric
|
||||
name="CPC_ME1_BUSY_FOR_PACKET_DECODE" block=CPC event=13 descr="Me1 busy for packet decode."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_UTCL1_STALL_ON_TRANSLATION" block=CPC event=24 descr="One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_STAT_BUSY" block=CPC event=25 descr="CPC Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_STAT_IDLE" block=CPC event=26 descr="CPC Idle."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_STAT_STALL" block=CPC event=27 descr="CPC Stalled."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_TCIU_BUSY" block=CPC event=28 descr="CPC TCIU interface Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_TCIU_IDLE" block=CPC event=29 descr="CPC TCIU interface Idle."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_UTCL2IU_BUSY" block=CPC event=30 descr="CPC UTCL2 interface Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_UTCL2IU_IDLE" block=CPC event=31 descr="CPC UTCL2 interface Idle."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_CPC_UTCL2IU_STALL" block=CPC event=32 descr="CPC UTCL2 interface Stalled waiting on Free, Tags or Translation."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPC_ME1_DC0_SPI_BUSY" block=CPC event=33 descr="CPC Me1 Processor Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CMP_UTCL1_STALL_ON_TRANSLATION" block=CPF event=20 descr="One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING response."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_STAT_BUSY" block=CPF event=23 descr="CPF Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_STAT_IDLE" block=CPF event=24 descr="CPF Idle."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_STAT_STALL" block=CPF event=25 descr="CPF Stalled."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_TCIU_BUSY" block=CPF event=26 descr="CPF TCIU interface Busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_TCIU_IDLE" block=CPF event=27 descr="CPF TCIU interface Idle."
|
||||
></metric>
|
||||
<metric
|
||||
name="CPF_CPF_TCIU_STALL" block=CPF event=28 descr="CPF TCIU interface Stalled waiting on Free, Tags."
|
||||
></metric>
|
||||
# GRBM counters
|
||||
<metric
|
||||
name="GRBM_COUNT" block=GRBM event=0 descr="Tie High - Count Number of Clocks"
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_GUI_ACTIVE" block=GRBM event=2 descr="The GUI is Active"
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_CP_BUSY" block=GRBM event=3 descr="Any of the Command Processor (CPG/CPC/CPF) blocks are busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_SPI_BUSY" block=GRBM event=11 descr="Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_TA_BUSY" block=GRBM event=13 descr="Any of the Texture Pipes (TA) are busy in the shader engine(s)."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_TC_BUSY" block=GRBM event=28 descr="Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_CPC_BUSY" block=GRBM event=30 descr="The Command Processor Compute (CPC) is busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_CPF_BUSY" block=GRBM event=31 descr="The Command Processor Fetchers (CPF) is busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_UTCL2_BUSY" block=GRBM event=34 descr="The Unified Translation Cache Level-2 (UTCL2) block is busy."
|
||||
></metric>
|
||||
<metric
|
||||
name="GRBM_EA_BUSY" block=GRBM event=35 descr="The Efficiency Arbiter (EA) block is busy."
|
||||
></metric>
|
||||
# SPI counters
|
||||
<metric
|
||||
name="SPI_CSN_WINDOW_VALID" block=SPI event=47 descr="Clock count enabled by perfcounter_start event. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_CSN_BUSY" block=SPI event=48 descr="Number of clocks with outstanding waves (SPI or SH). Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_CSN_NUM_THREADGROUPS" block=SPI event=49 descr="Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_CSN_WAVE" block=SPI event=52 descr="Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_REQ_NO_ALLOC" block=SPI event=79 descr="Arb cycles with requests but no allocation. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_REQ_NO_ALLOC_CSN" block=SPI event=85 descr="Arb cycles with CSn req and no CSn alloc. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_RES_STALL_CSN" block=SPI event=91 descr="Arb cycles with CSn req and no CSn fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_TMP_STALL_CSN" block=SPI event=97 descr="Cycles where csn wants to req but does not fit in temp space."
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_WAVE_SIMD_FULL_CSN" block=SPI event=103 descr="Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_VGPR_SIMD_FULL_CSN" block=SPI event=109 descr="Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_SGPR_SIMD_FULL_CSN" block=SPI event=115 descr="Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_LDS_CU_FULL_CSN" block=SPI event=120 descr="Sum of CU where LDS can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_BAR_CU_FULL_CSN" block=SPI event=123 descr="Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_BULKY_CU_FULL_CSN" block=SPI event=125 descr="Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_TGLIM_CU_FULL_CSN" block=SPI event=127 descr="Cycles where csn wants to req but all CU are at tg_limit"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_RA_WVLIM_STALL_CSN" block=SPI event=133 descr="Number of clocks csn is stalled due to WAVE LIMIT."
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_SWC_CSC_WR" block=SPI event=189 descr="Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
<metric
|
||||
name="SPI_VWC_CSC_WR" block=SPI event=195 descr="Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"
|
||||
></metric>
|
||||
# SQ counters
|
||||
<metric
|
||||
name="SQ_ACCUM_PREV" block=SQ event=1 descr="For counter N, increment by the value of counter N-1. Only accumulates once every 4 cycles."
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_CYCLES" block=SQ event=2 descr="Clock cycles. (nondeterministic, per-simd, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_BUSY_CYCLES" block=SQ event=3 descr="Clock cycles while SQ is reporting that it is busy. (nondeterministic, per-simd, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES" block=SQ event=4 descr="Count number of waves sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LEVEL_WAVES" block=SQ event=5 descr="Track the number of waves. Set ACCUM_PREV for the next counter to use this. (level, per-simd, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_EQ_64" block=SQ event=6 descr="Count number of waves with exactly 64 active threads sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_LT_64" block=SQ event=7 descr="Count number of waves with <64 active threads sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_LT_48" block=SQ event=8 descr="Count number of waves with <48 active threads sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_LT_32" block=SQ event=9 descr="Count number of waves sent <32 active threads sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_LT_16" block=SQ event=10 descr="Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_BUSY_CU_CYCLES" block=SQ event=13 descr="Count quad-cycles each CU is busy. (nondeterministic, per-simd)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ITEMS" block=SQ event=14 descr="Number of valid items per wave. (per-simd, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS" block=SQ event=25 descr="Number of instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_VALU" block=SQ event=26 descr="Number of VALU instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_MFMA" block=SQ event=27 descr="Number of MFMA instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_VMEM_WR" block=SQ event=28 descr="Number of VMEM write instructions issued (including FLAT). (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_VMEM_RD" block=SQ event=29 descr="Number of VMEM read instructions issued (including FLAT). (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_VMEM" block=SQ event=30 descr="Number of VMEM instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_SALU" block=SQ event=31 descr="Number of SALU instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_SMEM" block=SQ event=32 descr="Number of SMEM instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_FLAT" block=SQ event=33 descr="Number of FLAT instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_FLAT_LDS_ONLY" block=SQ event=34 descr="Number of FLAT instructions issued that read/wrote only from/to LDS (only works if EARLY_TA_DONE is enabled). (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_LDS" block=SQ event=35 descr="Number of LDS instructions issued (including FLAT). (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_GDS" block=SQ event=36 descr="Number of GDS instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_EXP_GDS" block=SQ event=38 descr="Number of EXP and GDS instructions issued, excluding skipped export instructions. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_BRANCH" block=SQ event=39 descr="Number of Branch instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_SENDMSG" block=SQ event=40 descr="Number of Sendmsg instructions issued. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_VSKIPPED" block=SQ event=41 descr="Number of vector instructions skipped. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_LEVEL_VMEM" block=SQ event=42 descr="Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by INSTS_VMEM for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_LEVEL_SMEM" block=SQ event=43 descr="Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average latency per smem request. Falls slightly short of total request latency because some fetches are divided into two requests that may finish at different times and this counter collects the average latency of the two. (per-simd, level, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_LEVEL_LDS" block=SQ event=44 descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. (per-simd, level, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVE_CYCLES" block=SQ event=47 descr="Number of wave-cycles spent by waves in the CUs (per-simd, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAIT_ANY" block=SQ event=58 descr="Number of wave-cycles spent waiting for anything (per-simd, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAIT_INST_ANY" block=SQ event=61 descr="Number of wave-cycles spent waiting for any instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAIT_INST_LDS" block=SQ event=64 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_ANY" block=SQ event=69 descr="Number of cycles each wave is working on an instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_VMEM" block=SQ event=70 descr="Number of cycles the SQ instruction arbiter is working on a VMEM instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_LDS" block=SQ event=71 descr="Number of cycles the SQ instruction arbiter is working on a LDS instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_VALU" block=SQ event=72 descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_SCA" block=SQ event=73 descr="Number of cycles the SQ instruction arbiter is working on a SALU or SMEM instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_EXP_GDS" block=SQ event=74 descr="Number of cycles the SQ instruction arbiter is working on an EXPORT or GDS instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_MISC" block=SQ event=75 descr="Number of cycles the SQ instruction aribter is working on a BRANCH or SENDMSG instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACTIVE_INST_FLAT" block=SQ event=76 descr="Number of cycles the SQ instruction arbiter is working on a FLAT instruction. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_CYCLES_VMEM_WR" block=SQ event=77 descr="Number of cycles needed to send addr and cmd data for VMEM write instructions. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_CYCLES_VMEM_RD" block=SQ event=78 descr="Number of cycles needed to send addr and cmd data for VMEM read instructions. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_CYCLES_SMEM" block=SQ event=84 descr="Number of cycles needed to execute scalar memory reads. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INST_CYCLES_SALU" block=SQ event=85 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_THREAD_CYCLES_VALU" block=SQ event=86 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_IFETCH" block=SQ event=88 descr="Number of instruction fetch requests from cache. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_IFETCH_LEVEL" block=SQ event=89 descr="Number of instruction fetch requests from cache. (per-simd, level)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_BANK_CONFLICT" block=SQ event=94 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_ADDR_CONFLICT" block=SQ event=95 descr="Number of cycles LDS is stalled by address conflicts. (emulated,nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_UNALIGNED_STALL" block=SQ event=96 descr="Number of cycles LDS is stalled processing flat unaligned load/store ops. (emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_MEM_VIOLATIONS" block=SQ event=97 descr="Number of threads that have a memory violation in the LDS.(emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_ATOMIC_RETURN" block=SQ event=98 descr="Number of atomic return cycles in LDS. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_LDS_IDX_ACTIVE" block=SQ event=99 descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_ACCUM_PREV_HIRES" block=SQ event=158 descr="For counter N, increment by the value of counter N-1."
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_RESTORED" block=SQ event=159 descr="Count number of context-restored waves sent to SQs. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_WAVES_SAVED" block=SQ event=160 descr="Count number of context-saved waves. (per-simd, emulated, global)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQ_INSTS_SMEM_NORM" block=SQ event=161 descr="Number of SMEM instructions issued normalized to match smem_level (*2 load/store; *2 atomic; *2 memtime; *4 wb/inv). (per-simd, emulated)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_INPUT_VALID_READYB" block=SQ event=260 descr="Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_REQ" block=SQ event=262 descr="Total number of TC requests that were issued by instruction and constant caches. (No-Masking, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_INST_REQ" block=SQ event=263 descr="Number of insruction requests to the TC (No-Masking, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_DATA_READ_REQ" block=SQ event=264 descr="Number of data read requests to the TC (No-Masking, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_DATA_WRITE_REQ" block=SQ event=265 descr="Number of data write requests to the TC (No-Masking, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_DATA_ATOMIC_REQ" block=SQ event=266 descr="Number of data atomic requests to the TC (No-Masking, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_TC_STALL" block=SQ event=267 descr="Valid request stalled TC request interface (no-credits). (No-Masking, nondeterministic, unwindowed)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_ICACHE_REQ" block=SQ event=270 descr="Number of requests. (per-SQ, per-Bank)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_ICACHE_HITS" block=SQ event=271 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_ICACHE_MISSES" block=SQ event=272 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_ICACHE_MISSES_DUPLICATE" block=SQ event=273 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ" block=SQ event=290 descr="Number of requests (post-bank-serialization). (per-SQ, per-Bank)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_HITS" block=SQ event=291 descr="Number of cache hits. (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_MISSES" block=SQ event=292 descr="Number of cache misses, includes uncached requests. (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_MISSES_DUPLICATE" block=SQ event=293 descr="Number of misses that were duplicates (access to a non-resident, miss pending CL). (per-SQ, per-Bank, nondeterministic)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_ATOMIC" block=SQ event=298 descr="Number of atomic requests. (per-SQ, per-Bank)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ_READ_1" block=SQ event=323 descr="Number of constant cache 1 dw read requests. (per-SQ)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ_READ_2" block=SQ event=324 descr="Number of constant cache 2 dw read requests. (per-SQ)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ_READ_4" block=SQ event=325 descr="Number of constant cache 4 dw read requests. (per-SQ)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ_READ_8" block=SQ event=326 descr="Number of constant cache 8 dw read requests. (per-SQ)"
|
||||
></metric>
|
||||
<metric
|
||||
name="SQC_DCACHE_REQ_READ_16" block=SQ event=327 descr="Number of constant cache 16 dw read requests. (per-SQ)"
|
||||
></metric>
|
||||
# TA counters
|
||||
<metric
|
||||
name="TA_TA_BUSY" block=TA event=15 descr="TA block is busy. Perf_Windowing not supported for this counter."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_TOTAL_WAVEFRONTS" block=TA event=32 descr="Total number of wavefronts processed by TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_WAVEFRONTS" block=TA event=44 descr="Number of buffer wavefronts processed by TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_READ_WAVEFRONTS" block=TA event=45 descr="Number of buffer read wavefronts processed by TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_WRITE_WAVEFRONTS" block=TA event=46 descr="Number of buffer write wavefronts processed by TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_ATOMIC_WAVEFRONTS" block=TA event=47 descr="Number of buffer atomic wavefronts processed by TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_TOTAL_CYCLES" block=TA event=49 descr="Number of buffer cycles issued to TC."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_COALESCED_READ_CYCLES" block=TA event=52 descr="Number of buffer coalesced read cycles issued to TC."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_BUFFER_COALESCED_WRITE_CYCLES" block=TA event=53 descr="Number of buffer coalesced write cycles issued to TC."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_ADDR_STALLED_BY_TC_CYCLES" block=TA event=54 descr="Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_ADDR_STALLED_BY_TD_CYCLES" block=TA event=55 descr="Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_DATA_STALLED_BY_TC_CYCLES" block=TA event=56 descr="Number of cycles data path stalled by TC. Perf_Windowing not supported for this counter."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_FLAT_WAVEFRONTS" block=TA event=100 descr="Number of flat opcode wavfronts processed by the TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_FLAT_READ_WAVEFRONTS" block=TA event=101 descr="Number of flat opcode reads processed by the TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_FLAT_WRITE_WAVEFRONTS" block=TA event=102 descr="Number of flat opcode writes processed by the TA."
|
||||
></metric>
|
||||
<metric
|
||||
name="TA_FLAT_ATOMIC_WAVEFRONTS" block=TA event=103 descr="Number of flat opcode atomics processed by the TA."
|
||||
></metric>
|
||||
# TCA counters
|
||||
<metric
|
||||
name="TCA_CYCLE" block=TCA event=1 descr="Number of cycles. Not windowable."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCA_BUSY" block=TCA event=2 descr="Number of cycles we have a request pending. Not windowable."
|
||||
></metric>
|
||||
# TCC counters
|
||||
<metric
|
||||
name="TCC_CYCLE" block=TCC event=1 descr="Number of cycles. Not windowable."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_BUSY" block=TCC event=2 descr="Number of cycles we have a request pending. Not windowable."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_REQ" block=TCC event=3 descr="Number of requests of all types. This is measured at the tag block. This may be more than the number of requests arriving at the TCC, but it is a good indication of the total amount of work that needs to be performed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_STREAMING_REQ" block=TCC event=4 descr="Number of streaming requests. This is measured at the tag block."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_NC_REQ" block=TCC event=5 descr="The number of noncoherently cached requests. This is measured at the tag block."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_UC_REQ" block=TCC event=6 descr="The number of uncached requests. This is measured at the tag block."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CC_REQ" block=TCC event=7 descr="The number of coherently cached requests. This is measured at the tag block."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_RW_REQ" block=TCC event=8 descr="The number of RW requests. This is measured at the tag block."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_PROBE" block=TCC event=9 descr="Number of probe requests. Not windowable."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_PROBE_ALL" block=TCC event=10 descr="Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_READ" block=TCC event=12 descr="Number of read requests. Compressed reads are included in this, but metadata reads are not included."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_WRITE" block=TCC event=13 descr="Number of write requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_ATOMIC" block=TCC event=14 descr="Number of atomic requests of all types."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_HIT" block=TCC event=17 descr="Number of cache hits."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_MISS" block=TCC event=19 descr="Number of cache misses. UC reads count as misses."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_WRITEBACK" block=TCC event=22 descr="Number of lines written back to main memory. This includes writebacks of dirty lines and uncached write/atomic requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ" block=TCC event=26 descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_64B" block=TCC event=27 descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WR_UNCACHED_32B" block=TCC event=29 descr="Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached traffic. Note that CC mtypes can produce uncached requests, and those are included in this. A 64-byte request will be counted as 2"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_STALL" block=TCC event=30 descr="Number of cycles a write request was stalled."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_IO_CREDIT_STALL" block=TCC event=31 descr="Number of cycles a EA write request was stalled because the interface was out of IO credits."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_GMI_CREDIT_STALL" block=TCC event=32 descr="Number of cycles a EA write request was stalled because the interface was out of GMI credits."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_DRAM_CREDIT_STALL" block=TCC event=33 descr="Number of cycles a EA write request was stalled because the interface was out of DRAM credits."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_TOO_MANY_EA_WRREQS_STALL" block=TCC event=34 descr="Number of cycles the TCC could not send a EA write request because it already reached its maximum number of pending EA write requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_LEVEL" block=TCC event=35 descr="The sum of the number of EA write requests in flight. This is primarily meant for measure average EA write latency. Average write latency = TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_ATOMIC" block=TCC event=36 descr="Number of transactions going over the TC_EA_wrreq interface that are actually atomic requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_ATOMIC_LEVEL" block=TCC event=37 descr="The sum of the number of EA atomics in flight. This is primarily meant for measure average EA atomic latency. Average atomic latency = TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ" block=TCC event=38 descr="Number of TCC/EA read requests (either 32-byte or 64-byte)"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_32B" block=TCC event=39 descr="Number of 32-byte TCC/EA read requests"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RD_UNCACHED_32B" block=TCC event=40 descr="Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted as 2"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_IO_CREDIT_STALL" block=TCC event=41 descr="Number of cycles there was a stall because the read request interface was out of IO credits. Stalls occur regardless of whether a read needed to be performed or not."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_GMI_CREDIT_STALL" block=TCC event=42 descr="Number of cycles there was a stall because the read request interface was out of GMI credits. Stalls occur regardless of whether a read needed to be performed or not."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_DRAM_CREDIT_STALL" block=TCC event=43 descr="Number of cycles there was a stall because the read request interface was out of DRAM credits. Stalls occur regardless of whether a read needed to be performed or not."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_LEVEL" block=TCC event=44 descr="The sum of the number of TCC/EA read requests in flight. This is primarily meant for measure average EA read latency. Average read latency = TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_TAG_STALL" block=TCC event=45 descr="Number of cycles the normal request pipeline in the tag was stalled for any reason. Normally, stalls of this nature are measured exactly from one point the pipeline, but that is not the case for this counter. Probes can stall the pipeline at a variety of places, and there is no single point that can reasonably measure the total stalls accurately."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_NORMAL_WRITEBACK" block=TCC event=68 descr="Number of writebacks due to requests that are not writeback requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_ALL_TC_OP_WB_WRITEBACK" block=TCC event=73 descr="Number of writebacks due to all TC_OP writeback requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_NORMAL_EVICT" block=TCC event=74 descr="Number of evictions due to requests that are not invalidate or probe requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_ALL_TC_OP_INV_EVICT" block=TCC event=80 descr="Number of evictions due to all TC_OP invalidate requests."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_RDREQ_DRAM" block=TCC event=102 descr="Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_EA_WRREQ_DRAM" block=TCC event=103 descr="Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT184_REQ" block=TCC event=312 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT185_REQ" block=TCC event=313 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT186_REQ" block=TCC event=314 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT187_REQ" block=TCC event=315 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT188_REQ" block=TCC event=316 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT189_REQ" block=TCC event=317 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT190_REQ" block=TCC event=318 descr=""
|
||||
></metric>
|
||||
<metric
|
||||
name="TCC_CLIENT191_REQ" block=TCC event=319 descr=""
|
||||
></metric>
|
||||
# TCP counters
|
||||
<metric
|
||||
name="TCP_GATE_EN1" block=TCP event=0 descr="TCP interface clocks are turned on. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_GATE_EN2" block=TCP event=1 descr="TCP core clocks are turned on. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCP_TA_DATA_STALL_CYCLES" block=TCP event=6 descr="TCP stalls TA data interface. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TD_TCP_STALL_CYCLES" block=TCP event=7 descr="TD stalls TCP"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCR_TCP_STALL_CYCLES" block=TCP event=8 descr="TCR stalls TCP_TCR_req interface"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_READ_TAGCONFLICT_STALL_CYCLES" block=TCP event=11 descr="Tagram conflict stall on a read"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES" block=TCP event=12 descr="Tagram conflict stall on a write"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES" block=TCP event=13 descr="Tagram conflict stall on an atomic"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_PENDING_STALL_CYCLES" block=TCP event=22 descr="Stall due to data pending from L2"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TA_TCP_STATE_READ" block=TCP event=27 descr="Number of state reads"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_VOLATILE" block=TCP event=28 descr="Total number of L1 volatile pixels/buffers from TA"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_ACCESSES" block=TCP event=29 descr="Total number of pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_READ" block=TCP event=30 descr="Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_WRITE" block=TCP event=32 descr="Total number of local write pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_ATOMIC_WITH_RET" block=TCP event=38 descr="Total number of atomic with return pixels/buffers from TA"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_ATOMIC_WITHOUT_RET" block=TCP event=39 descr="Total number of atomic without return pixels/buffers from TA"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_WRITEBACK_INVALIDATES" block=TCP event=45 descr="Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_UTCL1_REQUEST" block=TCP event=47 descr="Total CLIENT_UTCL1 NORMAL requests"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_UTCL1_TRANSLATION_MISS" block=TCP event=48 descr="Total utcl1 translation misses"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_UTCL1_TRANSLATION_HIT" block=TCP event=49 descr="Total utcl1 translation hits"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_UTCL1_PERMISSION_MISS" block=TCP event=50 descr="Total utcl1 permission misses"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TOTAL_CACHE_ACCESSES" block=TCP event=60 descr="Count of total cache line (tag) accesses (includes hits and misses)."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCP_LATENCY" block=TCP event=65 descr="Total TCP wave latency (from first clock of wave entering to first clock of wave leaving), divide by TA_TCP_STATE_READ to avg wave latency"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_READ_REQ_LATENCY" block=TCP event=66 descr="Total TCP->TCC request latency for reads and atomics with return. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_WRITE_REQ_LATENCY" block=TCP event=67 descr="Total TCP->TCC request latency for writes and atomics without return. Not Windowed."
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_READ_REQ" block=TCP event=69 descr="Total read requests from TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_WRITE_REQ" block=TCP event=70 descr="Total write requests from TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_ATOMIC_WITH_RET_REQ" block=TCP event=71 descr="Total atomic with return requests from TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ" block=TCP event=72 descr="Total atomic without return requests from TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_NC_READ_REQ" block=TCP event=75 descr="Total read requests with NC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_NC_WRITE_REQ" block=TCP event=76 descr="Total write requests with NC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_NC_ATOMIC_REQ" block=TCP event=77 descr="Total atomic requests with NC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_UC_READ_REQ" block=TCP event=78 descr="Total read requests with UC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_UC_WRITE_REQ" block=TCP event=79 descr="Total write requests with UC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_UC_ATOMIC_REQ" block=TCP event=80 descr="Total atomic requests with UC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_CC_READ_REQ" block=TCP event=81 descr="Total write requests with CC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_CC_WRITE_REQ" block=TCP event=82 descr="Total write requests with CC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_CC_ATOMIC_REQ" block=TCP event=83 descr="Total atomic requests with CC mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_RW_READ_REQ" block=TCP event=85 descr="Total write requests with RW mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_RW_WRITE_REQ" block=TCP event=86 descr="Total write requests with RW mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
<metric
|
||||
name="TCP_TCC_RW_ATOMIC_REQ" block=TCP event=87 descr="Total atomic requests with RW mtype from this TCP to all TCCs"
|
||||
></metric>
|
||||
# TD counters
|
||||
<metric
|
||||
name="TD_TD_BUSY" block=TD event=1 descr="TD is processing or waiting for data. Perf_Windowing not supported for this counter."
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_TC_STALL" block=TD event=15 descr="TD is stalled waiting for TC data."
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_RESERVED_18" block=TD event=18 descr="RESERVED_18"
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_LOAD_WAVEFRONT" block=TD event=25 descr="Count the wavefronts with opcode = load, include atomics and store."
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_ATOMIC_WAVEFRONT" block=TD event=26 descr="Count the wavefronts with opcode = atomic."
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_STORE_WAVEFRONT" block=TD event=27 descr="Count the wavefronts with opcode = store."
|
||||
></metric>
|
||||
<metric
|
||||
name="TD_COALESCABLE_WAVEFRONT" block=TD event=32 descr="Count wavefronts that TA finds coalescable."
|
||||
></metric>
|
||||
</gfx908>
|
||||
@@ -0,0 +1,163 @@
|
||||
#include "gfx908_metrics.xml"
|
||||
|
||||
<gfx9_expr>
|
||||
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) descr="TA block is busy. Average over TA instances."></metric>
|
||||
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) descr="TA block is busy. Max over TA instances."></metric>
|
||||
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) descr="TA block is busy. Min over TA instances."></metric>
|
||||
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="Number of flat opcode reads processed by the TA. Sum over TA instances."></metric>
|
||||
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="Number of flat opcode writes processed by the TA. Sum over TA instances."></metric>
|
||||
|
||||
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,16) descr="TCC_BUSY avr over all memory channels."></metric>
|
||||
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,16) descr="TCC_REQ sum over all memory channels."></metric>
|
||||
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,16) descr="Number of cache hits. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,16) descr="Number of cache misses. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,16) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,16) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,16) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,16) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,16) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
|
||||
|
||||
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
|
||||
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 descr="The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
|
||||
<metric name="WRITE_REQ_32B" expr=TCC_EA_WRREQ_64B_sum*2+(TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum) descr="The total number of 32-byte effective memory writes."></metric>
|
||||
|
||||
|
||||
|
||||
#xlu - TA
|
||||
<metric name="TA_TA_BUSY_sum" expr=sum(TA_TA_BUSY,16) descr="."></metric>
|
||||
|
||||
<metric name="TA_TOTAL_WAVEFRONTS_sum" expr=sum(TA_TOTAL_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_ADDR_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TC_CYCLES,16) descr="."></metric>
|
||||
<metric name="TA_ADDR_STALLED_BY_TD_CYCLES_sum" expr=sum(TA_ADDR_STALLED_BY_TD_CYCLES,16) descr="."></metric>
|
||||
<metric name="TA_DATA_STALLED_BY_TC_CYCLES_sum" expr=sum(TA_DATA_STALLED_BY_TC_CYCLES,16) descr="."></metric>
|
||||
|
||||
|
||||
|
||||
<metric name="TA_FLAT_WAVEFRONTS_sum" expr=sum(TA_FLAT_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_FLAT_READ_WAVEFRONTS_sum" expr=sum(TA_FLAT_READ_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_FLAT_WRITE_WAVEFRONTS_sum" expr=sum(TA_FLAT_WRITE_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_FLAT_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_FLAT_ATOMIC_WAVEFRONTS,16) descr="."></metric>
|
||||
|
||||
<metric name="TA_BUFFER_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_BUFFER_READ_WAVEFRONTS_sum" expr=sum(TA_BUFFER_READ_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_BUFFER_WRITE_WAVEFRONTS_sum" expr=sum(TA_BUFFER_WRITE_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_BUFFER_ATOMIC_WAVEFRONTS_sum" expr=sum(TA_BUFFER_ATOMIC_WAVEFRONTS,16) descr="."></metric>
|
||||
<metric name="TA_BUFFER_TOTAL_CYCLES_sum" expr=sum(TA_BUFFER_TOTAL_CYCLES,16) descr="."></metric>
|
||||
|
||||
<metric name="TA_BUFFER_COALESCED_READ_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_READ_CYCLES,16) descr="."></metric>
|
||||
<metric name="TA_BUFFER_COALESCED_WRITE_CYCLES_sum" expr=sum(TA_BUFFER_COALESCED_WRITE_CYCLES,16) descr="."></metric>
|
||||
|
||||
#xlu -TD
|
||||
<metric name="TD_TD_BUSY_sum" expr=sum(TD_TD_BUSY,16) descr="."></metric>
|
||||
<metric name="TD_TC_STALL_sum" expr=sum(TD_TC_STALL,16) descr="."></metric>
|
||||
<metric name="TD_LOAD_WAVEFRONT_sum" expr=sum(TD_LOAD_WAVEFRONT,16) descr="."></metric>
|
||||
<metric name="TD_ATOMIC_WAVEFRONT_sum" expr=sum(TD_ATOMIC_WAVEFRONT,16) descr="."></metric>
|
||||
<metric name="TD_STORE_WAVEFRONT_sum" expr=sum(TD_STORE_WAVEFRONT,16) descr="."></metric>
|
||||
|
||||
<metric name="TD_COALESCABLE_WAVEFRONT_sum" expr=sum(TD_COALESCABLE_WAVEFRONT,16) descr="."></metric>
|
||||
|
||||
#xlu -TCP
|
||||
<metric name="TCP_GATE_EN1_sum" expr=sum(TCP_GATE_EN1,16) descr="."></metric>
|
||||
<metric name="TCP_GATE_EN2_sum" expr=sum(TCP_GATE_EN2,16) descr="."></metric>
|
||||
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_sum" expr=sum(TCP_TCP_TA_DATA_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_TD_TCP_STALL_CYCLES_sum" expr=sum(TCP_TD_TCP_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_TCR_TCP_STALL_CYCLES_sum" expr=sum(TCP_TCR_TCP_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_READ_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_READ_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_WRITE_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum" expr=sum(TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_PENDING_STALL_CYCLES_sum" expr=sum(TCP_PENDING_STALL_CYCLES,16) descr="."></metric>
|
||||
<metric name="TCP_VOLATILE_sum" expr=sum(TCP_VOLATILE,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_ACCESSES_sum" expr=sum(TCP_TOTAL_ACCESSES,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_READ_sum" expr=sum(TCP_TOTAL_READ,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_WRITE_sum" expr=sum(TCP_TOTAL_WRITE,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_ATOMIC_WITH_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITH_RET,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_ATOMIC_WITHOUT_RET_sum" expr=sum(TCP_TOTAL_ATOMIC_WITHOUT_RET,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_WRITEBACK_INVALIDATES_sum" expr=sum(TCP_TOTAL_WRITEBACK_INVALIDATES,16) descr="."></metric>
|
||||
<metric name="TCP_UTCL1_REQUEST_sum" expr=sum(TCP_UTCL1_REQUEST,16) descr="."></metric>
|
||||
<metric name="TCP_UTCL1_TRANSLATION_MISS_sum" expr=sum(TCP_UTCL1_TRANSLATION_MISS,16) descr="."></metric>
|
||||
<metric name="TCP_UTCL1_TRANSLATION_HIT_sum" expr=sum(TCP_UTCL1_TRANSLATION_HIT,16) descr="."></metric>
|
||||
<metric name="TCP_UTCL1_PERMISSION_MISS_sum" expr=sum(TCP_UTCL1_PERMISSION_MISS,16) descr="."></metric>
|
||||
<metric name="TCP_TOTAL_CACHE_ACCESSES_sum" expr=sum(TCP_TOTAL_CACHE_ACCESSES,16) descr="."></metric>
|
||||
<metric name="TCP_TCP_LATENCY_sum" expr=sum(TCP_TCP_LATENCY,16) descr="."></metric>
|
||||
<metric name="TCP_TA_TCP_STATE_READ_sum" expr=sum(TCP_TA_TCP_STATE_READ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_READ_REQ_LATENCY_sum" expr=sum(TCP_TCC_READ_REQ_LATENCY,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_WRITE_REQ_LATENCY_sum" expr=sum(TCP_TCC_WRITE_REQ_LATENCY,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_READ_REQ_sum" expr=sum(TCP_TCC_READ_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_WRITE_REQ_sum" expr=sum(TCP_TCC_WRITE_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_ATOMIC_WITH_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITH_RET_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum" expr=sum(TCP_TCC_ATOMIC_WITHOUT_RET_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_NC_READ_REQ_sum" expr=sum(TCP_TCC_NC_READ_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_NC_WRITE_REQ_sum" expr=sum(TCP_TCC_NC_WRITE_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_NC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_NC_ATOMIC_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_UC_READ_REQ_sum" expr=sum(TCP_TCC_UC_READ_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_UC_WRITE_REQ_sum" expr=sum(TCP_TCC_UC_WRITE_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_UC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_UC_ATOMIC_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_CC_READ_REQ_sum" expr=sum(TCP_TCC_CC_READ_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_CC_WRITE_REQ_sum" expr=sum(TCP_TCC_CC_WRITE_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_CC_ATOMIC_REQ_sum" expr=sum(TCP_TCC_CC_ATOMIC_REQ,16) descr="."></metric>
|
||||
|
||||
</gfx9_expr>
|
||||
|
||||
<gfx908_expr base="gfx9_expr">
|
||||
<metric name="TCC_BUSY_avr" expr=avr(TCC_BUSY,32) descr="TCC_BUSY avr over all memory channels."></metric>
|
||||
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="TCC_REQ sum over all memory channels."></metric>
|
||||
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="Number of cache hits. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="Number of cache misses. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="Number of 32-byte TCC/EA read requests. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="Number of TCC/EA read requests (either 32-byte or 64-byte). Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_WRREQ_STALL_max" expr=max(TCC_EA_WRREQ_STALL,32) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
|
||||
|
||||
#xlu - TCP
|
||||
<metric name="TCP_TCC_RW_READ_REQ_sum" expr=sum(TCP_TCC_RW_READ_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_RW_WRITE_REQ_sum" expr=sum(TCP_TCC_RW_WRITE_REQ,16) descr="."></metric>
|
||||
<metric name="TCP_TCC_RW_ATOMIC_REQ_sum" expr=sum(TCP_TCC_RW_ATOMIC_REQ,16) descr="."></metric>
|
||||
|
||||
#xlu - TCC
|
||||
<metric name="TCC_CYCLE_sum" expr=sum(TCC_CYCLE,32) descr="."></metric>
|
||||
<metric name="TCC_BUSY_sum" expr=sum(TCC_BUSY,32) descr="."></metric>
|
||||
<metric name="TCC_REQ_sum" expr=sum(TCC_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_STREAMING_REQ_sum" expr=sum(TCC_STREAMING_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_NC_REQ_sum" expr=sum(TCC_NC_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_UC_REQ_sum" expr=sum(TCC_UC_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_CC_REQ_sum" expr=sum(TCC_CC_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_RW_REQ_sum" expr=sum(TCC_RW_REQ,32) descr="."></metric>
|
||||
<metric name="TCC_PROBE_sum" expr=sum(TCC_PROBE,32) descr="."></metric>
|
||||
<metric name="TCC_PROBE_ALL_sum" expr=sum(TCC_PROBE_ALL,32) descr="."></metric>
|
||||
<metric name="TCC_READ_sum" expr=sum(TCC_READ,32) descr="."></metric>
|
||||
<metric name="TCC_WRITE_sum" expr=sum(TCC_WRITE,32) descr="."></metric>
|
||||
<metric name="TCC_ATOMIC_sum" expr=sum(TCC_ATOMIC,32) descr="."></metric>
|
||||
<metric name="TCC_HIT_sum" expr=sum(TCC_HIT,32) descr="."></metric>
|
||||
<metric name="TCC_MISS_sum" expr=sum(TCC_MISS,32) descr="."></metric>
|
||||
<metric name="TCC_TAG_STALL_sum" expr=sum(TCC_TAG_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_WRITEBACK_sum" expr=sum(TCC_WRITEBACK,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_sum" expr=sum(TCC_EA_WRREQ,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_64B_sum" expr=sum(TCC_EA_WRREQ_64B,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WR_UNCACHED_32B_sum" expr=sum(TCC_EA_WR_UNCACHED_32B,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_STALL_sum" expr=sum(TCC_EA_WRREQ_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_IO_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_GMI_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_WRREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_TOO_MANY_EA_WRREQS_STALL_sum" expr=sum(TCC_TOO_MANY_EA_WRREQS_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_LEVEL_sum" expr=sum(TCC_EA_WRREQ_LEVEL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_LEVEL_sum" expr=sum(TCC_EA_RDREQ_LEVEL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_ATOMIC_sum" expr=sum(TCC_EA_ATOMIC,32) descr="."></metric>
|
||||
<metric name="TCC_EA_ATOMIC_LEVEL_sum" expr=sum(TCC_EA_ATOMIC_LEVEL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_sum" expr=sum(TCC_EA_RDREQ,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_32B_sum" expr=sum(TCC_EA_RDREQ_32B,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RD_UNCACHED_32B_sum" expr=sum(TCC_EA_RD_UNCACHED_32B,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_IO_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_IO_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_GMI_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_GMI_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum" expr=sum(TCC_EA_RDREQ_DRAM_CREDIT_STALL,32) descr="."></metric>
|
||||
<metric name="TCC_NORMAL_WRITEBACK_sum" expr=sum(TCC_NORMAL_WRITEBACK,32) descr="."></metric>
|
||||
<metric name="TCC_ALL_TC_OP_WB_WRITEBACK_sum" expr=sum(TCC_ALL_TC_OP_WB_WRITEBACK,32) descr="."></metric>
|
||||
<metric name="TCC_NORMAL_EVICT_sum" expr=sum(TCC_NORMAL_EVICT,32) descr="."></metric>
|
||||
<metric name="TCC_ALL_TC_OP_INV_EVICT_sum" expr=sum(TCC_ALL_TC_OP_INV_EVICT,32) descr="."></metric>
|
||||
<metric name="TCC_EA_RDREQ_DRAM_sum" expr=sum(TCC_EA_RDREQ_DRAM,32) descr="."></metric>
|
||||
<metric name="TCC_EA_WRREQ_DRAM_sum" expr=sum(TCC_EA_WRREQ_DRAM,32) descr="."></metric>
|
||||
|
||||
</gfx908_expr>
|
||||
|
||||
<gfx9 base="gfx9_expr"></gfx9>
|
||||
<gfx908 base="gfx908_expr"> </gfx908>
|
||||
@@ -0,0 +1,15 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
|
||||
|
||||
pmc: CPC_CPC_STAT_BUSY CPC_CPC_STAT_IDLE
|
||||
pmc: CPC_CPC_TCIU_BUSY CPC_CPC_TCIU_IDLE
|
||||
pmc: CPC_CPC_STAT_STALL CPC_UTCL1_STALL_ON_TRANSLATION
|
||||
pmc: CPC_CPC_UTCL2IU_BUSY CPC_CPC_UTCL2IU_IDLE
|
||||
pmc: CPC_CPC_UTCL2IU_STALL CPC_ME1_BUSY_FOR_PACKET_DECODE
|
||||
pmc: CPC_ME1_DC0_SPI_BUSY
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
|
||||
|
||||
pmc: CPF_CPF_STAT_BUSY CPF_CPF_STAT_STALL
|
||||
pmc: CPF_CPF_TCIU_BUSY CPF_CPF_TCIU_STALL
|
||||
pmc: CPF_CPF_STAT_IDLE CPF_CPF_TCIU_IDLE
|
||||
pmc: CPF_CMP_UTCL1_STALL_ON_TRANSLATION
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES GRBM_SPI_BUSY
|
||||
|
||||
pmc: SPI_CSN_WINDOW_VALID SPI_CSN_BUSY SPI_CSN_NUM_THREADGROUPS SPI_CSN_WAVE SPI_RA_REQ_NO_ALLOC SPI_RA_REQ_NO_ALLOC_CSN
|
||||
pmc: SPI_RA_RES_STALL_CSN SPI_RA_TMP_STALL_CSN SPI_RA_WAVE_SIMD_FULL_CSN SPI_RA_VGPR_SIMD_FULL_CSN SPI_RA_SGPR_SIMD_FULL_CSN SPI_RA_LDS_CU_FULL_CSN
|
||||
pmc: SPI_RA_BAR_CU_FULL_CSN SPI_RA_TGLIM_CU_FULL_CSN SPI_RA_WVLIM_STALL_CSN SPI_SWC_CSC_WR SPI_VWC_CSC_WR SPI_RA_BULKY_CU_FULL_CSN
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
#SQ
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_INSTS_VMEM_WR SQ_INSTS_VMEM_RD SQ_INSTS_VMEM SQ_INSTS_SALU SQ_INSTS_VSKIPPED
|
||||
pmc: SQ_INSTS_SMEM SQ_INSTS_FLAT SQ_INSTS_LDS SQ_INSTS_GDS SQ_INSTS_EXP_GDS SQ_INSTS_BRANCH SQ_INSTS_SENDMSG SQ_INSTS
|
||||
pmc: SQ_WAVE_CYCLES SQ_WAIT_ANY SQ_WAIT_INST_ANY SQ_ACTIVE_INST_ANY SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_INSTS_VALU
|
||||
pmc: SQ_ACTIVE_INST_VMEM SQ_ACTIVE_INST_LDS SQ_ACTIVE_INST_VALU SQ_ACTIVE_INST_SCA SQ_ACTIVE_INST_EXP_GDS SQ_ACTIVE_INST_MISC SQ_ACTIVE_INST_FLAT SQ_INST_CYCLES_VMEM_WR
|
||||
pmc: SQ_INST_CYCLES_VMEM_RD SQ_INST_CYCLES_SMEM SQ_INST_CYCLES_SALU SQ_THREAD_CYCLES_VALU SQ_IFETCH SQ_LDS_BANK_CONFLICT SQ_LDS_ADDR_CONFLICT SQ_LDS_UNALIGNED_STALL
|
||||
pmc: SQ_WAVES SQ_WAVES_EQ_64 SQ_WAVES_LT_64 SQ_WAVES_LT_48 SQ_WAVES_LT_32 SQ_WAVES_LT_16 SQ_ITEMS SQ_INSTS_VSKIPPED
|
||||
pmc: SQ_LDS_MEM_VIOLATIONS SQ_LDS_ATOMIC_RETURN SQ_LDS_IDX_ACTIVE SQ_WAVES_RESTORED SQ_WAVES_SAVED SQ_INSTS_SMEM_NORM
|
||||
|
||||
|
||||
#SQ:MI200
|
||||
#pmc: SQ_INSTS_MFMA SQ_INSTS_VALU_MFMA_I8 SQ_INSTS_VALU_MFMA_F16 SQ_INSTS_VALU_MFMA_BF16 SQ_INSTS_VALU_MFMA_F32 SQ_INSTS_VALU_MFMA_F64 SQ_VALU_MFMA_BUSY_CYCLES
|
||||
#pmc: SQ_INSTS_FLAT_LDS_ONLY SQ_INSTS_VALU_MFMA_MOPS_I8 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64
|
||||
|
||||
#SQC
|
||||
pmc: SQC_TC_INST_REQ SQC_TC_DATA_READ_REQ SQC_TC_DATA_WRITE_REQ SQC_TC_DATA_ATOMIC_REQ SQC_TC_STALL SQC_TC_REQ SQC_DCACHE_REQ_READ_16
|
||||
pmc: SQC_ICACHE_REQ SQC_ICACHE_HITS SQC_ICACHE_MISSES SQC_ICACHE_MISSES_DUPLICATE SQC_DCACHE_INPUT_VALID_READYB SQC_DCACHE_ATOMIC SQC_DCACHE_REQ_READ_8
|
||||
pmc: SQC_DCACHE_REQ SQC_DCACHE_HITS SQC_DCACHE_MISSES SQC_DCACHE_MISSES_DUPLICATE SQC_DCACHE_REQ_READ_1 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ_READ_4
|
||||
|
||||
|
||||
|
||||
########################################
|
||||
# Filtering
|
||||
########################################
|
||||
range:
|
||||
gpu:
|
||||
kernel:
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
#################################################
|
||||
# VMEM latency
|
||||
#################################################
|
||||
pmc: SQ_INSTS_VMEM SQ_INST_LEVEL_VMEM SQ_ACCUM_PREV_HIRES
|
||||
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
#################################################
|
||||
# SMEM latency
|
||||
#################################################
|
||||
pmc: SQ_INSTS_SMEM SQ_INST_LEVEL_SMEM SQ_ACCUM_PREV_HIRES
|
||||
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
#################################################
|
||||
# ifetch latency
|
||||
#################################################
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_WAVES SQ_IFETCH SQ_IFETCH_LEVEL SQ_ACCUM_PREV_HIRES
|
||||
|
||||
range:
|
||||
gpu:
|
||||
kernel:
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
#################################################
|
||||
# LDS latency
|
||||
#################################################
|
||||
pmc: SQ_INSTS_LDS SQ_INST_LEVEL_LDS SQ_ACCUM_PREV_HIRES
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE CPC_ME1_BUSY_FOR_PACKET_DECODE SQ_CYCLES SQ_WAVES SQ_WAVE_CYCLES SQ_BUSY_CYCLES SQ_LEVEL_WAVES SQ_ACCUM_PREV_HIRES
|
||||
|
||||
gpu:
|
||||
range:
|
||||
|
||||
kernel:
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
|
||||
|
||||
|
||||
pmc: SQC_TC_INST_REQ SQC_TC_DATA_READ_REQ SQC_TC_DATA_WRITE_REQ SQC_TC_DATA_ATOMIC_REQ SQC_TC_STALL SQC_TC_REQ SQC_DCACHE_REQ_READ_16
|
||||
pmc: SQC_ICACHE_REQ SQC_ICACHE_HITS SQC_ICACHE_MISSES SQC_ICACHE_MISSES_DUPLICATE SQC_DCACHE_INPUT_VALID_READYB SQC_DCACHE_ATOMIC SQC_DCACHE_REQ_READ_8
|
||||
pmc: SQC_DCACHE_REQ SQC_DCACHE_HITS SQC_DCACHE_MISSES SQC_DCACHE_MISSES_DUPLICATE SQC_DCACHE_REQ_READ_1 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ_READ_4
|
||||
|
||||
range:
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
|
||||
|
||||
pmc: TA_TA_BUSY_sum TA_SH_FIFO_BUSY_sum
|
||||
pmc: TA_SH_FIFO_CMD_BUSY_sum TA_SH_FIFO_ADDR_BUSY_sum
|
||||
pmc: TA_SH_FIFO_DATA_BUSY_sum TA_SH_FIFO_DATA_SFIFO_BUSY_sum
|
||||
pmc: TA_SH_FIFO_DATA_TFIFO_BUSY_sum TA_SQ_TA_CMD_CYCLES_sum
|
||||
pmc: TA_SP_TA_ADDR_CYCLES_sum TA_SP_TA_DATA_CYCLES_sum
|
||||
|
||||
|
||||
# Starvation
|
||||
pmc: TA_SH_FIFO_ADDR_STARVED_WHILE_BUSY_CYCLES_sum TA_SH_FIFO_CMD_STARVED_WHILE_BUSY_CYCLES_sum
|
||||
pmc: TA_SH_FIFO_DATA_STARVED_WHILE_BUSY_CYCLES_sum TA_TA_SH_FIFO_STARVED_sum
|
||||
|
||||
|
||||
|
||||
# buffer access
|
||||
pmc: TA_BUFFER_WAVEFRONTS_sum TA_BUFFER_READ_WAVEFRONTS_sum
|
||||
pmc: TA_BUFFER_WRITE_WAVEFRONTS_sum TA_BUFFER_ATOMIC_WAVEFRONTS_sum
|
||||
pmc: TA_BUFFER_TOTAL_CYCLES_sum TA_BUFFER_COALESCABLE_WAVEFRONTS_sum
|
||||
pmc: TA_BUFFER_COALESCED_READ_CYCLES_sum TA_BUFFER_COALESCED_WRITE_CYCLES_sum
|
||||
|
||||
|
||||
# stalls
|
||||
pmc: TA_ADDR_STALLED_BY_TC_CYCLES_sum TA_TOTAL_WAVEFRONTS_sum
|
||||
pmc: TA_ADDR_STALLED_BY_TD_CYCLES_sum TA_DATA_STALLED_BY_TC_CYCLES_sum
|
||||
|
||||
# flat accesses
|
||||
pmc: TA_FLAT_WAVEFRONTS_sum TA_FLAT_READ_WAVEFRONTS_sum
|
||||
pmc: TA_FLAT_WRITE_WAVEFRONTS_sum TA_FLAT_ATOMIC_WAVEFRONTS_sum
|
||||
pmc: TA_FLAT_COALESCEABLE_WAVEFRONTS_sum
|
||||
|
||||
|
||||
range:
|
||||
|
||||
gpu:
|
||||
|
||||
kernel:
|
||||
@@ -0,0 +1,26 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
|
||||
|
||||
# MI50: no TCC_RW_REQ
|
||||
pmc: TCC_CYCLE[0] TCC_HIT[0] TCC_MISS[0] TCC_CYCLE[1] TCC_HIT[1] TCC_MISS[1] TCC_CYCLE[2] TCC_HIT[2] TCC_MISS[2] TCC_CYCLE[3] TCC_HIT[3] TCC_MISS[3] TCC_CYCLE[4] TCC_HIT[4] TCC_MISS[4] TCC_CYCLE[5] TCC_HIT[5] TCC_MISS[5] TCC_CYCLE[6] TCC_HIT[6] TCC_MISS[6] TCC_CYCLE[7] TCC_HIT[7] TCC_MISS[7] TCC_CYCLE[8] TCC_HIT[8] TCC_MISS[8] TCC_CYCLE[9] TCC_HIT[9] TCC_MISS[9] TCC_CYCLE[10] TCC_HIT[10] TCC_MISS[10] TCC_CYCLE[11] TCC_HIT[11] TCC_MISS[11] TCC_CYCLE[12] TCC_HIT[12] TCC_MISS[12] TCC_CYCLE[13] TCC_HIT[13] TCC_MISS[13] TCC_CYCLE[14] TCC_HIT[14] TCC_MISS[14] TCC_CYCLE[15] TCC_HIT[15] TCC_MISS[15]
|
||||
|
||||
|
||||
|
||||
pmc: TCC_REQ[0] TCC_READ[0] TCC_WRITE[0] TCC_ATOMIC[0] TCC_REQ[1] TCC_READ[1] TCC_WRITE[1] TCC_ATOMIC[1] TCC_REQ[2] TCC_READ[2] TCC_WRITE[2] TCC_ATOMIC[2] TCC_REQ[3] TCC_READ[3] TCC_WRITE[3] TCC_ATOMIC[3] TCC_REQ[4] TCC_READ[4] TCC_WRITE[4] TCC_ATOMIC[4] TCC_REQ[5] TCC_READ[5] TCC_WRITE[5] TCC_ATOMIC[5] TCC_REQ[6] TCC_READ[6] TCC_WRITE[6] TCC_ATOMIC[6] TCC_REQ[7] TCC_READ[7] TCC_WRITE[7] TCC_ATOMIC[7] TCC_REQ[8] TCC_READ[8] TCC_WRITE[8] TCC_ATOMIC[8] TCC_REQ[9] TCC_READ[9] TCC_WRITE[9] TCC_ATOMIC[9] TCC_REQ[10] TCC_READ[10] TCC_WRITE[10] TCC_ATOMIC[10] TCC_REQ[11] TCC_READ[11] TCC_WRITE[11] TCC_ATOMIC[11] TCC_REQ[12] TCC_READ[12] TCC_WRITE[12] TCC_ATOMIC[12] TCC_REQ[13] TCC_READ[13] TCC_WRITE[13] TCC_ATOMIC[13] TCC_REQ[14] TCC_READ[14] TCC_WRITE[14] TCC_ATOMIC[14] TCC_REQ[15] TCC_READ[15] TCC_WRITE[15] TCC_ATOMIC[15]
|
||||
|
||||
|
||||
|
||||
pmc: TCC_EA_RDREQ[0] TCC_EA_RDREQ_32B[0] TCC_EA_WRREQ[0] TCC_EA_WRREQ_64B[0] TCC_EA_RDREQ[1] TCC_EA_RDREQ_32B[1] TCC_EA_WRREQ[1] TCC_EA_WRREQ_64B[1] TCC_EA_RDREQ[2] TCC_EA_RDREQ_32B[2] TCC_EA_WRREQ[2] TCC_EA_WRREQ_64B[2] TCC_EA_RDREQ[3] TCC_EA_RDREQ_32B[3] TCC_EA_WRREQ[3] TCC_EA_WRREQ_64B[3] TCC_EA_RDREQ[4] TCC_EA_RDREQ_32B[4] TCC_EA_WRREQ[4] TCC_EA_WRREQ_64B[4] TCC_EA_RDREQ[5] TCC_EA_RDREQ_32B[5] TCC_EA_WRREQ[5] TCC_EA_WRREQ_64B[5] TCC_EA_RDREQ[6] TCC_EA_RDREQ_32B[6] TCC_EA_WRREQ[6] TCC_EA_WRREQ_64B[6] TCC_EA_RDREQ[7] TCC_EA_RDREQ_32B[7] TCC_EA_WRREQ[7] TCC_EA_WRREQ_64B[7] TCC_EA_RDREQ[8] TCC_EA_RDREQ_32B[8] TCC_EA_WRREQ[8] TCC_EA_WRREQ_64B[8] TCC_EA_RDREQ[9] TCC_EA_RDREQ_32B[9] TCC_EA_WRREQ[9] TCC_EA_WRREQ_64B[9] TCC_EA_RDREQ[10] TCC_EA_RDREQ_32B[10] TCC_EA_WRREQ[10] TCC_EA_WRREQ_64B[10] TCC_EA_RDREQ[11] TCC_EA_RDREQ_32B[11] TCC_EA_WRREQ[11] TCC_EA_WRREQ_64B[11] TCC_EA_RDREQ[12] TCC_EA_RDREQ_32B[12] TCC_EA_WRREQ[12] TCC_EA_WRREQ_64B[12] TCC_EA_RDREQ[13] TCC_EA_RDREQ_32B[13] TCC_EA_WRREQ[13] TCC_EA_WRREQ_64B[13] TCC_EA_RDREQ[14] TCC_EA_RDREQ_32B[14] TCC_EA_WRREQ[14] TCC_EA_WRREQ_64B[14] TCC_EA_RDREQ[15] TCC_EA_RDREQ_32B[15] TCC_EA_WRREQ[15] TCC_EA_WRREQ_64B[15]
|
||||
|
||||
|
||||
pmc: TCC_EA_ATOMIC[0] TCC_EA_RDREQ_LEVEL[0] TCC_EA_WRREQ_LEVEL[0] TCC_EA_ATOMIC_LEVEL[0] TCC_EA_ATOMIC[1] TCC_EA_RDREQ_LEVEL[1] TCC_EA_WRREQ_LEVEL[1] TCC_EA_ATOMIC_LEVEL[1] TCC_EA_ATOMIC[2] TCC_EA_RDREQ_LEVEL[2] TCC_EA_WRREQ_LEVEL[2] TCC_EA_ATOMIC_LEVEL[2] TCC_EA_ATOMIC[3] TCC_EA_RDREQ_LEVEL[3] TCC_EA_WRREQ_LEVEL[3] TCC_EA_ATOMIC_LEVEL[3] TCC_EA_ATOMIC[4] TCC_EA_RDREQ_LEVEL[4] TCC_EA_WRREQ_LEVEL[4] TCC_EA_ATOMIC_LEVEL[4] TCC_EA_ATOMIC[5] TCC_EA_RDREQ_LEVEL[5] TCC_EA_WRREQ_LEVEL[5] TCC_EA_ATOMIC_LEVEL[5] TCC_EA_ATOMIC[6] TCC_EA_RDREQ_LEVEL[6] TCC_EA_WRREQ_LEVEL[6] TCC_EA_ATOMIC_LEVEL[6] TCC_EA_ATOMIC[7] TCC_EA_RDREQ_LEVEL[7] TCC_EA_WRREQ_LEVEL[7] TCC_EA_ATOMIC_LEVEL[7] TCC_EA_ATOMIC[8] TCC_EA_RDREQ_LEVEL[8] TCC_EA_WRREQ_LEVEL[8] TCC_EA_ATOMIC_LEVEL[8] TCC_EA_ATOMIC[9] TCC_EA_RDREQ_LEVEL[9] TCC_EA_WRREQ_LEVEL[9] TCC_EA_ATOMIC_LEVEL[9] TCC_EA_ATOMIC[10] TCC_EA_RDREQ_LEVEL[10] TCC_EA_WRREQ_LEVEL[10] TCC_EA_ATOMIC_LEVEL[10] TCC_EA_ATOMIC[11] TCC_EA_RDREQ_LEVEL[11] TCC_EA_WRREQ_LEVEL[11] TCC_EA_ATOMIC_LEVEL[11] TCC_EA_ATOMIC[12] TCC_EA_RDREQ_LEVEL[12] TCC_EA_WRREQ_LEVEL[12] TCC_EA_ATOMIC_LEVEL[12] TCC_EA_ATOMIC[13] TCC_EA_RDREQ_LEVEL[13] TCC_EA_WRREQ_LEVEL[13] TCC_EA_ATOMIC_LEVEL[13] TCC_EA_ATOMIC[14] TCC_EA_RDREQ_LEVEL[14] TCC_EA_WRREQ_LEVEL[14] TCC_EA_ATOMIC_LEVEL[14] TCC_EA_ATOMIC[15] TCC_EA_RDREQ_LEVEL[15] TCC_EA_WRREQ_LEVEL[15] TCC_EA_ATOMIC_LEVEL[15]
|
||||
|
||||
|
||||
pmc: TCC_EA_RDREQ_IO_CREDIT_STALL[0] TCC_EA_RDREQ_GMI_CREDIT_STALL[0] TCC_EA_RDREQ_DRAM_CREDIT_STALL[0] TCC_EA_RDREQ_IO_CREDIT_STALL[1] TCC_EA_RDREQ_GMI_CREDIT_STALL[1] TCC_EA_RDREQ_DRAM_CREDIT_STALL[1] TCC_EA_RDREQ_IO_CREDIT_STALL[2] TCC_EA_RDREQ_GMI_CREDIT_STALL[2] TCC_EA_RDREQ_DRAM_CREDIT_STALL[2] TCC_EA_RDREQ_IO_CREDIT_STALL[3] TCC_EA_RDREQ_GMI_CREDIT_STALL[3] TCC_EA_RDREQ_DRAM_CREDIT_STALL[3] TCC_EA_RDREQ_IO_CREDIT_STALL[4] TCC_EA_RDREQ_GMI_CREDIT_STALL[4] TCC_EA_RDREQ_DRAM_CREDIT_STALL[4] TCC_EA_RDREQ_IO_CREDIT_STALL[5] TCC_EA_RDREQ_GMI_CREDIT_STALL[5] TCC_EA_RDREQ_DRAM_CREDIT_STALL[5] TCC_EA_RDREQ_IO_CREDIT_STALL[6] TCC_EA_RDREQ_GMI_CREDIT_STALL[6] TCC_EA_RDREQ_DRAM_CREDIT_STALL[6] TCC_EA_RDREQ_IO_CREDIT_STALL[7] TCC_EA_RDREQ_GMI_CREDIT_STALL[7] TCC_EA_RDREQ_DRAM_CREDIT_STALL[7] TCC_EA_RDREQ_IO_CREDIT_STALL[8] TCC_EA_RDREQ_GMI_CREDIT_STALL[8] TCC_EA_RDREQ_DRAM_CREDIT_STALL[8] TCC_EA_RDREQ_IO_CREDIT_STALL[9] TCC_EA_RDREQ_GMI_CREDIT_STALL[9] TCC_EA_RDREQ_DRAM_CREDIT_STALL[9] TCC_EA_RDREQ_IO_CREDIT_STALL[10] TCC_EA_RDREQ_GMI_CREDIT_STALL[10] TCC_EA_RDREQ_DRAM_CREDIT_STALL[10] TCC_EA_RDREQ_IO_CREDIT_STALL[11] TCC_EA_RDREQ_GMI_CREDIT_STALL[11] TCC_EA_RDREQ_DRAM_CREDIT_STALL[11] TCC_EA_RDREQ_IO_CREDIT_STALL[12] TCC_EA_RDREQ_GMI_CREDIT_STALL[12] TCC_EA_RDREQ_DRAM_CREDIT_STALL[12] TCC_EA_RDREQ_IO_CREDIT_STALL[13] TCC_EA_RDREQ_GMI_CREDIT_STALL[13] TCC_EA_RDREQ_DRAM_CREDIT_STALL[13] TCC_EA_RDREQ_IO_CREDIT_STALL[14] TCC_EA_RDREQ_GMI_CREDIT_STALL[14] TCC_EA_RDREQ_DRAM_CREDIT_STALL[14] TCC_EA_RDREQ_IO_CREDIT_STALL[15] TCC_EA_RDREQ_GMI_CREDIT_STALL[15] TCC_EA_RDREQ_DRAM_CREDIT_STALL[15]
|
||||
|
||||
|
||||
pmc: TCC_EA_WRREQ_IO_CREDIT_STALL[0] TCC_EA_WRREQ_GMI_CREDIT_STALL[0] TCC_EA_WRREQ_DRAM_CREDIT_STALL[0] TCC_TOO_MANY_EA_WRREQS_STALL[0] TCC_EA_WRREQ_IO_CREDIT_STALL[1] TCC_EA_WRREQ_GMI_CREDIT_STALL[1] TCC_EA_WRREQ_DRAM_CREDIT_STALL[1] TCC_TOO_MANY_EA_WRREQS_STALL[1] TCC_EA_WRREQ_IO_CREDIT_STALL[2] TCC_EA_WRREQ_GMI_CREDIT_STALL[2] TCC_EA_WRREQ_DRAM_CREDIT_STALL[2] TCC_TOO_MANY_EA_WRREQS_STALL[2] TCC_EA_WRREQ_IO_CREDIT_STALL[3] TCC_EA_WRREQ_GMI_CREDIT_STALL[3] TCC_EA_WRREQ_DRAM_CREDIT_STALL[3] TCC_TOO_MANY_EA_WRREQS_STALL[3] TCC_EA_WRREQ_IO_CREDIT_STALL[4] TCC_EA_WRREQ_GMI_CREDIT_STALL[4] TCC_EA_WRREQ_DRAM_CREDIT_STALL[4] TCC_TOO_MANY_EA_WRREQS_STALL[4] TCC_EA_WRREQ_IO_CREDIT_STALL[5] TCC_EA_WRREQ_GMI_CREDIT_STALL[5] TCC_EA_WRREQ_DRAM_CREDIT_STALL[5] TCC_TOO_MANY_EA_WRREQS_STALL[5] TCC_EA_WRREQ_IO_CREDIT_STALL[6] TCC_EA_WRREQ_GMI_CREDIT_STALL[6] TCC_EA_WRREQ_DRAM_CREDIT_STALL[6] TCC_TOO_MANY_EA_WRREQS_STALL[6] TCC_EA_WRREQ_IO_CREDIT_STALL[7] TCC_EA_WRREQ_GMI_CREDIT_STALL[7] TCC_EA_WRREQ_DRAM_CREDIT_STALL[7] TCC_TOO_MANY_EA_WRREQS_STALL[7] TCC_EA_WRREQ_IO_CREDIT_STALL[8] TCC_EA_WRREQ_GMI_CREDIT_STALL[8] TCC_EA_WRREQ_DRAM_CREDIT_STALL[8] TCC_TOO_MANY_EA_WRREQS_STALL[8] TCC_EA_WRREQ_IO_CREDIT_STALL[9] TCC_EA_WRREQ_GMI_CREDIT_STALL[9] TCC_EA_WRREQ_DRAM_CREDIT_STALL[9] TCC_TOO_MANY_EA_WRREQS_STALL[9] TCC_EA_WRREQ_IO_CREDIT_STALL[10] TCC_EA_WRREQ_GMI_CREDIT_STALL[10] TCC_EA_WRREQ_DRAM_CREDIT_STALL[10] TCC_TOO_MANY_EA_WRREQS_STALL[10] TCC_EA_WRREQ_IO_CREDIT_STALL[11] TCC_EA_WRREQ_GMI_CREDIT_STALL[11] TCC_EA_WRREQ_DRAM_CREDIT_STALL[11] TCC_TOO_MANY_EA_WRREQS_STALL[11] TCC_EA_WRREQ_IO_CREDIT_STALL[12] TCC_EA_WRREQ_GMI_CREDIT_STALL[12] TCC_EA_WRREQ_DRAM_CREDIT_STALL[12] TCC_TOO_MANY_EA_WRREQS_STALL[12] TCC_EA_WRREQ_IO_CREDIT_STALL[13] TCC_EA_WRREQ_GMI_CREDIT_STALL[13] TCC_EA_WRREQ_DRAM_CREDIT_STALL[13] TCC_TOO_MANY_EA_WRREQS_STALL[13] TCC_EA_WRREQ_IO_CREDIT_STALL[14] TCC_EA_WRREQ_GMI_CREDIT_STALL[14] TCC_EA_WRREQ_DRAM_CREDIT_STALL[14] TCC_TOO_MANY_EA_WRREQS_STALL[14] TCC_EA_WRREQ_IO_CREDIT_STALL[15] TCC_EA_WRREQ_GMI_CREDIT_STALL[15] TCC_EA_WRREQ_DRAM_CREDIT_STALL[15] TCC_TOO_MANY_EA_WRREQS_STALL[15]
|
||||
|
||||
gpu:
|
||||
kernel:
|
||||
range:
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_WAVES
|
||||
|
||||
pmc: TCC_CYCLE_sum TCC_BUSY_sum TCC_PROBE_sum TCC_PROBE_ALL_sum
|
||||
pmc: TCC_NC_REQ_sum TCC_UC_REQ_sum TCC_CC_REQ_sum
|
||||
pmc: TCC_REQ_sum TCC_STREAMING_REQ_sum TCC_HIT_sum TCC_MISS_sum
|
||||
pmc: TCC_READ_sum TCC_WRITE_sum TCC_ATOMIC_sum TCC_WRITEBACK_sum
|
||||
pmc: TCC_EA_WRREQ_sum TCC_EA_WRREQ_64B_sum TCC_EA_WR_UNCACHED_32B_sum
|
||||
pmc: TCC_EA_WRREQ_STALL_sum TCC_EA_WRREQ_IO_CREDIT_STALL_sum TCC_EA_WRREQ_GMI_CREDIT_STALL_sum TCC_EA_WRREQ_DRAM_CREDIT_STALL_sum
|
||||
pmc: TCC_TOO_MANY_EA_WRREQS_STALL_sum TCC_EA_ATOMIC_sum TCC_EA_RDREQ_sum TCC_EA_RDREQ_32B_sum
|
||||
pmc: TCC_EA_RD_UNCACHED_32B_sum TCC_EA_RDREQ_IO_CREDIT_STALL_sum TCC_EA_RDREQ_GMI_CREDIT_STALL_sum TCC_EA_RDREQ_DRAM_CREDIT_STALL_sum
|
||||
pmc: TCC_NORMAL_WRITEBACK_sum TCC_ALL_TC_OP_WB_WRITEBACK_sum TCC_NORMAL_EVICT_sum
|
||||
pmc: TCC_ALL_TC_OP_INV_EVICT_sum TCC_EA_RDREQ_DRAM_sum TCC_EA_WRREQ_DRAM_sum
|
||||
pmc: TCC_EA_RDREQ_LEVEL_sum TCC_EA_WRREQ_LEVEL_sum TCC_EA_ATOMIC_LEVEL_sum
|
||||
|
||||
gpu:
|
||||
kernel:
|
||||
|
||||
range:
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
|
||||
|
||||
pmc: TCP_GATE_EN1_sum TCP_GATE_EN2_sum TCP_TD_TCP_STALL_CYCLES_sum TCP_TCR_TCP_STALL_CYCLES_sum
|
||||
pmc: TCP_READ_TAGCONFLICT_STALL_CYCLES_sum TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum TCP_TA_TCP_STATE_READ_sum
|
||||
pmc: TCP_VOLATILE_sum TCP_TOTAL_ACCESSES_sum TCP_TOTAL_READ_sum TCP_TOTAL_WRITE_sum
|
||||
pmc: TCP_TOTAL_ATOMIC_WITH_RET_sum TCP_TOTAL_ATOMIC_WITHOUT_RET_sum TCP_TOTAL_WRITEBACK_INVALIDATES_sum TCP_TOTAL_CACHE_ACCESSES_sum
|
||||
pmc: TCP_UTCL1_TRANSLATION_MISS_sum TCP_UTCL1_TRANSLATION_HIT_sum TCP_UTCL1_PERMISSION_MISS_sum TCP_UTCL1_REQUEST_sum
|
||||
pmc: TCP_TCP_LATENCY_sum TCP_TCC_READ_REQ_LATENCY_sum TCP_TCC_WRITE_REQ_LATENCY_sum TCP_TCC_READ_REQ_sum
|
||||
pmc: TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum TCP_TCC_NC_READ_REQ_sum
|
||||
pmc: TCP_TCC_NC_WRITE_REQ_sum TCP_TCC_NC_ATOMIC_REQ_sum TCP_TCC_UC_READ_REQ_sum TCP_TCC_UC_WRITE_REQ_sum
|
||||
pmc: TCP_TCC_UC_ATOMIC_REQ_sum TCP_TCC_CC_READ_REQ_sum TCP_TCC_CC_WRITE_REQ_sum TCP_TCC_CC_ATOMIC_REQ_sum
|
||||
pmc: TCP_PENDING_STALL_CYCLES_sum
|
||||
|
||||
#pmc: TCA_CYCLE_sum TCA_BUSY_sum
|
||||
|
||||
gpu:
|
||||
kernel:
|
||||
range:
|
||||
@@ -0,0 +1,12 @@
|
||||
pmc: GRBM_COUNT GRBM_GUI_ACTIVE SQ_CYCLES SQ_BUSY_CYCLES SQ_BUSY_CU_CYCLES SQ_WAVES SQ_WAVE_CYCLES
|
||||
|
||||
pmc: TD_TD_BUSY_sum TD_TC_STALL_sum
|
||||
pmc: TD_COALESCABLE_WAVEFRONT_sum TD_LOAD_WAVEFRONT_sum
|
||||
pmc: TD_ATOMIC_WAVEFRONT_sum TD_STORE_WAVEFRONT_sum
|
||||
|
||||
|
||||
|
||||
gpu:
|
||||
range:
|
||||
kernel:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
@@ -37,7 +39,6 @@ MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout
|
||||
|
||||
|
||||
def kernel_name_shortener(df, cache, level):
|
||||
|
||||
if level >= 5:
|
||||
return df
|
||||
|
||||
@@ -50,7 +51,6 @@ def kernel_name_shortener(df, cache, level):
|
||||
if columnName == "KernelName" or columnName == "Name":
|
||||
# loop through all indices
|
||||
for index in df.index:
|
||||
|
||||
original_name = df.loc[index, columnName]
|
||||
if original_name in cache:
|
||||
continue
|
||||
@@ -142,7 +142,7 @@ def parse(args, profileAndExport):
|
||||
print("Unable to parse SoC or workload name from sysinfo.csv")
|
||||
sys.exit(1)
|
||||
|
||||
db = "omniperf_" + args.team + "_" + name + "_" + soc
|
||||
db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
|
||||
|
||||
if Extractionlvl >= 5:
|
||||
print("KernelName shortening disabled")
|
||||
@@ -224,27 +224,27 @@ def convert_folder(connectionInfo, Extractionlvl):
|
||||
df_saved_file = t2.to_csv(newfilepath + file)
|
||||
|
||||
cmd = (
|
||||
'mongoimport --quiet --uri mongodb://{}:{}@{}:{} --authenticationDatabase "admin" --file {} -d {} -c {} --drop --type csv --headerline'
|
||||
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
|
||||
).format(
|
||||
connectionInfo["username"],
|
||||
connectionInfo["password"],
|
||||
connectionInfo["host"],
|
||||
connectionInfo["port"],
|
||||
newfilepath + file,
|
||||
connectionInfo["db"],
|
||||
newfilepath + file,
|
||||
fileName,
|
||||
)
|
||||
os.system(cmd)
|
||||
else:
|
||||
cmd = (
|
||||
'mongoimport --quiet --uri mongodb://{}:{}@{}:{} --authenticationDatabase "admin" --file {} -d {} -c {} --drop --type csv --headerline'
|
||||
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
|
||||
).format(
|
||||
connectionInfo["username"],
|
||||
connectionInfo["password"],
|
||||
connectionInfo["host"],
|
||||
connectionInfo["port"],
|
||||
connectionInfo["workload"] + "/" + file,
|
||||
connectionInfo["db"],
|
||||
connectionInfo["workload"] + "/" + file,
|
||||
fileName,
|
||||
)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -1,402 +1,396 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
from PyQt5.QtWidgets import (
|
||||
QMainWindow,
|
||||
QApplication,
|
||||
QTreeView,
|
||||
QTableWidget,
|
||||
QTableWidgetItem,
|
||||
)
|
||||
from PyQt5.QtWidgets import (
|
||||
QHBoxLayout,
|
||||
QWidget,
|
||||
QAction,
|
||||
QFileDialog,
|
||||
QAbstractItemView,
|
||||
qApp,
|
||||
)
|
||||
from PyQt5.QtGui import QStandardItemModel, QStandardItem
|
||||
from lxml import html
|
||||
import sys
|
||||
|
||||
|
||||
# class view(QWidget):
|
||||
class mainWindow(QMainWindow):
|
||||
def __init__(self):
|
||||
super(QMainWindow, self).__init__()
|
||||
|
||||
###############################################################################
|
||||
# SOC Parameters
|
||||
##############################################################################
|
||||
|
||||
# Per IP block max number of simulutaneous counters
|
||||
# GFX IP Blocks
|
||||
self.perfmon_config = {
|
||||
"SQ": 8,
|
||||
"TA": 2,
|
||||
"TD": 2,
|
||||
"TCP": 4,
|
||||
"TCC": 4,
|
||||
"CPC": 2,
|
||||
"CPF": 2,
|
||||
"SPI": 2,
|
||||
"GRBM": 2,
|
||||
"GDS": 4,
|
||||
}
|
||||
|
||||
# GFX Architectures
|
||||
self.soc_arch_list = ["gfx906", "gfx908", "gfx90a"]
|
||||
|
||||
###############################################################################
|
||||
# Window layout Design
|
||||
##############################################################################
|
||||
|
||||
self.block_list = []
|
||||
self.nodes_dict = {} # list of QStandardItem
|
||||
|
||||
self.tree = QTreeView(self)
|
||||
self.table = QTableWidget()
|
||||
|
||||
# Tree setup
|
||||
self.tree.header().setDefaultSectionSize(180)
|
||||
self.model = QStandardItemModel()
|
||||
self.model.setHorizontalHeaderLabels(["Metric", "Block", "Event", "Definition"])
|
||||
|
||||
self.tree.setModel(self.model)
|
||||
# self.importData(data)
|
||||
self.tree.setEditTriggers(QAbstractItemView.NoEditTriggers)
|
||||
|
||||
# Set up click processing
|
||||
self.tree.clicked.connect(self.pmc_select)
|
||||
# self.tree.expandAll()
|
||||
|
||||
# Table setup
|
||||
tableHeader = list(self.perfmon_config.keys())
|
||||
self.table.setColumnCount(len(tableHeader))
|
||||
self.table.setHorizontalHeaderLabels(tableHeader)
|
||||
self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
|
||||
self.table.showGrid()
|
||||
|
||||
self.setWindowTitle("GFX Perfmon Builder")
|
||||
# layout: lhs: metrics; rhs: selected perfmon
|
||||
layout = QHBoxLayout(self)
|
||||
layout.addWidget(self.tree)
|
||||
layout.addWidget(self.table)
|
||||
|
||||
widget = QWidget()
|
||||
widget.setLayout(layout)
|
||||
self.setCentralWidget(widget)
|
||||
|
||||
# Add Status
|
||||
self.statusBar()
|
||||
|
||||
###############################################################################
|
||||
# Window Menu Design
|
||||
##############################################################################
|
||||
|
||||
# Setup file menu
|
||||
menuBar = self.menuBar()
|
||||
menuBar.setNativeMenuBar(False)
|
||||
|
||||
openAction = QAction("&Open", self)
|
||||
openAction.setShortcut("Ctrl+O")
|
||||
openAction.setStatusTip("Open GFX Metrics file")
|
||||
openAction.triggered.connect(self.openGFXDialog)
|
||||
|
||||
saveAction = QAction("&Save", self)
|
||||
saveAction.setShortcut("Ctrl+S")
|
||||
saveAction.setStatusTip("Save to PMC file")
|
||||
saveAction.triggered.connect(self.exportGFXDialog)
|
||||
|
||||
exitAction = QAction("&Exit", self)
|
||||
exitAction.setShortcut("Ctrl+Q")
|
||||
exitAction.setStatusTip("Exit")
|
||||
exitAction.triggered.connect(self.close)
|
||||
|
||||
# Create new action
|
||||
fileMenu = menuBar.addMenu("&File")
|
||||
fileMenu.addActions([openAction, saveAction])
|
||||
fileMenu.addSeparator()
|
||||
fileMenu.addActions([exitAction])
|
||||
|
||||
def openGFXDialog(self):
|
||||
options = QFileDialog.Options()
|
||||
options |= QFileDialog.DontUseNativeDialog
|
||||
fileName, _ = QFileDialog.getOpenFileName(
|
||||
self, "Open GFX Metrics", "", "XML Files (*.xml)", "XML(*.xml)"
|
||||
)
|
||||
|
||||
# Parse the xml
|
||||
if fileName:
|
||||
xmlparsed = html.parse(fileName)
|
||||
self.importData(xmlparsed)
|
||||
|
||||
def exportGFXDialog(self):
|
||||
options = QFileDialog.Options()
|
||||
options |= QFileDialog.DontUseNativeDialog
|
||||
fileName, _ = QFileDialog.getSaveFileName(
|
||||
self, "Export PMC Counters", "", "Text File (*.txt)", "Text File(*.txt)"
|
||||
)
|
||||
|
||||
# Parse the xml
|
||||
if fileName:
|
||||
self.exportPMCCounters(fileName)
|
||||
|
||||
def exportPMCCounters(self, fileName):
|
||||
|
||||
f = open(fileName, "w")
|
||||
|
||||
total_IP_blocks = len(list(self.perfmon_config.keys()))
|
||||
for row in range(self.table.rowCount()):
|
||||
pmc_str = "pmc: "
|
||||
for col in range(total_IP_blocks):
|
||||
cell = self.table.item(row, col)
|
||||
|
||||
if cell:
|
||||
pmc_str = pmc_str + " ".join(cell.text().split("\n")) + " "
|
||||
|
||||
f.write(pmc_str + "\n")
|
||||
|
||||
# Add standard lines
|
||||
f.write("\n\n")
|
||||
f.write("gpu: \n")
|
||||
f.write("dispatch: \n")
|
||||
f.write("kernel: \n")
|
||||
|
||||
f.close()
|
||||
|
||||
return
|
||||
|
||||
def pmc_metric_selected(self, metric_name, col):
|
||||
|
||||
# check if the metric already exists
|
||||
metric_selected = False
|
||||
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
if entry:
|
||||
pmc_list = entry.text().split(sep="\n")
|
||||
if metric_name in pmc_list:
|
||||
metric_selected = True
|
||||
break
|
||||
|
||||
return metric_selected
|
||||
|
||||
def pmc_remove_metric(self, metric_name, IP_block):
|
||||
# Remove the metric to pmc table, if it is selected
|
||||
|
||||
# Map SQC to SQ, since they share the same Perfmon block
|
||||
if IP_block == "SQC":
|
||||
IP_block = "SQ"
|
||||
|
||||
# not action if it is for a ghost IP!
|
||||
if not IP_block in list(self.perfmon_config.keys()):
|
||||
return
|
||||
|
||||
# This is the column we need to add/remove perfmon counters
|
||||
col = list(self.perfmon_config.keys()).index(IP_block)
|
||||
|
||||
if not self.pmc_metric_selected(metric_name, col):
|
||||
return
|
||||
|
||||
pmc_list = []
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
|
||||
if entry:
|
||||
pmc_list = pmc_list + entry.text().split(sep="\n")
|
||||
# clear the cell, we will re-allocate the pmc
|
||||
|
||||
self.table.takeItem(row, col)
|
||||
|
||||
# allowed PMC counters per batch
|
||||
max_pmc_num = self.perfmon_config[IP_block]
|
||||
|
||||
# remote this metric and re-segment the list and refill all rows in this column
|
||||
pmc_list.remove(metric_name)
|
||||
|
||||
# We are empty now, do nothing
|
||||
if len(pmc_list) == 0:
|
||||
return
|
||||
|
||||
for row in range((len(pmc_list) + max_pmc_num - 1) // max_pmc_num):
|
||||
start_index = row * max_pmc_num
|
||||
pmc_str = "\n".join(pmc_list[start_index : start_index + max_pmc_num])
|
||||
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
|
||||
|
||||
# Remove last row, if empty
|
||||
last_row = self.table.rowCount() - 1
|
||||
empty_row = True
|
||||
total_cols = len(list(self.perfmon_config.keys()))
|
||||
for cindex in range(total_cols):
|
||||
x = self.table.item(last_row, cindex)
|
||||
|
||||
if x and x.text():
|
||||
empty_row = False
|
||||
break
|
||||
|
||||
if empty_row:
|
||||
self.table.removeRow(last_row)
|
||||
|
||||
def pmc_add_metric(self, metric_name, IP_block):
|
||||
# Add the metric to pmc table, if not there yet
|
||||
|
||||
# Map SQC to SQ, since they share the same Perfmon block
|
||||
if IP_block == "SQC":
|
||||
IP_block = "SQ"
|
||||
|
||||
if not IP_block in list(self.perfmon_config.keys()):
|
||||
return
|
||||
|
||||
# This is the column we need to add/remove perfmon counters
|
||||
col = list(self.perfmon_config.keys()).index(IP_block)
|
||||
|
||||
# check if the metric already exists
|
||||
if self.pmc_metric_selected(metric_name, col):
|
||||
return
|
||||
|
||||
# metric is not bucket yet, add it!
|
||||
if self.table.rowCount() == 0:
|
||||
# starting from scratch!
|
||||
self.table.insertRow(0)
|
||||
self.table.setItem(0, col, QTableWidgetItem(metric_name))
|
||||
return
|
||||
|
||||
# find the row to insert
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
if not entry:
|
||||
# print("search insert pos, row:", row, ", cell empty")
|
||||
break
|
||||
|
||||
if len(entry.text().split(sep="\n")) < self.perfmon_config[IP_block]:
|
||||
# print("found")
|
||||
break
|
||||
|
||||
entry = self.table.item(row, col)
|
||||
if not entry:
|
||||
# put it into the empty cell
|
||||
self.table.setItem(row, col, QTableWidgetItem(metric_name))
|
||||
return
|
||||
|
||||
pmc_list = entry.text().split(sep="\n")
|
||||
|
||||
if len(pmc_list) < self.perfmon_config[IP_block]:
|
||||
# we still have hit per-IP HW counters limit, add it to the last row
|
||||
pmc_list.append(metric_name)
|
||||
pmc_str = "\n".join(pmc_list)
|
||||
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
|
||||
self.table.resizeRowsToContents()
|
||||
else:
|
||||
# Start a new row
|
||||
row = row + 1
|
||||
self.table.insertRow(row)
|
||||
self.table.setItem(row, col, QTableWidgetItem(metric_name))
|
||||
|
||||
def pmc_select(self, item):
|
||||
|
||||
metric_name = item.data()
|
||||
if (
|
||||
not metric_name in self.nodes_dict
|
||||
or not self.nodes_dict[metric_name].isCheckable()
|
||||
):
|
||||
return
|
||||
|
||||
# only proper metrics check/uncheck is processed here.
|
||||
IP_block = item.data().split(sep="_")[0]
|
||||
|
||||
if self.nodes_dict[metric_name].checkState() == 0:
|
||||
# unselect the metric in the table if it is currently selected
|
||||
self.pmc_remove_metric(metric_name, IP_block)
|
||||
|
||||
elif self.nodes_dict[metric_name].checkState() == 2:
|
||||
self.pmc_add_metric(metric_name, IP_block)
|
||||
|
||||
# Function to save populate treeview with a dictionary
|
||||
def importData(self, xmlparsed, root=None):
|
||||
|
||||
self.model.setRowCount(0)
|
||||
if root is None:
|
||||
root = self.model.invisibleRootItem()
|
||||
|
||||
for x in xmlparsed.getiterator():
|
||||
|
||||
# Add SoC node to Root
|
||||
if x.tag in self.soc_arch_list:
|
||||
parent = root
|
||||
parent.appendRow([QStandardItem(x.tag)])
|
||||
self.nodes_dict[x.tag] = parent.child(parent.rowCount() - 1)
|
||||
|
||||
# check all metrics in an SoC family
|
||||
if x.tag == "metric" and x.getparent().tag in self.soc_arch_list:
|
||||
|
||||
# New IP block (e.g., SQ), detected, create a new hierarchy for the block
|
||||
if not x.attrib["block"] in self.block_list:
|
||||
|
||||
self.block_list.append(x.attrib["block"])
|
||||
parent = self.nodes_dict[x.getparent().tag] # the SoC node
|
||||
parent.appendRow(
|
||||
[
|
||||
QStandardItem(x.attrib["block"]),
|
||||
QStandardItem(""),
|
||||
QStandardItem(""),
|
||||
QStandardItem(""),
|
||||
]
|
||||
)
|
||||
|
||||
# record the tree node for the block
|
||||
self.nodes_dict[x.attrib["block"]] = parent.child(
|
||||
parent.rowCount() - 1
|
||||
)
|
||||
|
||||
# Add metric node to the Block node
|
||||
parent = self.nodes_dict[x.attrib["block"]]
|
||||
metric_name = QStandardItem(x.attrib["name"])
|
||||
metric_name.setCheckable(True)
|
||||
parent.appendRow(
|
||||
[
|
||||
metric_name,
|
||||
QStandardItem(x.attrib["block"]),
|
||||
QStandardItem(x.attrib["event"]),
|
||||
QStandardItem(x.attrib["descr"]),
|
||||
]
|
||||
)
|
||||
|
||||
self.nodes_dict[x.attrib["name"]] = parent.child(parent.rowCount() - 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
app = QApplication(sys.argv)
|
||||
app.setStyle("Fusion")
|
||||
|
||||
# populate the view with GFX metrics.xml
|
||||
window = mainWindow()
|
||||
|
||||
# show the view
|
||||
window.setGeometry(300, 100, 600, 300)
|
||||
# view.setWindowTitle('GFX Perfmon Counters')
|
||||
window.show()
|
||||
|
||||
# start the application
|
||||
sys.exit(app.exec_())
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
from PyQt5.QtWidgets import (
|
||||
QMainWindow,
|
||||
QApplication,
|
||||
QTreeView,
|
||||
QTableWidget,
|
||||
QTableWidgetItem,
|
||||
)
|
||||
from PyQt5.QtWidgets import (
|
||||
QHBoxLayout,
|
||||
QWidget,
|
||||
QAction,
|
||||
QFileDialog,
|
||||
QAbstractItemView,
|
||||
qApp,
|
||||
)
|
||||
from PyQt5.QtGui import QStandardItemModel, QStandardItem
|
||||
from lxml import html
|
||||
import sys
|
||||
|
||||
|
||||
# class view(QWidget):
|
||||
class mainWindow(QMainWindow):
|
||||
def __init__(self):
|
||||
super(QMainWindow, self).__init__()
|
||||
|
||||
###############################################################################
|
||||
# SOC Parameters
|
||||
##############################################################################
|
||||
|
||||
# Per IP block max number of simulutaneous counters
|
||||
# GFX IP Blocks
|
||||
self.perfmon_config = {
|
||||
"SQ": 8,
|
||||
"TA": 2,
|
||||
"TD": 2,
|
||||
"TCP": 4,
|
||||
"TCC": 4,
|
||||
"CPC": 2,
|
||||
"CPF": 2,
|
||||
"SPI": 2,
|
||||
"GRBM": 2,
|
||||
"GDS": 4,
|
||||
}
|
||||
|
||||
# GFX Architectures
|
||||
self.soc_arch_list = ["gfx906", "gfx908", "gfx90a"]
|
||||
|
||||
###############################################################################
|
||||
# Window layout Design
|
||||
##############################################################################
|
||||
|
||||
self.block_list = []
|
||||
self.nodes_dict = {} # list of QStandardItem
|
||||
|
||||
self.tree = QTreeView(self)
|
||||
self.table = QTableWidget()
|
||||
|
||||
# Tree setup
|
||||
self.tree.header().setDefaultSectionSize(180)
|
||||
self.model = QStandardItemModel()
|
||||
self.model.setHorizontalHeaderLabels(["Metric", "Block", "Event", "Definition"])
|
||||
|
||||
self.tree.setModel(self.model)
|
||||
# self.importData(data)
|
||||
self.tree.setEditTriggers(QAbstractItemView.NoEditTriggers)
|
||||
|
||||
# Set up click processing
|
||||
self.tree.clicked.connect(self.pmc_select)
|
||||
# self.tree.expandAll()
|
||||
|
||||
# Table setup
|
||||
tableHeader = list(self.perfmon_config.keys())
|
||||
self.table.setColumnCount(len(tableHeader))
|
||||
self.table.setHorizontalHeaderLabels(tableHeader)
|
||||
self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
|
||||
self.table.showGrid()
|
||||
|
||||
self.setWindowTitle("GFX Perfmon Builder")
|
||||
# layout: lhs: metrics; rhs: selected perfmon
|
||||
layout = QHBoxLayout(self)
|
||||
layout.addWidget(self.tree)
|
||||
layout.addWidget(self.table)
|
||||
|
||||
widget = QWidget()
|
||||
widget.setLayout(layout)
|
||||
self.setCentralWidget(widget)
|
||||
|
||||
# Add Status
|
||||
self.statusBar()
|
||||
|
||||
###############################################################################
|
||||
# Window Menu Design
|
||||
##############################################################################
|
||||
|
||||
# Setup file menu
|
||||
menuBar = self.menuBar()
|
||||
menuBar.setNativeMenuBar(False)
|
||||
|
||||
openAction = QAction("&Open", self)
|
||||
openAction.setShortcut("Ctrl+O")
|
||||
openAction.setStatusTip("Open GFX Metrics file")
|
||||
openAction.triggered.connect(self.openGFXDialog)
|
||||
|
||||
saveAction = QAction("&Save", self)
|
||||
saveAction.setShortcut("Ctrl+S")
|
||||
saveAction.setStatusTip("Save to PMC file")
|
||||
saveAction.triggered.connect(self.exportGFXDialog)
|
||||
|
||||
exitAction = QAction("&Exit", self)
|
||||
exitAction.setShortcut("Ctrl+Q")
|
||||
exitAction.setStatusTip("Exit")
|
||||
exitAction.triggered.connect(self.close)
|
||||
|
||||
# Create new action
|
||||
fileMenu = menuBar.addMenu("&File")
|
||||
fileMenu.addActions([openAction, saveAction])
|
||||
fileMenu.addSeparator()
|
||||
fileMenu.addActions([exitAction])
|
||||
|
||||
def openGFXDialog(self):
|
||||
options = QFileDialog.Options()
|
||||
options |= QFileDialog.DontUseNativeDialog
|
||||
fileName, _ = QFileDialog.getOpenFileName(
|
||||
self, "Open GFX Metrics", "", "XML Files (*.xml)", "XML(*.xml)"
|
||||
)
|
||||
|
||||
# Parse the xml
|
||||
if fileName:
|
||||
xmlparsed = html.parse(fileName)
|
||||
self.importData(xmlparsed)
|
||||
|
||||
def exportGFXDialog(self):
|
||||
options = QFileDialog.Options()
|
||||
options |= QFileDialog.DontUseNativeDialog
|
||||
fileName, _ = QFileDialog.getSaveFileName(
|
||||
self, "Export PMC Counters", "", "Text File (*.txt)", "Text File(*.txt)"
|
||||
)
|
||||
|
||||
# Parse the xml
|
||||
if fileName:
|
||||
self.exportPMCCounters(fileName)
|
||||
|
||||
def exportPMCCounters(self, fileName):
|
||||
f = open(fileName, "w")
|
||||
|
||||
total_IP_blocks = len(list(self.perfmon_config.keys()))
|
||||
for row in range(self.table.rowCount()):
|
||||
pmc_str = "pmc: "
|
||||
for col in range(total_IP_blocks):
|
||||
cell = self.table.item(row, col)
|
||||
|
||||
if cell:
|
||||
pmc_str = pmc_str + " ".join(cell.text().split("\n")) + " "
|
||||
|
||||
f.write(pmc_str + "\n")
|
||||
|
||||
# Add standard lines
|
||||
f.write("\n\n")
|
||||
f.write("gpu: \n")
|
||||
f.write("dispatch: \n")
|
||||
f.write("kernel: \n")
|
||||
|
||||
f.close()
|
||||
|
||||
return
|
||||
|
||||
def pmc_metric_selected(self, metric_name, col):
|
||||
# check if the metric already exists
|
||||
metric_selected = False
|
||||
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
if entry:
|
||||
pmc_list = entry.text().split(sep="\n")
|
||||
if metric_name in pmc_list:
|
||||
metric_selected = True
|
||||
break
|
||||
|
||||
return metric_selected
|
||||
|
||||
def pmc_remove_metric(self, metric_name, IP_block):
|
||||
# Remove the metric to pmc table, if it is selected
|
||||
|
||||
# Map SQC to SQ, since they share the same Perfmon block
|
||||
if IP_block == "SQC":
|
||||
IP_block = "SQ"
|
||||
|
||||
# not action if it is for a ghost IP!
|
||||
if not IP_block in list(self.perfmon_config.keys()):
|
||||
return
|
||||
|
||||
# This is the column we need to add/remove perfmon counters
|
||||
col = list(self.perfmon_config.keys()).index(IP_block)
|
||||
|
||||
if not self.pmc_metric_selected(metric_name, col):
|
||||
return
|
||||
|
||||
pmc_list = []
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
|
||||
if entry:
|
||||
pmc_list = pmc_list + entry.text().split(sep="\n")
|
||||
# clear the cell, we will re-allocate the pmc
|
||||
|
||||
self.table.takeItem(row, col)
|
||||
|
||||
# allowed PMC counters per batch
|
||||
max_pmc_num = self.perfmon_config[IP_block]
|
||||
|
||||
# remote this metric and re-segment the list and refill all rows in this column
|
||||
pmc_list.remove(metric_name)
|
||||
|
||||
# We are empty now, do nothing
|
||||
if len(pmc_list) == 0:
|
||||
return
|
||||
|
||||
for row in range((len(pmc_list) + max_pmc_num - 1) // max_pmc_num):
|
||||
start_index = row * max_pmc_num
|
||||
pmc_str = "\n".join(pmc_list[start_index : start_index + max_pmc_num])
|
||||
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
|
||||
|
||||
# Remove last row, if empty
|
||||
last_row = self.table.rowCount() - 1
|
||||
empty_row = True
|
||||
total_cols = len(list(self.perfmon_config.keys()))
|
||||
for cindex in range(total_cols):
|
||||
x = self.table.item(last_row, cindex)
|
||||
|
||||
if x and x.text():
|
||||
empty_row = False
|
||||
break
|
||||
|
||||
if empty_row:
|
||||
self.table.removeRow(last_row)
|
||||
|
||||
def pmc_add_metric(self, metric_name, IP_block):
|
||||
# Add the metric to pmc table, if not there yet
|
||||
|
||||
# Map SQC to SQ, since they share the same Perfmon block
|
||||
if IP_block == "SQC":
|
||||
IP_block = "SQ"
|
||||
|
||||
if not IP_block in list(self.perfmon_config.keys()):
|
||||
return
|
||||
|
||||
# This is the column we need to add/remove perfmon counters
|
||||
col = list(self.perfmon_config.keys()).index(IP_block)
|
||||
|
||||
# check if the metric already exists
|
||||
if self.pmc_metric_selected(metric_name, col):
|
||||
return
|
||||
|
||||
# metric is not bucket yet, add it!
|
||||
if self.table.rowCount() == 0:
|
||||
# starting from scratch!
|
||||
self.table.insertRow(0)
|
||||
self.table.setItem(0, col, QTableWidgetItem(metric_name))
|
||||
return
|
||||
|
||||
# find the row to insert
|
||||
for row in range(self.table.rowCount()):
|
||||
entry = self.table.item(row, col)
|
||||
if not entry:
|
||||
# print("search insert pos, row:", row, ", cell empty")
|
||||
break
|
||||
|
||||
if len(entry.text().split(sep="\n")) < self.perfmon_config[IP_block]:
|
||||
# print("found")
|
||||
break
|
||||
|
||||
entry = self.table.item(row, col)
|
||||
if not entry:
|
||||
# put it into the empty cell
|
||||
self.table.setItem(row, col, QTableWidgetItem(metric_name))
|
||||
return
|
||||
|
||||
pmc_list = entry.text().split(sep="\n")
|
||||
|
||||
if len(pmc_list) < self.perfmon_config[IP_block]:
|
||||
# we still have hit per-IP HW counters limit, add it to the last row
|
||||
pmc_list.append(metric_name)
|
||||
pmc_str = "\n".join(pmc_list)
|
||||
self.table.setItem(row, col, QTableWidgetItem(pmc_str))
|
||||
self.table.resizeRowsToContents()
|
||||
else:
|
||||
# Start a new row
|
||||
row = row + 1
|
||||
self.table.insertRow(row)
|
||||
self.table.setItem(row, col, QTableWidgetItem(metric_name))
|
||||
|
||||
def pmc_select(self, item):
|
||||
metric_name = item.data()
|
||||
if (
|
||||
not metric_name in self.nodes_dict
|
||||
or not self.nodes_dict[metric_name].isCheckable()
|
||||
):
|
||||
return
|
||||
|
||||
# only proper metrics check/uncheck is processed here.
|
||||
IP_block = item.data().split(sep="_")[0]
|
||||
|
||||
if self.nodes_dict[metric_name].checkState() == 0:
|
||||
# unselect the metric in the table if it is currently selected
|
||||
self.pmc_remove_metric(metric_name, IP_block)
|
||||
|
||||
elif self.nodes_dict[metric_name].checkState() == 2:
|
||||
self.pmc_add_metric(metric_name, IP_block)
|
||||
|
||||
# Function to save populate treeview with a dictionary
|
||||
def importData(self, xmlparsed, root=None):
|
||||
self.model.setRowCount(0)
|
||||
if root is None:
|
||||
root = self.model.invisibleRootItem()
|
||||
|
||||
for x in xmlparsed.getiterator():
|
||||
# Add SoC node to Root
|
||||
if x.tag in self.soc_arch_list:
|
||||
parent = root
|
||||
parent.appendRow([QStandardItem(x.tag)])
|
||||
self.nodes_dict[x.tag] = parent.child(parent.rowCount() - 1)
|
||||
|
||||
# check all metrics in an SoC family
|
||||
if x.tag == "metric" and x.getparent().tag in self.soc_arch_list:
|
||||
# New IP block (e.g., SQ), detected, create a new hierarchy for the block
|
||||
if not x.attrib["block"] in self.block_list:
|
||||
self.block_list.append(x.attrib["block"])
|
||||
parent = self.nodes_dict[x.getparent().tag] # the SoC node
|
||||
parent.appendRow(
|
||||
[
|
||||
QStandardItem(x.attrib["block"]),
|
||||
QStandardItem(""),
|
||||
QStandardItem(""),
|
||||
QStandardItem(""),
|
||||
]
|
||||
)
|
||||
|
||||
# record the tree node for the block
|
||||
self.nodes_dict[x.attrib["block"]] = parent.child(
|
||||
parent.rowCount() - 1
|
||||
)
|
||||
|
||||
# Add metric node to the Block node
|
||||
parent = self.nodes_dict[x.attrib["block"]]
|
||||
metric_name = QStandardItem(x.attrib["name"])
|
||||
metric_name.setCheckable(True)
|
||||
parent.appendRow(
|
||||
[
|
||||
metric_name,
|
||||
QStandardItem(x.attrib["block"]),
|
||||
QStandardItem(x.attrib["event"]),
|
||||
QStandardItem(x.attrib["descr"]),
|
||||
]
|
||||
)
|
||||
|
||||
self.nodes_dict[x.attrib["name"]] = parent.child(parent.rowCount() - 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
app.setStyle("Fusion")
|
||||
|
||||
# populate the view with GFX metrics.xml
|
||||
window = mainWindow()
|
||||
|
||||
# show the view
|
||||
window.setGeometry(300, 100, 600, 300)
|
||||
# view.setWindowTitle('GFX Perfmon Counters')
|
||||
window.show()
|
||||
|
||||
# start the application
|
||||
sys.exit(app.exec_())
|
||||
|
||||
+23
-13
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import sys, os, pathlib, shutil, subprocess, argparse, glob, re
|
||||
import numpy as np
|
||||
@@ -29,6 +31,19 @@ prog = "omniperf"
|
||||
# Per IP block max number of simulutaneous counters
|
||||
# GFX IP Blocks
|
||||
perfmon_config = {
|
||||
"vega10": {
|
||||
"SQ": 8,
|
||||
"TA": 2,
|
||||
"TD": 2,
|
||||
"TCP": 4,
|
||||
"TCC": 4,
|
||||
"CPC": 2,
|
||||
"CPF": 2,
|
||||
"SPI": 2,
|
||||
"GRBM": 2,
|
||||
"GDS": 4,
|
||||
"TCC_channels": 16,
|
||||
},
|
||||
"mi50": {
|
||||
"SQ": 8,
|
||||
"TA": 2,
|
||||
@@ -72,7 +87,6 @@ perfmon_config = {
|
||||
|
||||
|
||||
def perfmon_coalesce(pmc_files_list, workload_dir, soc):
|
||||
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
|
||||
# match pattern for pmc counters
|
||||
@@ -97,7 +111,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
|
||||
|
||||
# Extract all PMC counters and store in separate buckets
|
||||
for fname in pmc_files_list:
|
||||
|
||||
lines = open(fname, "r").read().splitlines()
|
||||
|
||||
for line in lines:
|
||||
@@ -170,7 +183,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
|
||||
|
||||
|
||||
def perfmon_emit(pmc_list, workload_dir, soc):
|
||||
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
|
||||
# Calculate the minimum number of iteration to save the pmc counters
|
||||
@@ -233,7 +245,6 @@ def perfmon_emit(pmc_list, workload_dir, soc):
|
||||
|
||||
|
||||
def perfmon_filter(workload_dir, perfmon_dir, args):
|
||||
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
soc = args.target
|
||||
|
||||
@@ -275,7 +286,6 @@ def perfmon_filter(workload_dir, perfmon_dir, args):
|
||||
|
||||
|
||||
def pmc_filter(workload_dir, perfmon_dir, soc):
|
||||
|
||||
workload_perfmon_dir = workload_dir + "/perfmon"
|
||||
|
||||
if not os.path.isdir(workload_perfmon_dir):
|
||||
|
||||
@@ -1,661 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
from linecache import cache
|
||||
import subprocess
|
||||
from operator import sub
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.pyplot import get, text
|
||||
from math import log, pi, sqrt
|
||||
import pandas as pd
|
||||
import pylab
|
||||
|
||||
from dataclasses import dataclass
|
||||
import csv
|
||||
|
||||
|
||||
################################################
|
||||
# Global vars
|
||||
################################################
|
||||
|
||||
IMGNAME = "empirRoof"
|
||||
|
||||
L2_BANKS = 32 # default assuming mi200
|
||||
|
||||
XMIN = 0.01
|
||||
XMAX = 1000
|
||||
|
||||
FONT_SIZE = 16
|
||||
FONT_COLOR = "black"
|
||||
FONT_WEIGHT = "bold"
|
||||
|
||||
SUPPORTED_SOC = ["mi200"]
|
||||
|
||||
################################################
|
||||
# Helper funcs
|
||||
################################################
|
||||
@dataclass
|
||||
class AI_Data:
|
||||
KernelName: str
|
||||
numCalls: float
|
||||
|
||||
total_flops: float
|
||||
valu_flops: float
|
||||
mfma_flops_f16: float
|
||||
mfma_flops_bf16: float
|
||||
mfma_flops_f32: float
|
||||
mfma_flops_f64: float
|
||||
lds_data: float
|
||||
L1cache_data: float
|
||||
L2cache_data: float
|
||||
hbm_data: float
|
||||
|
||||
totalDuration: float
|
||||
avgDuration: float
|
||||
|
||||
|
||||
def get_font():
|
||||
return {
|
||||
"size": FONT_SIZE,
|
||||
"color": FONT_COLOR,
|
||||
"weight": FONT_WEIGHT,
|
||||
"family": "serif",
|
||||
}
|
||||
|
||||
|
||||
def get_color(catagory):
|
||||
if catagory == "curr_ai_l1":
|
||||
return "green"
|
||||
elif catagory == "curr_ai_l2":
|
||||
return "blue"
|
||||
elif catagory == "curr_ai_hbm":
|
||||
return "red"
|
||||
else:
|
||||
raise RuntimeError("Invalid catagory passed to get_color()")
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Plot BW at each cache level
|
||||
# -------------------------------------------------------------------------------------
|
||||
def plot_roof(inputs, roof_data):
|
||||
cacheHierarchy = []
|
||||
if inputs["mem"] == "ALL":
|
||||
cacheHierarchy += ["HBM", "L2", "L1", "LDS"]
|
||||
else:
|
||||
cacheHierarchy.append(inputs["mem"])
|
||||
targ_dtype = (
|
||||
"FP32"
|
||||
if float(roof_data["FP32Flops"][0]) > float(roof_data["FP64Flops"][0])
|
||||
else "FP64"
|
||||
)
|
||||
print("Dtype: ", targ_dtype)
|
||||
print(inputs["mem"])
|
||||
x1 = y1 = x2 = y2 = -1
|
||||
x1_mfma = y1_mfma = x2_mfma = y2_mfma = -1
|
||||
target_precision = targ_dtype[2:]
|
||||
|
||||
peakOps = float(roof_data[targ_dtype + "Flops"][0])
|
||||
for i in range(0, len(cacheHierarchy)):
|
||||
# Plot BW line
|
||||
# print("Current cache level: {}".format(cacheHierarchy[i]))
|
||||
curr_bw = cacheHierarchy[i] + "Bw"
|
||||
peakBw = float(roof_data[curr_bw][0])
|
||||
|
||||
peakMFMA = float(roof_data["MFMAF{}Flops".format(target_precision)][0])
|
||||
|
||||
x1 = float(XMIN)
|
||||
y1 = float(XMIN) * peakBw
|
||||
|
||||
x2 = peakOps / peakBw
|
||||
y2 = peakOps
|
||||
|
||||
plt.plot([x1, x2], [y1, y2], color="magenta")
|
||||
# print("Mem Points: [{}, {}], [{}, {}]".format(x1, x2, y1, y2))
|
||||
|
||||
# Plot MFMA lines (NOTE: Assuming MI200 soc)
|
||||
x1_mfma = peakOps / peakBw
|
||||
y1_mfma = peakOps
|
||||
|
||||
x2_mfma = peakMFMA / peakBw
|
||||
y2_mfma = peakMFMA
|
||||
|
||||
plt.plot([x1_mfma, x2_mfma], [y1_mfma, y2_mfma], color="blue")
|
||||
# print("Extend BW Points: [{}, {}], [{}, {}]".format(x1_mfma, x2_mfma, y1_mfma, y2_mfma))
|
||||
|
||||
# These are the points to use:
|
||||
# print("x = [{}, {}]".format(x1,x2_mfma))
|
||||
# print("y = [{}, {}]".format(y1, y2_mfma))
|
||||
|
||||
# Plot BW label
|
||||
x1log = log(x1) / log(10)
|
||||
x2log = log(x2) / log(10)
|
||||
y1log = log(y1) / log(10)
|
||||
y2log = log(y2) / log(10)
|
||||
x_text = 10 ** ((x1log + x2log) / 2)
|
||||
y_text = 10 ** ((y1log + y2log) / 2)
|
||||
|
||||
fig = plt.gcf()
|
||||
size = fig.get_size_inches() * fig.dpi
|
||||
fig_x, fig_y = size
|
||||
|
||||
# dx = log(x2) - log(x1)
|
||||
# dy = log(y2) - log(y1)
|
||||
# x_min, x_max = plt.xlim()
|
||||
# y_min, y_max = plt.ylim()
|
||||
# Dx = dx * fig_x / (log(x_max) - log(x_min))
|
||||
# Dy = dy * fig_y / (log(y_max) - log(y_min))
|
||||
# #fdiv = 0.7 #TODO: improve accuracy of text angle (tilt)
|
||||
# angle = (180.0 / pi) * numpy.arctan(Dy / Dx )#/fdiv)
|
||||
|
||||
dx = abs(log(x2) - log(x1))
|
||||
dy = abs(log(y2) - log(y1))
|
||||
angle = (180.0 / pi) * numpy.arctan(dy / dx)
|
||||
# If user isn't zooming in, print bw labels normally
|
||||
if not inputs["axes"]:
|
||||
text(
|
||||
x_text,
|
||||
y_text,
|
||||
"{} vL1D GB/s".format(int(peakBw))
|
||||
if cacheHierarchy[i].upper() == "L1"
|
||||
else "{} {} GB/s".format(int(peakBw), cacheHierarchy[i].upper()),
|
||||
rotation=angle,
|
||||
rotation_mode="anchor",
|
||||
**get_font(),
|
||||
)
|
||||
else:
|
||||
# if bw line isn't being cut out then plot bw
|
||||
print("if {} < {}".format(inputs["axes"][0], 10**x2log))
|
||||
if inputs["axes"][0] < 10**x2log:
|
||||
text(
|
||||
10**x2log,
|
||||
10**y2log,
|
||||
"{} {} GB/s".format(int(peakBw), cacheHierarchy[i].upper()),
|
||||
rotation=angle,
|
||||
rotation_mode="anchor",
|
||||
**get_font(),
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Plot computing roof
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Plot FMA roof
|
||||
x0 = XMAX
|
||||
if x2 < x0:
|
||||
x0 = x2
|
||||
|
||||
temp_label = "{} VALU GFLOP/sec".format(int(peakOps))
|
||||
plt.plot([x0, XMAX], [peakOps, peakOps], color="magenta")
|
||||
# print("FMA Points: [{}, {}], [{},{}]".format(x0, XMAX, peakOps, peakOps))
|
||||
text(
|
||||
XMAX if not inputs["axes"] else inputs["axes"][1],
|
||||
peakOps - 4000, # should i keep this fixed at 4000?
|
||||
temp_label,
|
||||
horizontalalignment="right",
|
||||
**get_font(),
|
||||
)
|
||||
|
||||
# Plot MFMA roof
|
||||
if x1_mfma != -1: # assert that mfma has been assigned
|
||||
x0_mfma = XMAX
|
||||
if x2_mfma < x0_mfma:
|
||||
x0_mfma = x2_mfma
|
||||
|
||||
peakMFMA = float(roof_data["MFMAF{}Flops".format(target_precision)][0])
|
||||
temp_label = "{} MFMA GFLOP/sec".format(int(peakMFMA))
|
||||
plt.plot([x0_mfma, XMAX], [peakMFMA, peakMFMA], color="blue")
|
||||
# print("MFMA Points: [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA))
|
||||
text(
|
||||
XMAX if not inputs["axes"] else inputs["axes"][1],
|
||||
peakMFMA + 1000,
|
||||
temp_label,
|
||||
horizontalalignment="right",
|
||||
**get_font(),
|
||||
)
|
||||
|
||||
return targ_dtype
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Overlay application performance
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Calculate relevent metrics for ai calculation
|
||||
def plot_application(inputs, verbose):
|
||||
|
||||
df = pd.read_csv(inputs["path"] + "/pmc_perf.csv")
|
||||
# Sort by top kernels or top dispatches?
|
||||
df = df.sort_values(by=["KernelName"])
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
total_flops = (
|
||||
valu_flops
|
||||
) = (
|
||||
mfma_flops_bf16
|
||||
) = (
|
||||
mfma_flops_f16
|
||||
) = (
|
||||
mfma_iops_i8
|
||||
) = (
|
||||
mfma_flops_f32
|
||||
) = (
|
||||
mfma_flops_f64
|
||||
) = (
|
||||
lds_data
|
||||
) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
|
||||
kernelName = ""
|
||||
|
||||
myList = []
|
||||
for index, row in df.iterrows():
|
||||
# CASE: Top kernels
|
||||
if inputs["sort"] == "kernels" and (
|
||||
(row["KernelName"] != kernelName and kernelName != "")
|
||||
or index == df.shape[0] - 1
|
||||
):
|
||||
if df.shape[0] - 1 == index:
|
||||
calls += 1
|
||||
myList.append(
|
||||
AI_Data(
|
||||
kernelName,
|
||||
calls,
|
||||
total_flops / calls,
|
||||
valu_flops / calls,
|
||||
mfma_flops_f16 / calls,
|
||||
mfma_flops_bf16 / calls,
|
||||
mfma_flops_f32 / calls,
|
||||
mfma_flops_f64 / calls,
|
||||
lds_data / calls,
|
||||
L1cache_data / calls,
|
||||
L2cache_data / calls,
|
||||
hbm_data / calls,
|
||||
totalDuration,
|
||||
avgDuration / calls,
|
||||
)
|
||||
)
|
||||
if verbose >= 2:
|
||||
print(
|
||||
"Just added {} to AI_Data at index {}. # of calls: {}".format(
|
||||
kernelName, index, calls
|
||||
)
|
||||
)
|
||||
total_flops = (
|
||||
valu_flops
|
||||
) = (
|
||||
mfma_flops_bf16
|
||||
) = (
|
||||
mfma_flops_f16
|
||||
) = (
|
||||
mfma_iops_i8
|
||||
) = (
|
||||
mfma_flops_f32
|
||||
) = (
|
||||
mfma_flops_f64
|
||||
) = (
|
||||
lds_data
|
||||
) = (
|
||||
L1cache_data
|
||||
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
|
||||
|
||||
kernelName = row["KernelName"]
|
||||
try:
|
||||
total_flops += (
|
||||
(
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F16"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F16"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F16"]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F32"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F32"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F32"]
|
||||
)
|
||||
)
|
||||
+ (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F64"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F64"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F64"]
|
||||
)
|
||||
)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512)
|
||||
+ (row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped total_flops at index {}".format(index))
|
||||
pass
|
||||
try:
|
||||
valu_flops += (
|
||||
64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F16"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F16"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F16"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F16"]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F32"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F32"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F32"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F32"]
|
||||
)
|
||||
+ 64
|
||||
* (
|
||||
row["SQ_INSTS_VALU_ADD_F64"]
|
||||
+ row["SQ_INSTS_VALU_MUL_F64"]
|
||||
+ (2 * row["SQ_INSTS_VALU_FMA_F64"])
|
||||
+ row["SQ_INSTS_VALU_TRANS_F64"]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped valu_flops at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
mfma_flops_f16 += row["SQ_INSTS_VALU_MFMA_MOPS_F16"] * 512
|
||||
mfma_flops_bf16 += row["SQ_INSTS_VALU_MFMA_MOPS_BF16"] * 512
|
||||
mfma_flops_f32 += row["SQ_INSTS_VALU_MFMA_MOPS_F32"] * 512
|
||||
mfma_flops_f64 += row["SQ_INSTS_VALU_MFMA_MOPS_F64"] * 512
|
||||
mfma_iops_i8 += row["SQ_INSTS_VALU_MFMA_MOPS_I8"] * 512
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped mfma ops at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
lds_data += (
|
||||
(row["SQ_LDS_IDX_ACTIVE"] - row["SQ_LDS_BANK_CONFLICT"]) * 4 * L2_BANKS
|
||||
) # L2_BANKS = 32 (since assuming mi200)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped lds_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
L1cache_data += row["TCP_TOTAL_CACHE_ACCESSES_sum"] * 64
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped L1cache_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
try:
|
||||
L2cache_data += (
|
||||
row["TCP_TCC_WRITE_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_ATOMIC_WITH_RET_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum"] * 64
|
||||
+ row["TCP_TCC_READ_REQ_sum"] * 64
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped L2cache_data at index {}".format(index))
|
||||
pass
|
||||
try:
|
||||
hbm_data += (
|
||||
(row["TCC_EA_RDREQ_32B_sum"] * 32)
|
||||
+ ((row["TCC_EA_RDREQ_sum"] - row["TCC_EA_RDREQ_32B_sum"]) * 64)
|
||||
+ (row["TCC_EA_WRREQ_64B_sum"] * 64)
|
||||
+ ((row["TCC_EA_WRREQ_sum"] - row["TCC_EA_WRREQ_64B_sum"]) * 32)
|
||||
)
|
||||
except KeyError:
|
||||
if verbose >= 2:
|
||||
print("Skipped hbm_data at index {}".format(index))
|
||||
pass
|
||||
|
||||
totalDuration += row["EndNs"] - row["BeginNs"]
|
||||
|
||||
avgDuration += row["EndNs"] - row["BeginNs"]
|
||||
|
||||
calls += 1
|
||||
if inputs["sort"] == "dispatches":
|
||||
myList.append(
|
||||
AI_Data(
|
||||
kernelName,
|
||||
calls,
|
||||
total_flops,
|
||||
valu_flops,
|
||||
mfma_flops_f16,
|
||||
mfma_flops_bf16,
|
||||
mfma_flops_f32,
|
||||
mfma_flops_f64,
|
||||
mfma_iops_i8,
|
||||
lds_data,
|
||||
L1cache_data,
|
||||
L2cache_data,
|
||||
hbm_data,
|
||||
totalDuration,
|
||||
avgDuration,
|
||||
)
|
||||
)
|
||||
total_flops = (
|
||||
valu_flops
|
||||
) = (
|
||||
mfma_flops_bf16
|
||||
) = (
|
||||
mfma_flops_f16
|
||||
) = (
|
||||
mfma_iops_i8
|
||||
) = (
|
||||
mfma_flops_f32
|
||||
) = (
|
||||
mfma_flops_f64
|
||||
) = (
|
||||
lds_data
|
||||
) = (
|
||||
L1cache_data
|
||||
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0
|
||||
|
||||
myList.sort(key=lambda x: x.totalDuration, reverse=True)
|
||||
|
||||
print("Top 10 intensities ('{}')...".format(inputs["sort"]))
|
||||
intensities = {"curr_ai_l1": [], "curr_ai_l2": [], "curr_ai_hbm": []}
|
||||
curr_perf = []
|
||||
i = 0
|
||||
# Create list of top 5 intensities
|
||||
while i <= 9 and i != len(myList):
|
||||
intensities["curr_ai_l1"].append(
|
||||
myList[i].total_flops / myList[i].L1cache_data
|
||||
) if myList[i].L1cache_data else intensities["curr_ai_l1"].append(0)
|
||||
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
|
||||
# print()
|
||||
intensities["curr_ai_l2"].append(
|
||||
myList[i].total_flops / myList[i].L2cache_data
|
||||
) if myList[i].L2cache_data else intensities["curr_ai_l2"].append(0)
|
||||
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
|
||||
# print()
|
||||
intensities["curr_ai_hbm"].append(
|
||||
myList[i].total_flops / myList[i].hbm_data
|
||||
) if myList[i].hbm_data else intensities["curr_ai_hbm"].append(0)
|
||||
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
|
||||
# print()
|
||||
curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[
|
||||
i
|
||||
].avgDuration else curr_perf.append(0)
|
||||
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
|
||||
|
||||
i += 1
|
||||
|
||||
print(intensities)
|
||||
|
||||
# fig, ax = plt.subplots()
|
||||
|
||||
plotted_spots = []
|
||||
labels = []
|
||||
for i in intensities:
|
||||
values = intensities[i]
|
||||
color = get_color(i)
|
||||
x = []
|
||||
y = []
|
||||
for entryIndx in range(0, len(values)):
|
||||
x.append(values[entryIndx])
|
||||
y.append(curr_perf[entryIndx])
|
||||
myScatter = plt.scatter(x, y, c=color, marker="o")
|
||||
plotted_spots.append(myScatter)
|
||||
label = i
|
||||
labels.append(label)
|
||||
|
||||
try:
|
||||
pylab.legend(
|
||||
plotted_spots,
|
||||
labels,
|
||||
prop={"size": (FONT_SIZE - 2)},
|
||||
bbox_to_anchor=(1.04, 1),
|
||||
loc="upper left",
|
||||
title="Top {}".format(inputs["sort"]),
|
||||
title_fontsize=FONT_SIZE,
|
||||
)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"{e}\n")
|
||||
pylab.legend(
|
||||
plotted_spots,
|
||||
labels,
|
||||
prop={"size": (FONT_SIZE - 2)},
|
||||
)
|
||||
|
||||
|
||||
def empirical_roof(args):
|
||||
soc = args.target
|
||||
inputs = {
|
||||
"path": str,
|
||||
"cmd": str,
|
||||
"sort": str,
|
||||
"mem": str,
|
||||
"axes": list,
|
||||
"device": int,
|
||||
# "workgroups": int,
|
||||
# "wsize": int,
|
||||
# "dataset": int,
|
||||
# "experiments": int,
|
||||
# "iter": int
|
||||
}
|
||||
|
||||
inputs["sort"] = args.sort.lower()
|
||||
inputs["mem"] = args.mem_level.upper()
|
||||
|
||||
if inputs["sort"] != "kernels" and inputs["sort"] != "dispatches":
|
||||
sys.exit("Invalid sort. Must be either 'kernels' or 'dispatches'")
|
||||
if (
|
||||
inputs["mem"] != "HBM"
|
||||
and inputs["mem"] != "VL1D"
|
||||
and inputs["mem"] != "L2"
|
||||
and inputs["mem"] != "LDS"
|
||||
and inputs["mem"] != "ALL"
|
||||
):
|
||||
sys.exit(
|
||||
"Invalid mem-level. Must be one of these option 'LDS', 'L2', 'vL1D', or 'HBM'"
|
||||
)
|
||||
if inputs["mem"] == "VL1D":
|
||||
inputs["mem"] = "L1"
|
||||
|
||||
inputs["device"] = int(args.device)
|
||||
# inputs["workgroups"] = int(args.workgroups)
|
||||
# inputs["wsize"] = int(args.wsize)
|
||||
# inputs["dataset"] = int(args.dataset)
|
||||
# inputs["experiments"] = int(args.experiments)
|
||||
# inputs["iter"] = int(args.iter)
|
||||
inputs["path"] = args.path
|
||||
inputs["cmd"] = args.remaining
|
||||
inputs["axes"] = args.axes
|
||||
|
||||
# device_list = [int(item) for item in args.device.split(',')]
|
||||
|
||||
if soc not in SUPPORTED_SOC:
|
||||
sys.exit("SoC not yet supported for Roofline Analysis")
|
||||
|
||||
# Basic Info
|
||||
print("Path: ", inputs["path"])
|
||||
print("Target: ", soc)
|
||||
print("Memory Level: ", inputs["mem"])
|
||||
|
||||
roofPath = inputs["path"] + "/roofline.csv"
|
||||
# -----------------------------------------------------
|
||||
# Initialize roofline data dictionary from roofline.csv
|
||||
# -----------------------------------------------------
|
||||
roof_data = (
|
||||
{}
|
||||
) # TODO: consider changing this to an ordered dict for consistency over py versions
|
||||
headers = []
|
||||
with open(roofPath, "r") as csvfile:
|
||||
csvReader = csv.reader(csvfile, delimiter=",")
|
||||
rowCount = 0
|
||||
for row in csvReader:
|
||||
row.pop(0) # remove devID
|
||||
if rowCount == 0:
|
||||
headers = row
|
||||
for i in headers:
|
||||
roof_data[i] = []
|
||||
else:
|
||||
for i, key in enumerate(headers):
|
||||
roof_data[key].append(row[i])
|
||||
|
||||
rowCount += 1
|
||||
csvfile.close()
|
||||
|
||||
# Initalize plot
|
||||
f = plt.figure(figsize=(1600 / 100, 1200 / 100), dpi=100)
|
||||
f.add_subplot(111)
|
||||
|
||||
_title_font = get_font()
|
||||
_title_font["size"] += 8
|
||||
|
||||
plt.title("Empirical Roofline", **_title_font)
|
||||
plt.xlabel("Arithmetic Intensity (FLOP/Byte)", **get_font())
|
||||
plt.ylabel("Performance (GFLOP/sec)", **get_font())
|
||||
plt.grid(True, which="major", ls="--", lw=1)
|
||||
plt.grid(True, which="minor", ls="--", lw=0.5)
|
||||
plt.yscale("log")
|
||||
plt.xscale("log")
|
||||
# Adjust axes if instructed
|
||||
if inputs["axes"]:
|
||||
plt.xlim(inputs["axes"][0], inputs["axes"][1])
|
||||
plt.ylim(inputs["axes"][2], inputs["axes"][3])
|
||||
|
||||
# ------------------
|
||||
# Generate Roofline
|
||||
# ------------------
|
||||
dtype = plot_roof(inputs, roof_data) # Also returns chosen dtype
|
||||
plot_application(inputs, args.verbose)
|
||||
|
||||
filename = IMGNAME + "_gpu-" + str(inputs["device"]) + "_{}".format(dtype) + ".pdf"
|
||||
|
||||
full_path = os.path.abspath(inputs["path"])
|
||||
path_to_output = full_path + "/" + filename
|
||||
|
||||
print('Saving plot: "{}"...'.format(filename))
|
||||
plt.savefig(path_to_output, bbox_inches="tight", format="pdf")
|
||||
print('File saved to: "{}"'.format(path_to_output))
|
||||
plt.close()
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,22 +12,23 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import argparse
|
||||
import getpass
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
# Verify target directory and setup connection
|
||||
def remove_workload(args):
|
||||
# parser = argparse.ArgumentParser(description='Remove a workload from an Omniperf Instance')
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -8,17 +10,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
+14
-13
@@ -1,7 +1,9 @@
|
||||
"""Get host/gpu specs."""
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -10,17 +12,17 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
import os
|
||||
import re
|
||||
@@ -32,7 +34,7 @@ from dataclasses import dataclass
|
||||
from pathlib import Path as path
|
||||
from textwrap import dedent
|
||||
|
||||
gpu_list = {"gfx906", "gfx908", "gfx90a"}
|
||||
gpu_list = {"gfx906", "gfx908", "gfx90a", "gfx900"}
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -84,7 +86,6 @@ class MachineSpecs:
|
||||
|
||||
|
||||
def gpuinfo():
|
||||
|
||||
rocminfo = run(["rocminfo"]).split("\n")
|
||||
|
||||
for idx1, linetext in enumerate(rocminfo):
|
||||
@@ -97,7 +98,6 @@ def gpuinfo():
|
||||
|
||||
L1, L2 = "", ""
|
||||
for idx2, linetext in enumerate(rocminfo[idx1 + 1 :]):
|
||||
|
||||
key = search(r"^\s*L1:\s+ ([a-zA-Z0-9]+)\s*", linetext)
|
||||
if key != None:
|
||||
L1 = key
|
||||
@@ -148,6 +148,9 @@ def gpuinfo():
|
||||
|
||||
def run(cmd):
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if cmd[0] == "rocm-smi" and p.returncode == 8:
|
||||
print("ERROR: No GPU detected. Unable to load rocm-smi")
|
||||
sys.exit(1)
|
||||
return p.stdout.decode("ascii")
|
||||
|
||||
|
||||
@@ -159,7 +162,6 @@ def search(pattern, string):
|
||||
|
||||
|
||||
def get_machine_specs(devicenum):
|
||||
|
||||
cpuinfo = path("/proc/cpuinfo").read_text()
|
||||
meminfo = path("/proc/meminfo").read_text()
|
||||
version = path("/proc/version").read_text()
|
||||
@@ -180,7 +182,6 @@ def get_machine_specs(devicenum):
|
||||
for itr in version_loc:
|
||||
_path = os.path.join(os.getenv("ROCM_PATH", "/opt/rocm"), ".info", itr)
|
||||
if os.path.exists(_path):
|
||||
print(_path)
|
||||
rocm_ver = path(_path).read_text()
|
||||
rocmFound = True
|
||||
break
|
||||
|
||||
@@ -145,7 +145,7 @@ def test_filter_dispatch_ids_mi100():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi100",
|
||||
"--filter-dispatch-ids",
|
||||
"--dispatch",
|
||||
"0",
|
||||
],
|
||||
):
|
||||
@@ -162,7 +162,7 @@ def test_filter_dispatch_ids_inv_mi100():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi100",
|
||||
"--filter-dispatch-ids",
|
||||
"--dispatch",
|
||||
"99",
|
||||
],
|
||||
):
|
||||
@@ -179,7 +179,7 @@ def test_filter_gpu_ids_mi100():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi100",
|
||||
"--filter-gpu-ids",
|
||||
"--gpu-id",
|
||||
"0",
|
||||
],
|
||||
):
|
||||
@@ -196,7 +196,7 @@ def test_filter_gpu_ids_inv_mi100():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi100",
|
||||
"--filter-gpu-ids",
|
||||
"--gpu-id",
|
||||
"99",
|
||||
],
|
||||
):
|
||||
@@ -490,7 +490,7 @@ def test_filter_dispatch_ids_mi200():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi200",
|
||||
"--filter-dispatch-ids",
|
||||
"--dispatch",
|
||||
"0",
|
||||
],
|
||||
):
|
||||
@@ -507,7 +507,7 @@ def test_filter_dispatch_ids_inv_mi200():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi200",
|
||||
"--filter-dispatch-ids",
|
||||
"--dispatch",
|
||||
"99",
|
||||
],
|
||||
):
|
||||
@@ -524,7 +524,7 @@ def test_filter_gpu_ids_mi200():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi200",
|
||||
"--filter-gpu-ids",
|
||||
"--gpu-id",
|
||||
"0",
|
||||
],
|
||||
):
|
||||
@@ -541,7 +541,7 @@ def test_filter_gpu_ids_inv_mi200():
|
||||
"analyze",
|
||||
"--path",
|
||||
"tests/workloads/mixbench/mi200",
|
||||
"--filter-gpu-ids",
|
||||
"--gpu-id",
|
||||
"99",
|
||||
],
|
||||
):
|
||||
|
||||
Executable
+130
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
# -------------------------------------------------------------------------------
|
||||
# Support script for license header management.
|
||||
# -------------------------------------------------------------------------------
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import filecmp
|
||||
import shutil
|
||||
|
||||
begDelim = "######bl$"
|
||||
endDelim = "######el$"
|
||||
maxHeaderLines = 200
|
||||
|
||||
|
||||
def cacheLicenseFile(infile, comment="#"):
|
||||
if not os.path.isfile(infile):
|
||||
logging.error("Unable to access license file - >%s" % infile)
|
||||
sys.exit(1)
|
||||
|
||||
license = ""
|
||||
with open(infile, "r") as file_in:
|
||||
for line in file_in:
|
||||
license += comment
|
||||
if line.strip() != "":
|
||||
license += " "
|
||||
license += line
|
||||
return license
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--license", required=True, help="License File")
|
||||
parser.add_argument("--source", required=True, help="Source directory")
|
||||
parser.add_argument("--dryrun", help="enable dryrun mode", action="store_true")
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--extension", help="file extension to parse")
|
||||
group.add_argument("--files", help="specific file(s) to parse")
|
||||
|
||||
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
srcDir = args.source
|
||||
fileExtension = None
|
||||
specificFiles = None
|
||||
if args.extension:
|
||||
fileExtension = args.extension
|
||||
if args.files:
|
||||
specificFiles = args.files.split(",")
|
||||
|
||||
print("")
|
||||
logging.info("Source directory = %s" % srcDir)
|
||||
if fileExtension:
|
||||
logging.info("File extension = %s" % fileExtension)
|
||||
if specificFiles:
|
||||
logging.info("Specific files = %s" % specificFiles)
|
||||
|
||||
# cache license file
|
||||
license = cacheLicenseFile(args.license)
|
||||
|
||||
# Scan files in provided source directory...
|
||||
for filename in glob.iglob(srcDir + "/**", recursive=True):
|
||||
# skip directories
|
||||
if os.path.isdir(filename):
|
||||
continue
|
||||
|
||||
# File matching options:
|
||||
|
||||
# (1) filter non-matching extensions
|
||||
if fileExtension:
|
||||
if not filename.endswith(fileExtension):
|
||||
continue
|
||||
|
||||
# or, (2) filter for specific filename
|
||||
if specificFiles:
|
||||
found = False
|
||||
for file in specificFiles:
|
||||
fullPath = os.path.join(srcDir, file)
|
||||
if fullPath == filename:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
continue
|
||||
|
||||
logging.debug("Examining %s for license..." % filename)
|
||||
|
||||
# Update license header contents if delimiters are found
|
||||
with open(filename, "r") as file_in:
|
||||
baseName = os.path.basename(filename)
|
||||
dirName = os.path.dirname(filename)
|
||||
tmpFile = dirName + "/." + baseName + ".tmp"
|
||||
|
||||
file_out = open(tmpFile, "w")
|
||||
for line in file_in:
|
||||
if re.search(begDelim, line):
|
||||
logging.debug("Found beginning delimiter")
|
||||
file_out.write(line)
|
||||
file_out.write(license)
|
||||
|
||||
foundEnd = False
|
||||
|
||||
for i in range(maxHeaderLines):
|
||||
line = file_in.readline()
|
||||
if re.search(endDelim, line):
|
||||
logging.debug("Found ending delimiter")
|
||||
file_out.write(line)
|
||||
foundEnd = True
|
||||
break
|
||||
if not foundEnd:
|
||||
logging.error("Unable to find end of delimited header")
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
file_out.write(line)
|
||||
|
||||
file_out.close()
|
||||
|
||||
# Check if file changed and update
|
||||
if not filecmp.cmp(filename, tmpFile, shallow=False):
|
||||
logging.info("%s changed" % filename)
|
||||
shutil.copystat(filename, tmpFile)
|
||||
if not args.dryrun:
|
||||
os.rename(tmpFile, filename)
|
||||
else:
|
||||
os.unlink(tmpFile)
|
||||
Viittaa uudesa ongelmassa
Block a user