Format source code to PEP8 using Ruff (#36)

* added ruff docs

* style: Run ruff and black before yapf pass

* yapf -r -i (23 fixes)

* fixed conf.py and ran ruff format .

* fixed conf.py 2

* formatted argparser.py

* formatted src/rocprof_compute_analyze

* formatted src/rocprof_compute_profile

* formatted soc_base.py

* formatted rocprof_compute_tui

* formatted gui_components

* formatted src/utils

* formatted tests/

* format extra files

* cleanup

* fix test_utils.py

* fixed typos

* Update pyproject.toml

* Update README.md

* Update test_utils.py

---------

Signed-off-by: jamessiddeley-amd <James.Siddeley@amd.com>
Co-authored-by: James Siddeley <James.Siddeley@amd.com>
Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
Этот коммит содержится в:
systems-assistant[bot]
2025-08-08 15:32:30 -04:00
коммит произвёл GitHub
родитель d3f9ab25eb
Коммит 58d2a016ce
72 изменённых файлов: 3272 добавлений и 2395 удалений
+19 -7
Просмотреть файл
@@ -25,14 +25,26 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install black isort
python -m pip install ruff
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
- name: Run black formatter
uses: psf/black@stable
with:
use_pyproject: true
- name: Run isort formatter
uses: isort/isort-action@master
- name: Run Ruff Linter and Import Sorter
run: |
ruff check . --fix --exit-zero
- name: Run Ruff Formatter
run: |
ruff format .
- name: Check for formatting/linting changes
run: |
git config --global user.name 'github-actions'
git config --global user.email 'github-actions@github.com'
git add -A .
if ! git diff --cached --quiet; then
echo "::error::Files were modified by ruff. Please run 'ruff check . --fix && ruff format .' locally and commit the changes."
git diff --cached --patch # Show the diff in the logs
exit 1
else
echo "Ruff found no issues or all issues were fixed and files are clean."
fi
cmake:
runs-on: ubuntu-22.04
+8 -9
Просмотреть файл
@@ -7,13 +7,12 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
# Python import sorting
- repo: https://github.com/pycqa/isort
rev: 6.0.1
# Python import sorting and formatting
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility,
# for the latest ruff version supported by the hook.
rev: v0.12.7
hooks:
- id: isort
# Python formatting (Using this mirror lets us use mypyc-compiled black, which is about 2x faster)
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.1.0
hooks:
- id: black
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
+39
Просмотреть файл
@@ -104,3 +104,42 @@ style reference is provided below for convenience:
url = {https://doi.org/10.5281/zenodo.7314631}
}
```
### Contribution Guidelines
To ensure code quality and consistency, we use **Ruff**, a fast Python linter and formatter. Before submitting a pull request, please ensure your code is formatted and linted correctly.
-----
### Installing and Running Ruff
Ruff is available on PyPI and can be installed using `pip`:
```bash
pip install ruff
```
Once installed, you can run Ruff from the command line. To check for linting errors and formatting issues, navigate to the project root and run:
```bash
ruff check .
ruff format --check .
```
To automatically fix most of the issues detected, you can use the `--fix` flag with the `check` command and run the `format` command without the `--check` flag:
```bash
ruff check --fix .
ruff format .
```
-----
### Disabling Formatting for Specific Sections
There may be instances where you need to disable Ruff's formatting on a specific block of code. You can do this using special comments:
* **`# fmt: off`** and **`# fmt: on`**: These comments can be used to disable and re-enable formatting for a block of code.
* **`# fmt: skip`**: This comment, placed at the end of a line, will prevent Ruff from formatting that specific statement.
You can also disable specific linting rules for a line by using `# noqa: <rule_code>`.
+6 -9
Просмотреть файл
@@ -13,10 +13,14 @@ import sys
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use str(Path(<rel_path>).absolute().resolve()) to make it absolute, like shown here.
#
# documentation root, use str(Path(<rel_path>).absolute().resolve())
# to make it absolute, like shown here.
from pathlib import Path
from pygments.styles import get_all_styles
from recommonmark.parser import CommonMarkParser
from recommonmark.transform import AutoStructify
sys.path.insert(0, str(Path("..").absolute().resolve()))
repo_version = "unknown"
@@ -68,7 +72,6 @@ source_suffix = {
".md": "markdown",
}
from recommonmark.parser import CommonMarkParser
source_parsers = {".md": CommonMarkParser}
@@ -94,7 +97,6 @@ pygments_style = None
latex_engine = "lualatex"
latex_show_urls = "footnote"
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
@@ -113,7 +115,6 @@ html_theme = "sphinx_rtd_theme"
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
@@ -136,8 +137,6 @@ html_theme_options = {
"titles_only": False,
}
from pygments.styles import get_all_styles
# The name of the Pygments (syntax highlighting) style to use.
styles = list(get_all_styles())
preferences = ("emacs", "pastie", "colorful")
@@ -146,8 +145,6 @@ for pref in preferences:
pygments_style = pref
break
from recommonmark.transform import AutoStructify
# app setup hook
def setup(app):
+6 -12
Просмотреть файл
@@ -13,10 +13,14 @@ import sys
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use str(Path(<rel_path>).absolute().resolve()) to make it absolute, like shown here.
#
# documentation root, use str(Path(<rel_path>).absolute().resolve()
# to make it absolute, like shown here.
from pathlib import Path
from pygments.styles import get_all_styles
from recommonmark.parser import CommonMarkParser
from recommonmark.transform import AutoStructify
sys.path.insert(0, str(Path("..").absolute().resolve()))
repo_version = "unknown"
@@ -56,12 +60,10 @@ extensions = [
show_authors = True
myst_heading_anchors = 4
# enable replacement of (tm) & friends
myst_enable_extensions = ["replacements", "dollarmath"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
@@ -84,8 +86,6 @@ source_suffix = {
# sphinxmark_text_spacing = 800
# sphinxmark_text_opacity = 30
from recommonmark.parser import CommonMarkParser
source_parsers = {".md": CommonMarkParser}
# The master toctree document.
@@ -110,7 +110,6 @@ pygments_style = None
latex_engine = "lualatex"
latex_show_urls = "footnote"
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
@@ -133,7 +132,6 @@ latex_elements = {
"sphinxsetup": "verbatimwrapslines=true, verbatimforcewraps=true",
}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
@@ -157,8 +155,6 @@ html_theme_options = {
"titles_only": False,
}
from pygments.styles import get_all_styles
# The name of the Pygments (syntax highlighting) style to use.
styles = list(get_all_styles())
preferences = ("emacs", "pastie", "colorful")
@@ -167,8 +163,6 @@ for pref in preferences:
pygments_style = pref
break
from recommonmark.transform import AutoStructify
# app setup hook
def setup(app):
-1
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
+40 -26
Просмотреть файл
@@ -4,36 +4,50 @@ requires-python = ">=3.8"
[project.optional-dependencies]
developer = [
"black>=22.6.0",
"isort>=5.12.0",
"ruff>=0.12.7",
"pre-commit",
]
[tool.black]
line-length = 90
include = '\.py$'
exclude = '''
(
/(
\.eggs
| \.git
| \.github
| \.tox
| \.venv
| \.misc
| \.vscode
| \.pyc
| dist
| external
| .pytest_cache
| build
| build-rocprof_compute
)/
)
'''
[tool.ruff]
line-length = 88
# Ruff's default excludes cover: .bzr, .direnv, .eggs, .git, .git-rewrite, .hg, .ipynb_checkpoints,
# .mypy_cache, .nox, .pants.d, .pyenv, .pytest_cache, .pytype, .ruff_cache, .svn,
# .tox, .venv, .vscode, __pypackages__, _build, buck-out, build, dist, node_modules,
# site-packages, venv
extend-exclude = [
".github",
".misc",
"external",
"build-rocprof_compute",
]
[tool.ruff.lint]
# Enable Pyflakes (F), pycodestyle (E, W for PEP8), and isort (I) rules.
select = ["E", "W", "F", "I"]
ignore = ["E713", "E711"]
fixable = ["ALL"]
unfixable = []
[tool.ruff.format]
preview = true
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
docstring-code-format = true
docstring-code-line-length = "dynamic"
[tool.isort]
profile = "black"
[tool.pytest.ini_options]
addopts = [
+174 -40
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import argparse
import os
import re
@@ -87,7 +86,8 @@ def omniarg_parser(
help="Profile the target application",
usage="""
rocprof-compute profile --name <workload_name> [profile options] [roofline options] -- <profile_cmd>
`rocprof-compute profile --name <workload_name>
[profile options] [roofline options] -- <profile_cmd>`
---------------------------------------------------------------------------------
Examples:
@@ -118,7 +118,9 @@ Examples:
dest="name",
help="\t\t\tAssign a name to workload.",
)
profile_group.add_argument("--target", type=str, default=None, help=argparse.SUPPRESS)
profile_group.add_argument(
"--target", type=str, default=None, help=argparse.SUPPRESS
)
profile_group.add_argument(
"-p",
"--path",
@@ -127,8 +129,9 @@ Examples:
dest="path",
default=str(Path(os.getcwd()).joinpath("workloads")),
required=False,
help="\t\t\tSpecify path to save workload.\n\t\t\t(DEFAULT: {}/workloads/<name>)".format(
os.getcwd()
help=(
"\t\t\tSpecify path to save workload.\n\t\t\t"
"(DEFAULT: {}/workloads/<name>)".format(os.getcwd())
),
)
profile_group.add_argument(
@@ -138,7 +141,9 @@ Examples:
dest="subpath",
default="gpu",
required=False,
help="\t\t\tSpecify the type of subpath to save workload: node_name, gpu_model.",
help=(
"\t\t\tSpecify the type of subpath to save workload: node_name, gpu_model."
),
)
profile_group.add_argument(
"--hip-trace",
@@ -146,7 +151,10 @@ Examples:
required=False,
default=False,
action="store_true",
help="\t\t\tHIP trace, execturion trace for the entire application at the HIP level.",
help=(
"\t\t\tHIP trace, execturion trace for the entire application at the HIP "
"level."
),
)
profile_group.add_argument(
"--kokkos-trace",
@@ -195,7 +203,11 @@ Examples:
nargs="+",
required=False,
default=[],
help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 12, 12.1, 12.1.1).\n\t\t\tCan provide multiple space separated arguments.""",
help=(
"\t\t\tSpecify metric id(s) from --list-metrics for filtering "
"(e.g. 12, 12.1, 12.1.1).\n"
"\t\t\tCan provide multiple space separated arguments."
),
)
profile_group.add_argument(
"--list-metrics",
@@ -232,7 +244,11 @@ Examples:
required=False,
choices=["kernel", "grid"],
default="grid",
help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)",
help=(
"\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n"
"\t\t\t kernel (i.e. By unique kernel name dispatches)\n"
"\t\t\t grid (i.e. By unique kernel name + grid size dispatches)"
),
)
profile_group.add_argument(
"--no-roof",
@@ -274,7 +290,10 @@ Examples:
metavar="",
dest="pc_sampling_method",
default="stochastic",
help="\t\t\tSet the method of pc sampling, stochastic or host_trap. Support stochastic only >= MI300",
help=(
"\t\t\tSet the method of pc sampling, stochastic or host_trap. "
"Support stochastic only >= MI300"
),
)
profile_group.add_argument(
@@ -283,7 +302,12 @@ Examples:
metavar="",
dest="pc_sampling_interval",
default=1048576,
help="\t\t\tSet the interval of pc sampling.\n\t\t\t For stochastic sampling, the interval is in cycles.\n\t\t\t For host_trap sampling, the interval is in microsecond (DEFAULT: 1048576).",
help=(
"\t\t\tSet the interval of pc sampling.\n"
"\t\t\t For stochastic sampling, the interval is in cycles.\n"
"\t\t\t For host_trap sampling, the interval is in microsecond "
"(DEFAULT: 1048576)."
)
)
profile_group.add_argument(
@@ -299,7 +323,10 @@ Examples:
required=False,
default=False,
action="store_true",
help="\t\t\tRetain the large raw rocpd database in workload directory.\n\t\t\tThis option requires --format-rocprof-output rocpd.",
help=(
"\t\t\tRetain the large raw rocpd database in workload directory.\n"
"\t\t\tThis option requires --format-rocprof-output rocpd."
),
)
## Roofline Command Line Options
@@ -317,7 +344,11 @@ Examples:
type=str,
default="kernels",
choices=["kernels", "dispatches"],
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
help=(
"\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n"
"\t\t\t kernels\n"
"\t\t\t dispatches"
),
)
roofline_group.add_argument(
"-m",
@@ -328,7 +359,13 @@ Examples:
nargs="+",
type=str,
default="ALL",
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
help=(
"\t\t\tFilter by memory level: (DEFAULT: ALL)\n"
"\t\t\t HBM\n"
"\t\t\t L2\n"
"\t\t\t vL1D\n"
"\t\t\t LDS"
),
)
roofline_group.add_argument(
"--device",
@@ -350,19 +387,58 @@ Examples:
"-R",
"--roofline-data-type",
required=False,
choices=["FP4", "FP6", "FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
choices=[
"FP4",
"FP6",
"FP8",
"FP16",
"BF16",
"FP32",
"FP64",
"I8",
"I32",
"I64",
],
metavar="",
nargs="+",
type=str,
default=["FP32"],
help="\t\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n\t\t\t FP4\n\t\t\t FP6\n\t\t\t FP8\n\t\t\t FP16\n\t\t\t BF16\n\t\t\t FP32\n\t\t\t FP64\n\t\t\t I8\n\t\t\t I32\n\t\t\t I64\n\t\t\t ",
help=(
"\t\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n"
"\t\t\t FP4\n"
"\t\t\t FP6\n"
"\t\t\t FP8\n"
"\t\t\t FP16\n"
"\t\t\t BF16\n"
"\t\t\t FP32\n"
"\t\t\t FP64\n"
"\t\t\t I8\n"
"\t\t\t I32\n"
"\t\t\t I64\n"
"\t\t\t "
),
)
# roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)")
# roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)")
# roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)")
# roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)")
# roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)")
# roofline_group.add_argument(
# '-w', '--workgroups', required=False, default=-1, type=int,
# help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)"
# )
# roofline_group.add_argument(
# '--wsize', required=False, default=-1, type=int,
# help="\t\t\tWorkgroup size (DEFAULT: 256)"
# )
# roofline_group.add_argument(
# '--dataset', required=False, default=-1, type=int,
# help="\t\t\tDataset size (DEFAULT: 536M)"
# )
# roofline_group.add_argument(
# '-e', '--experiments', required=False, default=-1, type=int,
# help="\t\t\tNumber of experiments (DEFAULT: 100)"
# )
# roofline_group.add_argument(
# '--iter', required=False, default=-1, type=int,
# help="\t\t\tNumber of iterations (DEFAULT: 10)"
# )
## Database Command Line Options
## ----------------------------
@@ -374,9 +450,11 @@ Examples:
\n\n-------------------------------------------------------------------------------
\nExamples:
\n\trocprof-compute database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/
\n\trocprof-compute database --remove -H pavii1 -u temp -w rocprofiler-compute_asw_sample_mi200
\n-------------------------------------------------------------------------------\n
\n\trocprof-compute database --import -H pavii1 -u temp -t asw -w "
"workloads/vcopy/mi200/"
"\n\trocprof-compute database --remove -H pavii1 -u temp -w "
"rocprofiler-compute_asw_sample_mi200"
"\n-------------------------------------------------------------------------------\n"
""",
prog="tool",
allow_abbrev=False,
@@ -445,13 +523,20 @@ Examples:
required=True,
metavar="",
dest="workload",
help="\t\t\t\tSpecify name of workload (to remove) or path to workload (to import)",
help=(
"\t\t\t\tSpecify name of workload (to remove) or path to workload "
"(to import)"
),
)
connection_group.add_argument(
"--kernel-verbose",
required=False,
metavar="",
help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)",
help=(
"\t\tSpecify Kernel Name verbose level 1-5. "
"Lower the level, shorter the kernel name. "
"(DEFAULT: 5) (DISABLE: 5)"
),
default=5,
type=int,
)
@@ -558,23 +643,50 @@ Examples:
type=int,
nargs="?",
const=8050,
help="\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n\t\tOptionally, specify port to launch application (DEFAULT: 8050)",
help=(
"\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n"
"\t\tOptionally, specify port to launch application (DEFAULT: 8050)"
),
)
analyze_group.add_argument(
"--tui",
action="store_true",
help="\t\tActivate a Textual User Interface (TUI) to interact with rocprofiler-compute metrics.",
help="\t\tActivate a Textual User Interface (TUI) to "
"interact with rocprofiler-compute metrics.",
)
analyze_group.add_argument(
"-R",
"--roofline-data-type",
required=False,
choices=["FP4", "FP6", "FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
choices=[
"FP4",
"FP6",
"FP8",
"FP16",
"BF16",
"FP32",
"FP64",
"I8",
"I32",
"I64",
],
metavar="",
nargs="+",
type=str,
default=["FP32"],
help="\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n\t\t\t FP4\n\t\t\t FP6\n\t\t\t FP8\n\t\t\t FP16\n\t\t\t BF16\n\t\t\t FP32\n\t\t\t FP64\n\t\t\t I8\n\t\t\t I32\n\t\t\t I64\n\t\t\t ",
help=(
"\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n"
"\t\t\t FP4\n"
"\t\t\t FP6\n"
"\t\t\t FP8\n"
"\t\t\t FP16\n"
"\t\t\t BF16\n"
"\t\t\t FP32\n"
"\t\t\t FP64\n"
"\t\t\t I8\n"
"\t\t\t I32\n"
"\t\t\t I64\n\t\t\t "
),
)
analyze_group.add_argument(
@@ -584,13 +696,15 @@ Examples:
dest="pc_sampling_sorting_type",
default="offset",
type=str,
help="\t\tSet the sorting type of pc sampling: offset or count (DEFAULT: offset).",
help="\t\tSet the sorting type of pc sampling: "
"offset or count (DEFAULT: offset).",
)
analyze_advanced_group.add_argument(
"--random-port",
action="store_true",
help="\t\tRandomly generate a port to launch GUI application.\n\t\tRegistered Ports range inclusive (1024-49151).",
help="\t\tRandomly generate a port to launch GUI application.\n"
"\t\tRegistered Ports range inclusive (1024-49151).",
)
analyze_advanced_group.add_argument(
"--max-stat-num",
@@ -598,7 +712,8 @@ Examples:
metavar="",
type=int,
default=10,
help='\t\tSpecify the maximum number of stats shown in "Top Stats" tables (DEFAULT: 10)',
help="\t\tSpecify the maximum number of stats shown in "
'"Top Stats" tables (DEFAULT: 10)',
)
analyze_advanced_group.add_argument(
"-n",
@@ -607,7 +722,11 @@ Examples:
metavar="",
default="per_kernel",
choices=["per_wave", "per_cycle", "per_second", "per_kernel"],
help="\t\tSpecify the normalization unit: (DEFAULT: per_kernel)\n\t\t per_wave\n\t\t per_cycle\n\t\t per_second\n\t\t per_kernel",
help="\t\tSpecify the normalization unit: (DEFAULT: per_kernel)\n"
"\t\t per_wave\n"
"\t\t per_cycle\n"
"\t\t per_second\n"
"\t\t per_kernel",
)
analyze_advanced_group.add_argument(
"-t",
@@ -616,7 +735,11 @@ Examples:
metavar="",
default="ns",
choices=["s", "ms", "us", "ns"],
help="\t\tSpecify display time unit: (DEFAULT: ns)\n\t\t s\n\t\t ms\n\t\t us\n\t\t ns",
help="\t\tSpecify display time unit: (DEFAULT: ns)\n"
"\t\t s\n"
"\t\t ms\n"
"\t\t us\n"
"\t\t ns",
)
analyze_advanced_group.add_argument(
"--decimal",
@@ -644,7 +767,10 @@ Examples:
dest="cols",
metavar="",
nargs="+",
help="\t\tSpecify column indices to display.\n\t\tDefaults to display all columns.",
help=(
"\t\tSpecify column indices to display.\n"
"\t\tDefaults to display all columns."
),
)
analyze_advanced_group.add_argument(
"--include-cols",
@@ -653,7 +779,8 @@ Examples:
nargs="+",
help=(
"\t\tSpecify which hidden column names should be included in cli output.\n"
"\t\tFor example, to show 'Description' column which is hidden by default in cli output,\n"
"\t\tFor example, to show 'Description' column which is hidden by "
"default in cli output,\n"
"\t\tuse the option --include-cols Description."
),
)
@@ -669,7 +796,9 @@ Examples:
"--kernel-verbose",
required=False,
metavar="",
help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)",
help="\t\tSpecify Kernel Name verbose level 1-5. "
"Lower the level, shorter the kernel name. "
"(DEFAULT: 5) (DISABLE: 5)",
default=5,
type=int,
)
@@ -680,7 +809,9 @@ Examples:
"--specs-correction",
type=str,
metavar="",
help="\t\tSpecify the specs to correct. e.g. --specs-correction='specname1:specvalue1,specname2:specvalue2'",
help="\t\tSpecify the specs to correct. e.g. "
"--specs-correction='specname1:specvalue1,"
"specname2:specvalue2'",
)
analyze_advanced_group.add_argument(
"--list-nodes",
@@ -693,5 +824,8 @@ Examples:
type=str,
dest="nodes",
nargs="*",
help="\t\tMulti-node option: filter with node names. Enable it without node names means ALL.",
help=(
"\t\tMulti-node option: filter with node names. "
"Enable it without node names means ALL."
),
)
-2
Просмотреть файл
@@ -23,8 +23,6 @@
##############################################################################
import re
from pathlib import Path
# NB: Creating a new module to share global vars across modules
+10 -17
Просмотреть файл
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
"""Main entry point for rocprof-compute"""
##############################################################################bl
@@ -80,11 +79,10 @@ def verify_deps():
dependencies are not available."""
# Check which version of python is being used
if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 8):
print(
"[ERROR] Python 3.8 or higher is required to run rocprofiler-compute."
f" The current version is {sys.version_info[0]}.{sys.version_info[1]}."
)
if sys.version_info[0] < 3 or (sys.version_info[0] == 3
and sys.version_info[1] < 8):
print("[ERROR] Python 3.8 or higher is required to run rocprofiler-compute."
f" The current version is {sys.version_info[0]}.{sys.version_info[1]}.")
sys.exit(1)
bindir = str(Path(__file__).resolve().parent)
@@ -112,17 +110,14 @@ def verify_deps():
localVersion = metadata.distribution(package).version
except metadata.PackageNotFoundError:
error = True
print(
f"[ERROR] The '{dependency}' package was not found "
"in the current execution environment."
)
print(f"[ERROR] The '{dependency}' package was not found "
"in the current execution environment.")
# check version requirement
if not error:
if desiredVersion:
if not verify_deps_version(
localVersion, desiredVersion, operator
):
if not verify_deps_version(localVersion, desiredVersion,
operator):
print(
f"[ERROR] the '{dependency}' distribution does "
"not meet version requirements to use rocprofiler-compute."
@@ -132,10 +127,8 @@ def verify_deps():
if error:
print("")
print(
"Please verify all of the python dependencies called out "
"in the requirements file"
)
print("Please verify all of the python dependencies called out "
"in the requirements file")
print("are installed locally prior to running rocprofiler-compute.")
print("")
print(f"See: {checkFile}")
+11 -7
Просмотреть файл
@@ -23,12 +23,10 @@
##############################################################################
import copy
import os
import sys
import textwrap
from abc import ABC, abstractmethod
from abc import abstractmethod
from collections import OrderedDict
from pathlib import Path
@@ -78,7 +76,9 @@ class OmniAnalyze_Base:
# TODO: filter_metrics should/might be one per arch
# print(ac)
parser.build_dfs(archConfigs=ac, filter_metrics=filter_metrics, sys_info=sys_info)
parser.build_dfs(
archConfigs=ac, filter_metrics=filter_metrics, sys_info=sys_info
)
self._arch_configs[arch] = ac
return self._arch_configs
@@ -192,7 +192,9 @@ class OmniAnalyze_Base:
arch = w.sys_info.iloc[0]["gpu_arch"]
mspec = self.get_socs()[arch]._mspec
if self.__args.specs_correction:
w.sys_info = parser.correct_sys_info(mspec, self.__args.specs_correction)
w.sys_info = parser.correct_sys_info(
mspec, self.__args.specs_correction
)
w.avail_ips = w.sys_info["ip_blocks"].item().split("|")
w.dfs = copy.deepcopy(self._arch_configs[arch].dfs)
w.dfs_type = self._arch_configs[arch].dfs_type
@@ -222,7 +224,7 @@ class OmniAnalyze_Base:
# Todo: more err check
if not (
self.__args.nodes != None
self.__args.nodes is not None
or self.__args.list_nodes
or self.__args.spatial_multiplexing
):
@@ -266,7 +268,9 @@ class OmniAnalyze_Base:
console_log("analysis", "deriving rocprofiler-compute metrics...")
# initalize output file
self._output = (
open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout
open(self.__args.output_file, "w+")
if self.__args.output_file
else sys.stdout
)
# Read profiling config
-2
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from rocprof_compute_analyze.analysis_base import OmniAnalyze_Base
from utils import file_io, parser, tty
from utils.kernel_name_shortener import kernel_name_shortener
@@ -41,7 +40,6 @@ class cli_analysis(OmniAnalyze_Base):
if self.get_args().random_port:
console_error("--gui flag is required to enable --random-port")
for d in self.get_args().path:
# create 'mega dataframe'
self._runs[d[0]].raw_pmc = file_io.create_df_pmc(
d[0],
+15 -7
Просмотреть файл
@@ -23,9 +23,7 @@
##############################################################################
import copy
import os
import random
from pathlib import Path
@@ -228,7 +226,10 @@ class webui_analysis(OmniAnalyze_Base):
for t_type, table_config in data_source.items():
original_df = base_data[base_run].dfs[table_config["id"]]
# The sys info table need to add index back
if t_type == "raw_csv_table" and "Info" in original_df.keys():
if (
t_type == "raw_csv_table"
and "Info" in original_df.keys()
):
original_df.reset_index(inplace=True)
content = determine_chart_type(
@@ -279,7 +280,13 @@ class webui_analysis(OmniAnalyze_Base):
id="popup",
children=[
html.Div(
children="To dive deeper, use the top drop down menus to isolate particular kernel(s) or dispatch(s). You will then see the web page update with additional low-level metrics specific to the filter you've applied.",
children=(
"To dive deeper, use the top drop down menus to "
"isolate particular kernel(s) or dispatch(s). "
"You will then see the web page update with "
"additional low-level metrics specific to the "
"filter you've applied."
),
),
],
)
@@ -308,7 +315,9 @@ class webui_analysis(OmniAnalyze_Base):
)
if self.get_args().spatial_multiplexing:
self._runs[self.dest_dir].raw_pmc = self.spatial_multiplex_merge_counters(
self._runs[
self.dest_dir
].raw_pmc = self.spatial_multiplex_merge_counters(
self._runs[self.dest_dir].raw_pmc
)
@@ -386,8 +395,7 @@ def determine_chart_type(
d_figs = build_bar_chart(display_df, table_config, barchart_elements, norm_filt)
# Smaller formatting if barchart yeilds several graphs
if (
len(d_figs)
> 2
len(d_figs) > 2
# and not table_config["id"]
# in barchart_elements["l2_cache_per_chan"]
):
+18 -9
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import argparse
import importlib
import os
@@ -159,7 +158,9 @@ class RocProfCompute:
def parse_args(self):
parser = argparse.ArgumentParser(
description="Command line interface for AMD's GPU profiler, ROCm Compute Profiler",
description=(
"Command line interface for AMD's GPU profiler, ROCm Compute Profiler"
),
prog="tool",
formatter_class=lambda prog: argparse.RawTextHelpFormatter(
prog, max_help_position=30
@@ -176,7 +177,11 @@ class RocProfCompute:
and self.__args.format_rocprof_output != "rocpd"
):
console_warning(
f"The option --format-rocprof-output currently set to {self.__args.format_rocprof_output} will default to rocpd in a future release."
(
f"The option --format-rocprof-output currently set to "
f"{self.__args.format_rocprof_output} will default to rocpd "
"in a future release."
)
)
if self.__args.mode == None:
@@ -188,7 +193,6 @@ class RocProfCompute:
"rocprof-compute requires you to pass a valid mode. Detected None."
)
elif self.__args.mode == "profile":
# FIXME:
# Might want to get host name from detected spec
if self.__args.subpath == "node_name":
@@ -312,7 +316,9 @@ class RocProfCompute:
# Update default path
if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
self.__args.path = str(
Path(self.__args.path).joinpath(self.__args.name, self.__mspec.gpu_model)
Path(self.__args.path).joinpath(
self.__args.name, self.__mspec.gpu_model
)
)
# instantiate desired profiler
@@ -376,7 +382,10 @@ class RocProfCompute:
profiler.run_profiling(self.__version["ver"], config.PROJECT_NAME)
time_end_prof = time.time()
console_debug(
'finished "run_profiling" and finished rocprof\'s workload, time taken was {} m {} sec'.format(
(
'finished "run_profiling" and finished rocprof\'s workload, '
"time taken was {} m {} sec"
).format(
int((time_end_prof - time_start_prof) / 60),
str((time_end_prof - time_start_prof) % 60),
)
@@ -385,8 +394,7 @@ class RocProfCompute:
time_end_post = time.time()
console_debug(
'time taken for "post_processing" was {} seconds'.format(
int((time_end_post - time_end_prof) / 60),
str((time_end_post - time_end_prof) % 60),
int(time_end_post - time_end_prof)
)
)
self.__soc[self.__mspec.gpu_arch].post_profiling()
@@ -398,7 +406,8 @@ class RocProfCompute:
self.print_graphic()
console_warning(
"Database update mode is deprecated and will be removed in a future release "
"Database update mode is deprecated and will "
"be removed in a future release "
"and no fixes will be made for this mode."
)
+47 -37
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import csv
import glob
import os
@@ -96,7 +95,7 @@ class RocProfCompute_Base:
return
# Set default output directory if not specified
if type(self.__args.path) == str:
if isinstance(self.__args.path, str):
if out is None:
out = self.__args.path + "/pmc_perf.csv"
files = glob.glob(self.__args.path + "/" + "pmc_perf_*.csv")
@@ -121,7 +120,7 @@ class RocProfCompute_Base:
os.path.basename(f)
)
]
elif type(self.__args.path) == list:
elif isinstance(self.__args.path, list):
files = self.__args.path
else:
console_error(
@@ -130,7 +129,7 @@ class RocProfCompute_Base:
df = None
for i, file in enumerate(files):
_df = pd.read_csv(file) if type(self.__args.path) == str else file
_df = pd.read_csv(file) if isinstance(self.__args.path, str) else file
if self.__args.join_type == "kernel":
key = _df.groupby("Kernel_Name").cumcount()
_df["key"] = _df.Kernel_Name + " - " + key.astype(str)
@@ -145,7 +144,8 @@ class RocProfCompute_Base:
)
else:
console_error(
"%s is an unrecognized option for --join-type" % self.__args.join_type
"%s is an unrecognized option for --join-type"
% self.__args.join_type
)
if df is None:
@@ -174,7 +174,9 @@ class RocProfCompute_Base:
}
# Check for vgpr counter in ROCm < 5.3
if "vgpr" in df.columns:
duplicate_cols["vgpr"] = [col for col in df.columns if col.startswith("vgpr")]
duplicate_cols["vgpr"] = [
col for col in df.columns if col.startswith("vgpr")
]
# Check for vgpr counter in ROCm >= 5.3
else:
duplicate_cols["Arch_VGPR"] = [
@@ -235,7 +237,8 @@ class RocProfCompute_Base:
)
]
]
#   B) any timestamps that are _not_ the duration, which is the one we care about
# B) any timestamps that are _not_ the duration,
# which is the one we care about
df = df[
[
k
@@ -275,8 +278,9 @@ class RocProfCompute_Base:
df["End_Timestamp"] = endNs
# finally, join the drop key
df = df.drop(columns=["key"])
# save to file and delete old file(s), skip if we're being called outside of rocprof-compute
if type(self.__args.path) == str:
# save to file and delete old file(s)
# skip if we're being called outside of rocprof-compute
if isinstance(self.__args.path, str):
df.to_csv(out, index=False)
if not self.__args.verbose:
for file in files:
@@ -322,7 +326,12 @@ class RocProfCompute_Base:
self.__args.remaining = " ".join(self.__args.remaining)
else:
console_error(
"Profiling command required. Pass application executable after -- at the end of options.\n\t\ti.e. rocprof-compute profile -n vcopy -- ./vcopy -n 1048576 -b 256"
(
"Profiling command required. Pass application executable after -- "
"at the end of options.\n"
"\t\ti.e. rocprof-compute profile -n vcopy -- "
"./vcopy -n 1048576 -b 256"
)
)
gen_sysinfo(
@@ -380,27 +389,28 @@ class RocProfCompute_Base:
time_left_seconds = (total_runs - run_number) * avg_profiling_time
time_left = format_time(time_left_seconds)
console_log(
f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]..."
f"[Run {run_number}/{total_runs}]"
f"[Approximate profiling time left: {time_left}]..."
)
else:
console_log(
f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]"
f"[Run {run_number}/{total_runs}]"
"[Approximate profiling time left: "
"pending first measurement...]"
)
# Kernel filtering (in-place replacement)
if not self.__args.kernel == None:
success, output = capture_subprocess_output(
[
"sed",
"-i",
"-r",
"s%^(kernel:).*%"
+ "kernel: "
+ ",".join(self.__args.kernel)
+ "%g",
fname,
]
)
success, output = capture_subprocess_output([
"sed",
"-i",
"-r",
"s%^(kernel:).*%"
+ "kernel: "
+ ",".join(self.__args.kernel)
+ "%g",
fname,
])
# log output from profile filtering
if not success:
console_error(output)
@@ -409,18 +419,16 @@ class RocProfCompute_Base:
# Dispatch filtering (inplace replacement)
if not self.__args.dispatch == None:
success, output = capture_subprocess_output(
[
"sed",
"-i",
"-r",
"s%^(range:).*%"
+ "range: "
+ " ".join(self.__args.dispatch)
+ "%g",
fname,
]
)
success, output = capture_subprocess_output([
"sed",
"-i",
"-r",
"s%^(range:).*%"
+ "range: "
+ " ".join(self.__args.dispatch)
+ "%g",
fname,
])
# log output from profile filtering
if not success:
console_error(output)
@@ -462,7 +470,9 @@ class RocProfCompute_Base:
"rocprofv3",
"rocprofiler-sdk",
):
console_log(f"[Run {total_runs+1}/{total_runs+1}][PC sampling profile run]")
console_log(
f"[Run {total_runs + 1}/{total_runs + 1}][PC sampling profile run]"
)
start_run_prof = time.time()
pc_sampling_prof(
method=self.get_args().pc_sampling_method,
+7 -11
Просмотреть файл
@@ -23,8 +23,6 @@
##############################################################################
import os
from pathlib import Path
import config
@@ -67,15 +65,13 @@ class rocprof_v1_profiler(RocProfCompute_Base):
'"' + app_cmd + '"',
]
# store original args for debug message
store_app_cmd(
[
"--timestamp",
"on",
"-o",
self.get_args().path + "/" + fbase + ".csv",
app_cmd,
]
)
store_app_cmd([
"--timestamp",
"on",
"-o",
self.get_args().path + "/" + fbase + ".csv",
app_cmd,
])
return args
# -----------------------
-3
Просмотреть файл
@@ -23,8 +23,6 @@
##############################################################################
import os
import shlex
from pathlib import Path
@@ -44,7 +42,6 @@ class rocprof_v2_profiler(RocProfCompute_Base):
)
def get_profiler_options(self, fname, soc):
fbase = Path(fname).stem
app_cmd = shlex.split(self.get_args().remaining)
args = []
+3 -4
Просмотреть файл
@@ -23,12 +23,9 @@
##############################################################################
import os
import shlex
from pathlib import Path
import config
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
from utils.logger import console_error, console_log, demarcate
@@ -49,7 +46,9 @@ class rocprof_v3_profiler(RocProfCompute_Base):
trace_option = "--kokkos-trace"
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
console_error(
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
"The option '--kokkos-trace' is not supported in the current "
"version of rocprof-compute. This functionality is planned for a "
"future release. Please adjust your profiling options accordingly."
)
if self.get_args().hip_trace:
trace_option = "--hip-trace"
+3 -3
Просмотреть файл
@@ -23,8 +23,6 @@
##############################################################################
import os
import shlex
from pathlib import Path
@@ -64,7 +62,9 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
if self.get_args().kokkos_trace:
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
console_error(
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
"The option '--kokkos-trace' is not supported in the current "
"version of rocprof-compute. This functionality is planned for a "
"future release. Please adjust your profiling options accordingly."
)
if self.get_args().hip_trace:
options["ROCPROF_HIP_COMPILER_API_TRACE"] = "1"
+109 -57
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import glob
import json
import math
@@ -34,7 +33,6 @@ import sys
from abc import abstractmethod
from pathlib import Path
import pandas as pd
import yaml
import config
@@ -68,9 +66,8 @@ class OmniSoC_Base:
self.__arch = None
self._mspec = mspec
self.__perfmon_dir = None
self.__perfmon_config = (
{}
) # Per IP block max number of simulutaneous counters. GFX IP Blocks
# Per IP block, max number of simultaneous counters. GFX IP Blocks.
self.__perfmon_config = {}
self.__soc_params = {} # SoC specifications
self.__compatible_profilers = [] # Store profilers compatible with SoC
self.populate_mspec()
@@ -169,14 +166,20 @@ class OmniSoC_Base:
)
# Parse json from amd-smi static --clock
amd_smi_mclk = run(["amd-smi", "static", "--clock", "--json"], exit_on_error=True)
amd_smi_mclk = run(
["amd-smi", "static", "--clock", "--json"], exit_on_error=True
)
amd_smi_mclk = json.loads(amd_smi_mclk)
if isinstance(amd_smi_mclk, dict):
# The output of `amd-smi static --clock --json` is a dict with amd-smi>=26.0.0.
amd_smi_mclk = amd_smi_mclk["gpu_data"][0]["clock"]["mem"]["frequency_levels"]
# The output of `amd-smi static --clock --json` is a dict with
# amd-smi>=26.0.0.
amd_smi_mclk = amd_smi_mclk["gpu_data"][0]["clock"]["mem"][
"frequency_levels"
]
else:
# For backward compatibility: the output of `amd-smi static --clock --json` used to be a list for amd-smi<26.0.0.
# For backward compatibility: the output of `amd-smi static --clock --json`
# used to be a list for amd-smi<26.0.0.
amd_smi_mclk = amd_smi_mclk[0]["clock"]["mem"]["frequency_levels"]
# Choose the highest level of memory clock frequency
@@ -186,8 +189,8 @@ class OmniSoC_Base:
console_debug("max mem clock is {}".format(self._mspec.max_mclk))
# these are just max's now, because the parsing was broken and this was inconsistent
# with how we use the clocks elsewhere (all max, all the time)
# These are just max values now, because the parsing was broken and this was
# inconsistent with how we use the clocks elsewhere (all max, all the time)
self._mspec.cur_sclk = self._mspec.max_sclk
self._mspec.cur_mclk = self._mspec.max_mclk
@@ -202,7 +205,9 @@ class OmniSoC_Base:
self._mspec.num_xcd = str(
mi_gpu_specs.get_num_xcds(
self._mspec.gpu_arch, self._mspec.gpu_model, self._mspec.compute_partition
self._mspec.gpu_arch,
self._mspec.gpu_model,
self._mspec.compute_partition,
)
)
@@ -218,7 +223,8 @@ class OmniSoC_Base:
# TODO: use amd-smi python api when available
amd_smi_static = run(["amd-smi", "static", "--gpu=0"], exit_on_error=True)
# Purposely search for patterns without variants suffix to try and match a known GPU model.
# Purposely search for patterns without variants suffix to try and match a known
# GPU model.
detection_methods = [
{
"name": "Market Name",
@@ -228,7 +234,10 @@ class OmniSoC_Base:
"name": "VBIOS Name",
"pattern": r"NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
},
{"name": "Product Name", "pattern": r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)"},
{
"name": "Product Name",
"pattern": r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
},
]
gpu_model = None
@@ -267,7 +276,8 @@ class OmniSoC_Base:
def detect_counters(self):
"""
Create a set of counters required for the selected report sections.
Parse analysis report configuration files based on the selected report sections to be filtered.
Parse analysis report configuration files based on the selected report
sections to be filtered.
"""
# Read the analysis config files and filter
config_root_dir = f"{self.get_args().config_dir}/{self.__arch}"
@@ -306,7 +316,10 @@ class OmniSoC_Base:
# File id filtering
if file_id not in config_filename_dict:
console_warning(
f"Skipping {block_id}: file id {file_id} not found in {config_root_dir}"
(
f"Skipping {block_id}: file id {file_id} not found in "
f"{config_root_dir}"
)
)
continue
with open(config_filename_dict[file_id], "r") as stream:
@@ -324,7 +337,10 @@ class OmniSoC_Base:
}
if panel_id not in panel_dict:
console_warning(
f"Skipping {block_id}: metric table {panel_id} not found in {config_filename_dict[file_id]}"
(
f"Skipping {block_id}: metric table {panel_id} not found in "
f"{config_filename_dict[file_id]}"
)
)
continue
if metric_id is None:
@@ -338,14 +354,17 @@ class OmniSoC_Base:
}
if metric_id not in metric_dict:
console_warning(
f"Skipping {block_id}: metric id {metric_id} not found in panel id {panel_id}"
(
f"Skipping {block_id}: metric id {metric_id} not found in "
f"panel id {panel_id}"
)
)
continue
texts.append(yaml.dump(metric_dict[metric_id], sort_keys=False))
counters = self.parse_counters("\n".join(texts))
# Handle TCC channel counters: if hw_counter_matches has elements ending with '['
# Handle TCC channel counters: if hw_counter_matches has elems ending with '['
# Expand and interleve the TCC channel counters
# e.g. TCC_HIT[0] TCC_ATOMIC[0] ... TCC_HIT[1] TCC_ATOMIC[1] ...
if using_v3():
@@ -356,12 +375,10 @@ class OmniSoC_Base:
if counter_name.startswith("TCC") and counter_name.endswith("["):
counters.remove(counter_name)
counter_name = counter_name.split("[")[0]
counters = counters.union(
{
f"{counter_name}[{i}]"
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
}
)
counters = counters.union({
f"{counter_name}[{i}]"
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
})
return counters
@@ -394,7 +411,10 @@ class OmniSoC_Base:
if not using_v3():
# Counters not supported in rocprof v1 / v2
counters = counters - {"SQ_INSTS_VALU_MFMA_F8", "SQ_INSTS_VALU_MFMA_MOPS_F8"}
counters = counters - {
"SQ_INSTS_VALU_MFMA_F8",
"SQ_INSTS_VALU_MFMA_MOPS_F8",
}
# TCP_TCP_LATENCY_sum not supported for MI300 (gfx940, gfx941, gfx942)
if self.__arch in ("gfx940", "gfx941", "gfx942"):
@@ -409,7 +429,8 @@ class OmniSoC_Base:
@demarcate
def parse_counters(self, config_text):
"""
Create a set of all hardware counters mentioned in the given config file content string
Create a set of all hardware counters mentioned in the given config file
content string.
"""
hw_counter_matches, variable_matches = self.parse_counters_text(config_text)
@@ -440,7 +461,8 @@ class OmniSoC_Base:
def parse_counters_text(self, text):
"""Parse out hardware counters and variables from given text"""
# hw counter name should start with ip block name
# hw counter name should have all capital letters or digits and should not end with underscore
# hw counter name should have all capital letters or digits
# and should not end with underscore
# he counter name can either optionally end with '[' or '_sum'
hw_counter_regex = (
r"(?:SQ|SQC|TA|TD|TCP|TCC|CPC|CPF|SPI|GRBM)_[0-9A-Z_]*[0-9A-Z](?:\[|_sum)*"
@@ -458,11 +480,11 @@ class OmniSoC_Base:
if rocprof_cmd != "rocprofiler-sdk":
console_warning(
"rocprof v1 / v2 / v3 interfaces will be removed in favor of "
"rocprofiler-sdk interface in a future release. To use rocprofiler-sdk "
"interface, please set the environment variable ROCPROF to 'rocprofiler-sdk' "
"and optionally provide the path to librocprofiler-sdk.so library via the "
"--rocprofiler-sdk-library-path option."
"rocprof v1/v2/v3 interfaces will be removed in favor of "
"rocprofiler-sdk interface in a future release. To use "
"rocprofiler-sdk, set ROCPROF to 'rocprofiler-sdk' and "
"optionally provide the path to librocprofiler-sdk.so via "
"--rocprofiler-sdk-library-path."
)
rocprof_counters = set()
@@ -473,7 +495,8 @@ class OmniSoC_Base:
# return code should be 1 so success should be False
if success:
console_error(
f"Failed to list rocprof supported counters using command: {command}"
"Failed to list rocprof supported counters using command: %s"
% command
)
for line in output.splitlines():
if "gpu-agent" in line:
@@ -485,7 +508,8 @@ class OmniSoC_Base:
# return code should be 1 so success should be False
if success:
console_error(
f"Failed to list rocprof supported counters using command: {command}"
"Failed to list rocprof supported counters using command: %s"
% command
)
for line in output.splitlines():
if "gpu-agent" in line:
@@ -498,13 +522,13 @@ class OmniSoC_Base:
# return code should be 1 so success should be False
if success:
console_error(
f"Failed to list rocprof supported counters using command: {command}"
"Failed to list rocprof supported counters using command: %s"
% command
)
for line in output.splitlines():
if "gfx" in line:
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
rocprof_counters.update(counters)
elif (
str(rocprof_cmd).endswith("rocprofv3")
or str(rocprof_cmd) == "rocprofiler-sdk"
@@ -550,7 +574,10 @@ class OmniSoC_Base:
@demarcate
def perfmon_coalesce(self, counters):
"""Sort and bucket all related performance counters to minimize required application passes"""
"""
Sort and bucket all related performance counters to minimize required
application passes
"""
# Create workload directory
# In some cases (i.e. --specs) path will not be given
@@ -577,20 +604,26 @@ class OmniSoC_Base:
# Sanity check whether counters are supported by underlying rocprof tool
rocprof_counters = self.get_rocprof_supported_counters()
# rocprof does not support TCC channel counters in the avail output, so remove channel suffix for comparison
# rocprof does not support TCC channel counters in the avail output,
# so remove channel suffix for comparison
not_supported_counters = {
counter.split("[")[0] if is_tcc_channel_counter(counter) else counter
for counter in counters
} - rocprof_counters
if not_supported_counters:
console_warning(
f"Following counters might not be supported by rocprof: {', '.join(not_supported_counters)} "
"Following counters might not be supported by rocprof: %s"
% ", ".join(not_supported_counters)
)
# We might be providing definitions of unsupported counters, so still try to collect them
# We might be providing definitions of unsupported counters, so still try to
# collect them
if not counters:
console_error(
"profiling",
"No performance counters to collect, please check the provided profiling filters",
(
"No performance counters to collect, "
"please check the provided profiling filters"
),
)
else:
console_debug(f"Collecting following counters: {', '.join(counters)} ")
@@ -607,13 +640,15 @@ class OmniSoC_Base:
and not is_tcc_channel_counter(counter)
):
counters.remove(counter)
output_files.append(CounterFile(counter + ".txt", self.__perfmon_config))
output_files.append(
CounterFile(counter + ".txt", self.__perfmon_config)
)
output_files[-1].add(counter)
if using_v3():
# v3 does not support SQ_ACCUM_PREV_HIRES. Instead we defined our own
# counters in counter_defs.yaml that use the accumulate() function. These
# use the name of the accumulate counter with _ACCUM appended to them.
output_files[-1].add(counter + "_ACCUM")
# v3 does not support SQ_ACCUM_PREV_HIRES. Use custom counters
# defined in counter_defs.yaml that utilize accumulate(),
# with _ACCUM suffix.
output_files[-1].add(f"{counter}_ACCUM")
else:
output_files[-1].add("SQ_ACCUM_PREV_HIRES")
accu_file_count += 1
@@ -635,7 +670,9 @@ class OmniSoC_Base:
added = True
# Store all channels for a TCC channel counter in the same file
if is_tcc_channel_counter(ctr):
tcc_channel_counter_file_map[ctr.split("[")[0]] = output_files[i]
tcc_channel_counter_file_map[ctr.split("[")[0]] = output_files[
i
]
break
# All files are full, create a new file
@@ -652,7 +689,6 @@ class OmniSoC_Base:
# TODO: rewrite the above logic for spatial_multiplexing later
if self.get_args().spatial_multiplexing:
# TODO: more error checking
if len(self.get_args().spatial_multiplexing) != 3:
console_error(
@@ -676,9 +712,11 @@ class OmniSoC_Base:
console_debug(
"profiling",
"spatial_multiplexing node_idx %s, node_count %s, gpu_count: %s, old_group_num %s, "
"new_bucket_count %s, groups_per_bucket %s, max_groups_per_node %s, "
"group_start %s, group_end %s"
(
"spatial_multiplexing node_idx %s, node_count %s, gpu_count: %s,\n"
"old_group_num %s, new_bucket_count %s, groups_per_bucket %s,\n"
"max_groups_per_node %s, group_start %s, group_end %s"
)
% (
node_idx,
node_count,
@@ -695,7 +733,12 @@ class OmniSoC_Base:
for f_idx in range(groups_per_bucket):
file_name = str(
Path(workload_perfmon_dir).joinpath(
"pmc_perf_" + "node_" + str(node_idx) + "_" + str(f_idx) + ".txt"
"pmc_perf_"
+ "node_"
+ str(node_idx)
+ "_"
+ str(f_idx)
+ ".txt"
)
)
@@ -719,7 +762,9 @@ class OmniSoC_Base:
else:
# Output to files
for f in output_files:
file_name_txt = str(Path(workload_perfmon_dir).joinpath(f.file_name_txt))
file_name_txt = str(
Path(workload_perfmon_dir).joinpath(f.file_name_txt)
)
file_name_yaml = str(
Path(workload_perfmon_dir).joinpath(f.file_name_yaml)
)
@@ -739,12 +784,19 @@ class OmniSoC_Base:
idx = int(ctr.split("[")[1].split("]")[0])
xcd_idx = idx // int(self._mspec._l2_banks)
channel_idx = idx % int(self._mspec._l2_banks)
expression = f"select({counter_name},[DIMENSION_XCC=[{xcd_idx}], DIMENSION_INSTANCE=[{channel_idx}]])"
discription = f"{counter_name} on {xcd_idx}th XCC and {channel_idx}th channel"
expression = (
f"select({counter_name},"
f"[DIMENSION_XCC=[{xcd_idx}], "
f"DIMENSION_INSTANCE=[{channel_idx}]])"
)
description = (
f"{counter_name} on {xcd_idx}th XCC and "
f"{channel_idx}th channel"
)
counter_def = add_counter_extra_config_input_yaml(
counter_def,
ctr,
discription,
description,
expression,
[self.__arch],
)
-4
Просмотреть файл
@@ -23,10 +23,6 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from utils.logger import console_error, demarcate
from utils.mi_gpu_spec import mi_gpu_specs
+6 -4
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from pathlib import Path
import config
@@ -49,9 +48,12 @@ class gfx90a_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx90a"))
# Create roofline object if mode is provided; skip for --specs
+7 -5
Просмотреть файл
@@ -23,13 +23,12 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_error, console_log, console_warning, demarcate
from utils.logger import console_log, console_warning, demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
@@ -49,9 +48,12 @@ class gfx940_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx940"))
# Create roofline object if mode is provided; skip for --specs
+7 -5
Просмотреть файл
@@ -23,13 +23,12 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_error, console_log, console_warning, demarcate
from utils.logger import console_log, console_warning, demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
@@ -49,9 +48,12 @@ class gfx941_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx941"))
# Create roofline object if mode is provided; skip for --specs
+7 -5
Просмотреть файл
@@ -23,13 +23,12 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_error, console_log, console_warning, demarcate
from utils.logger import console_log, console_warning, demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
@@ -49,9 +48,12 @@ class gfx942_soc(OmniSoC_Base):
)
)
)
self.set_compatible_profilers(
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
)
self.set_compatible_profilers([
"rocprofv1",
"rocprofv2",
"rocprofv3",
"rocprofiler-sdk",
])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx942"))
# Create roofline object if mode is provided; skip for --specs
+1 -2
Просмотреть файл
@@ -23,13 +23,12 @@
##############################################################################
from pathlib import Path
import config
from rocprof_compute_soc.soc_base import OmniSoC_Base
from roofline import Roofline
from utils.logger import console_error, console_log, console_warning, demarcate
from utils.logger import console_log, console_warning, demarcate
from utils.mi_gpu_spec import mi_gpu_specs
from utils.utils import mibench
-1
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import copy
from pathlib import Path
-2
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
Configuration Module
-------------------
+1 -3
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
ROCm Compute Profiler TUI - Main Application with Analysis Methods
----------------------------------------------------------------
@@ -168,7 +166,7 @@ class RocprofTUIApp(App):
if opened := await self.push_screen_wait(SelectDirectory()):
self.add_to_recent(str(opened))
self.main_view.selected_path = opened
dropdown = self.query_one(f"#file-dropdown", DropdownMenu)
dropdown = self.query_one("#file-dropdown", DropdownMenu)
dropdown.add_class("hidden")
self.main_view.run_analysis()
+8 -4
Просмотреть файл
@@ -3,7 +3,6 @@ from collections import defaultdict
from datetime import datetime
from enum import Enum
import numpy as np
import pandas as pd
import config
@@ -61,9 +60,12 @@ class Logger:
if hasattr(self.output_area, "text"):
current_text = self.output_area.text
self.output_area.text = (
f"{current_text}\n{formatted_msg}" if current_text else formatted_msg
f"{current_text}\n{formatted_msg}"
if current_text
else formatted_msg
)
# HACK: moving curson to end of outpu (Is there a better way to achieve this?)
# HACK: moving curson to end of output
# (Is there a better way to achieve this?)
self.output_area.cursor_location = (999999, 0)
def info(self, message, update_ui=True):
@@ -192,7 +194,9 @@ def apply_rounding_logic(df, decimal_precision):
if df_copy[column].dtype == "object":
df_copy[column] = df_copy[column].combine(
rounded_series,
lambda orig, rounded: rounded if pd.notna(rounded) else orig,
lambda orig, rounded: (
rounded if pd.notna(rounded) else orig
),
)
else:
df_copy[column] = rounded_series
+5 -2
Просмотреть файл
@@ -77,7 +77,10 @@ class KernelView(Container):
"""
with VerticalScroll(id="top-container"):
yield Label(
"Open a workload directory to run analysis and view individual kernel analysis results.",
(
"Open a workload directory to run analysis and view individual "
"kernel analysis results."
),
classes="placeholder",
)
@@ -177,7 +180,7 @@ class KernelView(Container):
bottom_container.remove_children()
bottom_container.mount(
Label(f"Toggle kernel selection to view detailed analysis.")
Label("Toggle kernel selection to view detailed analysis.")
)
if self.current_selection and self.current_selection in self.dfs:
+22 -11
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
Main View Module
---------------
@@ -59,9 +57,7 @@ class MainView(Horizontal):
super().__init__(id="main-container")
self.start_path = (
# NOTE: is cwd the best choice?
Path.cwd()
if DEFAULT_START_PATH is None
else Path(DEFAULT_START_PATH)
Path.cwd() if DEFAULT_START_PATH is None else Path(DEFAULT_START_PATH)
)
self.logger = Logger()
@@ -157,7 +153,9 @@ class MainView(Horizontal):
analyzer.sanitize()
self.logger.info("Step 2: Analyzer sanitized successfully")
except Exception as e:
self.logger.error(f"Step 2 failed - Error sanitizing analyzer: {str(e)}")
self.logger.error(
f"Step 2 failed - Error sanitizing analyzer: {str(e)}"
)
raise
# Step 3: Load sys_info
@@ -172,7 +170,13 @@ class MainView(Horizontal):
sys_info_df = file_io.load_sys_info(sysinfo_path)
self.logger.info(f"Step 3: sys_info_df type = {type(sys_info_df)}")
self.logger.info(
f"Step 3: sys_info_df shape = {sys_info_df.shape if hasattr(sys_info_df, 'shape') else 'No shape attribute'}"
f"Step 3: sys_info_df shape = {
(
sys_info_df.shape
if hasattr(sys_info_df, 'shape')
else 'No shape attribute'
)
}"
)
except Exception as e:
@@ -193,12 +197,16 @@ class MainView(Horizontal):
# If it's already a dict
sys_info = sys_info_df
else:
raise TypeError(f"Unexpected type for sys_info: {type(sys_info_df)}")
raise TypeError(
f"Unexpected type for sys_info: {type(sys_info_df)}"
)
self.logger.info(f"Step 4: sys_info converted = {sys_info}")
except Exception as e:
self.logger.error(f"Step 4 failed - Error converting sys_info: {str(e)}")
self.logger.error(
f"Step 4 failed - Error converting sys_info: {str(e)}"
)
raise
# Step 5: Load SoC specs
@@ -236,7 +244,10 @@ class MainView(Horizontal):
# TODO: add per kernel Roofline support when available
if not self.per_kernel_dfs or not self.top_kernels:
warning_msg = "Step 8: Per Kernel Analysis completed but not all data was returned"
warning_msg = (
"Step 8: Per Kernel Analysis completed but not all data "
"was returned"
)
self._update_view(warning_msg, LogLevel.WARNING)
self.logger.warning(warning_msg)
else:
@@ -289,7 +300,7 @@ class MainView(Horizontal):
return
kernel_view.update_results(self.per_kernel_dfs, self.top_kernels)
self.logger.success(f"Results displayed successfully.")
self.logger.success("Results displayed successfully.")
except Exception as e:
self.logger.error(f"Error refreshing results: {str(e)}")
-2
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
Panel Widget Modules
-------------------
+3 -4
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from __future__ import annotations
import math
@@ -35,7 +34,6 @@ import pandas as pd
import plotext as plt
import plotly.express as px
from textual.widgets import Static
from textual_plotext import PlotextPlot
from utils.mem_chart import plot_mem_chart
@@ -58,7 +56,9 @@ def simple_bar(df, title=None):
.to_dict()["Avg"]
)
else:
raise NameError(f"simple_bar: No Metric or Avg in df columns: {str(df.columns)}")
raise NameError(
f"simple_bar: No Metric or Avg in df columns: {str(df.columns)}"
)
plt.clear_figure()
@@ -389,7 +389,6 @@ class SimpleBar(Static):
class SimpleBox(Static):
DEFAULT_CSS = """
SimpleBox {
padding: 0;
+23 -8
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from typing import Any, Dict, List, Optional
import pandas as pd
@@ -58,7 +57,10 @@ def load_config(config_path) -> Dict[str, Any]:
return yaml.safe_load(file)
except FileNotFoundError:
raise FileNotFoundError(
f"Configuration file {config_path} not found, \nplease populate the analysis_config.yaml file."
(
f"Configuration file {config_path} not found, \n"
"please populate the analysis_config.yaml file."
)
)
except yaml.YAMLError as e:
raise ValueError(f"Error parsing YAML configuration: {e}")
@@ -86,7 +88,7 @@ def get_tui_style_from_path(dfs: Dict[str, Any], path: List[str]) -> Optional[st
def create_widget_from_data(df: pd.DataFrame, tui_style: Optional[str] = None) -> Any:
if df is not None and not df.empty:
match tui_style:
match tui_style: # noqa
case None:
return create_table(df)
@@ -141,7 +143,9 @@ def build_subsection(
widgets = []
if "header_label" in subsection_config:
header_class = subsection_config.get("header_class", "")
widgets.append(Label(subsection_config["header_label"], classes=header_class))
widgets.append(
Label(subsection_config["header_label"], classes=header_class)
)
widgets.append(widget)
@@ -193,7 +197,10 @@ def build_kernel_sections(
def create_safe_widget(subsection_name: str, data: dict, section_name: str):
if not (isinstance(data, dict) and "df" in data):
add_warning(
f"Invalid data structure for '{subsection_name}' in section '{section_name}'"
(
f"Invalid data structure for '{subsection_name}' "
f"in section '{section_name}'"
)
)
return None
@@ -240,7 +247,10 @@ def build_kernel_sections(
kernel_children.append(collapsible)
except Exception as e:
add_warning(
f"Error processing subsection '{subsection_name}' in section '{section_name}': {str(e)}"
(
f"Error processing subsection '{subsection_name}' "
f"in section '{section_name}': {str(e)}"
)
)
if kernel_children:
@@ -251,7 +261,10 @@ def build_kernel_sections(
children.append(section_collapsible)
except Exception as e:
add_warning(
f"Failed to create collapsible for section '{section_name}': {str(e)}"
(
"Failed to create collapsible for section "
f"'{section_name}': {str(e)}"
)
)
except Exception as e:
@@ -289,7 +302,9 @@ def build_section_from_config(
if subsection:
children.append(subsection)
except Exception as e:
error_msg = f"{subsection_config.get('title', 'Unknown')} error: {str(e)}"
error_msg = (
f"{subsection_config.get('title', 'Unknown')} error: {str(e)}"
)
children.append(Label(error_msg, classes="warning"))
else:
children = [Label("No configuration provided for this section")]
+2 -2
Просмотреть файл
@@ -1,4 +1,4 @@
from textual import on, work
from textual import on
from textual.app import ComposeResult
from textual.containers import Container, Horizontal
from textual.reactive import reactive
@@ -74,7 +74,7 @@ class MenuBar(Container):
def on_recent_selected(selected_dir: str) -> None:
if selected_dir:
self.parent_main_view.selected_path = selected_dir
dropdown = self.query_one(f"#file-dropdown", DropdownMenu)
dropdown = self.query_one("#file-dropdown", DropdownMenu)
dropdown.add_class("hidden")
self.parent_main_view.run_analysis()
-1
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from typing import List
from textual.app import ComposeResult
-2
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
Panel Widget Modules
-------------------
-3
Просмотреть файл
@@ -22,15 +22,12 @@
# THE SOFTWARE.
##############################################################################
"""
Specialized Widget Modules
-------------------------
Contains custom widget implementations for the application.
"""
import pandas as pd
from textual.events import MouseDown, MouseMove, MouseUp
from textual.widgets import Static
-1
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from textual.binding import Binding
from textual.widgets import TabbedContent, Tabs
-2
Просмотреть файл
@@ -22,8 +22,6 @@
# THE SOFTWARE.
##############################################################################
"""
Panel Widget Modules
-------------------
+3 -2
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import os
import platform
import subprocess
@@ -81,7 +80,9 @@ class Terimnal(Container):
def update_prompt(self) -> None:
"""Update the command prompt in the input field."""
input_widget = self.query_one("#terminal-input")
current_path = os.path.basename(self.current_directory) or self.current_directory
current_path = (
os.path.basename(self.current_directory) or self.current_directory
)
if platform.system() != "Windows":
prompt = f"{current_path} $ "
+42 -23
Просмотреть файл
@@ -23,11 +23,10 @@
##############################################################################
import os
import textwrap
import time
from abc import ABC, abstractmethod
from abc import abstractmethod
from collections import OrderedDict
from pathlib import Path
@@ -77,7 +76,8 @@ class Roofline:
run_parameters
if run_parameters
else {
"workload_dir": None, # in some cases (i.e. --specs) path will not be given
"workload_dir": None, # in some cases (i.e. --specs),
# path will not be given
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
@@ -92,7 +92,7 @@ class Roofline:
# Set roofline run parameters from args
if hasattr(self.__args, "path") and not run_parameters:
self.__run_parameters["workload_dir"] = self.__args.path
if hasattr(self.__args, "no_roof") and self.__args.no_roof == False:
if hasattr(self.__args, "no_roof") and not self.__args.no_roof:
self.__run_parameters["is_standalone"] = True
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names:
self.__run_parameters["include_kernel_names"] = True
@@ -122,7 +122,8 @@ class Roofline:
if isinstance(workload_dir_val, list):
if not workload_dir_val or not workload_dir_val[0]:
console_error(
"Workload directory list is empty or invalid. Cannot perform setup.",
"Workload directory list is empty or invalid. "
"Cannot perform setup.",
exit=False,
)
return
@@ -139,7 +140,6 @@ class Roofline:
base_path = Path(base_dir)
if base_path.name == "workloads" and base_path.parent == Path(os.getcwd()):
app_name = getattr(self.__args, "name", "default_app_name")
gpu_model_name = getattr(self.__mspec, "gpu_model", "default_gpu_model")
@@ -169,14 +169,19 @@ class Roofline:
self,
ret_df,
):
"""Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
"""
Generate a set of empirical roofline plots given a directory containing
required profiling and benchmarking data.
"""
if (
not isinstance(self.__run_parameters["workload_dir"], list)
and self.__run_parameters["workload_dir"] != None
):
self.roof_setup()
console_debug("roofline", "Path: %s" % self.__run_parameters.get("workload_dir"))
console_debug(
"roofline", "Path: %s" % self.__run_parameters.get("workload_dir")
)
self.__ai_data = calc_ai(
self.__mspec, self.__run_parameters.get("sort_type"), ret_df
)
@@ -197,8 +202,11 @@ class Roofline:
or str(dt) not in SUPPORTED_DATATYPES[gpu_arch]
):
console_error(
"{} is not a supported datatype for roofline profiling on {} (arch: {})".format(
str(dt), getattr(self.__mspec, "gpu_model", "N/A"), gpu_arch
"{} is not a supported datatype for roofline profiling on {} "
"(arch: {})".format(
str(dt),
getattr(self.__mspec, "gpu_model", "N/A"),
gpu_arch,
),
exit=False,
)
@@ -230,7 +238,8 @@ class Roofline:
if self.__run_parameters.get("include_kernel_names", False):
if self.__ai_data is None:
console_error(
"Roofline Error: self.__ai_data is not populated. Cannot generate kernel names info.",
"Roofline Error: self.__ai_data is not populated. "
"Cannot generate kernel names info.",
exit=False,
)
original_kernel_names = []
@@ -245,7 +254,8 @@ class Roofline:
if num_kernels == 0:
console_log(
"roofline",
"No kernel names found to generate 'Kernel Names and Markers' info.",
"No kernel names found to generate "
"'Kernel Names and Markers' info.",
)
self.__figure.add_annotation(
text="No kernel names to display.",
@@ -356,7 +366,8 @@ class Roofline:
)
# Output will be different depending on interaction type:
# Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output
# Save PDFs if we're in "standalone roofline" mode,
# otherwise return HTML to be used in GUI output
if self.__run_parameters["is_standalone"]:
dev_id = str(self.__run_parameters["device_id"])
@@ -497,7 +508,8 @@ class Roofline:
)
console_debug(
"roofline",
"Roofline analysis only supports AI for floating point calculations at this time",
"Roofline analysis only supports AI for "
"floating point calculations at this time",
)
#######################
@@ -515,15 +527,17 @@ class Roofline:
# Plot peak BW ceiling(s)
for cache_level in cache_hierarchy:
if (
not self.__ceiling_data
or cache_level.lower() not in self.__ceiling_data
or not isinstance(self.__ceiling_data[cache_level.lower()], (list, tuple))
or not isinstance(
self.__ceiling_data[cache_level.lower()], (list, tuple)
)
or len(self.__ceiling_data[cache_level.lower()]) < 3
):
console_error(
f"Ceiling data for {cache_level} is missing or malformed for dtype {dtype}.",
f"Ceiling data for {cache_level} is missing "
f"or malformed for dtype {dtype}.",
exit=False,
)
continue
@@ -612,7 +626,8 @@ class Roofline:
:param dtype: The datatype to be profiled
:type method: str
:return: Build the current figure using plot.build(), or None if datatype is not valid for the architecture
:return: Build the current figure using plot.build(),
or None if datatype is not valid for the architecture
:rtype: str or None
"""
console_debug("roofline", "Generating roofline plot for CLI")
@@ -635,7 +650,8 @@ class Roofline:
)
return
# Extract base directory path regardless of whether workload_dir is list or string
# Extract base directory path regardless of-
# whether workload_dir is list or string
if isinstance(workload_dir, list):
if not workload_dir or not workload_dir[0]:
console_error(
@@ -893,17 +909,20 @@ class Roofline:
if not self.__args.remaining:
console_error(
"profiling"
"An <app_cmd> is required to run.\rrocprof-compute profile -n test -- <app_cmd>"
"An <app_cmd> is required to run.\r"
"rocprof-compute profile -n test -- <app_cmd>"
)
# TODO: Add an equivelent of characterize_app() to run profiling directly out of this module
# TODO: Add an equivelent of characterize_app() to run profiling
# directly out of this module
elif self.__args.no_roof:
console_log("roofline", "Skipping roofline.")
else:
mibench(self.__args, self.__mspec)
# NB: Currently the post_prossesing() method is the only one being used by rocprofiler-compute,
# we include pre_processing() and profile() methods for those who wish to borrow the roofline module
# NB: Currently the post_prossesing() method is the only one being used by
# rocprofiler-compute, we include pre_processing() and profile() methods for
# those who wish to borrow the roofline module
@abstractmethod
def post_processing(self):
if self.__run_parameters["is_standalone"]:
+12 -10
Просмотреть файл
@@ -23,10 +23,9 @@
##############################################################################
import getpass
import os
from abc import ABC, abstractmethod
from abc import abstractmethod
from pathlib import Path
import pandas as pd
@@ -75,7 +74,8 @@ class DatabaseConnector:
name = sys_info["workload_name"][0].strip()
except KeyError as e:
console_error(
f"Outdated workload. Cannot find {e} field. Please reprofile to update."
f"Outdated workload. "
f"Cannot find {e} field. Please reprofile to update."
)
else:
console_error(
@@ -140,7 +140,6 @@ class DatabaseConnector:
db_to_remove = self.client[self.connection_info["workload"]]
# check the collection names on the database
col_list = db_to_remove.list_collection_names()
self.client.drop_database(db_to_remove)
db = self.client["workload_names"]
col = db["names"]
@@ -166,15 +165,17 @@ class DatabaseConnector:
is_full_workload_name = self.args.workload.count("_") >= 3
if not is_full_workload_name:
console_error(
"-w/--workload is not valid. Please use full workload name as seen in GUI when removing (i.e. rocprofiler-compute_asw_vcopy_mi200)"
"-w/--workload is not valid. Please use full workload name "
"as seen in GUI when removing (i.e. "
"rocprofiler-compute_asw_vcopy_mi200)"
)
if (
self.connection_info["host"] == None
or self.connection_info["username"] == None
):
console_error(
"-H/--host and -u/--username are required when interaction type is set to %s"
% self.interaction_type
"-H/--host and -u/--username are required when "
"interaction type is set to %s" % self.interaction_type
)
if (
self.connection_info["workload"] == "admin"
@@ -192,7 +193,8 @@ class DatabaseConnector:
or self.connection_info["workload"] == None
):
console_error(
"-H/--host, -w/--workload, -u/--username, and -t/--team are all required when interaction type is set to %s"
"-H/--host, -w/--workload, -u/--username, and -t/--team are all "
"required when interaction type is set to %s"
% self.interaction_type
)
@@ -220,7 +222,7 @@ class DatabaseConnector:
else:
console_log("database", "Password received")
else:
password = self.connection_info["password"]
pass
# Establish client connection
connection_str = (
@@ -239,5 +241,5 @@ class DatabaseConnector:
)
try:
self.client.server_info()
except:
except Exception:
console_error("database", "Unable to connect to the DB server.")
+15 -10
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import os
import re
from collections import OrderedDict
@@ -50,7 +49,9 @@ top_stats_build_in_config = {
1: {
"id": 1,
"title": "Dispatch List",
"data source": [{"raw_csv_table": {"id": 2, "source": "pmc_dispatch_info.csv"}}],
"data source": [
{"raw_csv_table": {"id": 2, "source": "pmc_dispatch_info.csv"}}
],
},
}
@@ -72,7 +73,8 @@ def load_panel_configs(dir):
if f.endswith(".yaml"):
with open(str(Path(root).joinpath(f))) as file:
config = yaml.safe_load(file)
# metric key can be None due to some metric tables not having any metrics
# metric key can be None due to some metric tables
# not having any metrics
# metric key should be empty dict instead of None
for data_source in config["Panel Config"]["data source"]:
metric_table = data_source.get("metric_table")
@@ -80,7 +82,8 @@ def load_panel_configs(dir):
metric_table["metric"] = {}
d[config["Panel Config"]["id"]] = config["Panel Config"]
# TODO: sort metrics as the header order in case they are not defined in the same order
# TODO: sort metrics as the header order in case they-
# are not defined in the same order
od = OrderedDict(sorted(d.items()))
# for key, value in od.items():
@@ -157,9 +160,9 @@ def create_df_kernel_top_stats(
axis=1,
)
grouped = time_stats.groupby(by=["Kernel_Name"]).agg(
{"ExeTime": ["count", "sum", "mean", "median"]}
)
grouped = time_stats.groupby(by=["Kernel_Name"]).agg({
"ExeTime": ["count", "sum", "mean", "median"]
})
time_unit_str = "(" + time_unit + ")"
grouped.columns = [
@@ -205,8 +208,8 @@ def create_df_pmc(
dfs = []
coll_levels = []
df = pd.DataFrame()
new_df = pd.DataFrame()
df = pd.DataFrame() # noqa: F841
new_df = pd.DataFrame() # noqa: F841
for root, dirs, files in os.walk(raw_data_dir):
for f in files:
# print("file ", f)
@@ -250,7 +253,9 @@ def create_df_pmc(
else:
# regular single node case
if nodes is None:
return create_single_df_pmc(raw_data_root_dir, None, kernel_verbose, verbose)
return create_single_df_pmc(
raw_data_root_dir, None, kernel_verbose, verbose
)
# "empty list" means all nodes
elif not nodes:
+21 -20
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import colorlover
import pandas as pd
import plotly.express as px
@@ -87,23 +86,21 @@ def discrete_background_color_bins(df, n_bins=5, columns="all"):
color = "white" if i > len(bounds) / 2.0 else "inherit"
for column in df_numeric_columns:
styles.append(
{
"if": {
"filter_query": (
"{{{column}}} >= {min_bound}"
+ (
" && {{{column}}} < {max_bound}"
if (i < len(bounds) - 1)
else ""
)
).format(column=column, min_bound=min_bound, max_bound=max_bound),
"column_id": column,
},
"backgroundColor": backgroundColor,
"color": color,
}
)
styles.append({
"if": {
"filter_query": (
"{{{column}}} >= {min_bound}"
+ (
" && {{{column}}} < {max_bound}"
if (i < len(bounds) - 1)
else ""
)
).format(column=column, min_bound=min_bound, max_bound=max_bound),
"column_id": column,
},
"backgroundColor": backgroundColor,
"color": color,
})
legend.append(
html.Div(
style={"display": "inline-block", "width": "60px"},
@@ -203,7 +200,9 @@ def build_bar_chart(display_df, table_config, barchart_elements, norm_filt):
# Speed-of-light bar chart
elif table_config["id"] in barchart_elements["sol"]:
display_df["Avg"] = [float(x) if x != "" else float(0) for x in display_df["Avg"]]
display_df["Avg"] = [
float(x) if x != "" else float(0) for x in display_df["Avg"]
]
if table_config["id"] == 1701:
# special layout for L2 Cache SOL
d_figs.append(
@@ -265,7 +264,9 @@ def build_bar_chart(display_df, table_config, barchart_elements, norm_filt):
).update_xaxes(range=[0, 110])
)
else:
console_error("Table id %s. Cannot determine barchart type." % table_config["id"])
console_error(
"Table id %s. Cannot determine barchart type." % table_config["id"]
)
# update layout for each of the charts
for fig in d_figs:
+25 -13
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import dash_bootstrap_components as dbc
from dash import dcc, html
@@ -67,7 +66,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
children=[
dbc.DropdownMenu(
[
dbc.DropdownMenuItem("Overview", header=True),
dbc.DropdownMenuItem(
"Overview", header=True
),
dbc.DropdownMenuItem(
"Roofline",
href="#roofline",
@@ -88,7 +89,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
href="#system_speed-of-light",
external_link=True,
),
dbc.DropdownMenuItem("Compute", header=True),
dbc.DropdownMenuItem(
"Compute", header=True
),
dbc.DropdownMenuItem(
"Command Processor (CPF/CPC)",
href="#command_processor_cpccpf",
@@ -131,8 +134,14 @@ def get_header(raw_pmc, input_filters, kernel_names):
external_link=True,
),
dbc.DropdownMenuItem(
"Address Processing Unit and Data Return Path (TA/TD)",
href="#address_processing_unit_and_data_return_path_tatd",
(
"Address Processing Unit and "
"Data Return Path (TA/TD)"
),
href=(
"#address_processing_unit_and"
"_data_return_path_tatd"
),
external_link=True,
),
dbc.DropdownMenuItem(
@@ -199,9 +208,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
), # list avail gcd ids
id="gcd-filt",
multi=True,
value=input_filters[
"gpu"
], # default to any gpu filters passed as args
# default to any gpu filters
# passed as args
value=input_filters["gpu"],
placeholder="ALL",
clearable=False,
style={"width": "60px"},
@@ -230,9 +239,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
),
id="disp-filt",
multi=True,
value=input_filters[
"dispatch"
], # default to any dispatch filters passed as args
# default to any dispatch
# filters passed as args
value=input_filters["dispatch"],
placeholder="ALL",
style={"width": "150px"},
),
@@ -254,7 +263,8 @@ def get_header(raw_pmc, input_filters, kernel_names):
id="top-n-filt",
value=input_filters[
"top_n"
], # default to any dispatch filters passed as args
], # default to any dispatch filters
# passed as args
clearable=False,
style={"width": "50px"},
),
@@ -287,7 +297,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
optionHeight=150,
placeholder="ALL",
style={
"width": "600px", # TODO: Change these widths to % rather than fixed value
"width": "600px",
# TODO: Change these widths to
# % rather than fixed value
},
),
]
+123 -59
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
from dash import html
from dash_svg import G, Path, Rect, Svg, Text
@@ -580,21 +579,31 @@ def get_memchart(mem_data, base_data):
Rect(x="319", y="329.5", rx="3", ry="3"),
Rect(x="319", y="382", rx="3", ry="3"),
Rect(x="1367.69", y="271", rx="3", ry="3"),
Rect(x="1367.69", y="296.5", rx="3", ry="3"),
Rect(x="1367.69", y="322.5", rx="3", ry="3"),
Rect(
x="1367.69", y="296.5", rx="3", ry="3"
),
Rect(
x="1367.69", y="322.5", rx="3", ry="3"
),
Rect(x="1078", y="199", rx="3", ry="3"),
Rect(x="1078", y="224.5", rx="3", ry="3"),
Rect(x="1078", y="250.5", rx="3", ry="3"),
Rect(x="771.44", y="103", rx="3", ry="3"),
Rect(x="770.44", y="358.75", rx="3", ry="3"),
Rect(x="770.44", y="390.25", rx="3", ry="3"),
Rect(
x="770.44", y="358.75", rx="3", ry="3"
),
Rect(
x="770.44", y="390.25", rx="3", ry="3"
),
Rect(x="769.44", y="477", rx="3", ry="3"),
Rect(x="769.44", y="508.5", rx="3", ry="3"),
Rect(x="1078", y="278", rx="3", ry="3"),
Rect(x="1078", y="342.5", rx="3", ry="3"),
Rect(x="1078", y="368.5", rx="3", ry="3"),
Rect(x="772.44", y="179", rx="3", ry="3"),
Rect(x="772.44", y="210.18", rx="3", ry="3"),
Rect(
x="772.44", y="210.18", rx="3", ry="3"
),
Rect(x="771.44", y="71.28", rx="3", ry="3"),
Rect(x="772.44", y="242", rx="3", ry="3"),
Rect(x="770.44", y="274.5", rx="3", ry="3"),
@@ -617,21 +626,31 @@ def get_memchart(mem_data, base_data):
className="val-3",
children=[
Rect(x="1410.69", y="271", rx="3", ry="3"),
Rect(x="1410.69", y="296.5", rx="3", ry="3"),
Rect(x="1410.69", y="322.5", rx="3", ry="3"),
Rect(
x="1410.69", y="296.5", rx="3", ry="3"
),
Rect(
x="1410.69", y="322.5", rx="3", ry="3"
),
Rect(x="1121", y="199", rx="3", ry="3"),
Rect(x="1121", y="224.5", rx="3", ry="3"),
Rect(x="1121", y="250.5", rx="3", ry="3"),
Rect(x="814.44", y="103", rx="3", ry="3"),
Rect(x="813.44", y="358.75", rx="3", ry="3"),
Rect(x="813.44", y="390.25", rx="3", ry="3"),
Rect(
x="813.44", y="358.75", rx="3", ry="3"
),
Rect(
x="813.44", y="390.25", rx="3", ry="3"
),
Rect(x="812.44", y="477", rx="3", ry="3"),
Rect(x="812.44", y="508.5", rx="3", ry="3"),
Rect(x="1121", y="278", rx="3", ry="3"),
Rect(x="1121", y="342.5", rx="3", ry="3"),
Rect(x="1121", y="368.5", rx="3", ry="3"),
Rect(x="815.44", y="179", rx="3", ry="3"),
Rect(x="815.44", y="210.18", rx="3", ry="3"),
Rect(
x="815.44", y="210.18", rx="3", ry="3"
),
Rect(x="814.44", y="71.28", rx="3", ry="3"),
Rect(x="815.44", y="242", rx="3", ry="3"),
Rect(x="813.44", y="274.5", rx="3", ry="3"),
@@ -694,7 +713,8 @@ def get_memchart(mem_data, base_data):
children=[
Path(
id="p1",
d="M 100 243.72 L 120 220.28 L 475 220.28 L 495 243.72 Z",
d="M 100 243.72 L 120 220.28 L "
"475 220.28 L 495 243.72 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -744,7 +764,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 62 L 411.89 65.51 L 413.63 62.01 L 411.88 58.51 Z",
d="M 418.88 62 L 411.89 65.51 L "
"413.63 62.01 L 411.88 58.51 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -754,7 +775,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 112 L 411.88 115.49 L 413.63 112 L 411.88 108.49 Z",
d="M 418.88 112 L 411.88 115.49 L "
"413.63 112 L 411.88 108.49 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -764,7 +786,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 162 L 411.88 165.51 L 413.63 162 L 411.88 158.51 Z",
d="M 418.88 162 L 411.88 165.51 L "
"413.63 162 L 411.88 158.51 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -774,7 +797,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 212 L 411.88 215.5 L 413.63 212 L 411.88 208.5 Z",
d="M 418.88 212 L 411.88 215.5 "
"L 413.63 212 L 411.88 208.5 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -784,7 +808,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 262 L 411.88 265.5 L 413.63 262 L 411.88 258.5 Z",
d="M 418.88 262 L 411.88 265.5 L "
"413.63 262 L 411.88 258.5 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -794,7 +819,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 312 L 411.89 315.51 L 413.63 312.01 L 411.88 308.51 Z",
d="M 418.88 312 L 411.89 315.51 L "
"413.63 312.01 L 411.88 308.51 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -804,7 +830,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 362 L 411.88 365.5 L 413.63 362 L 411.88 358.5 Z",
d="M 418.88 362 L 411.88 365.5 L "
"413.63 362 L 411.88 358.5 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -814,7 +841,8 @@ def get_memchart(mem_data, base_data):
stroke="#ff8000",
),
Path(
d="M 418.88 409 L 411.87 412.47 L 413.63 408.97 L 411.9 405.47 Z",
d="M 418.88 409 L 411.87 412.47 L "
"413.63 408.97 L 411.9 405.47 Z",
fill="#ff8000",
stroke="#ff8000",
),
@@ -824,7 +852,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 647.12 207 L 654.12 203.5 L 652.37 207 L 654.12 210.5 Z",
d="M 647.12 207 L 654.12 203.5 L "
"652.37 207 L 654.12 210.5 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -834,17 +863,20 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 748.88 236.01 L 741.9 239.54 L 743.63 236.03 L 741.86 232.54 Z",
d="M 748.88 236.01 L 741.9 239.54 L "
"743.63 236.03 L 741.86 232.54 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 750 502 L 110 502 Q 100 502 100 492 L 100 468.37",
d="M 750 502 L 110 502 Q 100 "
"502 100 492 L 100 468.37",
fill="none",
stroke="#00cccc",
),
Path(
d="M 100 463.12 L 103.5 470.12 L 100 468.37 L 96.5 470.12 Z",
d="M 100 463.12 L 103.5 470.12 L "
"100 468.37 L 96.5 470.12 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -854,7 +886,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 937.12 504 L 944.12 500.5 L 942.37 504 L 944.12 507.5 Z",
d="M 937.12 504 L 944.12 500.5 L "
"942.37 504 L 944.12 507.5 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -864,7 +897,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1528.81 242.93 L 1535.81 239.43 L 1534.06 242.93 L 1535.81 246.43 Z",
d="M 1528.81 242.93 L 1535.81 239.43 L "
"1534.06 242.93 L 1535.81 246.43 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -874,7 +908,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1596.57 274.64 L 1589.57 278.14 L 1591.32 274.64 L 1589.57 271.14 Z",
d="M 1596.57 274.64 L 1589.57 278.14 L "
"1591.32 274.64 L 1589.57 271.14 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -884,7 +919,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1413 127.12 L 1416.5 134.12 L 1413 132.37 L 1409.5 134.12 Z",
d="M 1413 127.12 L 1416.5 134.12 L "
"1413 132.37 L 1409.5 134.12 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -894,7 +930,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1443 194.88 L 1439.5 187.88 L 1443 189.63 L 1446.5 187.88 Z",
d="M 1443 194.88 L 1439.5 187.88 L "
"1443 189.63 L 1446.5 187.88 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -904,7 +941,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1413.01 372.12 L 1416.54 379.1 L 1413.03 377.37 L 1409.54 379.14 Z",
d="M 1413.01 372.12 L 1416.54 379.1 L "
"1413.03 377.37 L 1409.54 379.14 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -914,12 +952,14 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1443.35 437.38 L 1439.82 430.4 L 1443.33 432.13 L 1446.82 430.36 Z",
d="M 1443.35 437.38 L 1439.82 430.4 L "
"1443.33 432.13 L 1446.82 430.36 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 1145.25 341.38 L 1141.75 334.38 L 1145.25 336.13 L 1148.75 334.38 Z",
d="M 1145.25 341.38 L 1141.75 334.38 L "
"1145.25 336.13 L 1148.75 334.38 Z",
fill="rgb(0, 0, 0)",
stroke="rgb(0, 0, 0)",
),
@@ -929,7 +969,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 647.12 82 L 654.12 78.5 L 652.37 82 L 654.12 85.5 Z",
d="M 647.12 82 L 654.12 78.5 L 652.37 "
"82 L 654.12 85.5 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -939,7 +980,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 651.12 386.75 L 658.12 383.25 L 656.37 386.75 L 658.12 390.25 Z",
d="M 651.12 386.75 L 658.12 383.25 L "
"656.37 386.75 L 658.12 390.25 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -949,12 +991,14 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 646.12 269 L 653.12 265.5 L 651.37 269 L 653.12 272.5 Z",
d="M 646.12 269 L 653.12 265.5 L "
"651.37 269 L 653.12 272.5 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 748.88 269 L 741.88 272.5 L 743.63 269 L 741.88 265.5 Z",
d="M 748.88 269 L 741.88 272.5 L "
"743.63 269 L 741.88 265.5 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -964,7 +1008,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 939.12 206.41 L 946.12 202.91 L 944.37 206.41 L 946.12 209.91 Z",
d="M 939.12 206.41 L 946.12 202.91 L "
"944.37 206.41 L 946.12 209.91 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -974,7 +1019,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1040.88 235.42 L 1033.9 238.95 L 1035.63 235.44 L 1033.86 231.95 Z",
d="M 1040.88 235.42 L 1033.9 238.95 L "
"1035.63 235.44 L 1033.86 231.95 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -984,12 +1030,14 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 938.12 268.41 L 945.12 264.91 L 943.37 268.41 L 945.12 271.91 Z",
d="M 938.12 268.41 L 945.12 264.91 L "
"943.37 268.41 L 945.12 271.91 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 1040.88 268.41 L 1033.88 271.91 L 1035.63 268.41 L 1033.88 264.91 Z",
d="M 1040.88 268.41 L 1033.88 271.91 L "
"1035.63 268.41 L 1033.88 264.91 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -999,7 +1047,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 939.12 354.32 L 946.12 350.82 L 944.37 354.32 L 946.12 357.82 Z",
d="M 939.12 354.32 L 946.12 350.82 L "
"944.37 354.32 L 946.12 357.82 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -1009,7 +1058,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1040.88 383.33 L 1033.9 386.86 L 1035.63 383.35 L 1033.86 379.86 Z",
d="M 1040.88 383.33 L 1033.9 386.86 L "
"1035.63 383.35 L 1033.86 379.86 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -1019,12 +1069,14 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 938.12 416.32 L 945.12 412.82 L 943.37 416.32 L 945.12 419.82 Z",
d="M 938.12 416.32 L 945.12 412.82 L "
"943.37 416.32 L 945.12 419.82 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 1040.88 416.32 L 1033.88 419.82 L 1035.63 416.32 L 1033.88 412.82 Z",
d="M 1040.88 416.32 L 1033.88 419.82 L "
"1035.63 416.32 L 1033.88 412.82 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -1034,7 +1086,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1252.12 245.75 L 1259.12 242.25 L 1257.37 245.75 L 1259.12 249.25 Z",
d="M 1252.12 245.75 L 1259.12 242.25 L "
"1257.37 245.75 L 1259.12 249.25 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -1044,7 +1097,8 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1353.88 274.76 L 1346.9 278.29 L 1348.63 274.78 L 1346.86 271.29 Z",
d="M 1353.88 274.76 L 1346.9 278.29 L "
"1348.63 274.78 L 1346.86 271.29 Z",
fill="#00cccc",
stroke="#00cccc",
),
@@ -1054,18 +1108,21 @@ def get_memchart(mem_data, base_data):
stroke="#00cccc",
),
Path(
d="M 1251.12 307.75 L 1258.12 304.25 L 1256.37 307.75 L 1258.12 311.25 Z",
d="M 1251.12 307.75 L 1258.12 304.25 L "
"1256.37 307.75 L 1258.12 311.25 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
d="M 1353.88 307.75 L 1346.88 311.25 L 1348.63 307.75 L 1346.88 304.25 Z",
d="M 1353.88 307.75 L 1346.88 311.25 L "
"1348.63 307.75 L 1346.88 304.25 Z",
fill="#00cccc",
stroke="#00cccc",
),
Path(
id="p2",
d="M 235 67 L 245 57 L 265 57 L 275 67 Z",
d="M 235 67 L 245 57 L "
"265 57 L 275 67 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1091,7 +1148,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p3",
d="M 235 117 L 245 107 L 265 107 L 275 117 Z",
d="M 235 117 L 245 107 L 265 "
"107 L 275 117 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1117,7 +1175,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p4",
d="M 235 167 L 245 157 L 265 157 L 275 167 Z",
d="M 235 167 L 245 157 L "
"265 157 L 275 167 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1143,7 +1202,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p5",
d="M 235 217 L 245 207 L 265 207 L 275 217 Z",
d="M 235 217 L 245 207 L "
"265 207 L 275 217 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1169,7 +1229,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p6",
d="M 235 267 L 245 257 L 265 257 L 275 267 Z",
d="M 235 267 L 245 257 L "
"265 257 L 275 267 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1195,7 +1256,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p7",
d="M 235 317 L 245 307 L 265 307 L 275 317 Z",
d="M 235 317 L 245 307 L "
"265 307 L 275 317 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1221,7 +1283,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p8",
d="M 235 367 L 245 357 L 265 357 L 275 367 Z",
d="M 235 367 L 245 357 L "
"265 357 L 275 367 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1247,7 +1310,8 @@ def get_memchart(mem_data, base_data):
),
Path(
id="p9",
d="M 235 417 L 245 407 L 265 407 L 275 417 Z",
d="M 235 417 L 245 407 L "
"265 407 L 275 417 Z",
fill="#ffffff",
stroke="#ff8000",
),
@@ -1986,11 +2050,11 @@ def format_value_for_display(value, max_length=6):
if isinstance(value, (int, float)):
value = abs(value)
if value >= 1000000000:
value = f"{value/1000000000:.1f}B"
value = f"{value / 1000000000:.1f}B"
elif value >= 1000000:
value = f"{value/1000000:.1f}M"
value = f"{value / 1000000:.1f}M"
elif value >= 1000:
value = f"{value/1000:.1f}K"
value = f"{value / 1000:.1f}K"
elif value == int(value):
value = str(int(value))
else:
+6 -4
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import re
import subprocess
from pathlib import Path
@@ -70,7 +69,8 @@ def kernel_name_shortener(df, level):
r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?"
)
# works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
# works for name:
# Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
if names_and_args.search(demangled_name):
matches = names_and_args.findall(demangled_name)
else:
@@ -82,7 +82,8 @@ def kernel_name_shortener(df, level):
current_level = 0
for name in matches:
##can cause errors if a function name or argument is equal to 'clone'
# can cause errors if a function name-
# or argument is equal to 'clone'
if name[0] == "clone":
continue
if len(name) == 3:
@@ -101,7 +102,8 @@ def kernel_name_shortener(df, level):
current_level += name[1].count("<")
curr_index = 0
# cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level
# cases include '>' '> >, ' have to go in depth here to-
# not lose account of commas and current level
while name[1].count(">") > 0 and curr_index < len(name[1]):
if current_level < level:
new_name += name[1][curr_index:]
+11 -6
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import logging
import os
import sys
@@ -97,7 +96,9 @@ class ColoredFormatter(logging.Formatter):
def format(self, record):
levelname = record.levelname
if levelname in COLORS:
levelname_color = COLOR_SEQ % (30 + COLORS[levelname]) + levelname + RESET_SEQ
levelname_color = (
COLOR_SEQ % (30 + COLORS[levelname]) + levelname + RESET_SEQ
)
record.levelname = levelname_color
return logging.Formatter.format(self, record)
@@ -107,9 +108,12 @@ class ColoredFormatterAll(logging.Formatter):
levelname = record.levelname
if levelname in COLORS:
if levelname == "INFO":
log_fmt = f"%(message)s"
log_fmt = "%(message)s"
else:
log_fmt = f"{COLOR_SEQ % (30 + COLORS[levelname])}%(levelname)s: %(message)s{RESET_SEQ}"
log_fmt = (
f"{COLOR_SEQ % (30 + COLORS[levelname])}"
f"%(levelname)s: %(message)s{RESET_SEQ}"
)
formatter = logging.Formatter(log_fmt)
return formatter.format(record)
@@ -171,7 +175,6 @@ def setup_file_handler(loglevel, workload_dir):
# Setup logger priority - called after argument parsing
def setup_logging_priority(verbosity, quietmode, appmode, guimode):
# set loglevel based on selected verbosity and quietmode
levels = [logging.INFO, logging.DEBUG, logging.TRACE]
@@ -197,7 +200,9 @@ def setup_logging_priority(verbosity, quietmode, appmode, guimode):
elif loglevel in {"ERROR", "error"}:
loglevel = logging.ERROR
else:
print("Ignoring unsupported ROCPROFCOMPUTE_LOGLEVEL setting (%s)" % loglevel)
print(
"Ignoring unsupported ROCPROFCOMPUTE_LOGLEVEL setting (%s)" % loglevel
)
sys.exit(1)
# update console loglevel based on command-line args/env settings
+21 -10
Просмотреть файл
@@ -24,8 +24,7 @@
from dataclasses import dataclass, field
from decimal import Decimal
from types import SimpleNamespace as NS
from typing import Dict, Generator, List, Mapping
from typing import Dict
from plotille import Canvas
@@ -60,7 +59,8 @@ def make_format_spec(num, align=">"):
def is_value_valid(value):
"""
Check if a value is valid and display N/A if not(to be valid, it needs to be not None, and be int or float)
Check if a value is valid and display N/A if not
(to be valid, it needs to be not None, and be int or float)
"""
if value is None:
return False
@@ -82,7 +82,8 @@ def format_text(
value_align=">",
):
"""
Format a text string for canvas to display according to input key value pair and make proper aligment
Format a text string for canvas to display according to
input key value pair and make proper aligment
For invalid value, it displays N/A
All strings to be displayed on Canvas need to use this method
"""
@@ -102,12 +103,16 @@ def format_text(
value_str = f"{'N/A':{align}{width}}"
key_format = (
make_format_spec(key_step_prec_leftalign, key_align) if key is not None else None
make_format_spec(key_step_prec_leftalign, key_align)
if key is not None
else None
)
key_str = (
"{key:{key_format}}".format(key=key, key_format=key_format)
if key and isinstance(key, (int, float))
else str(key) if key else None
else str(key)
if key
else None
)
unit_string = post_description_with_space if not "N/A" in value_str else ""
@@ -913,7 +918,9 @@ class Fabric(RectFrame):
canvas.rect(self.x_min, self.y_min, self.x_max, self.y_max)
canvas.text(self.x_min + 6.0, self.y_max - 2.0, " " + self.label)
canvas.text(self.x_min + 2.0, self.y_max - 4.0, "Latency (cycles)")
canvas.rect(self.x_min + 2.0, self.y_max - 9, self.x_max - 2.0, self.y_max - 4.5)
canvas.rect(
self.x_min + 2.0, self.y_max - 9, self.x_max - 2.0, self.y_max - 4.5
)
i = 1
for k, v in self.lat.items():
@@ -960,7 +967,9 @@ class Wire_Fabric_HBM(RectFrame):
value_step_prec_rightalign=4.0,
),
)
canvas.text(self.x_min + self.text_x_offset - 2, self.y_max - 1.0, "<-----------")
canvas.text(
self.x_min + self.text_x_offset - 2, self.y_max - 1.0, "<-----------"
)
canvas.text(
self.x_min + self.text_x_offset,
self.y_max - 2.0,
@@ -971,7 +980,9 @@ class Wire_Fabric_HBM(RectFrame):
value_step_prec_rightalign=4.0,
),
)
canvas.text(self.x_min + self.text_x_offset - 2, self.y_max - 3.0, "----------->")
canvas.text(
self.x_min + self.text_x_offset - 2, self.y_max - 3.0, "----------->"
)
# HBM
@@ -1001,7 +1012,7 @@ class MemChart:
# Fixme: this is temp solution to filter out non-numeric string
for k, v in metric_dict.items():
# print(k, type(v))
metric_dict[k] = None if type(v) == str else v
metric_dict[k] = None if isinstance(v, str) else v
# Typically, the drawing order would be: left->right, top->down
+34 -20
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import os
from dataclasses import dataclass
from typing import Any, Dict
@@ -48,7 +47,6 @@ MI_CONSTANS = {
MI350: "mi350",
}
# ----------------------------
# Data Class handling to preserve the hierarchical gpu information
# ----------------------------
@@ -64,9 +62,9 @@ class MIGPUSpecs:
_chip_id_dict = {} # key: chip_id (int)
_perfmon_config = {} # key: gpu_arch
_gpu_arch_to_compute_partition_dict = (
{}
) # key: gpu_arch, used for gpu archs containing only one gpu model and thus one compute partition
_gpu_arch_to_compute_partition_dict = {} # key: gpu_arch, used for gpu archs
# containing only one gpu model and
# thus one compute partition
_initialized = False
@@ -109,7 +107,8 @@ class MIGPUSpecs:
console_error(f"Error parsing YAML file '{file_path}': {exc}")
except Exception as e:
console_error(
f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
f"An unexpected error occurred while loading YAML "
f"file '{file_path}': {e}"
)
@classmethod
@@ -152,9 +151,13 @@ class MIGPUSpecs:
.get("num_xcds", {})
)
if "chip_ids" in models and "physical" in models["chip_ids"]:
cls._chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model
cls._chip_id_dict[models["chip_ids"]["physical"]] = (
curr_gpu_model
)
if "chip_ids" in models and "virtual" in models["chip_ids"]:
cls._chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model
cls._chip_id_dict[models["chip_ids"]["virtual"]] = (
curr_gpu_model
)
# detect gpu arch to compute partition relationships
cls._populate_gpu_arch_to_compute_partition_dict()
@@ -171,10 +174,12 @@ class MIGPUSpecs:
compute_partition = cls._num_xcds_dict.get(single_model)
if compute_partition is not None:
cls._gpu_arch_to_compute_partition_dict[gpu_arch] = compute_partition
cls._gpu_arch_to_compute_partition_dict[gpu_arch] = (
compute_partition
)
console_debug(
"[populate_single_arch_partition_dict] Single model arch found: "
"%s -> %s (partition: %s)"
"[populate_single_arch_partition_dict] Single model "
"arch found: %s -> %s (partition: %s)"
% (gpu_arch, single_model, compute_partition)
)
@@ -254,14 +259,16 @@ class MIGPUSpecs:
@classmethod
def set_default_gpu_settings(self, gpu_arch, gpu_model, compute_partition):
"""
Set default GPU settings when model is unknown or cannot be determined.
NOTE: This is a fallback to gfx942 settings - consider making this architecture-specific.
Set default GPU settings when model is unknown or cannot be
determined. NOTE: This is a fallback to gfx942 settings -
consider making this architecture-specific.
"""
DEFAULT_COMPUTE_PARTITION = "SPX"
DEFAULT_NUM_XCD = 8
console_warning(
f"Unable to determine xcd count from:\n\t"
f"GPU arch: '{gpu_arch}', model: '{gpu_model}', partition: '{compute_partition}'"
"Unable to determine xcd count from:\n\t"
f"GPU arch: '{gpu_arch}', model: '{gpu_model}',\n\t"
f"partition: '{compute_partition}'"
)
console_warning(
f"Applying default gfx942 settings:\n"
@@ -276,7 +283,8 @@ class MIGPUSpecs:
cls, gpu_arch: str = None, gpu_model: str = None, compute_partition: str = None
):
"""
Retrieve the number of XCDs based on GPU architecture, model, and compute partition.
Retrieve the number of XCDs based on GPU architecture, model,
and compute partition.
Priority order:
1. Legacy GPU check (returns 1 XCD for older architectures/models)
@@ -307,7 +315,8 @@ class MIGPUSpecs:
return num_xcds
else:
console_warning(
f"No compute partition data found for architecture '{gpu_arch.upper()}'"
f"No compute partition data found for "
f"architecture '{gpu_arch.upper()}'"
)
# 3. Fall back to model + partition-based lookup
@@ -315,7 +324,8 @@ class MIGPUSpecs:
# Validate XCD dictionary is populated
if not hasattr(cls, "_num_xcds_dict") or not cls._num_xcds_dict:
console_error(
"mi300_num_xcds_dict not populated. Did you run parse_mi_gpu_spec()?"
"mi300_num_xcds_dict not populated. "
"Did you run parse_mi_gpu_spec()?"
)
elif gpu_model_norm not in cls._num_xcds_dict:
console_warning(
@@ -330,7 +340,9 @@ class MIGPUSpecs:
)
elif partition_norm not in model_dict:
console_warning(
f"Unknown compute partition '{compute_partition}' for model '{gpu_model}'"
f"Unknown compute partition "
f"'{compute_partition}' for model "
f"'{gpu_model}'"
)
else:
num_xcds = model_dict[partition_norm]
@@ -338,8 +350,10 @@ class MIGPUSpecs:
return num_xcds
else:
console_warning(
f"Unknown compute partition found for {compute_partition} / {gpu_model}"
"Unknown compute partition found "
f"for {compute_partition} / {gpu_model}"
)
else:
console_warning("No gpu model provided for num xcds lookup.")
+195 -90
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import ast
import json
import re
@@ -66,10 +65,9 @@ pmc_kernel_top_table_id = 1
# },
# {
# "case": { "$eq": [ $normUnit, "per Sec"]} ,
# "then": {"$divide":[{"$subtract": ["&End_Timestamp", "&Start_Timestamp" ]}, 1000000000]}
# }
# ],
# "default": 1
# "then": {"$divide":[{"$subtract": ["&End_Timestamp",
# "&Start_Timestamp" ]},
# 1000000000]}
# }
# }
supported_denom = {
@@ -84,16 +82,19 @@ build_in_vars = {
"GRBM_GUI_ACTIVE_PER_XCD": "(GRBM_GUI_ACTIVE / $num_xcd)",
"GRBM_COUNT_PER_XCD": "(GRBM_COUNT / $num_xcd)",
"GRBM_SPI_BUSY_PER_XCD": "(GRBM_SPI_BUSY / $num_xcd)",
"numActiveCUs": "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD)), \
0) / $max_waves_per_cu) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $max_waves_per_cu), 8)), $cu_per_gpu))",
"kernelBusyCycles": "ROUND(AVG((((End_Timestamp - Start_Timestamp) / 1000) * $max_sclk)), 0)",
"numActiveCUs": "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / \
$GRBM_GUI_ACTIVE_PER_XCD)), 0) / $max_waves_per_cu) * 8) + \
MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / \
$GRBM_GUI_ACTIVE_PER_XCD)), 0), $max_waves_per_cu), 8)), $cu_per_gpu))",
"kernelBusyCycles": "ROUND(AVG((((End_Timestamp - Start_Timestamp) / \
1000) * $max_sclk)), 0)",
"hbmBandwidth": "($max_mclk / 1000 * 32 * $num_hbm_channels)",
}
supported_call = {
# If the below has single arg, like(expr), it is a aggr, in which turn to a pd function.
# If it has args like list [], in which turn to a python function.
# If the below has a single arg, like(expr), it is an aggr,
# in which case it turns into a pandas function.
# If it has args like a list [], it turns into a Python function.
"MIN": "to_min",
"MAX": "to_max",
# simple aggr
@@ -243,11 +244,18 @@ class CodeTransformer(ast.NodeTransformer):
def visit_IfExp(self, node):
self.generic_visit(node)
# print("visit_IfExp", type(node.test), type(node.body), type(node.orelse), dir(node))
# print(
# "visit_IfExp",
# type(node.test),
# type(node.body),
# type(node.orelse),
# dir(node),
# )
if isinstance(node.body, ast.Num):
raise Exception(
"Don't support body of IF with number only! Has to be expr with df['column']."
"Don't support body of IF with number only! Has to be expr with "
"df['column']."
)
new_node = ast.Expr(
@@ -289,19 +297,58 @@ class CodeTransformer(ast.NodeTransformer):
def build_eval_string(equation, coll_level, config):
"""
Convert user defined equation string to eval executable string
Convert user defined equation string to eval executable string.
For example,
input: AVG(100 * SQ_ACTIVE_INST_SCA / ( GRBM_GUI_ACTIVE * $numCU ))
output: to_avg(100 * raw_pmc_df["pmc_perf"]["SQ_ACTIVE_INST_SCA"] / \
(raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] * numCU))
input: AVG(((TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31) if (TCC_EA_RDREQ_31 != 0) else (0)))
output: to_avg((raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]).where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] != 0, 0))
We can not handle the below for now,
input: AVG((0 if (TCC_EA_RDREQ_31 == 0) else (TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31)))
But potential workaound is,
output: to_avg(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"].where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] == 0, raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]))
input:
AVG(100 * SQ_ACTIVE_INST_SCA / ( GRBM_GUI_ACTIVE * $numCU ))
output:
to_avg(
100 * raw_pmc_df["pmc_perf"]["SQ_ACTIVE_INST_SCA"] /
(
raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] *
numCU
)
)
input:
AVG(
(
TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31
)
if (TCC_EA_RDREQ_31 != 0)
else (0)
)
output:
to_avg(
(
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] /
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]
).where(
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] != 0,
0
)
)
We can not handle the below for now:
input:
AVG(
(
0
if (TCC_EA_RDREQ_31 == 0)
else (
TCC_EA_RDREQ_LEVEL_31 /
TCC_EA_RDREQ_31
)
)
)
But potential workaround is:
output:
to_avg(
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"].where(
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] == 0,
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] /
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]
)
)
"""
if coll_level is None:
raise Exception("Error: coll_level can not be None.")
@@ -333,7 +380,7 @@ def build_eval_string(equation, coll_level, config):
# apply coll_level
if config.get("format_rocprof_output") == "rocpd":
# Replace SQ_ACCUM_PREV_HIRES with coll_level_ACCUM then ignore coll_level df
s = re.sub(f"SQ_ACCUM_PREV_HIRES", f"{coll_level}_ACCUM", s)
s = re.sub("SQ_ACCUM_PREV_HIRES", f"{coll_level}_ACCUM", s)
s = re.sub(
r"raw_pmc_df", "raw_pmc_df.get('" + schema.pmc_perf_file_prefix + "')", s
)
@@ -420,21 +467,24 @@ def gen_counter_list(formula):
.replace("$denom", "SQ_WAVES")
.replace(
"$numActiveCUs",
"TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD})), \
0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $maxWavesPerCU), 8)), $numCU))",
"TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / "
"$GRBM_GUI_ACTIVE_PER_XCD})), 0) / $maxWavesPerCU) * 8) + "
"MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / "
"$GRBM_GUI_ACTIVE_PER_XCD)), 0), $maxWavesPerCU), 8)), $numCU))",
)
.replace("$", "")
)
for node in ast.walk(tree):
if isinstance(node, ast.Name):
val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
val = (
str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
)
if val.isupper() and val not in function_filter:
counters.append(val)
visited = True
if val in built_in_counter:
visited = True
except:
except Exception:
pass
return visited, counters
@@ -645,10 +695,7 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
or (data_source_idx == "0") # no filter
or (data_source_idx in filter_metrics)
):
if (
"columnwise" in data_config
and data_config["columnwise"] == True
):
if "columnwise" in data_config and data_config["columnwise"]:
df = pd.DataFrame(
[data_config["source"]], columns=["from_csv_columnwise"]
)
@@ -726,79 +773,93 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
ammolite__se_per_gpu = int(sys_info.se_per_gpu)
if np.isnan(ammolite__se_per_gpu) or ammolite__se_per_gpu == 0:
console_warning(
"se_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"se_per_gpu is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__pipes_per_gpu = int(sys_info.pipes_per_gpu)
if np.isnan(ammolite__pipes_per_gpu) or ammolite__pipes_per_gpu == 0:
console_warning(
"pipes_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"pipes_per_gpu is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__cu_per_gpu = int(sys_info.cu_per_gpu)
if np.isnan(ammolite__cu_per_gpu) or ammolite__cu_per_gpu == 0:
console_warning(
"cu_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"cu_per_gpu is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__simd_per_cu = int(sys_info.simd_per_cu) # not used
if np.isnan(ammolite__simd_per_cu) or ammolite__simd_per_cu == 0:
console_warning(
"simd_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"simd_per_cu is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__sqc_per_gpu = int(sys_info.sqc_per_gpu)
if np.isnan(ammolite__sqc_per_gpu) or ammolite__sqc_per_gpu == 0:
console_warning(
"sqc_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"sqc_per_gpu is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__lds_banks_per_cu = int(sys_info.lds_banks_per_cu)
if np.isnan(ammolite__lds_banks_per_cu) or ammolite__lds_banks_per_cu == 0:
console_warning(
"lds_banks_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"lds_banks_per_cu is not available in sysinfo.csv, please provide the "
"correct value using --specs-correction"
)
ammolite__cur_sclk = float(sys_info.cur_sclk) # not used
if np.isnan(ammolite__cur_sclk) or ammolite__cur_sclk == 0:
console_warning(
"cur_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"cur_sclk is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__cur_mclk = float(sys_info.cur_mclk) # not used
if np.isnan(ammolite__cur_mclk) or ammolite__cur_mclk == 0:
console_warning(
"cur_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"cur_mclk is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__max_mclk = float(sys_info.max_mclk)
if np.isnan(ammolite__max_mclk) or ammolite__max_mclk == 0:
console_warning(
"max_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"max_mclk is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__max_sclk = float(sys_info.max_sclk)
if np.isnan(ammolite__max_sclk) or ammolite__max_sclk == 0:
console_warning(
"max_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"max_sclk is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__max_waves_per_cu = int(sys_info.max_waves_per_cu)
if np.isnan(ammolite__max_waves_per_cu) or ammolite__max_waves_per_cu == 0:
console_warning(
"max_waver_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"max_waver_per_cu is not available in sysinfo.csv, please provide the "
"correct value using --specs-correction"
)
ammolite__num_hbm_channels = float(sys_info.num_hbm_channels)
if np.isnan(ammolite__num_hbm_channels) or ammolite__num_hbm_channels == 0:
console_warning(
"num_hbm_channels is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"num_hbm_channels is not available in sysinfo.csv, please provide the "
"correct value using --specs-correction"
)
ammolite__total_l2_chan = calc_builtin_var("$total_l2_chan", sys_info)
if np.isnan(ammolite__total_l2_chan) or ammolite__total_l2_chan == 0:
console_warning(
"total_l2_chan is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"total_l2_chan is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__num_xcd = int(sys_info.num_xcd)
if np.isnan(ammolite__num_xcd) or ammolite__num_xcd == 0:
console_warning(
"num_xcd is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"num_xcd is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
ammolite__wave_size = int(sys_info.wave_size)
if np.isnan(ammolite__wave_size) or ammolite__wave_size == 0:
console_warning(
"wave_size is not available in sysinfo.csv, please provide the correct value using --specs-correction"
"wave_size is not available in sysinfo.csv, please provide the correct "
"value using --specs-correction"
)
# TODO: fix all $normUnit in Unit column or title
# build and eval all derived build-in global variables
@@ -817,9 +878,9 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
except AttributeError as ae:
if ae == "'NoneType' object has no attribute 'get'":
ammolite__build_in[key] = None
ammolite__GRBM_GUI_ACTIVE_PER_XCD = ammolite__build_in["GRBM_GUI_ACTIVE_PER_XCD"]
ammolite__GRBM_COUNT_PER_XCD = ammolite__build_in["GRBM_COUNT_PER_XCD"]
ammolite__GRBM_SPI_BUSY_PER_XCD = ammolite__build_in["GRBM_SPI_BUSY_PER_XCD"]
ammolite__GRBM_GUI_ACTIVE_PER_XCD = ammolite__build_in["GRBM_GUI_ACTIVE_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
ammolite__GRBM_COUNT_PER_XCD = ammolite__build_in["GRBM_COUNT_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
ammolite__GRBM_SPI_BUSY_PER_XCD = ammolite__build_in["GRBM_SPI_BUSY_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
for key, value in build_in_vars.items():
# next pass, we evaluate the builtins the depend on the per-XCD values
@@ -834,12 +895,16 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
except AttributeError as ae:
if ae == "'NoneType' object has no attribute 'get'":
ammolite__build_in[key] = None
ammolite__numActiveCUs = ammolite__build_in["numActiveCUs"]
ammolite__kernelBusyCycles = ammolite__build_in["kernelBusyCycles"]
ammolite__hbmBandwidth = ammolite__build_in["hbmBandwidth"]
ammolite__numActiveCUs = ammolite__build_in["numActiveCUs"] # noqa: F841 - Ruff: var utilized during runtime
ammolite__kernelBusyCycles = ammolite__build_in["kernelBusyCycles"] # noqa: F841 - Ruff: var utilized during runtime
ammolite__hbmBandwidth = ammolite__build_in["hbmBandwidth"] # noqa: F841 - Ruff: var utilized during runtime
# Hmmm... apply + lambda should just work
# df['Value'] = df['Value'].apply(lambda s: eval(compile(str(s), '<string>', 'eval')))
# df['Value'] = df['Value'].apply(
# lambda s: eval(
# compile(str(s), '<string>', 'eval')
# )
# )
for id, df in dfs.items():
if dfs_type[id] == "metric_table":
for idx, row in df.iterrows():
@@ -851,7 +916,9 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
print("~" * 40 + "\nExpression:")
print(expr, "=", row[expr])
print("Inputs:")
matched_vars = re.findall(r"ammolite__\w+", row[expr])
matched_vars = re.findall(
r"ammolite__\w+", row[expr]
)
if matched_vars:
for v in matched_vars:
print(
@@ -868,7 +935,7 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
m = re.match(
r"raw_pmc_df\['(\w+)'\]\['(\w+)'\]", c
)
t = raw_pmc_df[m.group(1)][
t = raw_pmc_df[m.group(1)][ # noqa: F841
m.group(2)
].to_list()
print(c)
@@ -890,19 +957,22 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
print("~" * 40)
except TypeError:
console_warning(
"Skipping entry. Encountered a missing counter\n{} has been assigned to None\n{}".format(
expr, np.nan
"Skipping entry. Encountered a missing "
"counter\n{} has been assigned to None\n{}"
.format(
expr,
np.nan,
)
)
except AttributeError as ae:
if (
str(ae)
== "'NoneType' object has no attribute 'get'"
== "'NoneType' object has no attribute "
"'get'"
):
console_warning(
"Skipping entry. Encountered a missing csv\n{}".format(
np.nan
)
"Skipping entry. Encountered a missing "
"csv\n{}".format(np.nan)
)
else:
console_error("analysis", str(ae))
@@ -967,14 +1037,16 @@ def apply_filters(workload, dir, is_gui, debug):
# We pick up kernel names from kerne ids first.
# Then filter valid entries with kernel names.
if workload.filter_kernel_ids:
if all(type(kid) == int for kid in workload.filter_kernel_ids):
if all(isinstance(kid, int) for kid in workload.filter_kernel_ids):
# Verify valid kernel filter
kernels_df = pd.read_csv(str(Path(dir).joinpath("pmc_kernel_top.csv")))
for kernel_id in workload.filter_kernel_ids:
if kernel_id >= len(kernels_df["Kernel_Name"]):
console_error(
"{} is an invalid kernel id. Please enter an id between 0-{}".format(
kernel_id, len(kernels_df["Kernel_Name"]) - 1
"{} is an invalid kernel id. Please enter an id between 0-{}"
.format(
kernel_id,
len(kernels_df["Kernel_Name"]) - 1,
)
)
kernels = []
@@ -992,7 +1064,7 @@ def apply_filters(workload, dir, is_gui, debug):
ret_df = ret_df.loc[
ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].isin(kernels)
]
elif all(type(kid) == str for kid in workload.filter_kernel_ids):
elif all(isinstance(kid, str) for kid in workload.filter_kernel_ids):
df_cleaned = ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].apply(
lambda x: x.strip() if isinstance(x, str) else x
)
@@ -1028,7 +1100,8 @@ def apply_filters(workload, dir, is_gui, debug):
def find_key_recursively(data, search_key):
"""
Recursively search for the search_key in the given data (which can be a dict or list).
Recursively search for the search_key in the given data
(which can be a dict or list).
If the key is found, returns the value as a DataFrame.
"""
if isinstance(data, dict):
@@ -1050,7 +1123,6 @@ def find_key_recursively(data, search_key):
def search_key_in_json(file_path, search_key):
# FIXME:
# Load the entire JSON into memory.
# Should not use for large file.
@@ -1081,14 +1153,18 @@ def search_pc_sampling_record(records):
"inst_index": None,
"stall_reason": {
"NONE": 0,
"NO_INSTRUCTION_AVAILABLE": 0, # No instruction available in the instruction cache.
# No instruction available in the instruction cache.
"NO_INSTRUCTION_AVAILABLE": 0,
"ALU_DEPENDENCY": 0, # ALU dependency not resolved.
"WAITCNT": 0,
"INTERNAL_INSTRUCTION": 0, # Wave executes an internal instruction.
"BARRIER_WAIT": 0,
"ARBITER_NOT_WIN": 0, # The instruction did not win the arbiter.
"ARBITER_WIN_EX_STALL": 0, # Arbiter issued an instruction, but the execution pipe pushed it back from execution.
"OTHER_WAIT": 0, # Other types of wait (e.g., wait for XNACK acknowledgment).
"ARBITER_WIN_EX_STALL": 0,
# Arbiter issued an instruction, but the execution pipe
# pushed it back from execution.
"OTHER_WAIT": 0,
# Other types of wait (e.g., wait for XNACK acknowledgment).
"SLEEP_WAIT": 0,
"LAST": 0,
},
@@ -1116,14 +1192,19 @@ def search_pc_sampling_record(records):
and inst_index is not None
):
grouped_data[code_object_id][code_object_offset]["count"] += 1
# NB: the write here could be duplicated. If there is perf issue, We might want to opt it.
# NB: the write here could be duplicated. If there is perf issue,
# We might want to opt it.
grouped_data[code_object_id][code_object_offset]["inst_index"] = inst_index
if len(snapshot):
if issued:
grouped_data[code_object_id][code_object_offset]["count_issued"] += 1
grouped_data[code_object_id][code_object_offset][
"count_issued"
] += 1
else:
grouped_data[code_object_id][code_object_offset]["count_stalled"] += 1
grouped_data[code_object_id][code_object_offset][
"count_stalled"
] += 1
grouped_data[code_object_id][code_object_offset]["stall_reason"][
snapshot.get("stall_reason")[rocp_inst_not_issued_prefix_len:]
] += 1
@@ -1138,7 +1219,8 @@ def search_pc_sampling_record(records):
# print(grouped_data)
# Convert to sorted list of tuples (code_object_id, inst_index, code_object_offset, count)
# Convert to sorted list of tuples:
# (code_object_id, inst_index, code_object_offset, count)
sorted_counts = sorted(
[
(
@@ -1148,7 +1230,8 @@ def search_pc_sampling_record(records):
info["count"],
info["count_issued"],
info["count_stalled"],
# For info["stall_reason"], remove the zero entries, sorting the remaining items by their values in descending order
# For info["stall_reason"], remove the zero entries,
# sorting the remaining items by their values in descending order
sorted(
((k, v) for k, v in info["stall_reason"].items() if v > 0),
key=lambda item: item[1],
@@ -1173,7 +1256,8 @@ def load_pc_sampling_data_per_kernel(
) -> pd.DataFrame:
"""
Load PC sampling raw data from json file with given method and kernel name,
count pc sampling and sort it in the order of compiled asm and associate with kernel source code if available,
count pc sampling and sort it in the order of compiled asm and associate with
kernel source code if available,
then return df.
:param method: "host_trap" or "stochastic".
@@ -1258,11 +1342,25 @@ def load_pc_sampling_data_per_kernel(
(df["code_object_id"] == kernel_info["code_object_id"])
& (df["offset"] > kernel_info["entry_byte_offset"])
& (df["offset"] < kernel_info["potential_end_offset"])
][["inst_index", "offset", "count", "count_issued", "count_stalled", "stall_reason"]]
][
[
"inst_index",
"offset",
"count",
"count_issued",
"count_stalled",
"stall_reason",
]
]
df["offset"] = df["offset"].apply(lambda x: hex(x))
# df["stall_reason"] = df["stall_reason"].apply(lambda x: ', '.join(f"{k}: {v}" for k, v in x))
# df["stall_reason"] = df["stall_reason"].apply(
# lambda x: ', '.join(
# f"{k}: {v}"
# for k, v in x
# )
# )
pc_sample_instructions = search_key_in_json(file_name, "pc_sample_instructions")
# print(pc_sample_instructions)
@@ -1334,7 +1432,9 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
# - The default file name is subject to changes from rocprofv3
# - Prioritize stochastic
# - Alternatively, we could check pc_sampling_method in json
csv_file_path = Path.joinpath(Path(dir), file_prefix + "_pc_sampling_stochastic.csv")
csv_file_path = Path.joinpath(
Path(dir), file_prefix + "_pc_sampling_stochastic.csv"
)
if csv_file_path.exists():
pc_sampling_method = "stochastic"
else:
@@ -1352,7 +1452,6 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
# No kernel filter, return grouped and sorted csv directly
if not workload.filter_kernel_ids:
df = pd.read_csv(csv_file_path)
# Group by 'Instruction_Comment' and count occurrences
grouped_counts = (
@@ -1379,7 +1478,8 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
elif len(workload.filter_kernel_ids) > 1:
console_error(
"PC sampling supports single kernel only! Please specify -k with single kernel."
"PC sampling supports single kernel only! Please specify -k with "
"single kernel."
)
return pd.DataFrame()
@@ -1409,7 +1509,8 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
@demarcate
def load_kernel_top(workload, dir, args):
# NB:
# - Do pmc_kernel_top.csv loading before eval_metric because we need the kernel names.
# - Do pmc_kernel_top.csv loading before eval_metric because we need the
# kernel names.
# - There might be a better way/timing to load raw_csv_table.
# FIXME:
@@ -1427,9 +1528,11 @@ def load_kernel_top(workload, dir, args):
tmp[id] = pd.read_csv(file)
else:
console_warning(
f"Couldn't load {file.name}. This may result in missing analysis data."
f"Couldn't load {file.name}. "
"This may result in missing analysis data."
)
# NB: Special case for sysinfo. Probably room for improvement in this whole function design
# NB: Special case for sysinfo. Probably room for improvement in this whole
# function design
elif "from_csv_columnwise" in df.columns and id == 101:
tmp[id] = workload.sys_info.transpose()
# All transposed columns should be marked with a general header
@@ -1447,7 +1550,8 @@ def load_kernel_top(workload, dir, args):
tmp[id].columns = ["Info"]
else:
console_warning(
f"Couldn't load {file.name}. This may result in missing analysis data."
f"Couldn't load {file.name}. "
"This may result in missing analysis data."
)
elif "from_pc_sampling" in df.columns:
tmp[id] = load_pc_sampling_data(
@@ -1513,7 +1617,8 @@ def correct_sys_info(mspec, specs_correction: dict):
if not hasattr(mspec, str(k)):
console_error(
"analyze",
f"Invalid specs correction '{k}'. Please use --specs option to peak valid specs",
f"Invalid specs correction '{k}'. Please use --specs option "
f"to peak valid specs",
)
setattr(mspec, str(k), v)
return mspec.get_class_members()
+16 -15
Просмотреть файл
@@ -4,7 +4,8 @@ from contextlib import closing
from utils.logger import console_error
# From schema definition in source/share/rocprofiler-sdk-rocpd/data_views.sql in rocprofiler-sdk repository
# From schema definition in source/share/rocprofiler-sdk-rocpd/data_views.sql
# in rocprofiler-sdk repository
COUNTERS_COLLECTION_QUERY = """
SELECT
agent_id as GPU_ID,
@@ -39,9 +40,9 @@ def convert_db_to_csv(
with closing(conn.execute(COUNTERS_COLLECTION_QUERY)) as cursor:
with open(csv_file_path, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
[description[0] for description in cursor.description]
)
writer.writerow([
description[0] for description in cursor.description
])
for row in cursor:
writer.writerow(row)
except (sqlite3.DatabaseError, IOError) as e:
@@ -50,22 +51,21 @@ def convert_db_to_csv(
def process_rocpd_csv(df):
"""
Merge counters across unique dispatches from the input dataframe and return processed dataframe.
Merge counters across unique dispatches from the
input dataframe and return processed dataframe.
"""
# Only import pandas if needed
import pandas as pd
data = list()
# Group by unique kernel and merge into a single row
for _, group_df in df.groupby(
[
"Dispatch_ID",
"Kernel_Name",
"Grid_Size",
"Workgroup_Size",
"LDS_Per_Workgroup",
]
):
for _, group_df in df.groupby([
"Dispatch_ID",
"Kernel_Name",
"Grid_Size",
"Workgroup_Size",
"LDS_Per_Workgroup",
]):
row = {
"GPU_ID": group_df["GPU_ID"].iloc[0],
"Grid_Size": group_df["Grid_Size"].iloc[0],
@@ -80,7 +80,8 @@ def process_rocpd_csv(df):
}
# Each counter will become its own column
row.update(dict(zip(group_df["Counter_Name"], group_df["Counter_Value"])))
# Replace end timestamp with median of durations of group, start timestamp is set to 0
# Replace end timestamp with median of durations of group,
# start timestamp is set to 0
row["End_Timestamp"] = (
group_df["End_Timestamp"] - group_df["Start_Timestamp"]
).median()
+34 -20
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import csv
from dataclasses import dataclass
from pathlib import Path
@@ -155,7 +154,8 @@ def get_color(catagory):
# Plot BW at each cache level
# -------------------------------------------------------------------------------------
def calc_ceilings(roofline_parameters, dtype, benchmark_data):
"""Given benchmarking data, calculate ceilings (or peak performance) for empirical roofline"""
"""Given benchmarking data, calculate ceilings
(or peak performance) for empirical roofline"""
# TODO: This is where filtering by memory level will need to occur for standalone
graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []}
@@ -186,7 +186,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
if dtype in PEAK_OPS_DATATYPES:
x2 = peakOps / peakBw
y2 = peakOps
y2 = peakOps # noqa: F841
# Plot MFMA lines (NOTE: Assuming MI200 soc)
x1_mfma = peakOps / peakBw
@@ -220,9 +220,9 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
graphPoints[cacheHierarchy[i].lower()].append([y1, peakY])
graphPoints[cacheHierarchy[i].lower()].append(peakBw)
# -------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------
# Plot computing roof
# -------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------
if dtype in PEAK_OPS_DATATYPES:
# Plot FMA roof
x0 = XMAX
@@ -255,7 +255,8 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
# -------------------------------------------------------------------------------------
# Calculate relevant metrics for ai calculation
def calc_ai(mspec, sort_type, ret_df):
"""Given counter data, calculate arithmetic intensity for each kernel in the application."""
"""Given counter data, calculate arithmetic intensity
for each kernel in the application."""
df = ret_df["pmc_perf"]
# Sort by top kernels or top dispatches?
df = df.sort_values(by=["Kernel_Name"])
@@ -442,7 +443,10 @@ def calc_ai(mspec, sort_type, ret_df):
* 64
)
+ (
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
(
df["TCC_EA0_WRREQ_sum"][idx]
- df["TCC_EA0_WRREQ_64B_sum"][idx]
)
* 32
)
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
@@ -459,7 +463,7 @@ def calc_ai(mspec, sort_type, ret_df):
calls += 1
if sort_type == "kernels" and (at_end == True or (kernelName != next_kernelName)):
if sort_type == "kernels" and (at_end or (kernelName != next_kernelName)):
myList.append(
AI_Data(
kernelName,
@@ -534,9 +538,8 @@ def calc_ai(mspec, sort_type, ret_df):
while i < TOP_N and i != len(myList):
if myList[i].total_flops == 0:
console_debug(
"No flops counted for {}, arithmetic intensities will not display on plots.".format(
myList[i].KernelName
)
"No flops counted for {}, arithmetic intensities will not "
"display on plots.".format(myList[i].KernelName)
)
kernelNames.append(myList[i].KernelName)
@@ -545,28 +548,40 @@ def calc_ai(mspec, sort_type, ret_df):
if myList[i].L1cache_data
else intensities["ai_l1"].append(0)
)
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
# print(
# "cur_ai_L1",
# myList[i].total_flops / myList[i].L1cache_data
# ) if myList[i].L1cache_data else print("null")
# print()
(
intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data)
if myList[i].L2cache_data
else intensities["ai_l2"].append(0)
)
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
# print(
# "cur_ai_L2",
# myList[i].total_flops / myList[i].L2cache_data
# ) if myList[i].L2cache_data else print("null")
# print()
(
intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data)
if myList[i].hbm_data
else intensities["ai_hbm"].append(0)
)
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
# print(
# "cur_ai_hbm",
# myList[i].total_flops / myList[i].hbm_data
# ) if myList[i].hbm_data else print("null")
# print()
(
curr_perf.append(myList[i].total_flops / myList[i].avgDuration)
if myList[i].avgDuration
else curr_perf.append(0)
)
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
# print(
# "cur_perf",
# myList[i].total_flops / myList[i].avgDuration
# ) if myList[i].avgDuration else print("null")
i += 1
@@ -575,7 +590,7 @@ def calc_ai(mspec, sort_type, ret_df):
for i in intensities:
values = intensities[i]
color = get_color(i)
color = get_color(i) # noqa: F841
x = []
y = []
for entryIndx in range(0, len(values)):
@@ -607,9 +622,8 @@ def constuct_roof(roofline_parameters, dtype):
# -----------------------------------------------------
# Initialize roofline data dictionary from roofline.csv
# -----------------------------------------------------
benchmark_data = (
{}
) # TODO: consider changing this to an ordered dict for consistency over py versions
# TODO: consider changing this to an ordered dict for consistency over py versions
benchmark_data = {}
headers = []
try:
with open(benchmark_results, "r") as csvfile:
@@ -627,7 +641,7 @@ def constuct_roof(roofline_parameters, dtype):
rowCount += 1
csvfile.close()
except:
except Exception:
graphPoints = {
"hbm": [None, None, None],
"l2": [None, None, None],
+1 -2
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
#
# Define all common data storage classes,
# predifned dict and global functions.
@@ -31,7 +30,7 @@
from collections import OrderedDict
from dataclasses import dataclass, field
from typing import Dict, Generator, List, Mapping
from typing import Dict, List
import pandas as pd
+135 -58
Просмотреть файл
@@ -22,16 +22,13 @@
# THE SOFTWARE.
##############################################################################
"""Get host/gpu specs."""
import importlib
import os
import re
import socket
import subprocess
import sys
from dataclasses import dataclass, field, fields
from datetime import datetime
from math import ceil
@@ -103,17 +100,19 @@ def kw_only(cls):
def generate_machine_specs(args, sysinfo: dict = None):
if not sysinfo is None:
if sysinfo is not None:
try:
sysinfo_ver = str(sysinfo["version"])
except KeyError:
console_error(
"Detected mismatch in sysinfo versioning. You need to reprofile to update data."
"Detected mismatch in sysinfo versioning. You need to reprofile "
"to update data."
)
version = get_version(config.rocprof_compute_home)["version"]
if sysinfo_ver != version[: version.find(".")]:
console_warning(
"Detected mismatch in sysinfo versioning. You need to reprofile to update data."
"Detected mismatch in sysinfo versioning. You need to reprofile "
"to update data."
)
return MachineSpecs(**sysinfo)
# read timestamp info
@@ -127,7 +126,8 @@ def generate_machine_specs(args, sysinfo: dict = None):
# set specs version
vData = get_version(config.rocprof_compute_home)
version = vData["version"]
# NB: Just taking major as specs version. May want to make this more specific in the future
# NB: Just taking major as specs version.
# May want to make this more specific in the future
specs_version = version[
: version.find(".")
] # version will always follow 'major.minor.patch' format
@@ -173,8 +173,8 @@ def generate_machine_specs(args, sysinfo: dict = None):
# Apply default compute partition is above fails
if compute_partition is None:
console_warning(f"Can not detect compute/accelerator partition from amd-smi.")
console_warning(f"Applying default compute partition: SPX")
console_warning("Can not detect compute/accelerator partition from amd-smi.")
console_warning("Applying default compute partition: SPX")
compute_partition = "SPX"
memory_partition = search(memory_partition_pattern, amd_smi_output)
@@ -218,14 +218,16 @@ def generate_machine_specs(args, sysinfo: dict = None):
# Load above SoC specs via module import
try:
soc_module = importlib.import_module("rocprof_compute_soc.soc_" + specs.gpu_arch)
soc_module = importlib.import_module(
"rocprof_compute_soc.soc_" + specs.gpu_arch
)
except ModuleNotFoundError as e:
console_error(
"Arch %s marked as supported, but couldn't find class implementation %s."
% (specs.gpu_arch, e)
)
soc_class = getattr(soc_module, specs.gpu_arch + "_soc")
soc_obj = soc_class(args, specs)
soc_obj = soc_class(args, specs) # noqa: F841
# Update arch specific specs
specs.gpu_model = mi_gpu_specs.get_gpu_model(specs.gpu_arch, specs.gpu_chip_id)
specs.num_xcd = mi_gpu_specs.get_num_xcds(
@@ -332,7 +334,10 @@ class MachineSpecs:
amd_gpu_kernel_version: str = field(
default=None,
metadata={
"doc": "[RESERVED] The version of the AMDGPU driver installed on the machine. Unimplemented.",
"doc": (
"[RESERVED] The version of the AMDGPU driver installed on the machine. "
"Unimplemented."
),
"name": "AMD GPU Kernel Version",
},
)
@@ -347,7 +352,10 @@ class MachineSpecs:
gpu_memory: str = field(
default=None,
metadata={
"doc": "[RESERVED] The total amount of memory available to accelerators/GPUs in the system. Unimplemented.",
"doc": (
"[RESERVED] The total amount of memory available to accelerators/GPUs "
"in the system. Unimplemented."
),
"unit": "KB",
"name": "GPU Memory",
},
@@ -369,14 +377,20 @@ class MachineSpecs:
compute_partition: str = field(
default=None,
metadata={
"doc": "The compute partitioning mode active on the accelerators/GPUs in the system (MI300 only).",
"doc": (
"The compute partitioning mode active on the accelerators/GPUs in the "
"system (MI300 only)."
),
"name": "Compute Partition",
},
)
memory_partition: str = field(
default=None,
metadata={
"doc": "The memory partitioning mode active on the accelerators/GPUs in the system (MI300 only).",
"doc": (
"The memory partitioning mode active on the accelerators/GPUs in the "
"system (MI300 only)."
),
"name": "Memory Partition",
},
)
@@ -417,7 +431,10 @@ class MachineSpecs:
gpu_l1: str = field(
default=None,
metadata={
"doc": "The size of the vL1D cache (per compute-unit) on the accelerators/GPUs.",
"doc": (
"The size of the vL1D cache (per compute-unit) on the "
"accelerators/GPUs."
),
"name": "GPU L1",
"unit": "KiB",
},
@@ -425,7 +442,10 @@ class MachineSpecs:
gpu_l2: str = field(
default=None,
metadata={
"doc": "The size of the vL1D cache (per compute-unit) on the accelerators/GPUs.",
"doc": (
"The size of the vL1D cache (per compute-unit) on the "
"accelerators/GPUs."
),
"name": "GPU L2",
"unit": "KiB",
},
@@ -433,52 +453,72 @@ class MachineSpecs:
cu_per_gpu: str = field(
default=None,
metadata={
"doc": "The total number of compute units per accelerator/GPU in the system. On systems with configurable\n"
"partitioning, (e.g., MI300) this is the total number of compute units in a partition.",
"doc": (
"The total number of compute units per accelerator/GPU in the system. "
"On systems with configurable partitioning, (e.g., MI300) this is "
"the total number of compute units in a partition."
),
"name": "CU per GPU",
},
)
simd_per_cu: str = field(
default=None,
metadata={
"doc": "The number of SIMD processors in a compute unit for the accelerators/GPUs in the system.",
"doc": (
"The number of SIMD processors in a compute unit for the "
"accelerators/GPUs in the system."
),
"name": "SIMD per CU",
},
)
se_per_gpu: str = field(
default=None,
metadata={
"doc": "The number of shader engines on the accelerators/GPUs in the system. On systems with configurable\n"
"partitioning, (e.g., MI300) this is the total number of shader engines in a partition.",
"doc": (
"The number of shader engines on the accelerators/GPUs in the system. "
"On systems with configurable partitioning, (e.g., MI300) this is "
"the total number of shader engines in a partition."
),
"name": "SE per GPU",
},
)
wave_size: str = field(
default=None,
metadata={
"doc": "The number work-items in a wavefront on the accelerators/GPUs in the system.",
"doc": (
"The number work-items in a wavefront on the accelerators/GPUs in "
"the system."
),
"name": "Wave Size",
},
)
workgroup_max_size: str = field(
default=None,
metadata={
"doc": "The maximum number of work-items in a workgroup on the accelerators/GPUs in the system.",
"doc": (
"The maximum number of work-items in a workgroup on the "
"accelerators/GPUs in the system."
),
"name": "Workgroup Max Size",
},
)
max_waves_per_cu: str = field(
default=None,
metadata={
"doc": "The maximum number of wavefronts that can be resident on a compute unit on the\n"
"accelerators/GPUs in the system",
"doc": (
"The maximum number of wavefronts that can be resident on a "
"compute unit on the accelerators/GPUs in the system"
),
"name": "Max Waves per CU",
},
)
max_sclk: str = field(
default=None,
metadata={
"doc": "The maximum engine (compute-unit) clock rate of the accelerators/GPUs in the system.",
"doc": (
"The maximum engine (compute-unit) clock rate of the "
"accelerators/GPUs in the system."
),
"name": "Max SCLK",
"unit": "MHz",
},
@@ -486,7 +526,9 @@ class MachineSpecs:
max_mclk: str = field(
default=None,
metadata={
"doc": "The maximum memory clock rate of the accelerators/GPUs in the system.",
"doc": (
"The maximum memory clock rate of the accelerators/GPUs in the system."
),
"name": "Max MCLK",
"unit": "MHz",
},
@@ -494,7 +536,10 @@ class MachineSpecs:
cur_sclk: str = field(
default=None,
metadata={
"doc": "[RESERVED] The current engine (compute unit) clock rate of the accelerators/GPUs in the system. Unused.",
"doc": (
"[RESERVED] The current engine (compute unit) clock rate of the "
"accelerators/GPUs in the system. Unused."
),
"name": "Cur SCLK",
"unit": "MHz",
},
@@ -502,54 +547,75 @@ class MachineSpecs:
cur_mclk: str = field(
default=None,
metadata={
"doc": "[RESERVED] The current memory clock rate of the accelerators/GPUs in the system. Unused.",
"doc": (
"[RESERVED] The current memory clock rate of the accelerators/GPUs "
"in the system. Unused."
),
"name": "Cur MCLK",
"unit": "MHz",
},
)
_l2_banks: str = None # NB: This only used in flatten_tcc_info_across_hbm_stacks()
_l2_banks: str = None # NB: Only used in flatten_tcc_info_across_hbm_stacks()
total_l2_chan: str = field(
default=None,
metadata={
"doc": "The maximum number of L2 cache channels on the accelerators/GPUs in the system. On systems with\n"
"configurable partitioning, (e.g., MI300) this is the total number of L2 cache channels in a partition.",
"doc": (
"The maximum number of L2 cache channels on the accelerators/GPUs "
"in the system. On systems with configurable partitioning, "
"(e.g., MI300) this is the total number of L2 cache channels "
"in a partition."
),
"name": "Total L2 Channels",
},
)
lds_banks_per_cu: str = field(
default=None,
metadata={
"doc": "The number of banks in the LDS for a compute unit on the accelerators/GPUs in the system.",
"doc": (
"The number of banks in the LDS for a compute unit on the "
"accelerators/GPUs in the system."
),
"name": "LDS Banks per CU",
},
)
sqc_per_gpu: str = field(
default=None,
metadata={
"doc": "The number of L1I/sL1D caches on the accelerators/GPUs in the system. On systems with\n"
"configurable partitioning, (e.g., MI300) this is the total number of L1I/sL1D caches in a partition.",
"doc": (
"The number of L1I/sL1D caches on the accelerators/GPUs in the "
"system. On systems with configurable partitioning, (e.g., MI300) "
"this is the total number of L1I/sL1D caches in a partition."
),
"name": "SQC per GPU",
},
)
pipes_per_gpu: str = field(
default=None,
metadata={
"doc": "The number of scheduler-pipes on the accelerators/GPUs in the system.",
"doc": (
"The number of scheduler-pipes on the accelerators/GPUs in the system."
),
"name": "Pipes per GPU",
},
)
num_xcd: str = field(
default=None,
metadata={
"doc": "The total number of accelerator complex dies in a compute partition on the accelerators/GPUs in the\n"
"system. For accelerators without partitioning (i.e., pre-MI300), this is considered to be one.",
"doc": (
"The total number of accelerator complex dies in a compute partition "
"on the accelerators/GPUs in the system. For accelerators without "
"partitioning (i.e., pre-MI300), this is considered to be one."
),
"name": "Num XCDs",
"unit": "XCDs",
},
)
num_hbm_channels: str = field(
default=None,
metadata={"doc": "Number of HBM channels", "name": "HBM channels"},
metadata={
"doc": "Number of HBM channels",
"name": "HBM channels",
},
)
def get_hbm_channels(self):
@@ -567,16 +633,16 @@ class MachineSpecs:
all_populated = True
data = {}
# dataclass uses an OrderedDict for member variables, ensuring order consistency
for field in fields(self):
name = field.name
for class_field in fields(self):
name = class_field.name
if not name.startswith("_"):
value = getattr(self, name)
if value is None:
# check if we've marked it optional
if (
field.metadata
and "optional" in field.metadata
and field.metadata["optional"]
class_field.metadata
and "optional" in class_field.metadata
and class_field.metadata["optional"]
):
pass
else:
@@ -592,27 +658,35 @@ class MachineSpecs:
return pd.DataFrame(data, index=[0])
def __repr__(self):
topstr = "Machine Specifications: describing the state of the machine that ROCm Compute Profiler data was collected on.\n"
topstr = (
"Machine Specifications: describing the state of the machine that "
"ROCm Compute Profiler data was collected on.\n"
)
data = []
for field in fields(self):
name = field.name
for class_field in fields(self):
name = class_field.name
if not name.startswith("_"):
_data = {}
value = getattr(self, name)
if field.metadata:
if class_field.metadata:
# check out of table before any re-naming for pretty-printing
if "intable" in field.metadata and not field.metadata["intable"]:
if (
"intable" in class_field.metadata
and not class_field.metadata["intable"]
):
if name == "version":
topstr += f"Output version: {value}\n"
else:
console_error(f"Unknown out of table printing field: {name}")
console_error(
f"Unknown out of table printing field: {name}"
)
continue
if "name" in field.metadata:
name = field.metadata["name"]
if "unit" in field.metadata:
_data["Unit"] = field.metadata["unit"]
if "doc" in field.metadata:
_data["Description"] = field.metadata["doc"]
if "name" in class_field.metadata:
name = class_field.metadata["name"]
if "unit" in class_field.metadata:
_data["Unit"] = class_field.metadata["unit"]
if "doc" in class_field.metadata:
_data["Description"] = class_field.metadata["doc"]
_data["Spec"] = name
_data["Value"] = value
data.append(_data)
@@ -660,7 +734,10 @@ def run(cmd, exit_on_error=False):
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except FileNotFoundError as e:
console_error(
f"Unable to parse specs. Can't find ROCm asset: {e.filename}\nTry passing a path to an existing workload results in 'analyze' mode."
(
f"Unable to parse specs. Can't find ROCm asset: {e.filename}\n"
"Try passing a path to an existing workload results in 'analyze' mode."
)
)
if exit_on_error:
+53 -35
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import copy
import textwrap
from pathlib import Path
@@ -98,8 +97,10 @@ def convert_time_columns(df, time_unit):
numeric_values = pd.to_numeric(
df_copy.loc[mask, col], errors="coerce"
)
df_copy.loc[mask, col] = numeric_values / config.TIME_UNITS[time_unit]
except:
df_copy.loc[mask, col] = (
numeric_values / config.TIME_UNITS[time_unit]
)
except Exception:
pass
# Update the Unit column
@@ -147,11 +148,11 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
for data_source in panel["data source"]:
for type, table_config in data_source.items():
# If block filtering was used during analysis, then dont use profiling config
# If block filtering was used in profiling config, only show those panels
# If block filtering not used in profiling config, show all panels
# Skip this table if table id or panel id is not present in block filters
# However, always show panel id <= 100
# If block filtering was used during analysis, then don't use profiling
# config. If block filtering was used in profiling config, only show
# those panels. If block filtering not used in profiling config, show
# all panels. Skip this table if table id or panel id is not present
# in block filters. However, always show panel id <= 100.
if (
not args.filter_metrics
and filter_panel_ids
@@ -165,13 +166,16 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
+ str(table_config["id"] % 100)
)
console_log(
f"Not showing table not selected during profiling: {table_id_str} {table_config['title']}"
f"Not showing table not selected during profiling: "
f"{table_id_str} "
f"{table_config['title']}"
)
continue
# Show roofline
# Check if we have filter_metrics for analyze stage:
# no filter_metrics = show all, filter_metrics containing "4" = user requesting roofline chart
# no filter_metrics = show all,
# filter_metrics containing "4" = user requesting roofline chart
if panel_id == 400 and (
not args.filter_metrics or "4" in args.filter_metrics
):
@@ -179,7 +183,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
continue
# Metrics baseline comparison mode
# We cannot guarantee that all runs have the same metrics. Only show common metrics.
# We cannot guarantee that all runs have the same metrics.
# Only show common metrics.
if (
type == "metric_table"
and "Metric" in table_config["header"].values()
@@ -191,7 +196,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
if not common_metrics:
common_metrics = set(data.dfs[table_config["id"]]["Metric"])
else:
common_metrics &= set(data.dfs[table_config["id"]]["Metric"])
common_metrics &= set(
data.dfs[table_config["id"]]["Metric"]
)
# Apply common metrics across all runs
# Reindex all runs based on first run
initial_index = None
@@ -217,7 +224,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
for header in list(base_df.keys()):
# For raw csv table, columns cannot be filtered
# If columns are filtered, then skip the headers not in filtered columns
# If columns are filtered, then skip the headers not in
# filtered columns
if (
type == "raw_csv_table"
or not args.cols
@@ -234,7 +242,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
)
and header == "Kernel_Name"
):
# NB: the width of kernel name might depend on the header of the table.
# NB: the width of kernel name might depend
# on the header of the table.
if table_config["source"] == "pmc_kernel_top.csv":
adjusted_name = base_df["Kernel_Name"].apply(
lambda x: string_multiple_lines(x, 40, 3)
@@ -255,10 +264,13 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
cur_df = data.dfs[table_config["id"]]
if args.time_unit and has_time_data(base_df):
cur_df = convert_time_columns(cur_df, args.time_unit)
cur_df = convert_time_columns(
cur_df, args.time_unit
)
if (type == "raw_csv_table") or (
type == "metric_table" and (not header in hidden_cols)
type == "metric_table"
and (not header in hidden_cols)
):
if run != base_run:
# calc percentage over the baseline
@@ -304,9 +316,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
+ "%)"
)
df = pd.concat([df, t_df], axis=1)
# DEBUG: When in a CI setting and flag is set,
# then verify metrics meet threshold requirement
# then verify metrics meet threshold
# requirement
if (
header in ["Value", "Count", "Avg"]
and t_df_pretty.abs()
@@ -319,14 +331,15 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
t_df_pretty.abs() > args.report_diff
]
console_warning(
"Dataframe diff exceeds %s threshold requirement\nSee metric %s"
"Dataframe diff exceeds %s "
"threshold requirement\n"
"See metric %s"
% (
str(args.report_diff) + "%",
violation_idx.to_numpy(),
)
)
console_warning(df)
else:
cur_df_copy = copy.deepcopy(cur_df)
cur_df_copy[header] = [
@@ -337,7 +350,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
)
for x in base_df[header]
]
df = pd.concat([df, cur_df_copy[header]], axis=1)
df = pd.concat(
[df, cur_df_copy[header]], axis=1
)
if not df.empty:
# subtitle for each table in a panel if existing
@@ -348,22 +363,23 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
)
# Check if any column in df is empty
is_empty_columns_exist = any(
[
df.columns[col_idx]
for col_idx in range(len(df.columns))
if df.replace("", None).iloc[:, col_idx].isnull().all()
]
)
is_empty_columns_exist = any([
df.columns[col_idx]
for col_idx in range(len(df.columns))
if df.replace("", None).iloc[:, col_idx].isnull().all()
])
# Do not print the table if any column is empty
if is_empty_columns_exist:
if "title" in table_config:
console_log(
f"Not showing table with empty column(s): {table_id_str} {table_config['title']}"
f"Not showing table with empty column(s): "
f"{table_id_str} "
f"{table_config['title']}"
)
else:
console_log(
f"Not showing table with empty column(s): {table_id_str}"
f"Not showing table with empty column(s): "
f"{table_id_str}"
)
if (
"title" in table_config
@@ -383,7 +399,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
p.joinpath(table_id_str.replace(" ", "_") + ".csv"),
index=False,
)
# Only show top N kernels (as specified in --max-kernel-num) in "Top Stats" section
# Only show top N kernels (as specified in --max-kernel-num)
# in "Top Stats" section
if type == "raw_csv_table" and (
table_config["source"] == "pmc_kernel_top.csv"
or table_config["source"] == "pmc_dispatch_info.csv"
@@ -398,17 +415,17 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
transpose = (
type != "raw_csv_table"
and "columnwise" in table_config
and table_config["columnwise"] == True
and table_config["columnwise"]
)
if not is_empty_columns_exist:
# enable mem_chart only with single run
if (
"cli_style" in table_config
and table_config["cli_style"] == "mem_chart"
and len(runs) == 1
):
# NB: to avoid broken test with arbitrary number with "--cols" option
# NB: to avoid broken test with
# arbitrary number with "--cols" option
if "Metric" in df.columns and "Value" in df.columns:
ss += mem_chart.plot_mem_chart(
"",
@@ -442,7 +459,8 @@ def show_roof_plot(roof_plot):
print(roof_plot)
else:
console_error(
"Cannot create roofline plot for CLI with incomplete/missing roofline profiling data.",
"Cannot create roofline plot for CLI with incomplete/missing "
"roofline profiling data.",
exit=False,
)
+91 -59
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import glob
import io
import json
@@ -74,7 +73,8 @@ def add_counter_extra_config_input_yaml(
"""
Add a new counter to the rocprofiler-sdk dictionary.
Initialize missing parts if data is empty or incomplete.
Enforces that 'architectures' and 'properties' are lists for correct YAML list serialization.
Enforces that 'architectures' and 'properties' are lists
for correct YAML list serialization.
Overwrites the counter if it already exists.
Args:
@@ -189,7 +189,7 @@ def get_version(rocprof_compute_home) -> dict:
found = True
versionDir = dir
break
except:
except Exception:
pass
if not found:
console_error("Cannot find VERSION file at {}".format(searchDirs))
@@ -204,7 +204,7 @@ def get_version(rocprof_compute_home) -> dict:
MODE = "dev"
else:
raise Exception(output)
except:
except Exception:
try:
shaFile = path(versionDir).joinpath("VERSION.sha").absolute().resolve()
with open(shaFile, "r") as file:
@@ -263,7 +263,10 @@ def detect_rocprof(args):
rocprof_path = shutil.which(rocprof_cmd)
if not rocprof_path:
console_error(
"Please verify installation or set ROCPROF environment variable with full path."
(
"Please verify installation or set ROCPROF environment variable "
"with full path."
)
)
else:
# Resolve any sym links in file path
@@ -271,7 +274,8 @@ def detect_rocprof(args):
console_debug("ROC Profiler: " + str(rocprof_path))
console_debug("rocprof_cmd is {}".format(str(rocprof_cmd)))
return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call
# TODO: Do we still need to return this? It's not being used in the function call
return rocprof_cmd
def store_app_cmd(args):
@@ -364,14 +368,12 @@ def get_agent_dict(data):
# Returns a dictionary that maps agent ID to GPU ID
# starting at 0.
def get_gpuid_dict(data):
agents = data["rocprofiler-sdk-tool"][0]["agents"]
agent_list = []
# Get agent ID and node_id for GPU agents only
for agent in agents:
if agent["type"] == 2:
agent_id = agent["id"]["handle"]
node_id = agent["node_id"]
@@ -419,12 +421,13 @@ def v3_json_get_dispatches(data):
def v3_json_to_csv(json_file_path, csv_file_path):
f = open(json_file_path, "rt")
data = json.load(f)
dispatch_records = v3_json_get_dispatches(data)
dispatches = data["rocprofiler-sdk-tool"][0]["callback_records"]["counter_collection"]
dispatches = data["rocprofiler-sdk-tool"][0]["callback_records"][
"counter_collection"
]
kernel_symbols = data["rocprofiler-sdk-tool"][0]["kernel_symbols"]
agents = get_agent_dict(data)
pid = data["rocprofiler-sdk-tool"][0]["metadata"]["pid"]
@@ -434,33 +437,30 @@ def v3_json_to_csv(json_file_path, csv_file_path):
counter_info = v3_json_get_counters(data)
# CSV headers. If there are no dispatches we still end up with a valid CSV file.
csv_data = dict.fromkeys(
[
"Dispatch_ID",
"GPU_ID",
"Queue_ID",
"PID",
"TID",
"Grid_Size",
"Workgroup_Size",
"LDS_Per_Workgroup",
"Scratch_Per_Workitem",
"Arch_VGPR",
"Accum_VGPR",
"SGPR",
"Wave_Size",
"Kernel_Name",
"Start_Timestamp",
"End_Timestamp",
"Correlation_ID",
]
)
csv_data = dict.fromkeys([
"Dispatch_ID",
"GPU_ID",
"Queue_ID",
"PID",
"TID",
"Grid_Size",
"Workgroup_Size",
"LDS_Per_Workgroup",
"Scratch_Per_Workitem",
"Arch_VGPR",
"Accum_VGPR",
"SGPR",
"Wave_Size",
"Kernel_Name",
"Start_Timestamp",
"End_Timestamp",
"Correlation_ID",
])
for key in csv_data:
csv_data[key] = []
for d in dispatches:
dispatch_info = d["dispatch_data"]["dispatch_info"]
agent_id = dispatch_info["agent_id"]["handle"]
@@ -542,7 +542,8 @@ def v3_json_to_csv(json_file_path, csv_file_path):
def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_file):
"""
Convert the counter file of csv output for a certain csv from rocprofv3 format to rocprfv2 format.
Convert the counter file of csv output for a certain csv from rocprofv3 format
to rocprfv2 format.
This function is not for use of other csv out file such as kernel trace file.
"""
pd_counter_collections = pd.read_csv(counter_file)
@@ -576,7 +577,8 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
values="Counter_Value",
).reset_index()
# NB: Agent_Id is int in older rocporfv3, now switched to string with prefix "Agent ". We need to make sure handle both cases.
# NB: Agent_Id is int in older rocporfv3, now switched to string with prefix
# "Agent ". We need to make sure handle both cases.
console_debug(
"The type of Agent ID from counter csv file is {}".format(
result["Agent_Id"].dtype
@@ -592,9 +594,10 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
)
except Exception as e:
console_error(
'Parsing rocprofv3 csv output: Error of getting "Agent_Id", the error message "{}"'.format(
e
)
(
'Parsing rocprofv3 csv output: Error of getting "Agent_Id", '
'the error message "{}"'
).format(e)
)
# Grab the Wave_Front_Size column from agent info
@@ -762,7 +765,10 @@ def run_prof(
# Set counter definitions
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
console_debug(
f"Adding env var for counter definitions: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
(
"Adding env var for counter definitions: "
f"ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
)
)
# set required env var for >= mi300
@@ -836,7 +842,10 @@ def run_prof(
return
else:
console_error(
"rocpd output format is only supported with rocprofiler-sdk or rocprofv3."
(
"rocpd output format is only supported with "
"rocprofiler-sdk or rocprofv3."
)
)
elif rocprof_cmd.endswith("v2"):
# rocprofv2 has separate csv files for each process
@@ -863,12 +872,14 @@ def run_prof(
)
if rocprof_cmd == "rocprofiler-sdk":
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
# TODO: as rocprofv3 --kokkos-trace feature improves,
# rocprof-compute should make updates accordingly
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
process_hip_trace_output(workload_dir, fbase)
else:
if "--kokkos-trace" in options:
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
# TODO: as rocprofv3 --kokkos-trace feature improves,
# rocprof-compute should make updates accordingly
process_kokkos_trace_output(workload_dir, fbase)
elif "--hip-trace" in options:
process_hip_trace_output(workload_dir, fbase)
@@ -880,7 +891,10 @@ def run_prof(
)
else:
console_warning(
f"Cannot write results for {fbase}.csv due to no counter csv files generated."
(
f"Cannot write results for {fbase}.csv due to no counter "
"csv files generated."
)
)
return
@@ -976,7 +990,9 @@ def pc_sampling_prof(
for key, value in options.items():
new_env[key] = value
console_debug("pc sampling rocprof sdk env vars: {}".format(new_env))
console_debug("pc sampling rocprof sdk user provided command: {}".format(appcmd))
console_debug(
"pc sampling rocprof sdk user provided command: {}".format(appcmd)
)
success, output = capture_subprocess_output(
appcmd, new_env=new_env, profileMode=True
)
@@ -1011,7 +1027,8 @@ def pc_sampling_prof(
def process_rocprofv3_output(rocprof_output, workload_dir, is_timestamps):
"""
rocprofv3 specific output processing.
takes care of json or csv formats, for csv format, additional processing is performed.
takes care of json or csv formats, for csv format,
additional processing is performed.
"""
results_files_csv = {}
@@ -1059,12 +1076,15 @@ def process_rocprofv3_output(rocprof_output, workload_dir, is_timestamps):
results_files_csv = glob.glob(workload_dir + "/out/pmc_1/*/*_converted.csv")
elif is_timestamps:
# when the input is timestamps, we know counter csv file is not generated and will instead parse kernel trace file
# when the input is timestamps, we know counter csv file
# is not generated and will instead parse kernel trace file
results_files_csv = glob.glob(
workload_dir + "/out/pmc_1/*/*_kernel_trace.csv"
)
else:
# when the input is not for timestamps, and counter csv file is not generated, we assume failed rocprof run and will completely bypass the file generation and merging for current pmc
# when the input is not for timestamps, and counter csv file
# is not generated, we assume failed rocprof run and will completely
# bypass the file generation and merging for current pmc
results_files_csv = []
else:
console_error("The output file of rocprofv3 can only support json or csv!!!")
@@ -1121,7 +1141,6 @@ def process_hip_trace_output(workload_dir, fbase):
def replace_timestamps(workload_dir):
if not path(workload_dir, "timestamps.csv").is_file():
return
@@ -1173,7 +1192,9 @@ def detect_roofline(mspec):
if "ROOFLINE_BIN" in os.environ.keys():
rooflineBinary = os.environ["ROOFLINE_BIN"]
if path(rooflineBinary).exists():
msg = "Detected user-supplied binary --> ROOFLINE_BIN = %s\n" % rooflineBinary
msg = (
"Detected user-supplied binary --> ROOFLINE_BIN = %s\n" % rooflineBinary
)
console_warning("roofline", msg)
# distro stays marked as override and path value is substituted in
target_binary["path"] = rooflineBinary
@@ -1196,7 +1217,7 @@ def detect_roofline(mspec):
# Must be a valid SLES machine
elif (
(type(sles_distro) == str and len(sles_distro) >= 3)
(isinstance(sles_distro, str) and len(sles_distro) >= 3)
and sles_distro[:2] == "15" # confirm string and len
and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6
):
@@ -1208,7 +1229,9 @@ def detect_roofline(mspec):
distro = "22.04"
else:
console_error("roofline", "Cannot find a valid binary for your operating system")
console_error(
"roofline", "Cannot find a valid binary for your operating system"
)
# distro gets assigned, to follow default roofline bin location and nomenclature
target_binary["distro"] = distro
@@ -1250,14 +1273,16 @@ def mibench(args, mspec):
# Distro is valid but cant find rocm ver
found = False
for path in binary_paths:
if pathlib.Path(path).exists():
for binary_path in binary_paths:
if pathlib.Path(binary_path).exists():
found = True
path_to_binary = path
path_to_binary = binary_path
break
if not found:
console_error("roofline", "Unable to locate expected binary (%s)." % binary_paths)
console_error(
"roofline", "Unable to locate expected binary (%s)." % binary_paths
)
my_args = [
path_to_binary,
@@ -1303,7 +1328,7 @@ def flatten_tcc_info_across_xcds(file, xcds, tcc_channel_per_xcd):
# filter the channel index only
p = re.compile(r"\[(\d+)\]")
# pick up the 1st element only
r = (
r = ( # noqa: E731
lambda match: "["
+ str(int(match.group(1)) + i * tcc_channel_per_xcd)
+ "]"
@@ -1434,7 +1459,10 @@ def reverse_multi_index_df_pmc(final_df):
def merge_counters_spatial_multiplex(df_multi_index):
"""
For spatial multiplexing, this merges counter values for the same kernel that runs on different devices. For time stamp, start time stamp will use median while for end time stamp, it will be equal to the summation between median start stamp and median delta time.
For spatial multiplexing, this merges counter values for the same kernel that
runs on different devices. For time stamp, start time stamp will use median
while for end time stamp, it will be equal to the summation between median
start stamp and median delta time.
"""
non_counter_column_index = [
"Dispatch_ID",
@@ -1467,7 +1495,8 @@ def merge_counters_spatial_multiplex(df_multi_index):
result_dfs = []
# TODO: will need optimize to avoid this convertion to single index format and do merge directly on multi-index dataframe
# TODO: will need to optimize to avoid this conversion to single index format
# and do merge directly on multi-index dataframe
dfs, coll_levels = reverse_multi_index_df_pmc(df_multi_index)
for df in dfs:
@@ -1490,7 +1519,9 @@ def merge_counters_spatial_multiplex(df_multi_index):
# Process non-counter columns
for col in [
col for col in non_counter_column_index if col not in expired_column_index
col
for col in non_counter_column_index
if col not in expired_column_index
]:
if col == "Start_Timestamp":
# For Start_Timestamp, take the median
@@ -1504,7 +1535,8 @@ def merge_counters_spatial_multiplex(df_multi_index):
# For other non-counter columns, take the first occurrence (0th row)
merged_row[col] = group.iloc[0][col]
# Process counter columns (assumed to be all columns not in non_counter_column_index)
# Process counter columns (assumed to be all columns not in
# non_counter_column_index)
counter_columns = [
col for col in group.columns if col not in non_counter_column_index
]
+9 -3
Просмотреть файл
@@ -23,14 +23,15 @@
##############################################################################
import subprocess
from importlib.machinery import SourceFileLoader
from unittest.mock import patch
import pytest
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
rocprof_compute = SourceFileLoader(
"rocprof-compute", "src/rocprof-compute"
).load_module()
def pytest_addoption(parser):
@@ -52,7 +53,12 @@ def pytest_addoption(parser):
@pytest.fixture
def binary_handler_profile_rocprof_compute(request):
def _handler(
config, workload_dir, options=[], check_success=True, roof=False, app_name="app_1"
config,
workload_dir,
options=[],
check_success=True,
roof=False,
app_name="app_1",
):
if request.config.getoption("--rocprofiler-sdk-library-path"):
options.extend(
+10 -7
Просмотреть файл
@@ -23,11 +23,9 @@
##############################################################################
import argparse
import glob
import os
import sys
if __name__ == "__main__":
my_parser = argparse.ArgumentParser(description="create test_analyze_workloads.py")
@@ -52,11 +50,16 @@ if __name__ == "__main__":
+ arch
+ "():"
+ "\n\twith pytest.raises(SystemExit) as e:"
+ "\n\t\twith patch('sys.argv',['rocprof-compute', 'analyze', '--path', '"
+ workload
+ "/"
+ arch
+ "']):\n\t\t\trocprof_compute.main()"
+ (
"\n\t\twith patch("
"'sys.argv',"
"["
"'rocprof-compute', "
"'analyze', "
"'--path', "
"'" + workload + "/" + arch + "']"
"):\n\t\t\trocprof_compute.main()"
)
+ "\n\tassert e.value.code == 0"
)
f.write(test)
+12 -13
Просмотреть файл
@@ -23,23 +23,18 @@
##############################################################################
import csv
import inspect
import os
import re
import shutil
import subprocess
import sys
from importlib.machinery import SourceFileLoader
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
import test_utils
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
rocprof_compute = SourceFileLoader(
"rocprof-compute", "src/rocprof-compute"
).load_module()
config = {}
config["vseq"] = ["./tests/vsequential_access"]
@@ -49,7 +44,6 @@ config["COUNTER_LOGGING"] = False
config["METRIC_COMPARE"] = False
config["METRIC_LOGGING"] = False
SUPPORTED_ARCHS = {
"gfx940": {"mi300": ["MI300A_A0"]},
"gfx941": {"mi300": ["MI300X_A0"]},
@@ -153,7 +147,6 @@ def test_L1_cache_counters(
base = Path(test_utils.get_output_dir())
for app_name in app_names:
workload_dir = str(base / app_name)
# 1. profile the app
@@ -168,9 +161,15 @@ def test_L1_cache_counters(
assert return_code == 0
# 2. analyze the results
return_code = binary_handler_analyze_rocprof_compute(
["analyze", "--path", workload_dir, "-b", "16.3", "--save-dfs", workload_dir]
)
return_code = binary_handler_analyze_rocprof_compute([
"analyze",
"--path",
workload_dir,
"-b",
"16.3",
"--save-dfs",
workload_dir,
])
assert return_code == 0
# 3. save results in local
Разница между файлами не показана из-за своего большого размера Загрузить разницу
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+3 -3
Просмотреть файл
@@ -11,6 +11,6 @@ def test_modification_time():
hash_map = yaml.safe_load(f)
for file, hash in hash_map.items():
file_hash = hashlib.sha256(Path(file).read_bytes()).hexdigest()
assert (
file_hash == hash
), f"Hash mismatch for {file}: expected {hash}, got {file_hash}"
assert file_hash == hash, (
f"Hash mismatch for {file}: expected {hash}, got {file_hash}"
)
+12 -12
Просмотреть файл
@@ -23,12 +23,12 @@
##############################################################################
import logging
from unittest.mock import MagicMock, Mock, patch
import pandas as pd
import pytest
from db_connector import DatabaseConnector
logging.TRACE = logging.DEBUG - 5
logging.addLevelName(logging.TRACE, "TRACE")
@@ -40,8 +40,6 @@ def trace_logger(message, *args, **kwargs):
setattr(logging, "trace", trace_logger)
from db_connector import DatabaseConnector
"""
Tests for the DatabaseConnector class that tests almost methods with initialization,
CSV import, database removal, and error handling.
@@ -109,9 +107,10 @@ class TestDatabaseConnector:
mock_path.return_value.joinpath.return_value = "/fake/path/sysinfo.csv"
mock_path.return_value.is_file.return_value = True
mock_sysinfo = pd.DataFrame(
{"gpu_model": ["MI100 "], "workload_name": [" test_workload"]}
)
mock_sysinfo = pd.DataFrame({
"gpu_model": ["MI100 "],
"workload_name": [" test_workload"],
})
mock_read_csv.return_value = mock_sysinfo
connector = DatabaseConnector(mock_args_import)
@@ -236,9 +235,9 @@ class TestDatabaseConnector:
connector.db_remove()
mock_client.drop_database.assert_called_once_with(mock_db_to_remove)
mock_names_col.delete_many.assert_called_once_with(
{"name": "rocprofiler-compute_test_team_workload_mi100"}
)
mock_names_col.delete_many.assert_called_once_with({
"name": "rocprofiler-compute_test_team_workload_mi100"
})
def test_pre_processing_no_action_specified(self, mock_args_import):
"""Test pre_processing when neither upload nor remove is specified"""
@@ -398,9 +397,10 @@ class TestDatabaseConnectorIntegration:
)
mock_path.return_value.is_file.return_value = True
mock_sysinfo = pd.DataFrame(
{"gpu_model": ["MI100"], "workload_name": ["device_filter"]}
)
mock_sysinfo = pd.DataFrame({
"gpu_model": ["MI100"],
"workload_name": ["device_filter"],
})
mock_read_csv.return_value = mock_sysinfo
connector = DatabaseConnector(args)
+20 -25
Просмотреть файл
@@ -23,24 +23,20 @@
##############################################################################
import os
import re
import subprocess
import sys
import tempfile
from importlib.machinery import SourceFileLoader
from pathlib import Path
from unittest.mock import MagicMock, mock_open, patch
from unittest.mock import patch
import pandas as pd
import pytest
import yaml
from src.utils.specs import generate_machine_specs
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
rocprof_compute = SourceFileLoader(
"rocprof-compute", "src/rocprof-compute"
).load_module()
# NOTE: Only testing gfx942 for now.
GFX942_CHIP_IDS_TO_NUM_XCDS = {
@@ -70,7 +66,7 @@ def parse_table_dict(output: str) -> dict:
"""
Parse an ASCII table into a dict mapping Spec -> Value.
"""
lines = [l for l in output.splitlines() if l.startswith("")]
lines = [line for line in output.splitlines() if line.startswith("")]
# locate header row (the one containing 'Spec' and 'Value')
header_idx = next(
(i for i, ln in enumerate(lines) if "Spec" in ln and "Value" in ln), None
@@ -132,7 +128,6 @@ def get_num_xcds():
def get_gpu_arch():
rocminfo = str(
# decode with utf-8 to account for rocm-smi changes in latest rocm
subprocess.run(
@@ -182,18 +177,18 @@ def test_num_xcds_cli_output():
stderr=subprocess.PIPE,
text=True,
)
assert (
proc.returncode == 0
), f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}"
assert proc.returncode == 0, (
f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}"
)
# 3. strip ANSI, parse table
clean = strip_ansi(proc.stdout)
return_dict = parse_table_dict(clean)
# 4. check results are expected
assert (
"Compute Partition" in return_dict
), "Spec 'Compute Partition' not found in table"
assert "Compute Partition" in return_dict, (
"Spec 'Compute Partition' not found in table"
)
assert "Num XCDs" in return_dict, "Spec 'Num XCDs' not found in table"
compute_partition_actual = return_dict["Compute Partition"]
@@ -257,7 +252,7 @@ def test_get_gpu_series_uninitialized():
with patch.object(MIGPUSpecs, "_gpu_series_dict", {}):
with pytest.raises(SystemExit):
result = MIGPUSpecs.get_gpu_series("gfx942")
result = MIGPUSpecs.get_gpu_series("gfx942") # noqa: F841
@pytest.mark.misc
@@ -315,7 +310,7 @@ def test_get_num_xcds_no_compute_partition_data():
mock_dict = {"gfx942": None}
with patch.object(MIGPUSpecs, "_gpu_arch_to_compute_partition_dict", mock_dict):
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx942")
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx942") # noqa: F841
@pytest.mark.misc
@@ -333,7 +328,7 @@ def test_get_num_xcds_unknown_gpu_model():
"""Test get_num_xcds with unknown gpu model - covers lines 319-321"""
from src.utils.mi_gpu_spec import MIGPUSpecs
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx950", gpu_model="UNKNOWN_MODEL")
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx950", gpu_model="UNKNOWN_MODEL") # noqa: F841
@pytest.mark.misc
@@ -341,7 +336,7 @@ def test_get_num_xcds_no_compute_partition():
"""Test get_num_xcds with no compute partition - covers lines 325-327"""
from src.utils.mi_gpu_spec import MIGPUSpecs
result = MIGPUSpecs.get_num_xcds(
result = MIGPUSpecs.get_num_xcds( # noqa: F841
gpu_arch="gfx950", gpu_model="MI350", compute_partition=""
)
@@ -351,7 +346,7 @@ def test_get_num_xcds_unknown_compute_partition():
"""Test get_num_xcds with unknown compute partition - covers lines 329-332"""
from src.utils.mi_gpu_spec import MIGPUSpecs
result = MIGPUSpecs.get_num_xcds(
result = MIGPUSpecs.get_num_xcds( # noqa: F841
gpu_arch="gfx950", gpu_model="MI350", compute_partition="UNKNOWN"
)
@@ -363,7 +358,7 @@ def test_get_num_xcds_none_partition_value():
mock_dict = {"mi350": {"spx": None}}
with patch.object(MIGPUSpecs, "_num_xcds_dict", mock_dict):
result = MIGPUSpecs.get_num_xcds(
result = MIGPUSpecs.get_num_xcds( # noqa: F841
gpu_arch="gfx950", gpu_model="MI350", compute_partition="spx"
)
@@ -373,7 +368,7 @@ def test_get_num_xcds_no_gpu_model():
"""Test get_num_xcds with no gpu model - covers line 342"""
from src.utils.mi_gpu_spec import MIGPUSpecs
result = MIGPUSpecs.get_num_xcds(
result = MIGPUSpecs.get_num_xcds( # noqa: F841
gpu_arch="gfx950", gpu_model="", compute_partition="spx"
)
@@ -385,7 +380,7 @@ def test_get_chip_id_dict_empty():
with patch.object(MIGPUSpecs, "_chip_id_dict", {}):
with patch("src.utils.mi_gpu_spec.console_error") as mock_error:
result = MIGPUSpecs.get_chip_id_dict()
result = MIGPUSpecs.get_chip_id_dict() # noqa: F841
mock_error.assert_called_once()
@@ -396,7 +391,7 @@ def test_get_num_xcds_dict_empty():
with patch.object(MIGPUSpecs, "_num_xcds_dict", {}):
with patch("src.utils.mi_gpu_spec.console_error") as mock_error:
result = MIGPUSpecs.get_num_xcds_dict()
result = MIGPUSpecs.get_num_xcds_dict() # noqa: F841
mock_error.assert_called_once()
+3 -4
Просмотреть файл
@@ -23,13 +23,12 @@
##############################################################################
from importlib.machinery import SourceFileLoader
from unittest.mock import patch
import pytest
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
rocprof_compute = SourceFileLoader(
"rocprof-compute", "src/rocprof-compute"
).load_module()
##################################################
## Generated tests ##
+121 -117
Просмотреть файл
@@ -23,7 +23,6 @@
##############################################################################
import inspect
import os
import re
@@ -54,7 +53,6 @@ CHIP_IDS = {
"30112": "MI350",
}
# --
# Runtime config options
# --
@@ -74,101 +72,91 @@ DEFAULT_ABS_DIFF = 15
DEFAULT_REL_DIFF = 50
MAX_REOCCURING_COUNT = 28
ALL_CSVS_MI100 = sorted(
[
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"sysinfo.csv",
]
)
ALL_CSVS_MI100 = sorted([
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"sysinfo.csv",
])
ALL_CSVS_MI200 = sorted(
[
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"sysinfo.csv",
"timestamps.csv",
]
)
ALL_CSVS_MI300 = sorted(
[
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"sysinfo.csv",
"timestamps.csv",
]
)
ALL_CSVS_MI350 = sorted(
[
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"pmc_perf_7.csv",
"pmc_perf_8.csv",
"pmc_perf_9.csv",
"pmc_perf_10.csv",
"pmc_perf_11.csv",
"pmc_perf_12.csv",
"pmc_perf_13.csv",
"pmc_perf_14.csv",
"sysinfo.csv",
]
)
ALL_CSVS_MI200 = sorted([
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"sysinfo.csv",
"timestamps.csv",
])
ALL_CSVS_MI300 = sorted([
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"sysinfo.csv",
"timestamps.csv",
])
ALL_CSVS_MI350 = sorted([
"SQ_IFETCH_LEVEL.csv",
"SQ_INST_LEVEL_LDS.csv",
"SQ_INST_LEVEL_SMEM.csv",
"SQ_INST_LEVEL_VMEM.csv",
"SQ_LEVEL_WAVES.csv",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"pmc_perf_3.csv",
"pmc_perf_4.csv",
"pmc_perf_5.csv",
"pmc_perf_6.csv",
"pmc_perf_7.csv",
"pmc_perf_8.csv",
"pmc_perf_9.csv",
"pmc_perf_10.csv",
"pmc_perf_11.csv",
"pmc_perf_12.csv",
"pmc_perf_13.csv",
"pmc_perf_14.csv",
"sysinfo.csv",
])
ROOF_ONLY_FILES = sorted(
[
"empirRoof_gpu-0_FP32.pdf",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"roofline.csv",
"sysinfo.csv",
"timestamps.csv",
]
)
ROOF_ONLY_FILES = sorted([
"empirRoof_gpu-0_FP32.pdf",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
"pmc_perf_2.csv",
"roofline.csv",
"sysinfo.csv",
"timestamps.csv",
])
METRIC_THRESHOLDS = {
"2.1.12": {"absolute": 0, "relative": 8},
@@ -292,7 +280,9 @@ def counter_compare(test_name, errors_pd, baseline_df, run_df, threshold=5):
# if 0 show absolute difference
diff = round(baseline_data - run_data, 2)
if diff > threshold:
print(str(idx_1) + "[" + pmc_counter + "] diff is :" + str(diff))
print(
str(idx_1) + "[" + pmc_counter + "] diff is :" + str(diff)
)
differences["kernel_name"] = [kernel_name]
differences["test_name"] = [test_name]
differences["gpu-id"] = [gpu_id]
@@ -429,7 +419,13 @@ def baseline_compare_metric(test_name, workload_dir, args=[]):
metric_info = re.findall(
r"(^"
+ metric
+ r")(?: *)([()0-9A-Za-z- ]+ )(?: *)([0-9.-]*)(?: *)([0-9.-]*)(?: *)\(([-0-9.]*)%\)(?: *)([-0-9.e]*)",
+ (
r")(?: *)([()0-9A-Za-z- ]+ )"
r"(?: *)([0-9.-]*)"
r"(?: *)([0-9.-]*)"
r"(?: *)\(([-0-9.]*)%\)"
r"(?: *)([-0-9.e]*)"
),
captured_output,
flags=re.MULTILINE,
)
@@ -489,20 +485,20 @@ def baseline_compare_metric(test_name, workload_dir, args=[]):
# print("logging...")
# print(metric_info)
new_error = pd.DataFrame.from_dict(
{
"Index": [metric_idx],
"Metric": [metric_name],
"Percent Difference": [relative_diff],
"Absolute Difference": [absolute_diff],
"Baseline": [baseline_val],
"Current": [current_val],
"Test Name": [test_name],
}
)
new_error = pd.DataFrame.from_dict({
"Index": [metric_idx],
"Metric": [metric_name],
"Percent Difference": [relative_diff],
"Absolute Difference": [absolute_diff],
"Baseline": [baseline_val],
"Current": [current_val],
"Test Name": [test_name],
})
error_df = pd.concat([error_df, new_error])
counts = error_df.groupby(["Index"]).cumcount()
reoccurring_metrics = error_df.loc[counts > MAX_REOCCURING_COUNT]
reoccurring_metrics = error_df.loc[
counts > MAX_REOCCURING_COUNT
]
reoccurring_metrics["counts"] = counts[
counts > MAX_REOCCURING_COUNT
]
@@ -843,7 +839,7 @@ def test_roofline_empty_kernel_names_handling(binary_handler_profile_rocprof_com
]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
@@ -860,10 +856,16 @@ def test_roofline_unsupported_datatype_error(binary_handler_profile_rocprof_comp
pytest.skip("Skipping roofline test for MI100")
return
options = ["--device", "0", "--roof-only", "--roofline-data-type", "UNSUPPORTED_TYPE"]
options = [
"--device",
"0",
"--roof-only",
"--roofline-data-type",
"UNSUPPORTED_TYPE",
]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
@@ -914,7 +916,7 @@ def test_roof_cli_plot_generation(binary_handler_profile_rocprof_compute):
return
try:
import plotext as plt
import plotext as plt # noqa: F401
cli_available = True
except ImportError:
@@ -924,7 +926,7 @@ def test_roof_cli_plot_generation(binary_handler_profile_rocprof_compute):
options = ["--device", "0", "--roof-only"]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
@@ -946,7 +948,7 @@ def test_roof_error_handling(binary_handler_profile_rocprof_compute):
if os.path.exists(pmc_perf_path):
os.remove(pmc_perf_path)
returncode = binary_handler_profile_rocprof_compute(
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
@@ -1072,7 +1074,7 @@ def test_roofline_ceiling_data_validation(binary_handler_profile_rocprof_compute
options = ["--device", "0", "--roof-only", "--mem-level", "INVALID_LEVEL"]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
config, workload_dir, options, check_success=False, roof=True
)
@@ -1523,7 +1525,9 @@ def test_instmix_memchart_section(binary_handler_profile_rocprof_compute):
assert test_utils.check_file_pattern(
"- '10'", f"{workload_dir}/profiling_config.yaml"
)
assert test_utils.check_file_pattern("- '3'", f"{workload_dir}/profiling_config.yaml")
assert test_utils.check_file_pattern(
"- '3'", f"{workload_dir}/profiling_config.yaml"
)
assert test_utils.check_file_pattern(
"TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv"
)
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+13 -6
Просмотреть файл
@@ -1,10 +1,12 @@
# NOTES
#
# Read utils/unified_config.yaml and split it into per gfx architecture per panel config files
# WARNING: This script will overwrite existing files under per gfx architecture folders under src/rocprof_compute_soc/analysis_configs
# Read utils/unified_config.yaml and split it into per gfx architecture per panel
# config files. WARNING: This script will overwrite existing files under per gfx
# architecture folders under src/rocprof_compute_soc/analysis_configs.
#
# Read utils/unified_config.yaml and split it into metric tables per documentation section
# WARNING: This script will overwrite existing docs/data/metrics_description.yaml
# Read utils/unified_config.yaml and split it into metric tables per documentation
# section.
# WARNING: This script will overwrite existing docs/data/metrics_description.yaml.
import copy
import hashlib
@@ -21,6 +23,10 @@ SETS_TARGET_DIR = ROOT_DIR.joinpath(
"src", "rocprof_compute_soc", "profile_configs", "sets"
)
DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data")
AUTOGEN_TEXT = (
"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. "
"Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n"
)
HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml")
HASH_FILE_MAP = {}
GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"]
@@ -75,7 +81,6 @@ def update_analysis_config():
data_source_config["metric_table"]["metric"] = data_source_config[
"metric_table"
]["metric"][gfx_version]
build_metric_id_mapping(
panel_id_int,
data_source_index,
@@ -209,7 +214,9 @@ def update_documentation():
for metric_name in sorted(list(metric_names)):
metrics_info[metric_name] = {
"rst": panel_config["metrics_description"][metric_name]["rst"],
"unit": panel_config["metrics_description"][metric_name]["unit"],
"unit": panel_config["metrics_description"][metric_name][
"unit"
],
}
panel_metric_map[data_source["metric_table"]["id"]] = metrics_info
-1
Просмотреть файл
@@ -20,7 +20,6 @@ tag = args.tag
print("Current repository version = %s" % repoVer)
print("--> tagname = %s" % tag)
if repoCheck == tag:
print("OK: exact match")
exit(0)