Format source code to PEP8 using Ruff (#36)
* added ruff docs * style: Run ruff and black before yapf pass * yapf -r -i (23 fixes) * fixed conf.py and ran ruff format . * fixed conf.py 2 * formatted argparser.py * formatted src/rocprof_compute_analyze * formatted src/rocprof_compute_profile * formatted soc_base.py * formatted rocprof_compute_tui * formatted gui_components * formatted src/utils * formatted tests/ * format extra files * cleanup * fix test_utils.py * fixed typos * Update pyproject.toml * Update README.md * Update test_utils.py --------- Signed-off-by: jamessiddeley-amd <James.Siddeley@amd.com> Co-authored-by: James Siddeley <James.Siddeley@amd.com> Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
d3f9ab25eb
Коммит
58d2a016ce
+19
-7
@@ -25,14 +25,26 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install black isort
|
||||
python -m pip install ruff
|
||||
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
|
||||
- name: Run black formatter
|
||||
uses: psf/black@stable
|
||||
with:
|
||||
use_pyproject: true
|
||||
- name: Run isort formatter
|
||||
uses: isort/isort-action@master
|
||||
- name: Run Ruff Linter and Import Sorter
|
||||
run: |
|
||||
ruff check . --fix --exit-zero
|
||||
- name: Run Ruff Formatter
|
||||
run: |
|
||||
ruff format .
|
||||
- name: Check for formatting/linting changes
|
||||
run: |
|
||||
git config --global user.name 'github-actions'
|
||||
git config --global user.email 'github-actions@github.com'
|
||||
git add -A .
|
||||
if ! git diff --cached --quiet; then
|
||||
echo "::error::Files were modified by ruff. Please run 'ruff check . --fix && ruff format .' locally and commit the changes."
|
||||
git diff --cached --patch # Show the diff in the logs
|
||||
exit 1
|
||||
else
|
||||
echo "Ruff found no issues or all issues were fixed and files are clean."
|
||||
fi
|
||||
|
||||
cmake:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
@@ -7,13 +7,12 @@ repos:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
# Python import sorting
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 6.0.1
|
||||
# Python import sorting and formatting
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility,
|
||||
# for the latest ruff version supported by the hook.
|
||||
rev: v0.12.7
|
||||
hooks:
|
||||
- id: isort
|
||||
# Python formatting (Using this mirror lets us use mypyc-compiled black, which is about 2x faster)
|
||||
- repo: https://github.com/psf/black-pre-commit-mirror
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
- id: ruff-check
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
@@ -104,3 +104,42 @@ style reference is provided below for convenience:
|
||||
url = {https://doi.org/10.5281/zenodo.7314631}
|
||||
}
|
||||
```
|
||||
|
||||
### Contribution Guidelines
|
||||
|
||||
To ensure code quality and consistency, we use **Ruff**, a fast Python linter and formatter. Before submitting a pull request, please ensure your code is formatted and linted correctly.
|
||||
|
||||
-----
|
||||
|
||||
### Installing and Running Ruff
|
||||
|
||||
Ruff is available on PyPI and can be installed using `pip`:
|
||||
|
||||
```bash
|
||||
pip install ruff
|
||||
```
|
||||
|
||||
Once installed, you can run Ruff from the command line. To check for linting errors and formatting issues, navigate to the project root and run:
|
||||
|
||||
```bash
|
||||
ruff check .
|
||||
ruff format --check .
|
||||
```
|
||||
|
||||
To automatically fix most of the issues detected, you can use the `--fix` flag with the `check` command and run the `format` command without the `--check` flag:
|
||||
|
||||
```bash
|
||||
ruff check --fix .
|
||||
ruff format .
|
||||
```
|
||||
|
||||
-----
|
||||
|
||||
### Disabling Formatting for Specific Sections
|
||||
|
||||
There may be instances where you need to disable Ruff's formatting on a specific block of code. You can do this using special comments:
|
||||
|
||||
* **`# fmt: off`** and **`# fmt: on`**: These comments can be used to disable and re-enable formatting for a block of code.
|
||||
* **`# fmt: skip`**: This comment, placed at the end of a line, will prevent Ruff from formatting that specific statement.
|
||||
|
||||
You can also disable specific linting rules for a line by using `# noqa: <rule_code>`.
|
||||
|
||||
@@ -13,10 +13,14 @@ import sys
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use str(Path(<rel_path>).absolute().resolve()) to make it absolute, like shown here.
|
||||
#
|
||||
# documentation root, use str(Path(<rel_path>).absolute().resolve())
|
||||
# to make it absolute, like shown here.
|
||||
from pathlib import Path
|
||||
|
||||
from pygments.styles import get_all_styles
|
||||
from recommonmark.parser import CommonMarkParser
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
sys.path.insert(0, str(Path("..").absolute().resolve()))
|
||||
|
||||
repo_version = "unknown"
|
||||
@@ -68,7 +72,6 @@ source_suffix = {
|
||||
".md": "markdown",
|
||||
}
|
||||
|
||||
from recommonmark.parser import CommonMarkParser
|
||||
|
||||
source_parsers = {".md": CommonMarkParser}
|
||||
|
||||
@@ -94,7 +97,6 @@ pygments_style = None
|
||||
latex_engine = "lualatex"
|
||||
latex_show_urls = "footnote"
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
@@ -113,7 +115,6 @@ html_theme = "sphinx_rtd_theme"
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ["_static"]
|
||||
|
||||
|
||||
# -- Options for HTMLHelp output ---------------------------------------------
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
@@ -136,8 +137,6 @@ html_theme_options = {
|
||||
"titles_only": False,
|
||||
}
|
||||
|
||||
from pygments.styles import get_all_styles
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
styles = list(get_all_styles())
|
||||
preferences = ("emacs", "pastie", "colorful")
|
||||
@@ -146,8 +145,6 @@ for pref in preferences:
|
||||
pygments_style = pref
|
||||
break
|
||||
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
|
||||
# app setup hook
|
||||
def setup(app):
|
||||
|
||||
@@ -13,10 +13,14 @@ import sys
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use str(Path(<rel_path>).absolute().resolve()) to make it absolute, like shown here.
|
||||
#
|
||||
# documentation root, use str(Path(<rel_path>).absolute().resolve()
|
||||
# to make it absolute, like shown here.
|
||||
from pathlib import Path
|
||||
|
||||
from pygments.styles import get_all_styles
|
||||
from recommonmark.parser import CommonMarkParser
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
sys.path.insert(0, str(Path("..").absolute().resolve()))
|
||||
|
||||
repo_version = "unknown"
|
||||
@@ -56,12 +60,10 @@ extensions = [
|
||||
|
||||
show_authors = True
|
||||
|
||||
|
||||
myst_heading_anchors = 4
|
||||
# enable replacement of (tm) & friends
|
||||
myst_enable_extensions = ["replacements", "dollarmath"]
|
||||
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
||||
@@ -84,8 +86,6 @@ source_suffix = {
|
||||
# sphinxmark_text_spacing = 800
|
||||
# sphinxmark_text_opacity = 30
|
||||
|
||||
from recommonmark.parser import CommonMarkParser
|
||||
|
||||
source_parsers = {".md": CommonMarkParser}
|
||||
|
||||
# The master toctree document.
|
||||
@@ -110,7 +110,6 @@ pygments_style = None
|
||||
latex_engine = "lualatex"
|
||||
latex_show_urls = "footnote"
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
@@ -133,7 +132,6 @@ latex_elements = {
|
||||
"sphinxsetup": "verbatimwrapslines=true, verbatimforcewraps=true",
|
||||
}
|
||||
|
||||
|
||||
# -- Options for HTMLHelp output ---------------------------------------------
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
@@ -157,8 +155,6 @@ html_theme_options = {
|
||||
"titles_only": False,
|
||||
}
|
||||
|
||||
from pygments.styles import get_all_styles
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
styles = list(get_all_styles())
|
||||
preferences = ("emacs", "pastie", "colorful")
|
||||
@@ -167,8 +163,6 @@ for pref in preferences:
|
||||
pygments_style = pref
|
||||
break
|
||||
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
|
||||
# app setup hook
|
||||
def setup(app):
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
|
||||
@@ -4,36 +4,50 @@ requires-python = ">=3.8"
|
||||
|
||||
[project.optional-dependencies]
|
||||
developer = [
|
||||
"black>=22.6.0",
|
||||
"isort>=5.12.0",
|
||||
"ruff>=0.12.7",
|
||||
"pre-commit",
|
||||
]
|
||||
|
||||
[tool.black]
|
||||
line-length = 90
|
||||
include = '\.py$'
|
||||
exclude = '''
|
||||
(
|
||||
/(
|
||||
\.eggs
|
||||
| \.git
|
||||
| \.github
|
||||
| \.tox
|
||||
| \.venv
|
||||
| \.misc
|
||||
| \.vscode
|
||||
| \.pyc
|
||||
| dist
|
||||
| external
|
||||
| .pytest_cache
|
||||
| build
|
||||
| build-rocprof_compute
|
||||
)/
|
||||
)
|
||||
'''
|
||||
[tool.ruff]
|
||||
line-length = 88
|
||||
|
||||
# Ruff's default excludes cover: .bzr, .direnv, .eggs, .git, .git-rewrite, .hg, .ipynb_checkpoints,
|
||||
# .mypy_cache, .nox, .pants.d, .pyenv, .pytest_cache, .pytype, .ruff_cache, .svn,
|
||||
# .tox, .venv, .vscode, __pypackages__, _build, buck-out, build, dist, node_modules,
|
||||
# site-packages, venv
|
||||
|
||||
extend-exclude = [
|
||||
".github",
|
||||
".misc",
|
||||
"external",
|
||||
"build-rocprof_compute",
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
# Enable Pyflakes (F), pycodestyle (E, W for PEP8), and isort (I) rules.
|
||||
select = ["E", "W", "F", "I"]
|
||||
ignore = ["E713", "E711"]
|
||||
fixable = ["ALL"]
|
||||
unfixable = []
|
||||
|
||||
[tool.ruff.format]
|
||||
preview = true
|
||||
|
||||
# Like Black, use double quotes for strings.
|
||||
quote-style = "double"
|
||||
|
||||
# Like Black, indent with spaces, rather than tabs.
|
||||
indent-style = "space"
|
||||
|
||||
# Like Black, respect magic trailing commas.
|
||||
skip-magic-trailing-comma = false
|
||||
|
||||
# Like Black, automatically detect the appropriate line ending.
|
||||
line-ending = "auto"
|
||||
|
||||
docstring-code-format = true
|
||||
docstring-code-line-length = "dynamic"
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = [
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
@@ -87,7 +86,8 @@ def omniarg_parser(
|
||||
help="Profile the target application",
|
||||
usage="""
|
||||
|
||||
rocprof-compute profile --name <workload_name> [profile options] [roofline options] -- <profile_cmd>
|
||||
`rocprof-compute profile --name <workload_name>
|
||||
[profile options] [roofline options] -- <profile_cmd>`
|
||||
|
||||
---------------------------------------------------------------------------------
|
||||
Examples:
|
||||
@@ -118,7 +118,9 @@ Examples:
|
||||
dest="name",
|
||||
help="\t\t\tAssign a name to workload.",
|
||||
)
|
||||
profile_group.add_argument("--target", type=str, default=None, help=argparse.SUPPRESS)
|
||||
profile_group.add_argument(
|
||||
"--target", type=str, default=None, help=argparse.SUPPRESS
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"-p",
|
||||
"--path",
|
||||
@@ -127,8 +129,9 @@ Examples:
|
||||
dest="path",
|
||||
default=str(Path(os.getcwd()).joinpath("workloads")),
|
||||
required=False,
|
||||
help="\t\t\tSpecify path to save workload.\n\t\t\t(DEFAULT: {}/workloads/<name>)".format(
|
||||
os.getcwd()
|
||||
help=(
|
||||
"\t\t\tSpecify path to save workload.\n\t\t\t"
|
||||
"(DEFAULT: {}/workloads/<name>)".format(os.getcwd())
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
@@ -138,7 +141,9 @@ Examples:
|
||||
dest="subpath",
|
||||
default="gpu",
|
||||
required=False,
|
||||
help="\t\t\tSpecify the type of subpath to save workload: node_name, gpu_model.",
|
||||
help=(
|
||||
"\t\t\tSpecify the type of subpath to save workload: node_name, gpu_model."
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--hip-trace",
|
||||
@@ -146,7 +151,10 @@ Examples:
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="\t\t\tHIP trace, execturion trace for the entire application at the HIP level.",
|
||||
help=(
|
||||
"\t\t\tHIP trace, execturion trace for the entire application at the HIP "
|
||||
"level."
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--kokkos-trace",
|
||||
@@ -195,7 +203,11 @@ Examples:
|
||||
nargs="+",
|
||||
required=False,
|
||||
default=[],
|
||||
help="""\t\t\tSpecify metric id(s) from --list-metrics for filtering (e.g. 12, 12.1, 12.1.1).\n\t\t\tCan provide multiple space separated arguments.""",
|
||||
help=(
|
||||
"\t\t\tSpecify metric id(s) from --list-metrics for filtering "
|
||||
"(e.g. 12, 12.1, 12.1.1).\n"
|
||||
"\t\t\tCan provide multiple space separated arguments."
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--list-metrics",
|
||||
@@ -232,7 +244,11 @@ Examples:
|
||||
required=False,
|
||||
choices=["kernel", "grid"],
|
||||
default="grid",
|
||||
help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)",
|
||||
help=(
|
||||
"\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n"
|
||||
"\t\t\t kernel (i.e. By unique kernel name dispatches)\n"
|
||||
"\t\t\t grid (i.e. By unique kernel name + grid size dispatches)"
|
||||
),
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--no-roof",
|
||||
@@ -274,7 +290,10 @@ Examples:
|
||||
metavar="",
|
||||
dest="pc_sampling_method",
|
||||
default="stochastic",
|
||||
help="\t\t\tSet the method of pc sampling, stochastic or host_trap. Support stochastic only >= MI300",
|
||||
help=(
|
||||
"\t\t\tSet the method of pc sampling, stochastic or host_trap. "
|
||||
"Support stochastic only >= MI300"
|
||||
),
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
@@ -283,7 +302,12 @@ Examples:
|
||||
metavar="",
|
||||
dest="pc_sampling_interval",
|
||||
default=1048576,
|
||||
help="\t\t\tSet the interval of pc sampling.\n\t\t\t For stochastic sampling, the interval is in cycles.\n\t\t\t For host_trap sampling, the interval is in microsecond (DEFAULT: 1048576).",
|
||||
help=(
|
||||
"\t\t\tSet the interval of pc sampling.\n"
|
||||
"\t\t\t For stochastic sampling, the interval is in cycles.\n"
|
||||
"\t\t\t For host_trap sampling, the interval is in microsecond "
|
||||
"(DEFAULT: 1048576)."
|
||||
)
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
@@ -299,7 +323,10 @@ Examples:
|
||||
required=False,
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="\t\t\tRetain the large raw rocpd database in workload directory.\n\t\t\tThis option requires --format-rocprof-output rocpd.",
|
||||
help=(
|
||||
"\t\t\tRetain the large raw rocpd database in workload directory.\n"
|
||||
"\t\t\tThis option requires --format-rocprof-output rocpd."
|
||||
),
|
||||
)
|
||||
|
||||
## Roofline Command Line Options
|
||||
@@ -317,7 +344,11 @@ Examples:
|
||||
type=str,
|
||||
default="kernels",
|
||||
choices=["kernels", "dispatches"],
|
||||
help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches",
|
||||
help=(
|
||||
"\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n"
|
||||
"\t\t\t kernels\n"
|
||||
"\t\t\t dispatches"
|
||||
),
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"-m",
|
||||
@@ -328,7 +359,13 @@ Examples:
|
||||
nargs="+",
|
||||
type=str,
|
||||
default="ALL",
|
||||
help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS",
|
||||
help=(
|
||||
"\t\t\tFilter by memory level: (DEFAULT: ALL)\n"
|
||||
"\t\t\t HBM\n"
|
||||
"\t\t\t L2\n"
|
||||
"\t\t\t vL1D\n"
|
||||
"\t\t\t LDS"
|
||||
),
|
||||
)
|
||||
roofline_group.add_argument(
|
||||
"--device",
|
||||
@@ -350,19 +387,58 @@ Examples:
|
||||
"-R",
|
||||
"--roofline-data-type",
|
||||
required=False,
|
||||
choices=["FP4", "FP6", "FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
|
||||
choices=[
|
||||
"FP4",
|
||||
"FP6",
|
||||
"FP8",
|
||||
"FP16",
|
||||
"BF16",
|
||||
"FP32",
|
||||
"FP64",
|
||||
"I8",
|
||||
"I32",
|
||||
"I64",
|
||||
],
|
||||
metavar="",
|
||||
nargs="+",
|
||||
type=str,
|
||||
default=["FP32"],
|
||||
help="\t\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n\t\t\t FP4\n\t\t\t FP6\n\t\t\t FP8\n\t\t\t FP16\n\t\t\t BF16\n\t\t\t FP32\n\t\t\t FP64\n\t\t\t I8\n\t\t\t I32\n\t\t\t I64\n\t\t\t ",
|
||||
help=(
|
||||
"\t\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n"
|
||||
"\t\t\t FP4\n"
|
||||
"\t\t\t FP6\n"
|
||||
"\t\t\t FP8\n"
|
||||
"\t\t\t FP16\n"
|
||||
"\t\t\t BF16\n"
|
||||
"\t\t\t FP32\n"
|
||||
"\t\t\t FP64\n"
|
||||
"\t\t\t I8\n"
|
||||
"\t\t\t I32\n"
|
||||
"\t\t\t I64\n"
|
||||
"\t\t\t "
|
||||
),
|
||||
)
|
||||
|
||||
# roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)")
|
||||
# roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)")
|
||||
# roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)")
|
||||
# roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)")
|
||||
# roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)")
|
||||
# roofline_group.add_argument(
|
||||
# '-w', '--workgroups', required=False, default=-1, type=int,
|
||||
# help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)"
|
||||
# )
|
||||
# roofline_group.add_argument(
|
||||
# '--wsize', required=False, default=-1, type=int,
|
||||
# help="\t\t\tWorkgroup size (DEFAULT: 256)"
|
||||
# )
|
||||
# roofline_group.add_argument(
|
||||
# '--dataset', required=False, default=-1, type=int,
|
||||
# help="\t\t\tDataset size (DEFAULT: 536M)"
|
||||
# )
|
||||
# roofline_group.add_argument(
|
||||
# '-e', '--experiments', required=False, default=-1, type=int,
|
||||
# help="\t\t\tNumber of experiments (DEFAULT: 100)"
|
||||
# )
|
||||
# roofline_group.add_argument(
|
||||
# '--iter', required=False, default=-1, type=int,
|
||||
# help="\t\t\tNumber of iterations (DEFAULT: 10)"
|
||||
# )
|
||||
|
||||
## Database Command Line Options
|
||||
## ----------------------------
|
||||
@@ -374,9 +450,11 @@ Examples:
|
||||
|
||||
\n\n-------------------------------------------------------------------------------
|
||||
\nExamples:
|
||||
\n\trocprof-compute database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/
|
||||
\n\trocprof-compute database --remove -H pavii1 -u temp -w rocprofiler-compute_asw_sample_mi200
|
||||
\n-------------------------------------------------------------------------------\n
|
||||
\n\trocprof-compute database --import -H pavii1 -u temp -t asw -w "
|
||||
"workloads/vcopy/mi200/"
|
||||
"\n\trocprof-compute database --remove -H pavii1 -u temp -w "
|
||||
"rocprofiler-compute_asw_sample_mi200"
|
||||
"\n-------------------------------------------------------------------------------\n"
|
||||
""",
|
||||
prog="tool",
|
||||
allow_abbrev=False,
|
||||
@@ -445,13 +523,20 @@ Examples:
|
||||
required=True,
|
||||
metavar="",
|
||||
dest="workload",
|
||||
help="\t\t\t\tSpecify name of workload (to remove) or path to workload (to import)",
|
||||
help=(
|
||||
"\t\t\t\tSpecify name of workload (to remove) or path to workload "
|
||||
"(to import)"
|
||||
),
|
||||
)
|
||||
connection_group.add_argument(
|
||||
"--kernel-verbose",
|
||||
required=False,
|
||||
metavar="",
|
||||
help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)",
|
||||
help=(
|
||||
"\t\tSpecify Kernel Name verbose level 1-5. "
|
||||
"Lower the level, shorter the kernel name. "
|
||||
"(DEFAULT: 5) (DISABLE: 5)"
|
||||
),
|
||||
default=5,
|
||||
type=int,
|
||||
)
|
||||
@@ -558,23 +643,50 @@ Examples:
|
||||
type=int,
|
||||
nargs="?",
|
||||
const=8050,
|
||||
help="\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n\t\tOptionally, specify port to launch application (DEFAULT: 8050)",
|
||||
help=(
|
||||
"\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n"
|
||||
"\t\tOptionally, specify port to launch application (DEFAULT: 8050)"
|
||||
),
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
help="\t\tActivate a Textual User Interface (TUI) to interact with rocprofiler-compute metrics.",
|
||||
help="\t\tActivate a Textual User Interface (TUI) to "
|
||||
"interact with rocprofiler-compute metrics.",
|
||||
)
|
||||
analyze_group.add_argument(
|
||||
"-R",
|
||||
"--roofline-data-type",
|
||||
required=False,
|
||||
choices=["FP4", "FP6", "FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"],
|
||||
choices=[
|
||||
"FP4",
|
||||
"FP6",
|
||||
"FP8",
|
||||
"FP16",
|
||||
"BF16",
|
||||
"FP32",
|
||||
"FP64",
|
||||
"I8",
|
||||
"I32",
|
||||
"I64",
|
||||
],
|
||||
metavar="",
|
||||
nargs="+",
|
||||
type=str,
|
||||
default=["FP32"],
|
||||
help="\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n\t\t\t FP4\n\t\t\t FP6\n\t\t\t FP8\n\t\t\t FP16\n\t\t\t BF16\n\t\t\t FP32\n\t\t\t FP64\n\t\t\t I8\n\t\t\t I32\n\t\t\t I64\n\t\t\t ",
|
||||
help=(
|
||||
"\t\tChoose datatypes to view roofline PDFs for: (DEFAULT: FP32)\n"
|
||||
"\t\t\t FP4\n"
|
||||
"\t\t\t FP6\n"
|
||||
"\t\t\t FP8\n"
|
||||
"\t\t\t FP16\n"
|
||||
"\t\t\t BF16\n"
|
||||
"\t\t\t FP32\n"
|
||||
"\t\t\t FP64\n"
|
||||
"\t\t\t I8\n"
|
||||
"\t\t\t I32\n"
|
||||
"\t\t\t I64\n\t\t\t "
|
||||
),
|
||||
)
|
||||
|
||||
analyze_group.add_argument(
|
||||
@@ -584,13 +696,15 @@ Examples:
|
||||
dest="pc_sampling_sorting_type",
|
||||
default="offset",
|
||||
type=str,
|
||||
help="\t\tSet the sorting type of pc sampling: offset or count (DEFAULT: offset).",
|
||||
help="\t\tSet the sorting type of pc sampling: "
|
||||
"offset or count (DEFAULT: offset).",
|
||||
)
|
||||
|
||||
analyze_advanced_group.add_argument(
|
||||
"--random-port",
|
||||
action="store_true",
|
||||
help="\t\tRandomly generate a port to launch GUI application.\n\t\tRegistered Ports range inclusive (1024-49151).",
|
||||
help="\t\tRandomly generate a port to launch GUI application.\n"
|
||||
"\t\tRegistered Ports range inclusive (1024-49151).",
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"--max-stat-num",
|
||||
@@ -598,7 +712,8 @@ Examples:
|
||||
metavar="",
|
||||
type=int,
|
||||
default=10,
|
||||
help='\t\tSpecify the maximum number of stats shown in "Top Stats" tables (DEFAULT: 10)',
|
||||
help="\t\tSpecify the maximum number of stats shown in "
|
||||
'"Top Stats" tables (DEFAULT: 10)',
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"-n",
|
||||
@@ -607,7 +722,11 @@ Examples:
|
||||
metavar="",
|
||||
default="per_kernel",
|
||||
choices=["per_wave", "per_cycle", "per_second", "per_kernel"],
|
||||
help="\t\tSpecify the normalization unit: (DEFAULT: per_kernel)\n\t\t per_wave\n\t\t per_cycle\n\t\t per_second\n\t\t per_kernel",
|
||||
help="\t\tSpecify the normalization unit: (DEFAULT: per_kernel)\n"
|
||||
"\t\t per_wave\n"
|
||||
"\t\t per_cycle\n"
|
||||
"\t\t per_second\n"
|
||||
"\t\t per_kernel",
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"-t",
|
||||
@@ -616,7 +735,11 @@ Examples:
|
||||
metavar="",
|
||||
default="ns",
|
||||
choices=["s", "ms", "us", "ns"],
|
||||
help="\t\tSpecify display time unit: (DEFAULT: ns)\n\t\t s\n\t\t ms\n\t\t us\n\t\t ns",
|
||||
help="\t\tSpecify display time unit: (DEFAULT: ns)\n"
|
||||
"\t\t s\n"
|
||||
"\t\t ms\n"
|
||||
"\t\t us\n"
|
||||
"\t\t ns",
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"--decimal",
|
||||
@@ -644,7 +767,10 @@ Examples:
|
||||
dest="cols",
|
||||
metavar="",
|
||||
nargs="+",
|
||||
help="\t\tSpecify column indices to display.\n\t\tDefaults to display all columns.",
|
||||
help=(
|
||||
"\t\tSpecify column indices to display.\n"
|
||||
"\t\tDefaults to display all columns."
|
||||
),
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"--include-cols",
|
||||
@@ -653,7 +779,8 @@ Examples:
|
||||
nargs="+",
|
||||
help=(
|
||||
"\t\tSpecify which hidden column names should be included in cli output.\n"
|
||||
"\t\tFor example, to show 'Description' column which is hidden by default in cli output,\n"
|
||||
"\t\tFor example, to show 'Description' column which is hidden by "
|
||||
"default in cli output,\n"
|
||||
"\t\tuse the option --include-cols Description."
|
||||
),
|
||||
)
|
||||
@@ -669,7 +796,9 @@ Examples:
|
||||
"--kernel-verbose",
|
||||
required=False,
|
||||
metavar="",
|
||||
help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)",
|
||||
help="\t\tSpecify Kernel Name verbose level 1-5. "
|
||||
"Lower the level, shorter the kernel name. "
|
||||
"(DEFAULT: 5) (DISABLE: 5)",
|
||||
default=5,
|
||||
type=int,
|
||||
)
|
||||
@@ -680,7 +809,9 @@ Examples:
|
||||
"--specs-correction",
|
||||
type=str,
|
||||
metavar="",
|
||||
help="\t\tSpecify the specs to correct. e.g. --specs-correction='specname1:specvalue1,specname2:specvalue2'",
|
||||
help="\t\tSpecify the specs to correct. e.g. "
|
||||
"--specs-correction='specname1:specvalue1,"
|
||||
"specname2:specvalue2'",
|
||||
)
|
||||
analyze_advanced_group.add_argument(
|
||||
"--list-nodes",
|
||||
@@ -693,5 +824,8 @@ Examples:
|
||||
type=str,
|
||||
dest="nodes",
|
||||
nargs="*",
|
||||
help="\t\tMulti-node option: filter with node names. Enable it without node names means ALL.",
|
||||
help=(
|
||||
"\t\tMulti-node option: filter with node names. "
|
||||
"Enable it without node names means ALL."
|
||||
),
|
||||
)
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# NB: Creating a new module to share global vars across modules
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Main entry point for rocprof-compute"""
|
||||
|
||||
##############################################################################bl
|
||||
@@ -80,11 +79,10 @@ def verify_deps():
|
||||
dependencies are not available."""
|
||||
|
||||
# Check which version of python is being used
|
||||
if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 8):
|
||||
print(
|
||||
"[ERROR] Python 3.8 or higher is required to run rocprofiler-compute."
|
||||
f" The current version is {sys.version_info[0]}.{sys.version_info[1]}."
|
||||
)
|
||||
if sys.version_info[0] < 3 or (sys.version_info[0] == 3
|
||||
and sys.version_info[1] < 8):
|
||||
print("[ERROR] Python 3.8 or higher is required to run rocprofiler-compute."
|
||||
f" The current version is {sys.version_info[0]}.{sys.version_info[1]}.")
|
||||
sys.exit(1)
|
||||
|
||||
bindir = str(Path(__file__).resolve().parent)
|
||||
@@ -112,17 +110,14 @@ def verify_deps():
|
||||
localVersion = metadata.distribution(package).version
|
||||
except metadata.PackageNotFoundError:
|
||||
error = True
|
||||
print(
|
||||
f"[ERROR] The '{dependency}' package was not found "
|
||||
"in the current execution environment."
|
||||
)
|
||||
print(f"[ERROR] The '{dependency}' package was not found "
|
||||
"in the current execution environment.")
|
||||
|
||||
# check version requirement
|
||||
if not error:
|
||||
if desiredVersion:
|
||||
if not verify_deps_version(
|
||||
localVersion, desiredVersion, operator
|
||||
):
|
||||
if not verify_deps_version(localVersion, desiredVersion,
|
||||
operator):
|
||||
print(
|
||||
f"[ERROR] the '{dependency}' distribution does "
|
||||
"not meet version requirements to use rocprofiler-compute."
|
||||
@@ -132,10 +127,8 @@ def verify_deps():
|
||||
|
||||
if error:
|
||||
print("")
|
||||
print(
|
||||
"Please verify all of the python dependencies called out "
|
||||
"in the requirements file"
|
||||
)
|
||||
print("Please verify all of the python dependencies called out "
|
||||
"in the requirements file")
|
||||
print("are installed locally prior to running rocprofiler-compute.")
|
||||
print("")
|
||||
print(f"See: {checkFile}")
|
||||
|
||||
@@ -23,12 +23,10 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import copy
|
||||
import os
|
||||
import sys
|
||||
import textwrap
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import abstractmethod
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
@@ -78,7 +76,9 @@ class OmniAnalyze_Base:
|
||||
# TODO: filter_metrics should/might be one per arch
|
||||
# print(ac)
|
||||
|
||||
parser.build_dfs(archConfigs=ac, filter_metrics=filter_metrics, sys_info=sys_info)
|
||||
parser.build_dfs(
|
||||
archConfigs=ac, filter_metrics=filter_metrics, sys_info=sys_info
|
||||
)
|
||||
self._arch_configs[arch] = ac
|
||||
return self._arch_configs
|
||||
|
||||
@@ -192,7 +192,9 @@ class OmniAnalyze_Base:
|
||||
arch = w.sys_info.iloc[0]["gpu_arch"]
|
||||
mspec = self.get_socs()[arch]._mspec
|
||||
if self.__args.specs_correction:
|
||||
w.sys_info = parser.correct_sys_info(mspec, self.__args.specs_correction)
|
||||
w.sys_info = parser.correct_sys_info(
|
||||
mspec, self.__args.specs_correction
|
||||
)
|
||||
w.avail_ips = w.sys_info["ip_blocks"].item().split("|")
|
||||
w.dfs = copy.deepcopy(self._arch_configs[arch].dfs)
|
||||
w.dfs_type = self._arch_configs[arch].dfs_type
|
||||
@@ -222,7 +224,7 @@ class OmniAnalyze_Base:
|
||||
|
||||
# Todo: more err check
|
||||
if not (
|
||||
self.__args.nodes != None
|
||||
self.__args.nodes is not None
|
||||
or self.__args.list_nodes
|
||||
or self.__args.spatial_multiplexing
|
||||
):
|
||||
@@ -266,7 +268,9 @@ class OmniAnalyze_Base:
|
||||
console_log("analysis", "deriving rocprofiler-compute metrics...")
|
||||
# initalize output file
|
||||
self._output = (
|
||||
open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout
|
||||
open(self.__args.output_file, "w+")
|
||||
if self.__args.output_file
|
||||
else sys.stdout
|
||||
)
|
||||
|
||||
# Read profiling config
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from rocprof_compute_analyze.analysis_base import OmniAnalyze_Base
|
||||
from utils import file_io, parser, tty
|
||||
from utils.kernel_name_shortener import kernel_name_shortener
|
||||
@@ -41,7 +40,6 @@ class cli_analysis(OmniAnalyze_Base):
|
||||
if self.get_args().random_port:
|
||||
console_error("--gui flag is required to enable --random-port")
|
||||
for d in self.get_args().path:
|
||||
|
||||
# create 'mega dataframe'
|
||||
self._runs[d[0]].raw_pmc = file_io.create_df_pmc(
|
||||
d[0],
|
||||
|
||||
@@ -23,9 +23,7 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
@@ -228,7 +226,10 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
for t_type, table_config in data_source.items():
|
||||
original_df = base_data[base_run].dfs[table_config["id"]]
|
||||
# The sys info table need to add index back
|
||||
if t_type == "raw_csv_table" and "Info" in original_df.keys():
|
||||
if (
|
||||
t_type == "raw_csv_table"
|
||||
and "Info" in original_df.keys()
|
||||
):
|
||||
original_df.reset_index(inplace=True)
|
||||
|
||||
content = determine_chart_type(
|
||||
@@ -279,7 +280,13 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
id="popup",
|
||||
children=[
|
||||
html.Div(
|
||||
children="To dive deeper, use the top drop down menus to isolate particular kernel(s) or dispatch(s). You will then see the web page update with additional low-level metrics specific to the filter you've applied.",
|
||||
children=(
|
||||
"To dive deeper, use the top drop down menus to "
|
||||
"isolate particular kernel(s) or dispatch(s). "
|
||||
"You will then see the web page update with "
|
||||
"additional low-level metrics specific to the "
|
||||
"filter you've applied."
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -308,7 +315,9 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
)
|
||||
|
||||
if self.get_args().spatial_multiplexing:
|
||||
self._runs[self.dest_dir].raw_pmc = self.spatial_multiplex_merge_counters(
|
||||
self._runs[
|
||||
self.dest_dir
|
||||
].raw_pmc = self.spatial_multiplex_merge_counters(
|
||||
self._runs[self.dest_dir].raw_pmc
|
||||
)
|
||||
|
||||
@@ -386,8 +395,7 @@ def determine_chart_type(
|
||||
d_figs = build_bar_chart(display_df, table_config, barchart_elements, norm_filt)
|
||||
# Smaller formatting if barchart yeilds several graphs
|
||||
if (
|
||||
len(d_figs)
|
||||
> 2
|
||||
len(d_figs) > 2
|
||||
# and not table_config["id"]
|
||||
# in barchart_elements["l2_cache_per_chan"]
|
||||
):
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import os
|
||||
@@ -159,7 +158,9 @@ class RocProfCompute:
|
||||
|
||||
def parse_args(self):
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Command line interface for AMD's GPU profiler, ROCm Compute Profiler",
|
||||
description=(
|
||||
"Command line interface for AMD's GPU profiler, ROCm Compute Profiler"
|
||||
),
|
||||
prog="tool",
|
||||
formatter_class=lambda prog: argparse.RawTextHelpFormatter(
|
||||
prog, max_help_position=30
|
||||
@@ -176,7 +177,11 @@ class RocProfCompute:
|
||||
and self.__args.format_rocprof_output != "rocpd"
|
||||
):
|
||||
console_warning(
|
||||
f"The option --format-rocprof-output currently set to {self.__args.format_rocprof_output} will default to rocpd in a future release."
|
||||
(
|
||||
f"The option --format-rocprof-output currently set to "
|
||||
f"{self.__args.format_rocprof_output} will default to rocpd "
|
||||
"in a future release."
|
||||
)
|
||||
)
|
||||
|
||||
if self.__args.mode == None:
|
||||
@@ -188,7 +193,6 @@ class RocProfCompute:
|
||||
"rocprof-compute requires you to pass a valid mode. Detected None."
|
||||
)
|
||||
elif self.__args.mode == "profile":
|
||||
|
||||
# FIXME:
|
||||
# Might want to get host name from detected spec
|
||||
if self.__args.subpath == "node_name":
|
||||
@@ -312,7 +316,9 @@ class RocProfCompute:
|
||||
# Update default path
|
||||
if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")):
|
||||
self.__args.path = str(
|
||||
Path(self.__args.path).joinpath(self.__args.name, self.__mspec.gpu_model)
|
||||
Path(self.__args.path).joinpath(
|
||||
self.__args.name, self.__mspec.gpu_model
|
||||
)
|
||||
)
|
||||
|
||||
# instantiate desired profiler
|
||||
@@ -376,7 +382,10 @@ class RocProfCompute:
|
||||
profiler.run_profiling(self.__version["ver"], config.PROJECT_NAME)
|
||||
time_end_prof = time.time()
|
||||
console_debug(
|
||||
'finished "run_profiling" and finished rocprof\'s workload, time taken was {} m {} sec'.format(
|
||||
(
|
||||
'finished "run_profiling" and finished rocprof\'s workload, '
|
||||
"time taken was {} m {} sec"
|
||||
).format(
|
||||
int((time_end_prof - time_start_prof) / 60),
|
||||
str((time_end_prof - time_start_prof) % 60),
|
||||
)
|
||||
@@ -385,8 +394,7 @@ class RocProfCompute:
|
||||
time_end_post = time.time()
|
||||
console_debug(
|
||||
'time taken for "post_processing" was {} seconds'.format(
|
||||
int((time_end_post - time_end_prof) / 60),
|
||||
str((time_end_post - time_end_prof) % 60),
|
||||
int(time_end_post - time_end_prof)
|
||||
)
|
||||
)
|
||||
self.__soc[self.__mspec.gpu_arch].post_profiling()
|
||||
@@ -398,7 +406,8 @@ class RocProfCompute:
|
||||
self.print_graphic()
|
||||
|
||||
console_warning(
|
||||
"Database update mode is deprecated and will be removed in a future release "
|
||||
"Database update mode is deprecated and will "
|
||||
"be removed in a future release "
|
||||
"and no fixes will be made for this mode."
|
||||
)
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import os
|
||||
@@ -96,7 +95,7 @@ class RocProfCompute_Base:
|
||||
return
|
||||
|
||||
# Set default output directory if not specified
|
||||
if type(self.__args.path) == str:
|
||||
if isinstance(self.__args.path, str):
|
||||
if out is None:
|
||||
out = self.__args.path + "/pmc_perf.csv"
|
||||
files = glob.glob(self.__args.path + "/" + "pmc_perf_*.csv")
|
||||
@@ -121,7 +120,7 @@ class RocProfCompute_Base:
|
||||
os.path.basename(f)
|
||||
)
|
||||
]
|
||||
elif type(self.__args.path) == list:
|
||||
elif isinstance(self.__args.path, list):
|
||||
files = self.__args.path
|
||||
else:
|
||||
console_error(
|
||||
@@ -130,7 +129,7 @@ class RocProfCompute_Base:
|
||||
|
||||
df = None
|
||||
for i, file in enumerate(files):
|
||||
_df = pd.read_csv(file) if type(self.__args.path) == str else file
|
||||
_df = pd.read_csv(file) if isinstance(self.__args.path, str) else file
|
||||
if self.__args.join_type == "kernel":
|
||||
key = _df.groupby("Kernel_Name").cumcount()
|
||||
_df["key"] = _df.Kernel_Name + " - " + key.astype(str)
|
||||
@@ -145,7 +144,8 @@ class RocProfCompute_Base:
|
||||
)
|
||||
else:
|
||||
console_error(
|
||||
"%s is an unrecognized option for --join-type" % self.__args.join_type
|
||||
"%s is an unrecognized option for --join-type"
|
||||
% self.__args.join_type
|
||||
)
|
||||
|
||||
if df is None:
|
||||
@@ -174,7 +174,9 @@ class RocProfCompute_Base:
|
||||
}
|
||||
# Check for vgpr counter in ROCm < 5.3
|
||||
if "vgpr" in df.columns:
|
||||
duplicate_cols["vgpr"] = [col for col in df.columns if col.startswith("vgpr")]
|
||||
duplicate_cols["vgpr"] = [
|
||||
col for col in df.columns if col.startswith("vgpr")
|
||||
]
|
||||
# Check for vgpr counter in ROCm >= 5.3
|
||||
else:
|
||||
duplicate_cols["Arch_VGPR"] = [
|
||||
@@ -235,7 +237,8 @@ class RocProfCompute_Base:
|
||||
)
|
||||
]
|
||||
]
|
||||
# B) any timestamps that are _not_ the duration, which is the one we care about
|
||||
# B) any timestamps that are _not_ the duration,
|
||||
# which is the one we care about
|
||||
df = df[
|
||||
[
|
||||
k
|
||||
@@ -275,8 +278,9 @@ class RocProfCompute_Base:
|
||||
df["End_Timestamp"] = endNs
|
||||
# finally, join the drop key
|
||||
df = df.drop(columns=["key"])
|
||||
# save to file and delete old file(s), skip if we're being called outside of rocprof-compute
|
||||
if type(self.__args.path) == str:
|
||||
# save to file and delete old file(s)
|
||||
# skip if we're being called outside of rocprof-compute
|
||||
if isinstance(self.__args.path, str):
|
||||
df.to_csv(out, index=False)
|
||||
if not self.__args.verbose:
|
||||
for file in files:
|
||||
@@ -322,7 +326,12 @@ class RocProfCompute_Base:
|
||||
self.__args.remaining = " ".join(self.__args.remaining)
|
||||
else:
|
||||
console_error(
|
||||
"Profiling command required. Pass application executable after -- at the end of options.\n\t\ti.e. rocprof-compute profile -n vcopy -- ./vcopy -n 1048576 -b 256"
|
||||
(
|
||||
"Profiling command required. Pass application executable after -- "
|
||||
"at the end of options.\n"
|
||||
"\t\ti.e. rocprof-compute profile -n vcopy -- "
|
||||
"./vcopy -n 1048576 -b 256"
|
||||
)
|
||||
)
|
||||
|
||||
gen_sysinfo(
|
||||
@@ -380,27 +389,28 @@ class RocProfCompute_Base:
|
||||
time_left_seconds = (total_runs - run_number) * avg_profiling_time
|
||||
time_left = format_time(time_left_seconds)
|
||||
console_log(
|
||||
f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]..."
|
||||
f"[Run {run_number}/{total_runs}]"
|
||||
f"[Approximate profiling time left: {time_left}]..."
|
||||
)
|
||||
else:
|
||||
console_log(
|
||||
f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]"
|
||||
f"[Run {run_number}/{total_runs}]"
|
||||
"[Approximate profiling time left: "
|
||||
"pending first measurement...]"
|
||||
)
|
||||
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not self.__args.kernel == None:
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(kernel:).*%"
|
||||
+ "kernel: "
|
||||
+ ",".join(self.__args.kernel)
|
||||
+ "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
success, output = capture_subprocess_output([
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(kernel:).*%"
|
||||
+ "kernel: "
|
||||
+ ",".join(self.__args.kernel)
|
||||
+ "%g",
|
||||
fname,
|
||||
])
|
||||
# log output from profile filtering
|
||||
if not success:
|
||||
console_error(output)
|
||||
@@ -409,18 +419,16 @@ class RocProfCompute_Base:
|
||||
|
||||
# Dispatch filtering (inplace replacement)
|
||||
if not self.__args.dispatch == None:
|
||||
success, output = capture_subprocess_output(
|
||||
[
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(range:).*%"
|
||||
+ "range: "
|
||||
+ " ".join(self.__args.dispatch)
|
||||
+ "%g",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
success, output = capture_subprocess_output([
|
||||
"sed",
|
||||
"-i",
|
||||
"-r",
|
||||
"s%^(range:).*%"
|
||||
+ "range: "
|
||||
+ " ".join(self.__args.dispatch)
|
||||
+ "%g",
|
||||
fname,
|
||||
])
|
||||
# log output from profile filtering
|
||||
if not success:
|
||||
console_error(output)
|
||||
@@ -462,7 +470,9 @@ class RocProfCompute_Base:
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
):
|
||||
console_log(f"[Run {total_runs+1}/{total_runs+1}][PC sampling profile run]")
|
||||
console_log(
|
||||
f"[Run {total_runs + 1}/{total_runs + 1}][PC sampling profile run]"
|
||||
)
|
||||
start_run_prof = time.time()
|
||||
pc_sampling_prof(
|
||||
method=self.get_args().pc_sampling_method,
|
||||
|
||||
+7
-11
@@ -23,8 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
@@ -67,15 +65,13 @@ class rocprof_v1_profiler(RocProfCompute_Base):
|
||||
'"' + app_cmd + '"',
|
||||
]
|
||||
# store original args for debug message
|
||||
store_app_cmd(
|
||||
[
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
self.get_args().path + "/" + fbase + ".csv",
|
||||
app_cmd,
|
||||
]
|
||||
)
|
||||
store_app_cmd([
|
||||
"--timestamp",
|
||||
"on",
|
||||
"-o",
|
||||
self.get_args().path + "/" + fbase + ".csv",
|
||||
app_cmd,
|
||||
])
|
||||
return args
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
@@ -44,7 +42,6 @@ class rocprof_v2_profiler(RocProfCompute_Base):
|
||||
)
|
||||
|
||||
def get_profiler_options(self, fname, soc):
|
||||
fbase = Path(fname).stem
|
||||
app_cmd = shlex.split(self.get_args().remaining)
|
||||
|
||||
args = []
|
||||
|
||||
+3
-4
@@ -23,12 +23,9 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_profile.profiler_base import RocProfCompute_Base
|
||||
from utils.logger import console_error, console_log, demarcate
|
||||
|
||||
@@ -49,7 +46,9 @@ class rocprof_v3_profiler(RocProfCompute_Base):
|
||||
trace_option = "--kokkos-trace"
|
||||
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
|
||||
console_error(
|
||||
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
|
||||
"The option '--kokkos-trace' is not supported in the current "
|
||||
"version of rocprof-compute. This functionality is planned for a "
|
||||
"future release. Please adjust your profiling options accordingly."
|
||||
)
|
||||
if self.get_args().hip_trace:
|
||||
trace_option = "--hip-trace"
|
||||
|
||||
+3
-3
@@ -23,8 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
@@ -64,7 +62,9 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
||||
if self.get_args().kokkos_trace:
|
||||
# NOTE: --kokkos-trace feature is incomplete and is disabled for now.
|
||||
console_error(
|
||||
"The option '--kokkos-trace' is not supported in the current version of rocprof-compute. This functionality is planned for a future release. Please adjust your profiling options accordingly."
|
||||
"The option '--kokkos-trace' is not supported in the current "
|
||||
"version of rocprof-compute. This functionality is planned for a "
|
||||
"future release. Please adjust your profiling options accordingly."
|
||||
)
|
||||
if self.get_args().hip_trace:
|
||||
options["ROCPROF_HIP_COMPILER_API_TRACE"] = "1"
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import glob
|
||||
import json
|
||||
import math
|
||||
@@ -34,7 +33,6 @@ import sys
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import yaml
|
||||
|
||||
import config
|
||||
@@ -68,9 +66,8 @@ class OmniSoC_Base:
|
||||
self.__arch = None
|
||||
self._mspec = mspec
|
||||
self.__perfmon_dir = None
|
||||
self.__perfmon_config = (
|
||||
{}
|
||||
) # Per IP block max number of simulutaneous counters. GFX IP Blocks
|
||||
# Per IP block, max number of simultaneous counters. GFX IP Blocks.
|
||||
self.__perfmon_config = {}
|
||||
self.__soc_params = {} # SoC specifications
|
||||
self.__compatible_profilers = [] # Store profilers compatible with SoC
|
||||
self.populate_mspec()
|
||||
@@ -169,14 +166,20 @@ class OmniSoC_Base:
|
||||
)
|
||||
|
||||
# Parse json from amd-smi static --clock
|
||||
amd_smi_mclk = run(["amd-smi", "static", "--clock", "--json"], exit_on_error=True)
|
||||
amd_smi_mclk = run(
|
||||
["amd-smi", "static", "--clock", "--json"], exit_on_error=True
|
||||
)
|
||||
amd_smi_mclk = json.loads(amd_smi_mclk)
|
||||
|
||||
if isinstance(amd_smi_mclk, dict):
|
||||
# The output of `amd-smi static --clock --json` is a dict with amd-smi>=26.0.0.
|
||||
amd_smi_mclk = amd_smi_mclk["gpu_data"][0]["clock"]["mem"]["frequency_levels"]
|
||||
# The output of `amd-smi static --clock --json` is a dict with
|
||||
# amd-smi>=26.0.0.
|
||||
amd_smi_mclk = amd_smi_mclk["gpu_data"][0]["clock"]["mem"][
|
||||
"frequency_levels"
|
||||
]
|
||||
else:
|
||||
# For backward compatibility: the output of `amd-smi static --clock --json` used to be a list for amd-smi<26.0.0.
|
||||
# For backward compatibility: the output of `amd-smi static --clock --json`
|
||||
# used to be a list for amd-smi<26.0.0.
|
||||
amd_smi_mclk = amd_smi_mclk[0]["clock"]["mem"]["frequency_levels"]
|
||||
|
||||
# Choose the highest level of memory clock frequency
|
||||
@@ -186,8 +189,8 @@ class OmniSoC_Base:
|
||||
|
||||
console_debug("max mem clock is {}".format(self._mspec.max_mclk))
|
||||
|
||||
# these are just max's now, because the parsing was broken and this was inconsistent
|
||||
# with how we use the clocks elsewhere (all max, all the time)
|
||||
# These are just max values now, because the parsing was broken and this was
|
||||
# inconsistent with how we use the clocks elsewhere (all max, all the time)
|
||||
self._mspec.cur_sclk = self._mspec.max_sclk
|
||||
self._mspec.cur_mclk = self._mspec.max_mclk
|
||||
|
||||
@@ -202,7 +205,9 @@ class OmniSoC_Base:
|
||||
|
||||
self._mspec.num_xcd = str(
|
||||
mi_gpu_specs.get_num_xcds(
|
||||
self._mspec.gpu_arch, self._mspec.gpu_model, self._mspec.compute_partition
|
||||
self._mspec.gpu_arch,
|
||||
self._mspec.gpu_model,
|
||||
self._mspec.compute_partition,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -218,7 +223,8 @@ class OmniSoC_Base:
|
||||
# TODO: use amd-smi python api when available
|
||||
amd_smi_static = run(["amd-smi", "static", "--gpu=0"], exit_on_error=True)
|
||||
|
||||
# Purposely search for patterns without variants suffix to try and match a known GPU model.
|
||||
# Purposely search for patterns without variants suffix to try and match a known
|
||||
# GPU model.
|
||||
detection_methods = [
|
||||
{
|
||||
"name": "Market Name",
|
||||
@@ -228,7 +234,10 @@ class OmniSoC_Base:
|
||||
"name": "VBIOS Name",
|
||||
"pattern": r"NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
|
||||
},
|
||||
{"name": "Product Name", "pattern": r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)"},
|
||||
{
|
||||
"name": "Product Name",
|
||||
"pattern": r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
|
||||
},
|
||||
]
|
||||
|
||||
gpu_model = None
|
||||
@@ -267,7 +276,8 @@ class OmniSoC_Base:
|
||||
def detect_counters(self):
|
||||
"""
|
||||
Create a set of counters required for the selected report sections.
|
||||
Parse analysis report configuration files based on the selected report sections to be filtered.
|
||||
Parse analysis report configuration files based on the selected report
|
||||
sections to be filtered.
|
||||
"""
|
||||
# Read the analysis config files and filter
|
||||
config_root_dir = f"{self.get_args().config_dir}/{self.__arch}"
|
||||
@@ -306,7 +316,10 @@ class OmniSoC_Base:
|
||||
# File id filtering
|
||||
if file_id not in config_filename_dict:
|
||||
console_warning(
|
||||
f"Skipping {block_id}: file id {file_id} not found in {config_root_dir}"
|
||||
(
|
||||
f"Skipping {block_id}: file id {file_id} not found in "
|
||||
f"{config_root_dir}"
|
||||
)
|
||||
)
|
||||
continue
|
||||
with open(config_filename_dict[file_id], "r") as stream:
|
||||
@@ -324,7 +337,10 @@ class OmniSoC_Base:
|
||||
}
|
||||
if panel_id not in panel_dict:
|
||||
console_warning(
|
||||
f"Skipping {block_id}: metric table {panel_id} not found in {config_filename_dict[file_id]}"
|
||||
(
|
||||
f"Skipping {block_id}: metric table {panel_id} not found in "
|
||||
f"{config_filename_dict[file_id]}"
|
||||
)
|
||||
)
|
||||
continue
|
||||
if metric_id is None:
|
||||
@@ -338,14 +354,17 @@ class OmniSoC_Base:
|
||||
}
|
||||
if metric_id not in metric_dict:
|
||||
console_warning(
|
||||
f"Skipping {block_id}: metric id {metric_id} not found in panel id {panel_id}"
|
||||
(
|
||||
f"Skipping {block_id}: metric id {metric_id} not found in "
|
||||
f"panel id {panel_id}"
|
||||
)
|
||||
)
|
||||
continue
|
||||
texts.append(yaml.dump(metric_dict[metric_id], sort_keys=False))
|
||||
|
||||
counters = self.parse_counters("\n".join(texts))
|
||||
|
||||
# Handle TCC channel counters: if hw_counter_matches has elements ending with '['
|
||||
# Handle TCC channel counters: if hw_counter_matches has elems ending with '['
|
||||
# Expand and interleve the TCC channel counters
|
||||
# e.g. TCC_HIT[0] TCC_ATOMIC[0] ... TCC_HIT[1] TCC_ATOMIC[1] ...
|
||||
if using_v3():
|
||||
@@ -356,12 +375,10 @@ class OmniSoC_Base:
|
||||
if counter_name.startswith("TCC") and counter_name.endswith("["):
|
||||
counters.remove(counter_name)
|
||||
counter_name = counter_name.split("[")[0]
|
||||
counters = counters.union(
|
||||
{
|
||||
f"{counter_name}[{i}]"
|
||||
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
|
||||
}
|
||||
)
|
||||
counters = counters.union({
|
||||
f"{counter_name}[{i}]"
|
||||
for i in range(num_xcd_for_pmc_file * int(self._mspec._l2_banks))
|
||||
})
|
||||
|
||||
return counters
|
||||
|
||||
@@ -394,7 +411,10 @@ class OmniSoC_Base:
|
||||
|
||||
if not using_v3():
|
||||
# Counters not supported in rocprof v1 / v2
|
||||
counters = counters - {"SQ_INSTS_VALU_MFMA_F8", "SQ_INSTS_VALU_MFMA_MOPS_F8"}
|
||||
counters = counters - {
|
||||
"SQ_INSTS_VALU_MFMA_F8",
|
||||
"SQ_INSTS_VALU_MFMA_MOPS_F8",
|
||||
}
|
||||
|
||||
# TCP_TCP_LATENCY_sum not supported for MI300 (gfx940, gfx941, gfx942)
|
||||
if self.__arch in ("gfx940", "gfx941", "gfx942"):
|
||||
@@ -409,7 +429,8 @@ class OmniSoC_Base:
|
||||
@demarcate
|
||||
def parse_counters(self, config_text):
|
||||
"""
|
||||
Create a set of all hardware counters mentioned in the given config file content string
|
||||
Create a set of all hardware counters mentioned in the given config file
|
||||
content string.
|
||||
"""
|
||||
hw_counter_matches, variable_matches = self.parse_counters_text(config_text)
|
||||
|
||||
@@ -440,7 +461,8 @@ class OmniSoC_Base:
|
||||
def parse_counters_text(self, text):
|
||||
"""Parse out hardware counters and variables from given text"""
|
||||
# hw counter name should start with ip block name
|
||||
# hw counter name should have all capital letters or digits and should not end with underscore
|
||||
# hw counter name should have all capital letters or digits
|
||||
# and should not end with underscore
|
||||
# he counter name can either optionally end with '[' or '_sum'
|
||||
hw_counter_regex = (
|
||||
r"(?:SQ|SQC|TA|TD|TCP|TCC|CPC|CPF|SPI|GRBM)_[0-9A-Z_]*[0-9A-Z](?:\[|_sum)*"
|
||||
@@ -458,11 +480,11 @@ class OmniSoC_Base:
|
||||
|
||||
if rocprof_cmd != "rocprofiler-sdk":
|
||||
console_warning(
|
||||
"rocprof v1 / v2 / v3 interfaces will be removed in favor of "
|
||||
"rocprofiler-sdk interface in a future release. To use rocprofiler-sdk "
|
||||
"interface, please set the environment variable ROCPROF to 'rocprofiler-sdk' "
|
||||
"and optionally provide the path to librocprofiler-sdk.so library via the "
|
||||
"--rocprofiler-sdk-library-path option."
|
||||
"rocprof v1/v2/v3 interfaces will be removed in favor of "
|
||||
"rocprofiler-sdk interface in a future release. To use "
|
||||
"rocprofiler-sdk, set ROCPROF to 'rocprofiler-sdk' and "
|
||||
"optionally provide the path to librocprofiler-sdk.so via "
|
||||
"--rocprofiler-sdk-library-path."
|
||||
)
|
||||
|
||||
rocprof_counters = set()
|
||||
@@ -473,7 +495,8 @@ class OmniSoC_Base:
|
||||
# return code should be 1 so success should be False
|
||||
if success:
|
||||
console_error(
|
||||
f"Failed to list rocprof supported counters using command: {command}"
|
||||
"Failed to list rocprof supported counters using command: %s"
|
||||
% command
|
||||
)
|
||||
for line in output.splitlines():
|
||||
if "gpu-agent" in line:
|
||||
@@ -485,7 +508,8 @@ class OmniSoC_Base:
|
||||
# return code should be 1 so success should be False
|
||||
if success:
|
||||
console_error(
|
||||
f"Failed to list rocprof supported counters using command: {command}"
|
||||
"Failed to list rocprof supported counters using command: %s"
|
||||
% command
|
||||
)
|
||||
for line in output.splitlines():
|
||||
if "gpu-agent" in line:
|
||||
@@ -498,13 +522,13 @@ class OmniSoC_Base:
|
||||
# return code should be 1 so success should be False
|
||||
if success:
|
||||
console_error(
|
||||
f"Failed to list rocprof supported counters using command: {command}"
|
||||
"Failed to list rocprof supported counters using command: %s"
|
||||
% command
|
||||
)
|
||||
for line in output.splitlines():
|
||||
if "gfx" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[2].strip())
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif (
|
||||
str(rocprof_cmd).endswith("rocprofv3")
|
||||
or str(rocprof_cmd) == "rocprofiler-sdk"
|
||||
@@ -550,7 +574,10 @@ class OmniSoC_Base:
|
||||
|
||||
@demarcate
|
||||
def perfmon_coalesce(self, counters):
|
||||
"""Sort and bucket all related performance counters to minimize required application passes"""
|
||||
"""
|
||||
Sort and bucket all related performance counters to minimize required
|
||||
application passes
|
||||
"""
|
||||
|
||||
# Create workload directory
|
||||
# In some cases (i.e. --specs) path will not be given
|
||||
@@ -577,20 +604,26 @@ class OmniSoC_Base:
|
||||
|
||||
# Sanity check whether counters are supported by underlying rocprof tool
|
||||
rocprof_counters = self.get_rocprof_supported_counters()
|
||||
# rocprof does not support TCC channel counters in the avail output, so remove channel suffix for comparison
|
||||
# rocprof does not support TCC channel counters in the avail output,
|
||||
# so remove channel suffix for comparison
|
||||
not_supported_counters = {
|
||||
counter.split("[")[0] if is_tcc_channel_counter(counter) else counter
|
||||
for counter in counters
|
||||
} - rocprof_counters
|
||||
if not_supported_counters:
|
||||
console_warning(
|
||||
f"Following counters might not be supported by rocprof: {', '.join(not_supported_counters)} "
|
||||
"Following counters might not be supported by rocprof: %s"
|
||||
% ", ".join(not_supported_counters)
|
||||
)
|
||||
# We might be providing definitions of unsupported counters, so still try to collect them
|
||||
# We might be providing definitions of unsupported counters, so still try to
|
||||
# collect them
|
||||
if not counters:
|
||||
console_error(
|
||||
"profiling",
|
||||
"No performance counters to collect, please check the provided profiling filters",
|
||||
(
|
||||
"No performance counters to collect, "
|
||||
"please check the provided profiling filters"
|
||||
),
|
||||
)
|
||||
else:
|
||||
console_debug(f"Collecting following counters: {', '.join(counters)} ")
|
||||
@@ -607,13 +640,15 @@ class OmniSoC_Base:
|
||||
and not is_tcc_channel_counter(counter)
|
||||
):
|
||||
counters.remove(counter)
|
||||
output_files.append(CounterFile(counter + ".txt", self.__perfmon_config))
|
||||
output_files.append(
|
||||
CounterFile(counter + ".txt", self.__perfmon_config)
|
||||
)
|
||||
output_files[-1].add(counter)
|
||||
if using_v3():
|
||||
# v3 does not support SQ_ACCUM_PREV_HIRES. Instead we defined our own
|
||||
# counters in counter_defs.yaml that use the accumulate() function. These
|
||||
# use the name of the accumulate counter with _ACCUM appended to them.
|
||||
output_files[-1].add(counter + "_ACCUM")
|
||||
# v3 does not support SQ_ACCUM_PREV_HIRES. Use custom counters
|
||||
# defined in counter_defs.yaml that utilize accumulate(),
|
||||
# with _ACCUM suffix.
|
||||
output_files[-1].add(f"{counter}_ACCUM")
|
||||
else:
|
||||
output_files[-1].add("SQ_ACCUM_PREV_HIRES")
|
||||
accu_file_count += 1
|
||||
@@ -635,7 +670,9 @@ class OmniSoC_Base:
|
||||
added = True
|
||||
# Store all channels for a TCC channel counter in the same file
|
||||
if is_tcc_channel_counter(ctr):
|
||||
tcc_channel_counter_file_map[ctr.split("[")[0]] = output_files[i]
|
||||
tcc_channel_counter_file_map[ctr.split("[")[0]] = output_files[
|
||||
i
|
||||
]
|
||||
break
|
||||
|
||||
# All files are full, create a new file
|
||||
@@ -652,7 +689,6 @@ class OmniSoC_Base:
|
||||
|
||||
# TODO: rewrite the above logic for spatial_multiplexing later
|
||||
if self.get_args().spatial_multiplexing:
|
||||
|
||||
# TODO: more error checking
|
||||
if len(self.get_args().spatial_multiplexing) != 3:
|
||||
console_error(
|
||||
@@ -676,9 +712,11 @@ class OmniSoC_Base:
|
||||
|
||||
console_debug(
|
||||
"profiling",
|
||||
"spatial_multiplexing node_idx %s, node_count %s, gpu_count: %s, old_group_num %s, "
|
||||
"new_bucket_count %s, groups_per_bucket %s, max_groups_per_node %s, "
|
||||
"group_start %s, group_end %s"
|
||||
(
|
||||
"spatial_multiplexing node_idx %s, node_count %s, gpu_count: %s,\n"
|
||||
"old_group_num %s, new_bucket_count %s, groups_per_bucket %s,\n"
|
||||
"max_groups_per_node %s, group_start %s, group_end %s"
|
||||
)
|
||||
% (
|
||||
node_idx,
|
||||
node_count,
|
||||
@@ -695,7 +733,12 @@ class OmniSoC_Base:
|
||||
for f_idx in range(groups_per_bucket):
|
||||
file_name = str(
|
||||
Path(workload_perfmon_dir).joinpath(
|
||||
"pmc_perf_" + "node_" + str(node_idx) + "_" + str(f_idx) + ".txt"
|
||||
"pmc_perf_"
|
||||
+ "node_"
|
||||
+ str(node_idx)
|
||||
+ "_"
|
||||
+ str(f_idx)
|
||||
+ ".txt"
|
||||
)
|
||||
)
|
||||
|
||||
@@ -719,7 +762,9 @@ class OmniSoC_Base:
|
||||
else:
|
||||
# Output to files
|
||||
for f in output_files:
|
||||
file_name_txt = str(Path(workload_perfmon_dir).joinpath(f.file_name_txt))
|
||||
file_name_txt = str(
|
||||
Path(workload_perfmon_dir).joinpath(f.file_name_txt)
|
||||
)
|
||||
file_name_yaml = str(
|
||||
Path(workload_perfmon_dir).joinpath(f.file_name_yaml)
|
||||
)
|
||||
@@ -739,12 +784,19 @@ class OmniSoC_Base:
|
||||
idx = int(ctr.split("[")[1].split("]")[0])
|
||||
xcd_idx = idx // int(self._mspec._l2_banks)
|
||||
channel_idx = idx % int(self._mspec._l2_banks)
|
||||
expression = f"select({counter_name},[DIMENSION_XCC=[{xcd_idx}], DIMENSION_INSTANCE=[{channel_idx}]])"
|
||||
discription = f"{counter_name} on {xcd_idx}th XCC and {channel_idx}th channel"
|
||||
expression = (
|
||||
f"select({counter_name},"
|
||||
f"[DIMENSION_XCC=[{xcd_idx}], "
|
||||
f"DIMENSION_INSTANCE=[{channel_idx}]])"
|
||||
)
|
||||
description = (
|
||||
f"{counter_name} on {xcd_idx}th XCC and "
|
||||
f"{channel_idx}th channel"
|
||||
)
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
ctr,
|
||||
discription,
|
||||
description,
|
||||
expression,
|
||||
[self.__arch],
|
||||
)
|
||||
|
||||
@@ -23,10 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from utils.logger import console_error, demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
@@ -49,9 +48,12 @@ class gfx90a_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx90a"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
|
||||
@@ -23,13 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_error, console_log, console_warning, demarcate
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
@@ -49,9 +48,12 @@ class gfx940_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx940"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
|
||||
@@ -23,13 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_error, console_log, console_warning, demarcate
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
@@ -49,9 +48,12 @@ class gfx941_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx941"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
|
||||
@@ -23,13 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_error, console_log, console_warning, demarcate
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
@@ -49,9 +48,12 @@ class gfx942_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(
|
||||
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
|
||||
)
|
||||
self.set_compatible_profilers([
|
||||
"rocprofv1",
|
||||
"rocprofv2",
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx942"))
|
||||
# Create roofline object if mode is provided; skip for --specs
|
||||
|
||||
@@ -23,13 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
from rocprof_compute_soc.soc_base import OmniSoC_Base
|
||||
from roofline import Roofline
|
||||
from utils.logger import console_error, console_log, console_warning, demarcate
|
||||
from utils.logger import console_log, console_warning, demarcate
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.utils import mibench
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import copy
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Configuration Module
|
||||
-------------------
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
ROCm Compute Profiler TUI - Main Application with Analysis Methods
|
||||
----------------------------------------------------------------
|
||||
@@ -168,7 +166,7 @@ class RocprofTUIApp(App):
|
||||
if opened := await self.push_screen_wait(SelectDirectory()):
|
||||
self.add_to_recent(str(opened))
|
||||
self.main_view.selected_path = opened
|
||||
dropdown = self.query_one(f"#file-dropdown", DropdownMenu)
|
||||
dropdown = self.query_one("#file-dropdown", DropdownMenu)
|
||||
dropdown.add_class("hidden")
|
||||
self.main_view.run_analysis()
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import config
|
||||
@@ -61,9 +60,12 @@ class Logger:
|
||||
if hasattr(self.output_area, "text"):
|
||||
current_text = self.output_area.text
|
||||
self.output_area.text = (
|
||||
f"{current_text}\n{formatted_msg}" if current_text else formatted_msg
|
||||
f"{current_text}\n{formatted_msg}"
|
||||
if current_text
|
||||
else formatted_msg
|
||||
)
|
||||
# HACK: moving curson to end of outpu (Is there a better way to achieve this?)
|
||||
# HACK: moving curson to end of output
|
||||
# (Is there a better way to achieve this?)
|
||||
self.output_area.cursor_location = (999999, 0)
|
||||
|
||||
def info(self, message, update_ui=True):
|
||||
@@ -192,7 +194,9 @@ def apply_rounding_logic(df, decimal_precision):
|
||||
if df_copy[column].dtype == "object":
|
||||
df_copy[column] = df_copy[column].combine(
|
||||
rounded_series,
|
||||
lambda orig, rounded: rounded if pd.notna(rounded) else orig,
|
||||
lambda orig, rounded: (
|
||||
rounded if pd.notna(rounded) else orig
|
||||
),
|
||||
)
|
||||
else:
|
||||
df_copy[column] = rounded_series
|
||||
|
||||
@@ -77,7 +77,10 @@ class KernelView(Container):
|
||||
"""
|
||||
with VerticalScroll(id="top-container"):
|
||||
yield Label(
|
||||
"Open a workload directory to run analysis and view individual kernel analysis results.",
|
||||
(
|
||||
"Open a workload directory to run analysis and view individual "
|
||||
"kernel analysis results."
|
||||
),
|
||||
classes="placeholder",
|
||||
)
|
||||
|
||||
@@ -177,7 +180,7 @@ class KernelView(Container):
|
||||
bottom_container.remove_children()
|
||||
|
||||
bottom_container.mount(
|
||||
Label(f"Toggle kernel selection to view detailed analysis.")
|
||||
Label("Toggle kernel selection to view detailed analysis.")
|
||||
)
|
||||
|
||||
if self.current_selection and self.current_selection in self.dfs:
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Main View Module
|
||||
---------------
|
||||
@@ -59,9 +57,7 @@ class MainView(Horizontal):
|
||||
super().__init__(id="main-container")
|
||||
self.start_path = (
|
||||
# NOTE: is cwd the best choice?
|
||||
Path.cwd()
|
||||
if DEFAULT_START_PATH is None
|
||||
else Path(DEFAULT_START_PATH)
|
||||
Path.cwd() if DEFAULT_START_PATH is None else Path(DEFAULT_START_PATH)
|
||||
)
|
||||
|
||||
self.logger = Logger()
|
||||
@@ -157,7 +153,9 @@ class MainView(Horizontal):
|
||||
analyzer.sanitize()
|
||||
self.logger.info("Step 2: Analyzer sanitized successfully")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Step 2 failed - Error sanitizing analyzer: {str(e)}")
|
||||
self.logger.error(
|
||||
f"Step 2 failed - Error sanitizing analyzer: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
# Step 3: Load sys_info
|
||||
@@ -172,7 +170,13 @@ class MainView(Horizontal):
|
||||
sys_info_df = file_io.load_sys_info(sysinfo_path)
|
||||
self.logger.info(f"Step 3: sys_info_df type = {type(sys_info_df)}")
|
||||
self.logger.info(
|
||||
f"Step 3: sys_info_df shape = {sys_info_df.shape if hasattr(sys_info_df, 'shape') else 'No shape attribute'}"
|
||||
f"Step 3: sys_info_df shape = {
|
||||
(
|
||||
sys_info_df.shape
|
||||
if hasattr(sys_info_df, 'shape')
|
||||
else 'No shape attribute'
|
||||
)
|
||||
}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -193,12 +197,16 @@ class MainView(Horizontal):
|
||||
# If it's already a dict
|
||||
sys_info = sys_info_df
|
||||
else:
|
||||
raise TypeError(f"Unexpected type for sys_info: {type(sys_info_df)}")
|
||||
raise TypeError(
|
||||
f"Unexpected type for sys_info: {type(sys_info_df)}"
|
||||
)
|
||||
|
||||
self.logger.info(f"Step 4: sys_info converted = {sys_info}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Step 4 failed - Error converting sys_info: {str(e)}")
|
||||
self.logger.error(
|
||||
f"Step 4 failed - Error converting sys_info: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
# Step 5: Load SoC specs
|
||||
@@ -236,7 +244,10 @@ class MainView(Horizontal):
|
||||
# TODO: add per kernel Roofline support when available
|
||||
|
||||
if not self.per_kernel_dfs or not self.top_kernels:
|
||||
warning_msg = "Step 8: Per Kernel Analysis completed but not all data was returned"
|
||||
warning_msg = (
|
||||
"Step 8: Per Kernel Analysis completed but not all data "
|
||||
"was returned"
|
||||
)
|
||||
self._update_view(warning_msg, LogLevel.WARNING)
|
||||
self.logger.warning(warning_msg)
|
||||
else:
|
||||
@@ -289,7 +300,7 @@ class MainView(Horizontal):
|
||||
return
|
||||
|
||||
kernel_view.update_results(self.per_kernel_dfs, self.top_kernels)
|
||||
self.logger.success(f"Results displayed successfully.")
|
||||
self.logger.success("Results displayed successfully.")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error refreshing results: {str(e)}")
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Panel Widget Modules
|
||||
-------------------
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
@@ -35,7 +34,6 @@ import pandas as pd
|
||||
import plotext as plt
|
||||
import plotly.express as px
|
||||
from textual.widgets import Static
|
||||
from textual_plotext import PlotextPlot
|
||||
|
||||
from utils.mem_chart import plot_mem_chart
|
||||
|
||||
@@ -58,7 +56,9 @@ def simple_bar(df, title=None):
|
||||
.to_dict()["Avg"]
|
||||
)
|
||||
else:
|
||||
raise NameError(f"simple_bar: No Metric or Avg in df columns: {str(df.columns)}")
|
||||
raise NameError(
|
||||
f"simple_bar: No Metric or Avg in df columns: {str(df.columns)}"
|
||||
)
|
||||
|
||||
plt.clear_figure()
|
||||
|
||||
@@ -389,7 +389,6 @@ class SimpleBar(Static):
|
||||
|
||||
|
||||
class SimpleBox(Static):
|
||||
|
||||
DEFAULT_CSS = """
|
||||
SimpleBox {
|
||||
padding: 0;
|
||||
|
||||
+23
-8
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
@@ -58,7 +57,10 @@ def load_config(config_path) -> Dict[str, Any]:
|
||||
return yaml.safe_load(file)
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(
|
||||
f"Configuration file {config_path} not found, \nplease populate the analysis_config.yaml file."
|
||||
(
|
||||
f"Configuration file {config_path} not found, \n"
|
||||
"please populate the analysis_config.yaml file."
|
||||
)
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
raise ValueError(f"Error parsing YAML configuration: {e}")
|
||||
@@ -86,7 +88,7 @@ def get_tui_style_from_path(dfs: Dict[str, Any], path: List[str]) -> Optional[st
|
||||
|
||||
def create_widget_from_data(df: pd.DataFrame, tui_style: Optional[str] = None) -> Any:
|
||||
if df is not None and not df.empty:
|
||||
match tui_style:
|
||||
match tui_style: # noqa
|
||||
case None:
|
||||
return create_table(df)
|
||||
|
||||
@@ -141,7 +143,9 @@ def build_subsection(
|
||||
widgets = []
|
||||
if "header_label" in subsection_config:
|
||||
header_class = subsection_config.get("header_class", "")
|
||||
widgets.append(Label(subsection_config["header_label"], classes=header_class))
|
||||
widgets.append(
|
||||
Label(subsection_config["header_label"], classes=header_class)
|
||||
)
|
||||
|
||||
widgets.append(widget)
|
||||
|
||||
@@ -193,7 +197,10 @@ def build_kernel_sections(
|
||||
def create_safe_widget(subsection_name: str, data: dict, section_name: str):
|
||||
if not (isinstance(data, dict) and "df" in data):
|
||||
add_warning(
|
||||
f"Invalid data structure for '{subsection_name}' in section '{section_name}'"
|
||||
(
|
||||
f"Invalid data structure for '{subsection_name}' "
|
||||
f"in section '{section_name}'"
|
||||
)
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -240,7 +247,10 @@ def build_kernel_sections(
|
||||
kernel_children.append(collapsible)
|
||||
except Exception as e:
|
||||
add_warning(
|
||||
f"Error processing subsection '{subsection_name}' in section '{section_name}': {str(e)}"
|
||||
(
|
||||
f"Error processing subsection '{subsection_name}' "
|
||||
f"in section '{section_name}': {str(e)}"
|
||||
)
|
||||
)
|
||||
|
||||
if kernel_children:
|
||||
@@ -251,7 +261,10 @@ def build_kernel_sections(
|
||||
children.append(section_collapsible)
|
||||
except Exception as e:
|
||||
add_warning(
|
||||
f"Failed to create collapsible for section '{section_name}': {str(e)}"
|
||||
(
|
||||
"Failed to create collapsible for section "
|
||||
f"'{section_name}': {str(e)}"
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -289,7 +302,9 @@ def build_section_from_config(
|
||||
if subsection:
|
||||
children.append(subsection)
|
||||
except Exception as e:
|
||||
error_msg = f"{subsection_config.get('title', 'Unknown')} error: {str(e)}"
|
||||
error_msg = (
|
||||
f"{subsection_config.get('title', 'Unknown')} error: {str(e)}"
|
||||
)
|
||||
children.append(Label(error_msg, classes="warning"))
|
||||
else:
|
||||
children = [Label("No configuration provided for this section")]
|
||||
|
||||
+2
-2
@@ -1,4 +1,4 @@
|
||||
from textual import on, work
|
||||
from textual import on
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Container, Horizontal
|
||||
from textual.reactive import reactive
|
||||
@@ -74,7 +74,7 @@ class MenuBar(Container):
|
||||
def on_recent_selected(selected_dir: str) -> None:
|
||||
if selected_dir:
|
||||
self.parent_main_view.selected_path = selected_dir
|
||||
dropdown = self.query_one(f"#file-dropdown", DropdownMenu)
|
||||
dropdown = self.query_one("#file-dropdown", DropdownMenu)
|
||||
dropdown.add_class("hidden")
|
||||
self.parent_main_view.run_analysis()
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from typing import List
|
||||
|
||||
from textual.app import ComposeResult
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Panel Widget Modules
|
||||
-------------------
|
||||
|
||||
@@ -22,15 +22,12 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Specialized Widget Modules
|
||||
-------------------------
|
||||
Contains custom widget implementations for the application.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from textual.events import MouseDown, MouseMove, MouseUp
|
||||
from textual.widgets import Static
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from textual.binding import Binding
|
||||
from textual.widgets import TabbedContent, Tabs
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
"""
|
||||
Panel Widget Modules
|
||||
-------------------
|
||||
|
||||
+3
-2
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
@@ -81,7 +80,9 @@ class Terimnal(Container):
|
||||
def update_prompt(self) -> None:
|
||||
"""Update the command prompt in the input field."""
|
||||
input_widget = self.query_one("#terminal-input")
|
||||
current_path = os.path.basename(self.current_directory) or self.current_directory
|
||||
current_path = (
|
||||
os.path.basename(self.current_directory) or self.current_directory
|
||||
)
|
||||
|
||||
if platform.system() != "Windows":
|
||||
prompt = f"{current_path} $ "
|
||||
|
||||
@@ -23,11 +23,10 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import textwrap
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import abstractmethod
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
@@ -77,7 +76,8 @@ class Roofline:
|
||||
run_parameters
|
||||
if run_parameters
|
||||
else {
|
||||
"workload_dir": None, # in some cases (i.e. --specs) path will not be given
|
||||
"workload_dir": None, # in some cases (i.e. --specs),
|
||||
# path will not be given
|
||||
"device_id": 0,
|
||||
"sort_type": "kernels",
|
||||
"mem_level": "ALL",
|
||||
@@ -92,7 +92,7 @@ class Roofline:
|
||||
# Set roofline run parameters from args
|
||||
if hasattr(self.__args, "path") and not run_parameters:
|
||||
self.__run_parameters["workload_dir"] = self.__args.path
|
||||
if hasattr(self.__args, "no_roof") and self.__args.no_roof == False:
|
||||
if hasattr(self.__args, "no_roof") and not self.__args.no_roof:
|
||||
self.__run_parameters["is_standalone"] = True
|
||||
if hasattr(self.__args, "kernel_names") and self.__args.kernel_names:
|
||||
self.__run_parameters["include_kernel_names"] = True
|
||||
@@ -122,7 +122,8 @@ class Roofline:
|
||||
if isinstance(workload_dir_val, list):
|
||||
if not workload_dir_val or not workload_dir_val[0]:
|
||||
console_error(
|
||||
"Workload directory list is empty or invalid. Cannot perform setup.",
|
||||
"Workload directory list is empty or invalid. "
|
||||
"Cannot perform setup.",
|
||||
exit=False,
|
||||
)
|
||||
return
|
||||
@@ -139,7 +140,6 @@ class Roofline:
|
||||
base_path = Path(base_dir)
|
||||
|
||||
if base_path.name == "workloads" and base_path.parent == Path(os.getcwd()):
|
||||
|
||||
app_name = getattr(self.__args, "name", "default_app_name")
|
||||
gpu_model_name = getattr(self.__mspec, "gpu_model", "default_gpu_model")
|
||||
|
||||
@@ -169,14 +169,19 @@ class Roofline:
|
||||
self,
|
||||
ret_df,
|
||||
):
|
||||
"""Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
|
||||
"""
|
||||
Generate a set of empirical roofline plots given a directory containing
|
||||
required profiling and benchmarking data.
|
||||
"""
|
||||
if (
|
||||
not isinstance(self.__run_parameters["workload_dir"], list)
|
||||
and self.__run_parameters["workload_dir"] != None
|
||||
):
|
||||
self.roof_setup()
|
||||
|
||||
console_debug("roofline", "Path: %s" % self.__run_parameters.get("workload_dir"))
|
||||
console_debug(
|
||||
"roofline", "Path: %s" % self.__run_parameters.get("workload_dir")
|
||||
)
|
||||
self.__ai_data = calc_ai(
|
||||
self.__mspec, self.__run_parameters.get("sort_type"), ret_df
|
||||
)
|
||||
@@ -197,8 +202,11 @@ class Roofline:
|
||||
or str(dt) not in SUPPORTED_DATATYPES[gpu_arch]
|
||||
):
|
||||
console_error(
|
||||
"{} is not a supported datatype for roofline profiling on {} (arch: {})".format(
|
||||
str(dt), getattr(self.__mspec, "gpu_model", "N/A"), gpu_arch
|
||||
"{} is not a supported datatype for roofline profiling on {} "
|
||||
"(arch: {})".format(
|
||||
str(dt),
|
||||
getattr(self.__mspec, "gpu_model", "N/A"),
|
||||
gpu_arch,
|
||||
),
|
||||
exit=False,
|
||||
)
|
||||
@@ -230,7 +238,8 @@ class Roofline:
|
||||
if self.__run_parameters.get("include_kernel_names", False):
|
||||
if self.__ai_data is None:
|
||||
console_error(
|
||||
"Roofline Error: self.__ai_data is not populated. Cannot generate kernel names info.",
|
||||
"Roofline Error: self.__ai_data is not populated. "
|
||||
"Cannot generate kernel names info.",
|
||||
exit=False,
|
||||
)
|
||||
original_kernel_names = []
|
||||
@@ -245,7 +254,8 @@ class Roofline:
|
||||
if num_kernels == 0:
|
||||
console_log(
|
||||
"roofline",
|
||||
"No kernel names found to generate 'Kernel Names and Markers' info.",
|
||||
"No kernel names found to generate "
|
||||
"'Kernel Names and Markers' info.",
|
||||
)
|
||||
self.__figure.add_annotation(
|
||||
text="No kernel names to display.",
|
||||
@@ -356,7 +366,8 @@ class Roofline:
|
||||
)
|
||||
|
||||
# Output will be different depending on interaction type:
|
||||
# Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output
|
||||
# Save PDFs if we're in "standalone roofline" mode,
|
||||
# otherwise return HTML to be used in GUI output
|
||||
if self.__run_parameters["is_standalone"]:
|
||||
dev_id = str(self.__run_parameters["device_id"])
|
||||
|
||||
@@ -497,7 +508,8 @@ class Roofline:
|
||||
)
|
||||
console_debug(
|
||||
"roofline",
|
||||
"Roofline analysis only supports AI for floating point calculations at this time",
|
||||
"Roofline analysis only supports AI for "
|
||||
"floating point calculations at this time",
|
||||
)
|
||||
|
||||
#######################
|
||||
@@ -515,15 +527,17 @@ class Roofline:
|
||||
|
||||
# Plot peak BW ceiling(s)
|
||||
for cache_level in cache_hierarchy:
|
||||
|
||||
if (
|
||||
not self.__ceiling_data
|
||||
or cache_level.lower() not in self.__ceiling_data
|
||||
or not isinstance(self.__ceiling_data[cache_level.lower()], (list, tuple))
|
||||
or not isinstance(
|
||||
self.__ceiling_data[cache_level.lower()], (list, tuple)
|
||||
)
|
||||
or len(self.__ceiling_data[cache_level.lower()]) < 3
|
||||
):
|
||||
console_error(
|
||||
f"Ceiling data for {cache_level} is missing or malformed for dtype {dtype}.",
|
||||
f"Ceiling data for {cache_level} is missing "
|
||||
f"or malformed for dtype {dtype}.",
|
||||
exit=False,
|
||||
)
|
||||
continue
|
||||
@@ -612,7 +626,8 @@ class Roofline:
|
||||
|
||||
:param dtype: The datatype to be profiled
|
||||
:type method: str
|
||||
:return: Build the current figure using plot.build(), or None if datatype is not valid for the architecture
|
||||
:return: Build the current figure using plot.build(),
|
||||
or None if datatype is not valid for the architecture
|
||||
:rtype: str or None
|
||||
"""
|
||||
console_debug("roofline", "Generating roofline plot for CLI")
|
||||
@@ -635,7 +650,8 @@ class Roofline:
|
||||
)
|
||||
return
|
||||
|
||||
# Extract base directory path regardless of whether workload_dir is list or string
|
||||
# Extract base directory path regardless of-
|
||||
# whether workload_dir is list or string
|
||||
if isinstance(workload_dir, list):
|
||||
if not workload_dir or not workload_dir[0]:
|
||||
console_error(
|
||||
@@ -893,17 +909,20 @@ class Roofline:
|
||||
if not self.__args.remaining:
|
||||
console_error(
|
||||
"profiling"
|
||||
"An <app_cmd> is required to run.\rrocprof-compute profile -n test -- <app_cmd>"
|
||||
"An <app_cmd> is required to run.\r"
|
||||
"rocprof-compute profile -n test -- <app_cmd>"
|
||||
)
|
||||
# TODO: Add an equivelent of characterize_app() to run profiling directly out of this module
|
||||
# TODO: Add an equivelent of characterize_app() to run profiling
|
||||
# directly out of this module
|
||||
|
||||
elif self.__args.no_roof:
|
||||
console_log("roofline", "Skipping roofline.")
|
||||
else:
|
||||
mibench(self.__args, self.__mspec)
|
||||
|
||||
# NB: Currently the post_prossesing() method is the only one being used by rocprofiler-compute,
|
||||
# we include pre_processing() and profile() methods for those who wish to borrow the roofline module
|
||||
# NB: Currently the post_prossesing() method is the only one being used by
|
||||
# rocprofiler-compute, we include pre_processing() and profile() methods for
|
||||
# those who wish to borrow the roofline module
|
||||
@abstractmethod
|
||||
def post_processing(self):
|
||||
if self.__run_parameters["is_standalone"]:
|
||||
|
||||
@@ -23,10 +23,9 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import getpass
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
@@ -75,7 +74,8 @@ class DatabaseConnector:
|
||||
name = sys_info["workload_name"][0].strip()
|
||||
except KeyError as e:
|
||||
console_error(
|
||||
f"Outdated workload. Cannot find {e} field. Please reprofile to update."
|
||||
f"Outdated workload. "
|
||||
f"Cannot find {e} field. Please reprofile to update."
|
||||
)
|
||||
else:
|
||||
console_error(
|
||||
@@ -140,7 +140,6 @@ class DatabaseConnector:
|
||||
db_to_remove = self.client[self.connection_info["workload"]]
|
||||
|
||||
# check the collection names on the database
|
||||
col_list = db_to_remove.list_collection_names()
|
||||
self.client.drop_database(db_to_remove)
|
||||
db = self.client["workload_names"]
|
||||
col = db["names"]
|
||||
@@ -166,15 +165,17 @@ class DatabaseConnector:
|
||||
is_full_workload_name = self.args.workload.count("_") >= 3
|
||||
if not is_full_workload_name:
|
||||
console_error(
|
||||
"-w/--workload is not valid. Please use full workload name as seen in GUI when removing (i.e. rocprofiler-compute_asw_vcopy_mi200)"
|
||||
"-w/--workload is not valid. Please use full workload name "
|
||||
"as seen in GUI when removing (i.e. "
|
||||
"rocprofiler-compute_asw_vcopy_mi200)"
|
||||
)
|
||||
if (
|
||||
self.connection_info["host"] == None
|
||||
or self.connection_info["username"] == None
|
||||
):
|
||||
console_error(
|
||||
"-H/--host and -u/--username are required when interaction type is set to %s"
|
||||
% self.interaction_type
|
||||
"-H/--host and -u/--username are required when "
|
||||
"interaction type is set to %s" % self.interaction_type
|
||||
)
|
||||
if (
|
||||
self.connection_info["workload"] == "admin"
|
||||
@@ -192,7 +193,8 @@ class DatabaseConnector:
|
||||
or self.connection_info["workload"] == None
|
||||
):
|
||||
console_error(
|
||||
"-H/--host, -w/--workload, -u/--username, and -t/--team are all required when interaction type is set to %s"
|
||||
"-H/--host, -w/--workload, -u/--username, and -t/--team are all "
|
||||
"required when interaction type is set to %s"
|
||||
% self.interaction_type
|
||||
)
|
||||
|
||||
@@ -220,7 +222,7 @@ class DatabaseConnector:
|
||||
else:
|
||||
console_log("database", "Password received")
|
||||
else:
|
||||
password = self.connection_info["password"]
|
||||
pass
|
||||
|
||||
# Establish client connection
|
||||
connection_str = (
|
||||
@@ -239,5 +241,5 @@ class DatabaseConnector:
|
||||
)
|
||||
try:
|
||||
self.client.server_info()
|
||||
except:
|
||||
except Exception:
|
||||
console_error("database", "Unable to connect to the DB server.")
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
@@ -50,7 +49,9 @@ top_stats_build_in_config = {
|
||||
1: {
|
||||
"id": 1,
|
||||
"title": "Dispatch List",
|
||||
"data source": [{"raw_csv_table": {"id": 2, "source": "pmc_dispatch_info.csv"}}],
|
||||
"data source": [
|
||||
{"raw_csv_table": {"id": 2, "source": "pmc_dispatch_info.csv"}}
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -72,7 +73,8 @@ def load_panel_configs(dir):
|
||||
if f.endswith(".yaml"):
|
||||
with open(str(Path(root).joinpath(f))) as file:
|
||||
config = yaml.safe_load(file)
|
||||
# metric key can be None due to some metric tables not having any metrics
|
||||
# metric key can be None due to some metric tables
|
||||
# not having any metrics
|
||||
# metric key should be empty dict instead of None
|
||||
for data_source in config["Panel Config"]["data source"]:
|
||||
metric_table = data_source.get("metric_table")
|
||||
@@ -80,7 +82,8 @@ def load_panel_configs(dir):
|
||||
metric_table["metric"] = {}
|
||||
d[config["Panel Config"]["id"]] = config["Panel Config"]
|
||||
|
||||
# TODO: sort metrics as the header order in case they are not defined in the same order
|
||||
# TODO: sort metrics as the header order in case they-
|
||||
# are not defined in the same order
|
||||
|
||||
od = OrderedDict(sorted(d.items()))
|
||||
# for key, value in od.items():
|
||||
@@ -157,9 +160,9 @@ def create_df_kernel_top_stats(
|
||||
axis=1,
|
||||
)
|
||||
|
||||
grouped = time_stats.groupby(by=["Kernel_Name"]).agg(
|
||||
{"ExeTime": ["count", "sum", "mean", "median"]}
|
||||
)
|
||||
grouped = time_stats.groupby(by=["Kernel_Name"]).agg({
|
||||
"ExeTime": ["count", "sum", "mean", "median"]
|
||||
})
|
||||
|
||||
time_unit_str = "(" + time_unit + ")"
|
||||
grouped.columns = [
|
||||
@@ -205,8 +208,8 @@ def create_df_pmc(
|
||||
dfs = []
|
||||
coll_levels = []
|
||||
|
||||
df = pd.DataFrame()
|
||||
new_df = pd.DataFrame()
|
||||
df = pd.DataFrame() # noqa: F841
|
||||
new_df = pd.DataFrame() # noqa: F841
|
||||
for root, dirs, files in os.walk(raw_data_dir):
|
||||
for f in files:
|
||||
# print("file ", f)
|
||||
@@ -250,7 +253,9 @@ def create_df_pmc(
|
||||
else:
|
||||
# regular single node case
|
||||
if nodes is None:
|
||||
return create_single_df_pmc(raw_data_root_dir, None, kernel_verbose, verbose)
|
||||
return create_single_df_pmc(
|
||||
raw_data_root_dir, None, kernel_verbose, verbose
|
||||
)
|
||||
|
||||
# "empty list" means all nodes
|
||||
elif not nodes:
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import colorlover
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
@@ -87,23 +86,21 @@ def discrete_background_color_bins(df, n_bins=5, columns="all"):
|
||||
color = "white" if i > len(bounds) / 2.0 else "inherit"
|
||||
|
||||
for column in df_numeric_columns:
|
||||
styles.append(
|
||||
{
|
||||
"if": {
|
||||
"filter_query": (
|
||||
"{{{column}}} >= {min_bound}"
|
||||
+ (
|
||||
" && {{{column}}} < {max_bound}"
|
||||
if (i < len(bounds) - 1)
|
||||
else ""
|
||||
)
|
||||
).format(column=column, min_bound=min_bound, max_bound=max_bound),
|
||||
"column_id": column,
|
||||
},
|
||||
"backgroundColor": backgroundColor,
|
||||
"color": color,
|
||||
}
|
||||
)
|
||||
styles.append({
|
||||
"if": {
|
||||
"filter_query": (
|
||||
"{{{column}}} >= {min_bound}"
|
||||
+ (
|
||||
" && {{{column}}} < {max_bound}"
|
||||
if (i < len(bounds) - 1)
|
||||
else ""
|
||||
)
|
||||
).format(column=column, min_bound=min_bound, max_bound=max_bound),
|
||||
"column_id": column,
|
||||
},
|
||||
"backgroundColor": backgroundColor,
|
||||
"color": color,
|
||||
})
|
||||
legend.append(
|
||||
html.Div(
|
||||
style={"display": "inline-block", "width": "60px"},
|
||||
@@ -203,7 +200,9 @@ def build_bar_chart(display_df, table_config, barchart_elements, norm_filt):
|
||||
|
||||
# Speed-of-light bar chart
|
||||
elif table_config["id"] in barchart_elements["sol"]:
|
||||
display_df["Avg"] = [float(x) if x != "" else float(0) for x in display_df["Avg"]]
|
||||
display_df["Avg"] = [
|
||||
float(x) if x != "" else float(0) for x in display_df["Avg"]
|
||||
]
|
||||
if table_config["id"] == 1701:
|
||||
# special layout for L2 Cache SOL
|
||||
d_figs.append(
|
||||
@@ -265,7 +264,9 @@ def build_bar_chart(display_df, table_config, barchart_elements, norm_filt):
|
||||
).update_xaxes(range=[0, 110])
|
||||
)
|
||||
else:
|
||||
console_error("Table id %s. Cannot determine barchart type." % table_config["id"])
|
||||
console_error(
|
||||
"Table id %s. Cannot determine barchart type." % table_config["id"]
|
||||
)
|
||||
|
||||
# update layout for each of the charts
|
||||
for fig in d_figs:
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import dash_bootstrap_components as dbc
|
||||
from dash import dcc, html
|
||||
|
||||
@@ -67,7 +66,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
children=[
|
||||
dbc.DropdownMenu(
|
||||
[
|
||||
dbc.DropdownMenuItem("Overview", header=True),
|
||||
dbc.DropdownMenuItem(
|
||||
"Overview", header=True
|
||||
),
|
||||
dbc.DropdownMenuItem(
|
||||
"Roofline",
|
||||
href="#roofline",
|
||||
@@ -88,7 +89,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
href="#system_speed-of-light",
|
||||
external_link=True,
|
||||
),
|
||||
dbc.DropdownMenuItem("Compute", header=True),
|
||||
dbc.DropdownMenuItem(
|
||||
"Compute", header=True
|
||||
),
|
||||
dbc.DropdownMenuItem(
|
||||
"Command Processor (CPF/CPC)",
|
||||
href="#command_processor_cpccpf",
|
||||
@@ -131,8 +134,14 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
external_link=True,
|
||||
),
|
||||
dbc.DropdownMenuItem(
|
||||
"Address Processing Unit and Data Return Path (TA/TD)",
|
||||
href="#address_processing_unit_and_data_return_path_tatd",
|
||||
(
|
||||
"Address Processing Unit and "
|
||||
"Data Return Path (TA/TD)"
|
||||
),
|
||||
href=(
|
||||
"#address_processing_unit_and"
|
||||
"_data_return_path_tatd"
|
||||
),
|
||||
external_link=True,
|
||||
),
|
||||
dbc.DropdownMenuItem(
|
||||
@@ -199,9 +208,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
), # list avail gcd ids
|
||||
id="gcd-filt",
|
||||
multi=True,
|
||||
value=input_filters[
|
||||
"gpu"
|
||||
], # default to any gpu filters passed as args
|
||||
# default to any gpu filters
|
||||
# passed as args
|
||||
value=input_filters["gpu"],
|
||||
placeholder="ALL",
|
||||
clearable=False,
|
||||
style={"width": "60px"},
|
||||
@@ -230,9 +239,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
),
|
||||
id="disp-filt",
|
||||
multi=True,
|
||||
value=input_filters[
|
||||
"dispatch"
|
||||
], # default to any dispatch filters passed as args
|
||||
# default to any dispatch
|
||||
# filters passed as args
|
||||
value=input_filters["dispatch"],
|
||||
placeholder="ALL",
|
||||
style={"width": "150px"},
|
||||
),
|
||||
@@ -254,7 +263,8 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
id="top-n-filt",
|
||||
value=input_filters[
|
||||
"top_n"
|
||||
], # default to any dispatch filters passed as args
|
||||
], # default to any dispatch filters
|
||||
# passed as args
|
||||
clearable=False,
|
||||
style={"width": "50px"},
|
||||
),
|
||||
@@ -287,7 +297,9 @@ def get_header(raw_pmc, input_filters, kernel_names):
|
||||
optionHeight=150,
|
||||
placeholder="ALL",
|
||||
style={
|
||||
"width": "600px", # TODO: Change these widths to % rather than fixed value
|
||||
"width": "600px",
|
||||
# TODO: Change these widths to
|
||||
# % rather than fixed value
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from dash import html
|
||||
from dash_svg import G, Path, Rect, Svg, Text
|
||||
|
||||
@@ -580,21 +579,31 @@ def get_memchart(mem_data, base_data):
|
||||
Rect(x="319", y="329.5", rx="3", ry="3"),
|
||||
Rect(x="319", y="382", rx="3", ry="3"),
|
||||
Rect(x="1367.69", y="271", rx="3", ry="3"),
|
||||
Rect(x="1367.69", y="296.5", rx="3", ry="3"),
|
||||
Rect(x="1367.69", y="322.5", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="1367.69", y="296.5", rx="3", ry="3"
|
||||
),
|
||||
Rect(
|
||||
x="1367.69", y="322.5", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="1078", y="199", rx="3", ry="3"),
|
||||
Rect(x="1078", y="224.5", rx="3", ry="3"),
|
||||
Rect(x="1078", y="250.5", rx="3", ry="3"),
|
||||
Rect(x="771.44", y="103", rx="3", ry="3"),
|
||||
Rect(x="770.44", y="358.75", rx="3", ry="3"),
|
||||
Rect(x="770.44", y="390.25", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="770.44", y="358.75", rx="3", ry="3"
|
||||
),
|
||||
Rect(
|
||||
x="770.44", y="390.25", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="769.44", y="477", rx="3", ry="3"),
|
||||
Rect(x="769.44", y="508.5", rx="3", ry="3"),
|
||||
Rect(x="1078", y="278", rx="3", ry="3"),
|
||||
Rect(x="1078", y="342.5", rx="3", ry="3"),
|
||||
Rect(x="1078", y="368.5", rx="3", ry="3"),
|
||||
Rect(x="772.44", y="179", rx="3", ry="3"),
|
||||
Rect(x="772.44", y="210.18", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="772.44", y="210.18", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="771.44", y="71.28", rx="3", ry="3"),
|
||||
Rect(x="772.44", y="242", rx="3", ry="3"),
|
||||
Rect(x="770.44", y="274.5", rx="3", ry="3"),
|
||||
@@ -617,21 +626,31 @@ def get_memchart(mem_data, base_data):
|
||||
className="val-3",
|
||||
children=[
|
||||
Rect(x="1410.69", y="271", rx="3", ry="3"),
|
||||
Rect(x="1410.69", y="296.5", rx="3", ry="3"),
|
||||
Rect(x="1410.69", y="322.5", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="1410.69", y="296.5", rx="3", ry="3"
|
||||
),
|
||||
Rect(
|
||||
x="1410.69", y="322.5", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="1121", y="199", rx="3", ry="3"),
|
||||
Rect(x="1121", y="224.5", rx="3", ry="3"),
|
||||
Rect(x="1121", y="250.5", rx="3", ry="3"),
|
||||
Rect(x="814.44", y="103", rx="3", ry="3"),
|
||||
Rect(x="813.44", y="358.75", rx="3", ry="3"),
|
||||
Rect(x="813.44", y="390.25", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="813.44", y="358.75", rx="3", ry="3"
|
||||
),
|
||||
Rect(
|
||||
x="813.44", y="390.25", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="812.44", y="477", rx="3", ry="3"),
|
||||
Rect(x="812.44", y="508.5", rx="3", ry="3"),
|
||||
Rect(x="1121", y="278", rx="3", ry="3"),
|
||||
Rect(x="1121", y="342.5", rx="3", ry="3"),
|
||||
Rect(x="1121", y="368.5", rx="3", ry="3"),
|
||||
Rect(x="815.44", y="179", rx="3", ry="3"),
|
||||
Rect(x="815.44", y="210.18", rx="3", ry="3"),
|
||||
Rect(
|
||||
x="815.44", y="210.18", rx="3", ry="3"
|
||||
),
|
||||
Rect(x="814.44", y="71.28", rx="3", ry="3"),
|
||||
Rect(x="815.44", y="242", rx="3", ry="3"),
|
||||
Rect(x="813.44", y="274.5", rx="3", ry="3"),
|
||||
@@ -694,7 +713,8 @@ def get_memchart(mem_data, base_data):
|
||||
children=[
|
||||
Path(
|
||||
id="p1",
|
||||
d="M 100 243.72 L 120 220.28 L 475 220.28 L 495 243.72 Z",
|
||||
d="M 100 243.72 L 120 220.28 L "
|
||||
"475 220.28 L 495 243.72 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -744,7 +764,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 62 L 411.89 65.51 L 413.63 62.01 L 411.88 58.51 Z",
|
||||
d="M 418.88 62 L 411.89 65.51 L "
|
||||
"413.63 62.01 L 411.88 58.51 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -754,7 +775,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 112 L 411.88 115.49 L 413.63 112 L 411.88 108.49 Z",
|
||||
d="M 418.88 112 L 411.88 115.49 L "
|
||||
"413.63 112 L 411.88 108.49 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -764,7 +786,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 162 L 411.88 165.51 L 413.63 162 L 411.88 158.51 Z",
|
||||
d="M 418.88 162 L 411.88 165.51 L "
|
||||
"413.63 162 L 411.88 158.51 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -774,7 +797,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 212 L 411.88 215.5 L 413.63 212 L 411.88 208.5 Z",
|
||||
d="M 418.88 212 L 411.88 215.5 "
|
||||
"L 413.63 212 L 411.88 208.5 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -784,7 +808,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 262 L 411.88 265.5 L 413.63 262 L 411.88 258.5 Z",
|
||||
d="M 418.88 262 L 411.88 265.5 L "
|
||||
"413.63 262 L 411.88 258.5 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -794,7 +819,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 312 L 411.89 315.51 L 413.63 312.01 L 411.88 308.51 Z",
|
||||
d="M 418.88 312 L 411.89 315.51 L "
|
||||
"413.63 312.01 L 411.88 308.51 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -804,7 +830,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 362 L 411.88 365.5 L 413.63 362 L 411.88 358.5 Z",
|
||||
d="M 418.88 362 L 411.88 365.5 L "
|
||||
"413.63 362 L 411.88 358.5 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -814,7 +841,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#ff8000",
|
||||
),
|
||||
Path(
|
||||
d="M 418.88 409 L 411.87 412.47 L 413.63 408.97 L 411.9 405.47 Z",
|
||||
d="M 418.88 409 L 411.87 412.47 L "
|
||||
"413.63 408.97 L 411.9 405.47 Z",
|
||||
fill="#ff8000",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -824,7 +852,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 647.12 207 L 654.12 203.5 L 652.37 207 L 654.12 210.5 Z",
|
||||
d="M 647.12 207 L 654.12 203.5 L "
|
||||
"652.37 207 L 654.12 210.5 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -834,17 +863,20 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 748.88 236.01 L 741.9 239.54 L 743.63 236.03 L 741.86 232.54 Z",
|
||||
d="M 748.88 236.01 L 741.9 239.54 L "
|
||||
"743.63 236.03 L 741.86 232.54 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 750 502 L 110 502 Q 100 502 100 492 L 100 468.37",
|
||||
d="M 750 502 L 110 502 Q 100 "
|
||||
"502 100 492 L 100 468.37",
|
||||
fill="none",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 100 463.12 L 103.5 470.12 L 100 468.37 L 96.5 470.12 Z",
|
||||
d="M 100 463.12 L 103.5 470.12 L "
|
||||
"100 468.37 L 96.5 470.12 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -854,7 +886,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 937.12 504 L 944.12 500.5 L 942.37 504 L 944.12 507.5 Z",
|
||||
d="M 937.12 504 L 944.12 500.5 L "
|
||||
"942.37 504 L 944.12 507.5 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -864,7 +897,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1528.81 242.93 L 1535.81 239.43 L 1534.06 242.93 L 1535.81 246.43 Z",
|
||||
d="M 1528.81 242.93 L 1535.81 239.43 L "
|
||||
"1534.06 242.93 L 1535.81 246.43 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -874,7 +908,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1596.57 274.64 L 1589.57 278.14 L 1591.32 274.64 L 1589.57 271.14 Z",
|
||||
d="M 1596.57 274.64 L 1589.57 278.14 L "
|
||||
"1591.32 274.64 L 1589.57 271.14 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -884,7 +919,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1413 127.12 L 1416.5 134.12 L 1413 132.37 L 1409.5 134.12 Z",
|
||||
d="M 1413 127.12 L 1416.5 134.12 L "
|
||||
"1413 132.37 L 1409.5 134.12 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -894,7 +930,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1443 194.88 L 1439.5 187.88 L 1443 189.63 L 1446.5 187.88 Z",
|
||||
d="M 1443 194.88 L 1439.5 187.88 L "
|
||||
"1443 189.63 L 1446.5 187.88 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -904,7 +941,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1413.01 372.12 L 1416.54 379.1 L 1413.03 377.37 L 1409.54 379.14 Z",
|
||||
d="M 1413.01 372.12 L 1416.54 379.1 L "
|
||||
"1413.03 377.37 L 1409.54 379.14 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -914,12 +952,14 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1443.35 437.38 L 1439.82 430.4 L 1443.33 432.13 L 1446.82 430.36 Z",
|
||||
d="M 1443.35 437.38 L 1439.82 430.4 L "
|
||||
"1443.33 432.13 L 1446.82 430.36 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1145.25 341.38 L 1141.75 334.38 L 1145.25 336.13 L 1148.75 334.38 Z",
|
||||
d="M 1145.25 341.38 L 1141.75 334.38 L "
|
||||
"1145.25 336.13 L 1148.75 334.38 Z",
|
||||
fill="rgb(0, 0, 0)",
|
||||
stroke="rgb(0, 0, 0)",
|
||||
),
|
||||
@@ -929,7 +969,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 647.12 82 L 654.12 78.5 L 652.37 82 L 654.12 85.5 Z",
|
||||
d="M 647.12 82 L 654.12 78.5 L 652.37 "
|
||||
"82 L 654.12 85.5 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -939,7 +980,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 651.12 386.75 L 658.12 383.25 L 656.37 386.75 L 658.12 390.25 Z",
|
||||
d="M 651.12 386.75 L 658.12 383.25 L "
|
||||
"656.37 386.75 L 658.12 390.25 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -949,12 +991,14 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 646.12 269 L 653.12 265.5 L 651.37 269 L 653.12 272.5 Z",
|
||||
d="M 646.12 269 L 653.12 265.5 L "
|
||||
"651.37 269 L 653.12 272.5 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 748.88 269 L 741.88 272.5 L 743.63 269 L 741.88 265.5 Z",
|
||||
d="M 748.88 269 L 741.88 272.5 L "
|
||||
"743.63 269 L 741.88 265.5 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -964,7 +1008,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 939.12 206.41 L 946.12 202.91 L 944.37 206.41 L 946.12 209.91 Z",
|
||||
d="M 939.12 206.41 L 946.12 202.91 L "
|
||||
"944.37 206.41 L 946.12 209.91 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -974,7 +1019,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1040.88 235.42 L 1033.9 238.95 L 1035.63 235.44 L 1033.86 231.95 Z",
|
||||
d="M 1040.88 235.42 L 1033.9 238.95 L "
|
||||
"1035.63 235.44 L 1033.86 231.95 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -984,12 +1030,14 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 938.12 268.41 L 945.12 264.91 L 943.37 268.41 L 945.12 271.91 Z",
|
||||
d="M 938.12 268.41 L 945.12 264.91 L "
|
||||
"943.37 268.41 L 945.12 271.91 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1040.88 268.41 L 1033.88 271.91 L 1035.63 268.41 L 1033.88 264.91 Z",
|
||||
d="M 1040.88 268.41 L 1033.88 271.91 L "
|
||||
"1035.63 268.41 L 1033.88 264.91 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -999,7 +1047,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 939.12 354.32 L 946.12 350.82 L 944.37 354.32 L 946.12 357.82 Z",
|
||||
d="M 939.12 354.32 L 946.12 350.82 L "
|
||||
"944.37 354.32 L 946.12 357.82 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -1009,7 +1058,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1040.88 383.33 L 1033.9 386.86 L 1035.63 383.35 L 1033.86 379.86 Z",
|
||||
d="M 1040.88 383.33 L 1033.9 386.86 L "
|
||||
"1035.63 383.35 L 1033.86 379.86 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -1019,12 +1069,14 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 938.12 416.32 L 945.12 412.82 L 943.37 416.32 L 945.12 419.82 Z",
|
||||
d="M 938.12 416.32 L 945.12 412.82 L "
|
||||
"943.37 416.32 L 945.12 419.82 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1040.88 416.32 L 1033.88 419.82 L 1035.63 416.32 L 1033.88 412.82 Z",
|
||||
d="M 1040.88 416.32 L 1033.88 419.82 L "
|
||||
"1035.63 416.32 L 1033.88 412.82 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -1034,7 +1086,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1252.12 245.75 L 1259.12 242.25 L 1257.37 245.75 L 1259.12 249.25 Z",
|
||||
d="M 1252.12 245.75 L 1259.12 242.25 L "
|
||||
"1257.37 245.75 L 1259.12 249.25 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -1044,7 +1097,8 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1353.88 274.76 L 1346.9 278.29 L 1348.63 274.78 L 1346.86 271.29 Z",
|
||||
d="M 1353.88 274.76 L 1346.9 278.29 L "
|
||||
"1348.63 274.78 L 1346.86 271.29 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
@@ -1054,18 +1108,21 @@ def get_memchart(mem_data, base_data):
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1251.12 307.75 L 1258.12 304.25 L 1256.37 307.75 L 1258.12 311.25 Z",
|
||||
d="M 1251.12 307.75 L 1258.12 304.25 L "
|
||||
"1256.37 307.75 L 1258.12 311.25 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
d="M 1353.88 307.75 L 1346.88 311.25 L 1348.63 307.75 L 1346.88 304.25 Z",
|
||||
d="M 1353.88 307.75 L 1346.88 311.25 L "
|
||||
"1348.63 307.75 L 1346.88 304.25 Z",
|
||||
fill="#00cccc",
|
||||
stroke="#00cccc",
|
||||
),
|
||||
Path(
|
||||
id="p2",
|
||||
d="M 235 67 L 245 57 L 265 57 L 275 67 Z",
|
||||
d="M 235 67 L 245 57 L "
|
||||
"265 57 L 275 67 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1091,7 +1148,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p3",
|
||||
d="M 235 117 L 245 107 L 265 107 L 275 117 Z",
|
||||
d="M 235 117 L 245 107 L 265 "
|
||||
"107 L 275 117 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1117,7 +1175,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p4",
|
||||
d="M 235 167 L 245 157 L 265 157 L 275 167 Z",
|
||||
d="M 235 167 L 245 157 L "
|
||||
"265 157 L 275 167 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1143,7 +1202,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p5",
|
||||
d="M 235 217 L 245 207 L 265 207 L 275 217 Z",
|
||||
d="M 235 217 L 245 207 L "
|
||||
"265 207 L 275 217 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1169,7 +1229,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p6",
|
||||
d="M 235 267 L 245 257 L 265 257 L 275 267 Z",
|
||||
d="M 235 267 L 245 257 L "
|
||||
"265 257 L 275 267 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1195,7 +1256,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p7",
|
||||
d="M 235 317 L 245 307 L 265 307 L 275 317 Z",
|
||||
d="M 235 317 L 245 307 L "
|
||||
"265 307 L 275 317 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1221,7 +1283,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p8",
|
||||
d="M 235 367 L 245 357 L 265 357 L 275 367 Z",
|
||||
d="M 235 367 L 245 357 L "
|
||||
"265 357 L 275 367 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1247,7 +1310,8 @@ def get_memchart(mem_data, base_data):
|
||||
),
|
||||
Path(
|
||||
id="p9",
|
||||
d="M 235 417 L 245 407 L 265 407 L 275 417 Z",
|
||||
d="M 235 417 L 245 407 L "
|
||||
"265 407 L 275 417 Z",
|
||||
fill="#ffffff",
|
||||
stroke="#ff8000",
|
||||
),
|
||||
@@ -1986,11 +2050,11 @@ def format_value_for_display(value, max_length=6):
|
||||
if isinstance(value, (int, float)):
|
||||
value = abs(value)
|
||||
if value >= 1000000000:
|
||||
value = f"{value/1000000000:.1f}B"
|
||||
value = f"{value / 1000000000:.1f}B"
|
||||
elif value >= 1000000:
|
||||
value = f"{value/1000000:.1f}M"
|
||||
value = f"{value / 1000000:.1f}M"
|
||||
elif value >= 1000:
|
||||
value = f"{value/1000:.1f}K"
|
||||
value = f"{value / 1000:.1f}K"
|
||||
elif value == int(value):
|
||||
value = str(int(value))
|
||||
else:
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
@@ -70,7 +69,8 @@ def kernel_name_shortener(df, level):
|
||||
r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?"
|
||||
)
|
||||
|
||||
# works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
|
||||
# works for name:
|
||||
# Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
|
||||
if names_and_args.search(demangled_name):
|
||||
matches = names_and_args.findall(demangled_name)
|
||||
else:
|
||||
@@ -82,7 +82,8 @@ def kernel_name_shortener(df, level):
|
||||
|
||||
current_level = 0
|
||||
for name in matches:
|
||||
##can cause errors if a function name or argument is equal to 'clone'
|
||||
# can cause errors if a function name-
|
||||
# or argument is equal to 'clone'
|
||||
if name[0] == "clone":
|
||||
continue
|
||||
if len(name) == 3:
|
||||
@@ -101,7 +102,8 @@ def kernel_name_shortener(df, level):
|
||||
current_level += name[1].count("<")
|
||||
|
||||
curr_index = 0
|
||||
# cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level
|
||||
# cases include '>' '> >, ' have to go in depth here to-
|
||||
# not lose account of commas and current level
|
||||
while name[1].count(">") > 0 and curr_index < len(name[1]):
|
||||
if current_level < level:
|
||||
new_name += name[1][curr_index:]
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@@ -97,7 +96,9 @@ class ColoredFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
levelname = record.levelname
|
||||
if levelname in COLORS:
|
||||
levelname_color = COLOR_SEQ % (30 + COLORS[levelname]) + levelname + RESET_SEQ
|
||||
levelname_color = (
|
||||
COLOR_SEQ % (30 + COLORS[levelname]) + levelname + RESET_SEQ
|
||||
)
|
||||
record.levelname = levelname_color
|
||||
return logging.Formatter.format(self, record)
|
||||
|
||||
@@ -107,9 +108,12 @@ class ColoredFormatterAll(logging.Formatter):
|
||||
levelname = record.levelname
|
||||
if levelname in COLORS:
|
||||
if levelname == "INFO":
|
||||
log_fmt = f"%(message)s"
|
||||
log_fmt = "%(message)s"
|
||||
else:
|
||||
log_fmt = f"{COLOR_SEQ % (30 + COLORS[levelname])}%(levelname)s: %(message)s{RESET_SEQ}"
|
||||
log_fmt = (
|
||||
f"{COLOR_SEQ % (30 + COLORS[levelname])}"
|
||||
f"%(levelname)s: %(message)s{RESET_SEQ}"
|
||||
)
|
||||
formatter = logging.Formatter(log_fmt)
|
||||
return formatter.format(record)
|
||||
|
||||
@@ -171,7 +175,6 @@ def setup_file_handler(loglevel, workload_dir):
|
||||
|
||||
# Setup logger priority - called after argument parsing
|
||||
def setup_logging_priority(verbosity, quietmode, appmode, guimode):
|
||||
|
||||
# set loglevel based on selected verbosity and quietmode
|
||||
levels = [logging.INFO, logging.DEBUG, logging.TRACE]
|
||||
|
||||
@@ -197,7 +200,9 @@ def setup_logging_priority(verbosity, quietmode, appmode, guimode):
|
||||
elif loglevel in {"ERROR", "error"}:
|
||||
loglevel = logging.ERROR
|
||||
else:
|
||||
print("Ignoring unsupported ROCPROFCOMPUTE_LOGLEVEL setting (%s)" % loglevel)
|
||||
print(
|
||||
"Ignoring unsupported ROCPROFCOMPUTE_LOGLEVEL setting (%s)" % loglevel
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# update console loglevel based on command-line args/env settings
|
||||
|
||||
@@ -24,8 +24,7 @@
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from decimal import Decimal
|
||||
from types import SimpleNamespace as NS
|
||||
from typing import Dict, Generator, List, Mapping
|
||||
from typing import Dict
|
||||
|
||||
from plotille import Canvas
|
||||
|
||||
@@ -60,7 +59,8 @@ def make_format_spec(num, align=">"):
|
||||
|
||||
def is_value_valid(value):
|
||||
"""
|
||||
Check if a value is valid and display N/A if not(to be valid, it needs to be not None, and be int or float)
|
||||
Check if a value is valid and display N/A if not
|
||||
(to be valid, it needs to be not None, and be int or float)
|
||||
"""
|
||||
if value is None:
|
||||
return False
|
||||
@@ -82,7 +82,8 @@ def format_text(
|
||||
value_align=">",
|
||||
):
|
||||
"""
|
||||
Format a text string for canvas to display according to input key value pair and make proper aligment
|
||||
Format a text string for canvas to display according to
|
||||
input key value pair and make proper aligment
|
||||
For invalid value, it displays N/A
|
||||
All strings to be displayed on Canvas need to use this method
|
||||
"""
|
||||
@@ -102,12 +103,16 @@ def format_text(
|
||||
value_str = f"{'N/A':{align}{width}}"
|
||||
|
||||
key_format = (
|
||||
make_format_spec(key_step_prec_leftalign, key_align) if key is not None else None
|
||||
make_format_spec(key_step_prec_leftalign, key_align)
|
||||
if key is not None
|
||||
else None
|
||||
)
|
||||
key_str = (
|
||||
"{key:{key_format}}".format(key=key, key_format=key_format)
|
||||
if key and isinstance(key, (int, float))
|
||||
else str(key) if key else None
|
||||
else str(key)
|
||||
if key
|
||||
else None
|
||||
)
|
||||
|
||||
unit_string = post_description_with_space if not "N/A" in value_str else ""
|
||||
@@ -913,7 +918,9 @@ class Fabric(RectFrame):
|
||||
canvas.rect(self.x_min, self.y_min, self.x_max, self.y_max)
|
||||
canvas.text(self.x_min + 6.0, self.y_max - 2.0, " " + self.label)
|
||||
canvas.text(self.x_min + 2.0, self.y_max - 4.0, "Latency (cycles)")
|
||||
canvas.rect(self.x_min + 2.0, self.y_max - 9, self.x_max - 2.0, self.y_max - 4.5)
|
||||
canvas.rect(
|
||||
self.x_min + 2.0, self.y_max - 9, self.x_max - 2.0, self.y_max - 4.5
|
||||
)
|
||||
|
||||
i = 1
|
||||
for k, v in self.lat.items():
|
||||
@@ -960,7 +967,9 @@ class Wire_Fabric_HBM(RectFrame):
|
||||
value_step_prec_rightalign=4.0,
|
||||
),
|
||||
)
|
||||
canvas.text(self.x_min + self.text_x_offset - 2, self.y_max - 1.0, "<-----------")
|
||||
canvas.text(
|
||||
self.x_min + self.text_x_offset - 2, self.y_max - 1.0, "<-----------"
|
||||
)
|
||||
canvas.text(
|
||||
self.x_min + self.text_x_offset,
|
||||
self.y_max - 2.0,
|
||||
@@ -971,7 +980,9 @@ class Wire_Fabric_HBM(RectFrame):
|
||||
value_step_prec_rightalign=4.0,
|
||||
),
|
||||
)
|
||||
canvas.text(self.x_min + self.text_x_offset - 2, self.y_max - 3.0, "----------->")
|
||||
canvas.text(
|
||||
self.x_min + self.text_x_offset - 2, self.y_max - 3.0, "----------->"
|
||||
)
|
||||
|
||||
|
||||
# HBM
|
||||
@@ -1001,7 +1012,7 @@ class MemChart:
|
||||
# Fixme: this is temp solution to filter out non-numeric string
|
||||
for k, v in metric_dict.items():
|
||||
# print(k, type(v))
|
||||
metric_dict[k] = None if type(v) == str else v
|
||||
metric_dict[k] = None if isinstance(v, str) else v
|
||||
|
||||
# Typically, the drawing order would be: left->right, top->down
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
@@ -48,7 +47,6 @@ MI_CONSTANS = {
|
||||
MI350: "mi350",
|
||||
}
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Data Class handling to preserve the hierarchical gpu information
|
||||
# ----------------------------
|
||||
@@ -64,9 +62,9 @@ class MIGPUSpecs:
|
||||
_chip_id_dict = {} # key: chip_id (int)
|
||||
_perfmon_config = {} # key: gpu_arch
|
||||
|
||||
_gpu_arch_to_compute_partition_dict = (
|
||||
{}
|
||||
) # key: gpu_arch, used for gpu archs containing only one gpu model and thus one compute partition
|
||||
_gpu_arch_to_compute_partition_dict = {} # key: gpu_arch, used for gpu archs
|
||||
# containing only one gpu model and
|
||||
# thus one compute partition
|
||||
|
||||
_initialized = False
|
||||
|
||||
@@ -109,7 +107,8 @@ class MIGPUSpecs:
|
||||
console_error(f"Error parsing YAML file '{file_path}': {exc}")
|
||||
except Exception as e:
|
||||
console_error(
|
||||
f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
|
||||
f"An unexpected error occurred while loading YAML "
|
||||
f"file '{file_path}': {e}"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -152,9 +151,13 @@ class MIGPUSpecs:
|
||||
.get("num_xcds", {})
|
||||
)
|
||||
if "chip_ids" in models and "physical" in models["chip_ids"]:
|
||||
cls._chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model
|
||||
cls._chip_id_dict[models["chip_ids"]["physical"]] = (
|
||||
curr_gpu_model
|
||||
)
|
||||
if "chip_ids" in models and "virtual" in models["chip_ids"]:
|
||||
cls._chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model
|
||||
cls._chip_id_dict[models["chip_ids"]["virtual"]] = (
|
||||
curr_gpu_model
|
||||
)
|
||||
|
||||
# detect gpu arch to compute partition relationships
|
||||
cls._populate_gpu_arch_to_compute_partition_dict()
|
||||
@@ -171,10 +174,12 @@ class MIGPUSpecs:
|
||||
compute_partition = cls._num_xcds_dict.get(single_model)
|
||||
|
||||
if compute_partition is not None:
|
||||
cls._gpu_arch_to_compute_partition_dict[gpu_arch] = compute_partition
|
||||
cls._gpu_arch_to_compute_partition_dict[gpu_arch] = (
|
||||
compute_partition
|
||||
)
|
||||
console_debug(
|
||||
"[populate_single_arch_partition_dict] Single model arch found: "
|
||||
"%s -> %s (partition: %s)"
|
||||
"[populate_single_arch_partition_dict] Single model "
|
||||
"arch found: %s -> %s (partition: %s)"
|
||||
% (gpu_arch, single_model, compute_partition)
|
||||
)
|
||||
|
||||
@@ -254,14 +259,16 @@ class MIGPUSpecs:
|
||||
@classmethod
|
||||
def set_default_gpu_settings(self, gpu_arch, gpu_model, compute_partition):
|
||||
"""
|
||||
Set default GPU settings when model is unknown or cannot be determined.
|
||||
NOTE: This is a fallback to gfx942 settings - consider making this architecture-specific.
|
||||
Set default GPU settings when model is unknown or cannot be
|
||||
determined. NOTE: This is a fallback to gfx942 settings -
|
||||
consider making this architecture-specific.
|
||||
"""
|
||||
DEFAULT_COMPUTE_PARTITION = "SPX"
|
||||
DEFAULT_NUM_XCD = 8
|
||||
console_warning(
|
||||
f"Unable to determine xcd count from:\n\t"
|
||||
f"GPU arch: '{gpu_arch}', model: '{gpu_model}', partition: '{compute_partition}'"
|
||||
"Unable to determine xcd count from:\n\t"
|
||||
f"GPU arch: '{gpu_arch}', model: '{gpu_model}',\n\t"
|
||||
f"partition: '{compute_partition}'"
|
||||
)
|
||||
console_warning(
|
||||
f"Applying default gfx942 settings:\n"
|
||||
@@ -276,7 +283,8 @@ class MIGPUSpecs:
|
||||
cls, gpu_arch: str = None, gpu_model: str = None, compute_partition: str = None
|
||||
):
|
||||
"""
|
||||
Retrieve the number of XCDs based on GPU architecture, model, and compute partition.
|
||||
Retrieve the number of XCDs based on GPU architecture, model,
|
||||
and compute partition.
|
||||
|
||||
Priority order:
|
||||
1. Legacy GPU check (returns 1 XCD for older architectures/models)
|
||||
@@ -307,7 +315,8 @@ class MIGPUSpecs:
|
||||
return num_xcds
|
||||
else:
|
||||
console_warning(
|
||||
f"No compute partition data found for architecture '{gpu_arch.upper()}'"
|
||||
f"No compute partition data found for "
|
||||
f"architecture '{gpu_arch.upper()}'"
|
||||
)
|
||||
|
||||
# 3. Fall back to model + partition-based lookup
|
||||
@@ -315,7 +324,8 @@ class MIGPUSpecs:
|
||||
# Validate XCD dictionary is populated
|
||||
if not hasattr(cls, "_num_xcds_dict") or not cls._num_xcds_dict:
|
||||
console_error(
|
||||
"mi300_num_xcds_dict not populated. Did you run parse_mi_gpu_spec()?"
|
||||
"mi300_num_xcds_dict not populated. "
|
||||
"Did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
elif gpu_model_norm not in cls._num_xcds_dict:
|
||||
console_warning(
|
||||
@@ -330,7 +340,9 @@ class MIGPUSpecs:
|
||||
)
|
||||
elif partition_norm not in model_dict:
|
||||
console_warning(
|
||||
f"Unknown compute partition '{compute_partition}' for model '{gpu_model}'"
|
||||
f"Unknown compute partition "
|
||||
f"'{compute_partition}' for model "
|
||||
f"'{gpu_model}'"
|
||||
)
|
||||
else:
|
||||
num_xcds = model_dict[partition_norm]
|
||||
@@ -338,8 +350,10 @@ class MIGPUSpecs:
|
||||
return num_xcds
|
||||
else:
|
||||
console_warning(
|
||||
f"Unknown compute partition found for {compute_partition} / {gpu_model}"
|
||||
"Unknown compute partition found "
|
||||
f"for {compute_partition} / {gpu_model}"
|
||||
)
|
||||
|
||||
else:
|
||||
console_warning("No gpu model provided for num xcds lookup.")
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
@@ -66,10 +65,9 @@ pmc_kernel_top_table_id = 1
|
||||
# },
|
||||
# {
|
||||
# "case": { "$eq": [ $normUnit, "per Sec"]} ,
|
||||
# "then": {"$divide":[{"$subtract": ["&End_Timestamp", "&Start_Timestamp" ]}, 1000000000]}
|
||||
# }
|
||||
# ],
|
||||
# "default": 1
|
||||
# "then": {"$divide":[{"$subtract": ["&End_Timestamp",
|
||||
# "&Start_Timestamp" ]},
|
||||
# 1000000000]}
|
||||
# }
|
||||
# }
|
||||
supported_denom = {
|
||||
@@ -84,16 +82,19 @@ build_in_vars = {
|
||||
"GRBM_GUI_ACTIVE_PER_XCD": "(GRBM_GUI_ACTIVE / $num_xcd)",
|
||||
"GRBM_COUNT_PER_XCD": "(GRBM_COUNT / $num_xcd)",
|
||||
"GRBM_SPI_BUSY_PER_XCD": "(GRBM_SPI_BUSY / $num_xcd)",
|
||||
"numActiveCUs": "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD)), \
|
||||
0) / $max_waves_per_cu) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
|
||||
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $max_waves_per_cu), 8)), $cu_per_gpu))",
|
||||
"kernelBusyCycles": "ROUND(AVG((((End_Timestamp - Start_Timestamp) / 1000) * $max_sclk)), 0)",
|
||||
"numActiveCUs": "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / \
|
||||
$GRBM_GUI_ACTIVE_PER_XCD)), 0) / $max_waves_per_cu) * 8) + \
|
||||
MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / \
|
||||
$GRBM_GUI_ACTIVE_PER_XCD)), 0), $max_waves_per_cu), 8)), $cu_per_gpu))",
|
||||
"kernelBusyCycles": "ROUND(AVG((((End_Timestamp - Start_Timestamp) / \
|
||||
1000) * $max_sclk)), 0)",
|
||||
"hbmBandwidth": "($max_mclk / 1000 * 32 * $num_hbm_channels)",
|
||||
}
|
||||
|
||||
supported_call = {
|
||||
# If the below has single arg, like(expr), it is a aggr, in which turn to a pd function.
|
||||
# If it has args like list [], in which turn to a python function.
|
||||
# If the below has a single arg, like(expr), it is an aggr,
|
||||
# in which case it turns into a pandas function.
|
||||
# If it has args like a list [], it turns into a Python function.
|
||||
"MIN": "to_min",
|
||||
"MAX": "to_max",
|
||||
# simple aggr
|
||||
@@ -243,11 +244,18 @@ class CodeTransformer(ast.NodeTransformer):
|
||||
|
||||
def visit_IfExp(self, node):
|
||||
self.generic_visit(node)
|
||||
# print("visit_IfExp", type(node.test), type(node.body), type(node.orelse), dir(node))
|
||||
# print(
|
||||
# "visit_IfExp",
|
||||
# type(node.test),
|
||||
# type(node.body),
|
||||
# type(node.orelse),
|
||||
# dir(node),
|
||||
# )
|
||||
|
||||
if isinstance(node.body, ast.Num):
|
||||
raise Exception(
|
||||
"Don't support body of IF with number only! Has to be expr with df['column']."
|
||||
"Don't support body of IF with number only! Has to be expr with "
|
||||
"df['column']."
|
||||
)
|
||||
|
||||
new_node = ast.Expr(
|
||||
@@ -289,19 +297,58 @@ class CodeTransformer(ast.NodeTransformer):
|
||||
|
||||
def build_eval_string(equation, coll_level, config):
|
||||
"""
|
||||
Convert user defined equation string to eval executable string
|
||||
Convert user defined equation string to eval executable string.
|
||||
For example,
|
||||
input: AVG(100 * SQ_ACTIVE_INST_SCA / ( GRBM_GUI_ACTIVE * $numCU ))
|
||||
output: to_avg(100 * raw_pmc_df["pmc_perf"]["SQ_ACTIVE_INST_SCA"] / \
|
||||
(raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] * numCU))
|
||||
input: AVG(((TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31) if (TCC_EA_RDREQ_31 != 0) else (0)))
|
||||
output: to_avg((raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]).where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] != 0, 0))
|
||||
We can not handle the below for now,
|
||||
input: AVG((0 if (TCC_EA_RDREQ_31 == 0) else (TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31)))
|
||||
But potential workaound is,
|
||||
output: to_avg(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"].where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] == 0, raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]))
|
||||
input:
|
||||
AVG(100 * SQ_ACTIVE_INST_SCA / ( GRBM_GUI_ACTIVE * $numCU ))
|
||||
output:
|
||||
to_avg(
|
||||
100 * raw_pmc_df["pmc_perf"]["SQ_ACTIVE_INST_SCA"] /
|
||||
(
|
||||
raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] *
|
||||
numCU
|
||||
)
|
||||
)
|
||||
input:
|
||||
AVG(
|
||||
(
|
||||
TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31
|
||||
)
|
||||
if (TCC_EA_RDREQ_31 != 0)
|
||||
else (0)
|
||||
)
|
||||
output:
|
||||
to_avg(
|
||||
(
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] /
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]
|
||||
).where(
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] != 0,
|
||||
0
|
||||
)
|
||||
)
|
||||
We can not handle the below for now:
|
||||
input:
|
||||
AVG(
|
||||
(
|
||||
0
|
||||
if (TCC_EA_RDREQ_31 == 0)
|
||||
else (
|
||||
TCC_EA_RDREQ_LEVEL_31 /
|
||||
TCC_EA_RDREQ_31
|
||||
)
|
||||
)
|
||||
)
|
||||
But potential workaround is:
|
||||
output:
|
||||
to_avg(
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"].where(
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] == 0,
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] /
|
||||
raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]
|
||||
)
|
||||
)
|
||||
"""
|
||||
|
||||
if coll_level is None:
|
||||
raise Exception("Error: coll_level can not be None.")
|
||||
|
||||
@@ -333,7 +380,7 @@ def build_eval_string(equation, coll_level, config):
|
||||
# apply coll_level
|
||||
if config.get("format_rocprof_output") == "rocpd":
|
||||
# Replace SQ_ACCUM_PREV_HIRES with coll_level_ACCUM then ignore coll_level df
|
||||
s = re.sub(f"SQ_ACCUM_PREV_HIRES", f"{coll_level}_ACCUM", s)
|
||||
s = re.sub("SQ_ACCUM_PREV_HIRES", f"{coll_level}_ACCUM", s)
|
||||
s = re.sub(
|
||||
r"raw_pmc_df", "raw_pmc_df.get('" + schema.pmc_perf_file_prefix + "')", s
|
||||
)
|
||||
@@ -420,21 +467,24 @@ def gen_counter_list(formula):
|
||||
.replace("$denom", "SQ_WAVES")
|
||||
.replace(
|
||||
"$numActiveCUs",
|
||||
"TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD})), \
|
||||
0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
|
||||
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $maxWavesPerCU), 8)), $numCU))",
|
||||
"TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / "
|
||||
"$GRBM_GUI_ACTIVE_PER_XCD})), 0) / $maxWavesPerCU) * 8) + "
|
||||
"MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / "
|
||||
"$GRBM_GUI_ACTIVE_PER_XCD)), 0), $maxWavesPerCU), 8)), $numCU))",
|
||||
)
|
||||
.replace("$", "")
|
||||
)
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Name):
|
||||
val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
|
||||
val = (
|
||||
str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
|
||||
)
|
||||
if val.isupper() and val not in function_filter:
|
||||
counters.append(val)
|
||||
visited = True
|
||||
if val in built_in_counter:
|
||||
visited = True
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return visited, counters
|
||||
@@ -645,10 +695,7 @@ def build_dfs(archConfigs, filter_metrics, sys_info):
|
||||
or (data_source_idx == "0") # no filter
|
||||
or (data_source_idx in filter_metrics)
|
||||
):
|
||||
if (
|
||||
"columnwise" in data_config
|
||||
and data_config["columnwise"] == True
|
||||
):
|
||||
if "columnwise" in data_config and data_config["columnwise"]:
|
||||
df = pd.DataFrame(
|
||||
[data_config["source"]], columns=["from_csv_columnwise"]
|
||||
)
|
||||
@@ -726,79 +773,93 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
ammolite__se_per_gpu = int(sys_info.se_per_gpu)
|
||||
if np.isnan(ammolite__se_per_gpu) or ammolite__se_per_gpu == 0:
|
||||
console_warning(
|
||||
"se_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"se_per_gpu is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__pipes_per_gpu = int(sys_info.pipes_per_gpu)
|
||||
if np.isnan(ammolite__pipes_per_gpu) or ammolite__pipes_per_gpu == 0:
|
||||
console_warning(
|
||||
"pipes_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"pipes_per_gpu is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__cu_per_gpu = int(sys_info.cu_per_gpu)
|
||||
if np.isnan(ammolite__cu_per_gpu) or ammolite__cu_per_gpu == 0:
|
||||
console_warning(
|
||||
"cu_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"cu_per_gpu is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__simd_per_cu = int(sys_info.simd_per_cu) # not used
|
||||
if np.isnan(ammolite__simd_per_cu) or ammolite__simd_per_cu == 0:
|
||||
console_warning(
|
||||
"simd_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"simd_per_cu is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__sqc_per_gpu = int(sys_info.sqc_per_gpu)
|
||||
if np.isnan(ammolite__sqc_per_gpu) or ammolite__sqc_per_gpu == 0:
|
||||
console_warning(
|
||||
"sqc_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"sqc_per_gpu is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__lds_banks_per_cu = int(sys_info.lds_banks_per_cu)
|
||||
if np.isnan(ammolite__lds_banks_per_cu) or ammolite__lds_banks_per_cu == 0:
|
||||
console_warning(
|
||||
"lds_banks_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"lds_banks_per_cu is not available in sysinfo.csv, please provide the "
|
||||
"correct value using --specs-correction"
|
||||
)
|
||||
ammolite__cur_sclk = float(sys_info.cur_sclk) # not used
|
||||
if np.isnan(ammolite__cur_sclk) or ammolite__cur_sclk == 0:
|
||||
console_warning(
|
||||
"cur_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"cur_sclk is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__cur_mclk = float(sys_info.cur_mclk) # not used
|
||||
if np.isnan(ammolite__cur_mclk) or ammolite__cur_mclk == 0:
|
||||
console_warning(
|
||||
"cur_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"cur_mclk is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__max_mclk = float(sys_info.max_mclk)
|
||||
if np.isnan(ammolite__max_mclk) or ammolite__max_mclk == 0:
|
||||
console_warning(
|
||||
"max_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"max_mclk is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__max_sclk = float(sys_info.max_sclk)
|
||||
if np.isnan(ammolite__max_sclk) or ammolite__max_sclk == 0:
|
||||
console_warning(
|
||||
"max_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"max_sclk is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__max_waves_per_cu = int(sys_info.max_waves_per_cu)
|
||||
if np.isnan(ammolite__max_waves_per_cu) or ammolite__max_waves_per_cu == 0:
|
||||
console_warning(
|
||||
"max_waver_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"max_waver_per_cu is not available in sysinfo.csv, please provide the "
|
||||
"correct value using --specs-correction"
|
||||
)
|
||||
ammolite__num_hbm_channels = float(sys_info.num_hbm_channels)
|
||||
if np.isnan(ammolite__num_hbm_channels) or ammolite__num_hbm_channels == 0:
|
||||
console_warning(
|
||||
"num_hbm_channels is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"num_hbm_channels is not available in sysinfo.csv, please provide the "
|
||||
"correct value using --specs-correction"
|
||||
)
|
||||
ammolite__total_l2_chan = calc_builtin_var("$total_l2_chan", sys_info)
|
||||
if np.isnan(ammolite__total_l2_chan) or ammolite__total_l2_chan == 0:
|
||||
console_warning(
|
||||
"total_l2_chan is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"total_l2_chan is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__num_xcd = int(sys_info.num_xcd)
|
||||
if np.isnan(ammolite__num_xcd) or ammolite__num_xcd == 0:
|
||||
console_warning(
|
||||
"num_xcd is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"num_xcd is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
ammolite__wave_size = int(sys_info.wave_size)
|
||||
if np.isnan(ammolite__wave_size) or ammolite__wave_size == 0:
|
||||
console_warning(
|
||||
"wave_size is not available in sysinfo.csv, please provide the correct value using --specs-correction"
|
||||
"wave_size is not available in sysinfo.csv, please provide the correct "
|
||||
"value using --specs-correction"
|
||||
)
|
||||
|
||||
# TODO: fix all $normUnit in Unit column or title
|
||||
|
||||
# build and eval all derived build-in global variables
|
||||
@@ -817,9 +878,9 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
except AttributeError as ae:
|
||||
if ae == "'NoneType' object has no attribute 'get'":
|
||||
ammolite__build_in[key] = None
|
||||
ammolite__GRBM_GUI_ACTIVE_PER_XCD = ammolite__build_in["GRBM_GUI_ACTIVE_PER_XCD"]
|
||||
ammolite__GRBM_COUNT_PER_XCD = ammolite__build_in["GRBM_COUNT_PER_XCD"]
|
||||
ammolite__GRBM_SPI_BUSY_PER_XCD = ammolite__build_in["GRBM_SPI_BUSY_PER_XCD"]
|
||||
ammolite__GRBM_GUI_ACTIVE_PER_XCD = ammolite__build_in["GRBM_GUI_ACTIVE_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
ammolite__GRBM_COUNT_PER_XCD = ammolite__build_in["GRBM_COUNT_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
ammolite__GRBM_SPI_BUSY_PER_XCD = ammolite__build_in["GRBM_SPI_BUSY_PER_XCD"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
|
||||
for key, value in build_in_vars.items():
|
||||
# next pass, we evaluate the builtins the depend on the per-XCD values
|
||||
@@ -834,12 +895,16 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
except AttributeError as ae:
|
||||
if ae == "'NoneType' object has no attribute 'get'":
|
||||
ammolite__build_in[key] = None
|
||||
ammolite__numActiveCUs = ammolite__build_in["numActiveCUs"]
|
||||
ammolite__kernelBusyCycles = ammolite__build_in["kernelBusyCycles"]
|
||||
ammolite__hbmBandwidth = ammolite__build_in["hbmBandwidth"]
|
||||
ammolite__numActiveCUs = ammolite__build_in["numActiveCUs"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
ammolite__kernelBusyCycles = ammolite__build_in["kernelBusyCycles"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
ammolite__hbmBandwidth = ammolite__build_in["hbmBandwidth"] # noqa: F841 - Ruff: var utilized during runtime
|
||||
|
||||
# Hmmm... apply + lambda should just work
|
||||
# df['Value'] = df['Value'].apply(lambda s: eval(compile(str(s), '<string>', 'eval')))
|
||||
# df['Value'] = df['Value'].apply(
|
||||
# lambda s: eval(
|
||||
# compile(str(s), '<string>', 'eval')
|
||||
# )
|
||||
# )
|
||||
for id, df in dfs.items():
|
||||
if dfs_type[id] == "metric_table":
|
||||
for idx, row in df.iterrows():
|
||||
@@ -851,7 +916,9 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
print("~" * 40 + "\nExpression:")
|
||||
print(expr, "=", row[expr])
|
||||
print("Inputs:")
|
||||
matched_vars = re.findall(r"ammolite__\w+", row[expr])
|
||||
matched_vars = re.findall(
|
||||
r"ammolite__\w+", row[expr]
|
||||
)
|
||||
if matched_vars:
|
||||
for v in matched_vars:
|
||||
print(
|
||||
@@ -868,7 +935,7 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
m = re.match(
|
||||
r"raw_pmc_df\['(\w+)'\]\['(\w+)'\]", c
|
||||
)
|
||||
t = raw_pmc_df[m.group(1)][
|
||||
t = raw_pmc_df[m.group(1)][ # noqa: F841
|
||||
m.group(2)
|
||||
].to_list()
|
||||
print(c)
|
||||
@@ -890,19 +957,22 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug, config):
|
||||
print("~" * 40)
|
||||
except TypeError:
|
||||
console_warning(
|
||||
"Skipping entry. Encountered a missing counter\n{} has been assigned to None\n{}".format(
|
||||
expr, np.nan
|
||||
"Skipping entry. Encountered a missing "
|
||||
"counter\n{} has been assigned to None\n{}"
|
||||
.format(
|
||||
expr,
|
||||
np.nan,
|
||||
)
|
||||
)
|
||||
except AttributeError as ae:
|
||||
if (
|
||||
str(ae)
|
||||
== "'NoneType' object has no attribute 'get'"
|
||||
== "'NoneType' object has no attribute "
|
||||
"'get'"
|
||||
):
|
||||
console_warning(
|
||||
"Skipping entry. Encountered a missing csv\n{}".format(
|
||||
np.nan
|
||||
)
|
||||
"Skipping entry. Encountered a missing "
|
||||
"csv\n{}".format(np.nan)
|
||||
)
|
||||
else:
|
||||
console_error("analysis", str(ae))
|
||||
@@ -967,14 +1037,16 @@ def apply_filters(workload, dir, is_gui, debug):
|
||||
# We pick up kernel names from kerne ids first.
|
||||
# Then filter valid entries with kernel names.
|
||||
if workload.filter_kernel_ids:
|
||||
if all(type(kid) == int for kid in workload.filter_kernel_ids):
|
||||
if all(isinstance(kid, int) for kid in workload.filter_kernel_ids):
|
||||
# Verify valid kernel filter
|
||||
kernels_df = pd.read_csv(str(Path(dir).joinpath("pmc_kernel_top.csv")))
|
||||
for kernel_id in workload.filter_kernel_ids:
|
||||
if kernel_id >= len(kernels_df["Kernel_Name"]):
|
||||
console_error(
|
||||
"{} is an invalid kernel id. Please enter an id between 0-{}".format(
|
||||
kernel_id, len(kernels_df["Kernel_Name"]) - 1
|
||||
"{} is an invalid kernel id. Please enter an id between 0-{}"
|
||||
.format(
|
||||
kernel_id,
|
||||
len(kernels_df["Kernel_Name"]) - 1,
|
||||
)
|
||||
)
|
||||
kernels = []
|
||||
@@ -992,7 +1064,7 @@ def apply_filters(workload, dir, is_gui, debug):
|
||||
ret_df = ret_df.loc[
|
||||
ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].isin(kernels)
|
||||
]
|
||||
elif all(type(kid) == str for kid in workload.filter_kernel_ids):
|
||||
elif all(isinstance(kid, str) for kid in workload.filter_kernel_ids):
|
||||
df_cleaned = ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].apply(
|
||||
lambda x: x.strip() if isinstance(x, str) else x
|
||||
)
|
||||
@@ -1028,7 +1100,8 @@ def apply_filters(workload, dir, is_gui, debug):
|
||||
|
||||
def find_key_recursively(data, search_key):
|
||||
"""
|
||||
Recursively search for the search_key in the given data (which can be a dict or list).
|
||||
Recursively search for the search_key in the given data
|
||||
(which can be a dict or list).
|
||||
If the key is found, returns the value as a DataFrame.
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
@@ -1050,7 +1123,6 @@ def find_key_recursively(data, search_key):
|
||||
|
||||
|
||||
def search_key_in_json(file_path, search_key):
|
||||
|
||||
# FIXME:
|
||||
# Load the entire JSON into memory.
|
||||
# Should not use for large file.
|
||||
@@ -1081,14 +1153,18 @@ def search_pc_sampling_record(records):
|
||||
"inst_index": None,
|
||||
"stall_reason": {
|
||||
"NONE": 0,
|
||||
"NO_INSTRUCTION_AVAILABLE": 0, # No instruction available in the instruction cache.
|
||||
# No instruction available in the instruction cache.
|
||||
"NO_INSTRUCTION_AVAILABLE": 0,
|
||||
"ALU_DEPENDENCY": 0, # ALU dependency not resolved.
|
||||
"WAITCNT": 0,
|
||||
"INTERNAL_INSTRUCTION": 0, # Wave executes an internal instruction.
|
||||
"BARRIER_WAIT": 0,
|
||||
"ARBITER_NOT_WIN": 0, # The instruction did not win the arbiter.
|
||||
"ARBITER_WIN_EX_STALL": 0, # Arbiter issued an instruction, but the execution pipe pushed it back from execution.
|
||||
"OTHER_WAIT": 0, # Other types of wait (e.g., wait for XNACK acknowledgment).
|
||||
"ARBITER_WIN_EX_STALL": 0,
|
||||
# Arbiter issued an instruction, but the execution pipe
|
||||
# pushed it back from execution.
|
||||
"OTHER_WAIT": 0,
|
||||
# Other types of wait (e.g., wait for XNACK acknowledgment).
|
||||
"SLEEP_WAIT": 0,
|
||||
"LAST": 0,
|
||||
},
|
||||
@@ -1116,14 +1192,19 @@ def search_pc_sampling_record(records):
|
||||
and inst_index is not None
|
||||
):
|
||||
grouped_data[code_object_id][code_object_offset]["count"] += 1
|
||||
# NB: the write here could be duplicated. If there is perf issue, We might want to opt it.
|
||||
# NB: the write here could be duplicated. If there is perf issue,
|
||||
# We might want to opt it.
|
||||
grouped_data[code_object_id][code_object_offset]["inst_index"] = inst_index
|
||||
|
||||
if len(snapshot):
|
||||
if issued:
|
||||
grouped_data[code_object_id][code_object_offset]["count_issued"] += 1
|
||||
grouped_data[code_object_id][code_object_offset][
|
||||
"count_issued"
|
||||
] += 1
|
||||
else:
|
||||
grouped_data[code_object_id][code_object_offset]["count_stalled"] += 1
|
||||
grouped_data[code_object_id][code_object_offset][
|
||||
"count_stalled"
|
||||
] += 1
|
||||
grouped_data[code_object_id][code_object_offset]["stall_reason"][
|
||||
snapshot.get("stall_reason")[rocp_inst_not_issued_prefix_len:]
|
||||
] += 1
|
||||
@@ -1138,7 +1219,8 @@ def search_pc_sampling_record(records):
|
||||
|
||||
# print(grouped_data)
|
||||
|
||||
# Convert to sorted list of tuples (code_object_id, inst_index, code_object_offset, count)
|
||||
# Convert to sorted list of tuples:
|
||||
# (code_object_id, inst_index, code_object_offset, count)
|
||||
sorted_counts = sorted(
|
||||
[
|
||||
(
|
||||
@@ -1148,7 +1230,8 @@ def search_pc_sampling_record(records):
|
||||
info["count"],
|
||||
info["count_issued"],
|
||||
info["count_stalled"],
|
||||
# For info["stall_reason"], remove the zero entries, sorting the remaining items by their values in descending order
|
||||
# For info["stall_reason"], remove the zero entries,
|
||||
# sorting the remaining items by their values in descending order
|
||||
sorted(
|
||||
((k, v) for k, v in info["stall_reason"].items() if v > 0),
|
||||
key=lambda item: item[1],
|
||||
@@ -1173,7 +1256,8 @@ def load_pc_sampling_data_per_kernel(
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Load PC sampling raw data from json file with given method and kernel name,
|
||||
count pc sampling and sort it in the order of compiled asm and associate with kernel source code if available,
|
||||
count pc sampling and sort it in the order of compiled asm and associate with
|
||||
kernel source code if available,
|
||||
then return df.
|
||||
|
||||
:param method: "host_trap" or "stochastic".
|
||||
@@ -1258,11 +1342,25 @@ def load_pc_sampling_data_per_kernel(
|
||||
(df["code_object_id"] == kernel_info["code_object_id"])
|
||||
& (df["offset"] > kernel_info["entry_byte_offset"])
|
||||
& (df["offset"] < kernel_info["potential_end_offset"])
|
||||
][["inst_index", "offset", "count", "count_issued", "count_stalled", "stall_reason"]]
|
||||
][
|
||||
[
|
||||
"inst_index",
|
||||
"offset",
|
||||
"count",
|
||||
"count_issued",
|
||||
"count_stalled",
|
||||
"stall_reason",
|
||||
]
|
||||
]
|
||||
|
||||
df["offset"] = df["offset"].apply(lambda x: hex(x))
|
||||
|
||||
# df["stall_reason"] = df["stall_reason"].apply(lambda x: ', '.join(f"{k}: {v}" for k, v in x))
|
||||
# df["stall_reason"] = df["stall_reason"].apply(
|
||||
# lambda x: ', '.join(
|
||||
# f"{k}: {v}"
|
||||
# for k, v in x
|
||||
# )
|
||||
# )
|
||||
|
||||
pc_sample_instructions = search_key_in_json(file_name, "pc_sample_instructions")
|
||||
# print(pc_sample_instructions)
|
||||
@@ -1334,7 +1432,9 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
|
||||
# - The default file name is subject to changes from rocprofv3
|
||||
# - Prioritize stochastic
|
||||
# - Alternatively, we could check pc_sampling_method in json
|
||||
csv_file_path = Path.joinpath(Path(dir), file_prefix + "_pc_sampling_stochastic.csv")
|
||||
csv_file_path = Path.joinpath(
|
||||
Path(dir), file_prefix + "_pc_sampling_stochastic.csv"
|
||||
)
|
||||
if csv_file_path.exists():
|
||||
pc_sampling_method = "stochastic"
|
||||
else:
|
||||
@@ -1352,7 +1452,6 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
|
||||
|
||||
# No kernel filter, return grouped and sorted csv directly
|
||||
if not workload.filter_kernel_ids:
|
||||
|
||||
df = pd.read_csv(csv_file_path)
|
||||
# Group by 'Instruction_Comment' and count occurrences
|
||||
grouped_counts = (
|
||||
@@ -1379,7 +1478,8 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
|
||||
|
||||
elif len(workload.filter_kernel_ids) > 1:
|
||||
console_error(
|
||||
"PC sampling supports single kernel only! Please specify -k with single kernel."
|
||||
"PC sampling supports single kernel only! Please specify -k with "
|
||||
"single kernel."
|
||||
)
|
||||
return pd.DataFrame()
|
||||
|
||||
@@ -1409,7 +1509,8 @@ def load_pc_sampling_data(workload, dir, file_prefix, sorting_type):
|
||||
@demarcate
|
||||
def load_kernel_top(workload, dir, args):
|
||||
# NB:
|
||||
# - Do pmc_kernel_top.csv loading before eval_metric because we need the kernel names.
|
||||
# - Do pmc_kernel_top.csv loading before eval_metric because we need the
|
||||
# kernel names.
|
||||
# - There might be a better way/timing to load raw_csv_table.
|
||||
|
||||
# FIXME:
|
||||
@@ -1427,9 +1528,11 @@ def load_kernel_top(workload, dir, args):
|
||||
tmp[id] = pd.read_csv(file)
|
||||
else:
|
||||
console_warning(
|
||||
f"Couldn't load {file.name}. This may result in missing analysis data."
|
||||
f"Couldn't load {file.name}. "
|
||||
"This may result in missing analysis data."
|
||||
)
|
||||
# NB: Special case for sysinfo. Probably room for improvement in this whole function design
|
||||
# NB: Special case for sysinfo. Probably room for improvement in this whole
|
||||
# function design
|
||||
elif "from_csv_columnwise" in df.columns and id == 101:
|
||||
tmp[id] = workload.sys_info.transpose()
|
||||
# All transposed columns should be marked with a general header
|
||||
@@ -1447,7 +1550,8 @@ def load_kernel_top(workload, dir, args):
|
||||
tmp[id].columns = ["Info"]
|
||||
else:
|
||||
console_warning(
|
||||
f"Couldn't load {file.name}. This may result in missing analysis data."
|
||||
f"Couldn't load {file.name}. "
|
||||
"This may result in missing analysis data."
|
||||
)
|
||||
elif "from_pc_sampling" in df.columns:
|
||||
tmp[id] = load_pc_sampling_data(
|
||||
@@ -1513,7 +1617,8 @@ def correct_sys_info(mspec, specs_correction: dict):
|
||||
if not hasattr(mspec, str(k)):
|
||||
console_error(
|
||||
"analyze",
|
||||
f"Invalid specs correction '{k}'. Please use --specs option to peak valid specs",
|
||||
f"Invalid specs correction '{k}'. Please use --specs option "
|
||||
f"to peak valid specs",
|
||||
)
|
||||
setattr(mspec, str(k), v)
|
||||
return mspec.get_class_members()
|
||||
|
||||
@@ -4,7 +4,8 @@ from contextlib import closing
|
||||
|
||||
from utils.logger import console_error
|
||||
|
||||
# From schema definition in source/share/rocprofiler-sdk-rocpd/data_views.sql in rocprofiler-sdk repository
|
||||
# From schema definition in source/share/rocprofiler-sdk-rocpd/data_views.sql
|
||||
# in rocprofiler-sdk repository
|
||||
COUNTERS_COLLECTION_QUERY = """
|
||||
SELECT
|
||||
agent_id as GPU_ID,
|
||||
@@ -39,9 +40,9 @@ def convert_db_to_csv(
|
||||
with closing(conn.execute(COUNTERS_COLLECTION_QUERY)) as cursor:
|
||||
with open(csv_file_path, "w", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow(
|
||||
[description[0] for description in cursor.description]
|
||||
)
|
||||
writer.writerow([
|
||||
description[0] for description in cursor.description
|
||||
])
|
||||
for row in cursor:
|
||||
writer.writerow(row)
|
||||
except (sqlite3.DatabaseError, IOError) as e:
|
||||
@@ -50,22 +51,21 @@ def convert_db_to_csv(
|
||||
|
||||
def process_rocpd_csv(df):
|
||||
"""
|
||||
Merge counters across unique dispatches from the input dataframe and return processed dataframe.
|
||||
Merge counters across unique dispatches from the
|
||||
input dataframe and return processed dataframe.
|
||||
"""
|
||||
# Only import pandas if needed
|
||||
import pandas as pd
|
||||
|
||||
data = list()
|
||||
# Group by unique kernel and merge into a single row
|
||||
for _, group_df in df.groupby(
|
||||
[
|
||||
"Dispatch_ID",
|
||||
"Kernel_Name",
|
||||
"Grid_Size",
|
||||
"Workgroup_Size",
|
||||
"LDS_Per_Workgroup",
|
||||
]
|
||||
):
|
||||
for _, group_df in df.groupby([
|
||||
"Dispatch_ID",
|
||||
"Kernel_Name",
|
||||
"Grid_Size",
|
||||
"Workgroup_Size",
|
||||
"LDS_Per_Workgroup",
|
||||
]):
|
||||
row = {
|
||||
"GPU_ID": group_df["GPU_ID"].iloc[0],
|
||||
"Grid_Size": group_df["Grid_Size"].iloc[0],
|
||||
@@ -80,7 +80,8 @@ def process_rocpd_csv(df):
|
||||
}
|
||||
# Each counter will become its own column
|
||||
row.update(dict(zip(group_df["Counter_Name"], group_df["Counter_Value"])))
|
||||
# Replace end timestamp with median of durations of group, start timestamp is set to 0
|
||||
# Replace end timestamp with median of durations of group,
|
||||
# start timestamp is set to 0
|
||||
row["End_Timestamp"] = (
|
||||
group_df["End_Timestamp"] - group_df["Start_Timestamp"]
|
||||
).median()
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@@ -155,7 +154,8 @@ def get_color(catagory):
|
||||
# Plot BW at each cache level
|
||||
# -------------------------------------------------------------------------------------
|
||||
def calc_ceilings(roofline_parameters, dtype, benchmark_data):
|
||||
"""Given benchmarking data, calculate ceilings (or peak performance) for empirical roofline"""
|
||||
"""Given benchmarking data, calculate ceilings
|
||||
(or peak performance) for empirical roofline"""
|
||||
# TODO: This is where filtering by memory level will need to occur for standalone
|
||||
graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []}
|
||||
|
||||
@@ -186,7 +186,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
|
||||
|
||||
if dtype in PEAK_OPS_DATATYPES:
|
||||
x2 = peakOps / peakBw
|
||||
y2 = peakOps
|
||||
y2 = peakOps # noqa: F841
|
||||
|
||||
# Plot MFMA lines (NOTE: Assuming MI200 soc)
|
||||
x1_mfma = peakOps / peakBw
|
||||
@@ -220,9 +220,9 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
|
||||
graphPoints[cacheHierarchy[i].lower()].append([y1, peakY])
|
||||
graphPoints[cacheHierarchy[i].lower()].append(peakBw)
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
# ---------------------------------------------------------------------------------
|
||||
# Plot computing roof
|
||||
# -------------------------------------------------------------------------------------
|
||||
# ---------------------------------------------------------------------------------
|
||||
if dtype in PEAK_OPS_DATATYPES:
|
||||
# Plot FMA roof
|
||||
x0 = XMAX
|
||||
@@ -255,7 +255,8 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Calculate relevant metrics for ai calculation
|
||||
def calc_ai(mspec, sort_type, ret_df):
|
||||
"""Given counter data, calculate arithmetic intensity for each kernel in the application."""
|
||||
"""Given counter data, calculate arithmetic intensity
|
||||
for each kernel in the application."""
|
||||
df = ret_df["pmc_perf"]
|
||||
# Sort by top kernels or top dispatches?
|
||||
df = df.sort_values(by=["Kernel_Name"])
|
||||
@@ -442,7 +443,10 @@ def calc_ai(mspec, sort_type, ret_df):
|
||||
* 64
|
||||
)
|
||||
+ (
|
||||
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
|
||||
(
|
||||
df["TCC_EA0_WRREQ_sum"][idx]
|
||||
- df["TCC_EA0_WRREQ_64B_sum"][idx]
|
||||
)
|
||||
* 32
|
||||
)
|
||||
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
|
||||
@@ -459,7 +463,7 @@ def calc_ai(mspec, sort_type, ret_df):
|
||||
|
||||
calls += 1
|
||||
|
||||
if sort_type == "kernels" and (at_end == True or (kernelName != next_kernelName)):
|
||||
if sort_type == "kernels" and (at_end or (kernelName != next_kernelName)):
|
||||
myList.append(
|
||||
AI_Data(
|
||||
kernelName,
|
||||
@@ -534,9 +538,8 @@ def calc_ai(mspec, sort_type, ret_df):
|
||||
while i < TOP_N and i != len(myList):
|
||||
if myList[i].total_flops == 0:
|
||||
console_debug(
|
||||
"No flops counted for {}, arithmetic intensities will not display on plots.".format(
|
||||
myList[i].KernelName
|
||||
)
|
||||
"No flops counted for {}, arithmetic intensities will not "
|
||||
"display on plots.".format(myList[i].KernelName)
|
||||
)
|
||||
|
||||
kernelNames.append(myList[i].KernelName)
|
||||
@@ -545,28 +548,40 @@ def calc_ai(mspec, sort_type, ret_df):
|
||||
if myList[i].L1cache_data
|
||||
else intensities["ai_l1"].append(0)
|
||||
)
|
||||
# print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null")
|
||||
# print(
|
||||
# "cur_ai_L1",
|
||||
# myList[i].total_flops / myList[i].L1cache_data
|
||||
# ) if myList[i].L1cache_data else print("null")
|
||||
# print()
|
||||
(
|
||||
intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data)
|
||||
if myList[i].L2cache_data
|
||||
else intensities["ai_l2"].append(0)
|
||||
)
|
||||
# print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null")
|
||||
# print(
|
||||
# "cur_ai_L2",
|
||||
# myList[i].total_flops / myList[i].L2cache_data
|
||||
# ) if myList[i].L2cache_data else print("null")
|
||||
# print()
|
||||
(
|
||||
intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data)
|
||||
if myList[i].hbm_data
|
||||
else intensities["ai_hbm"].append(0)
|
||||
)
|
||||
# print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null")
|
||||
# print(
|
||||
# "cur_ai_hbm",
|
||||
# myList[i].total_flops / myList[i].hbm_data
|
||||
# ) if myList[i].hbm_data else print("null")
|
||||
# print()
|
||||
(
|
||||
curr_perf.append(myList[i].total_flops / myList[i].avgDuration)
|
||||
if myList[i].avgDuration
|
||||
else curr_perf.append(0)
|
||||
)
|
||||
# print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null")
|
||||
# print(
|
||||
# "cur_perf",
|
||||
# myList[i].total_flops / myList[i].avgDuration
|
||||
# ) if myList[i].avgDuration else print("null")
|
||||
|
||||
i += 1
|
||||
|
||||
@@ -575,7 +590,7 @@ def calc_ai(mspec, sort_type, ret_df):
|
||||
for i in intensities:
|
||||
values = intensities[i]
|
||||
|
||||
color = get_color(i)
|
||||
color = get_color(i) # noqa: F841
|
||||
x = []
|
||||
y = []
|
||||
for entryIndx in range(0, len(values)):
|
||||
@@ -607,9 +622,8 @@ def constuct_roof(roofline_parameters, dtype):
|
||||
# -----------------------------------------------------
|
||||
# Initialize roofline data dictionary from roofline.csv
|
||||
# -----------------------------------------------------
|
||||
benchmark_data = (
|
||||
{}
|
||||
) # TODO: consider changing this to an ordered dict for consistency over py versions
|
||||
# TODO: consider changing this to an ordered dict for consistency over py versions
|
||||
benchmark_data = {}
|
||||
headers = []
|
||||
try:
|
||||
with open(benchmark_results, "r") as csvfile:
|
||||
@@ -627,7 +641,7 @@ def constuct_roof(roofline_parameters, dtype):
|
||||
|
||||
rowCount += 1
|
||||
csvfile.close()
|
||||
except:
|
||||
except Exception:
|
||||
graphPoints = {
|
||||
"hbm": [None, None, None],
|
||||
"l2": [None, None, None],
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
#
|
||||
# Define all common data storage classes,
|
||||
# predifned dict and global functions.
|
||||
@@ -31,7 +30,7 @@
|
||||
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Generator, List, Mapping
|
||||
from typing import Dict, List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@@ -22,16 +22,13 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
##############################################################################
|
||||
|
||||
"""Get host/gpu specs."""
|
||||
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass, field, fields
|
||||
from datetime import datetime
|
||||
from math import ceil
|
||||
@@ -103,17 +100,19 @@ def kw_only(cls):
|
||||
|
||||
|
||||
def generate_machine_specs(args, sysinfo: dict = None):
|
||||
if not sysinfo is None:
|
||||
if sysinfo is not None:
|
||||
try:
|
||||
sysinfo_ver = str(sysinfo["version"])
|
||||
except KeyError:
|
||||
console_error(
|
||||
"Detected mismatch in sysinfo versioning. You need to reprofile to update data."
|
||||
"Detected mismatch in sysinfo versioning. You need to reprofile "
|
||||
"to update data."
|
||||
)
|
||||
version = get_version(config.rocprof_compute_home)["version"]
|
||||
if sysinfo_ver != version[: version.find(".")]:
|
||||
console_warning(
|
||||
"Detected mismatch in sysinfo versioning. You need to reprofile to update data."
|
||||
"Detected mismatch in sysinfo versioning. You need to reprofile "
|
||||
"to update data."
|
||||
)
|
||||
return MachineSpecs(**sysinfo)
|
||||
# read timestamp info
|
||||
@@ -127,7 +126,8 @@ def generate_machine_specs(args, sysinfo: dict = None):
|
||||
# set specs version
|
||||
vData = get_version(config.rocprof_compute_home)
|
||||
version = vData["version"]
|
||||
# NB: Just taking major as specs version. May want to make this more specific in the future
|
||||
# NB: Just taking major as specs version.
|
||||
# May want to make this more specific in the future
|
||||
specs_version = version[
|
||||
: version.find(".")
|
||||
] # version will always follow 'major.minor.patch' format
|
||||
@@ -173,8 +173,8 @@ def generate_machine_specs(args, sysinfo: dict = None):
|
||||
|
||||
# Apply default compute partition is above fails
|
||||
if compute_partition is None:
|
||||
console_warning(f"Can not detect compute/accelerator partition from amd-smi.")
|
||||
console_warning(f"Applying default compute partition: SPX")
|
||||
console_warning("Can not detect compute/accelerator partition from amd-smi.")
|
||||
console_warning("Applying default compute partition: SPX")
|
||||
compute_partition = "SPX"
|
||||
|
||||
memory_partition = search(memory_partition_pattern, amd_smi_output)
|
||||
@@ -218,14 +218,16 @@ def generate_machine_specs(args, sysinfo: dict = None):
|
||||
|
||||
# Load above SoC specs via module import
|
||||
try:
|
||||
soc_module = importlib.import_module("rocprof_compute_soc.soc_" + specs.gpu_arch)
|
||||
soc_module = importlib.import_module(
|
||||
"rocprof_compute_soc.soc_" + specs.gpu_arch
|
||||
)
|
||||
except ModuleNotFoundError as e:
|
||||
console_error(
|
||||
"Arch %s marked as supported, but couldn't find class implementation %s."
|
||||
% (specs.gpu_arch, e)
|
||||
)
|
||||
soc_class = getattr(soc_module, specs.gpu_arch + "_soc")
|
||||
soc_obj = soc_class(args, specs)
|
||||
soc_obj = soc_class(args, specs) # noqa: F841
|
||||
# Update arch specific specs
|
||||
specs.gpu_model = mi_gpu_specs.get_gpu_model(specs.gpu_arch, specs.gpu_chip_id)
|
||||
specs.num_xcd = mi_gpu_specs.get_num_xcds(
|
||||
@@ -332,7 +334,10 @@ class MachineSpecs:
|
||||
amd_gpu_kernel_version: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "[RESERVED] The version of the AMDGPU driver installed on the machine. Unimplemented.",
|
||||
"doc": (
|
||||
"[RESERVED] The version of the AMDGPU driver installed on the machine. "
|
||||
"Unimplemented."
|
||||
),
|
||||
"name": "AMD GPU Kernel Version",
|
||||
},
|
||||
)
|
||||
@@ -347,7 +352,10 @@ class MachineSpecs:
|
||||
gpu_memory: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "[RESERVED] The total amount of memory available to accelerators/GPUs in the system. Unimplemented.",
|
||||
"doc": (
|
||||
"[RESERVED] The total amount of memory available to accelerators/GPUs "
|
||||
"in the system. Unimplemented."
|
||||
),
|
||||
"unit": "KB",
|
||||
"name": "GPU Memory",
|
||||
},
|
||||
@@ -369,14 +377,20 @@ class MachineSpecs:
|
||||
compute_partition: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The compute partitioning mode active on the accelerators/GPUs in the system (MI300 only).",
|
||||
"doc": (
|
||||
"The compute partitioning mode active on the accelerators/GPUs in the "
|
||||
"system (MI300 only)."
|
||||
),
|
||||
"name": "Compute Partition",
|
||||
},
|
||||
)
|
||||
memory_partition: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The memory partitioning mode active on the accelerators/GPUs in the system (MI300 only).",
|
||||
"doc": (
|
||||
"The memory partitioning mode active on the accelerators/GPUs in the "
|
||||
"system (MI300 only)."
|
||||
),
|
||||
"name": "Memory Partition",
|
||||
},
|
||||
)
|
||||
@@ -417,7 +431,10 @@ class MachineSpecs:
|
||||
gpu_l1: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The size of the vL1D cache (per compute-unit) on the accelerators/GPUs.",
|
||||
"doc": (
|
||||
"The size of the vL1D cache (per compute-unit) on the "
|
||||
"accelerators/GPUs."
|
||||
),
|
||||
"name": "GPU L1",
|
||||
"unit": "KiB",
|
||||
},
|
||||
@@ -425,7 +442,10 @@ class MachineSpecs:
|
||||
gpu_l2: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The size of the vL1D cache (per compute-unit) on the accelerators/GPUs.",
|
||||
"doc": (
|
||||
"The size of the vL1D cache (per compute-unit) on the "
|
||||
"accelerators/GPUs."
|
||||
),
|
||||
"name": "GPU L2",
|
||||
"unit": "KiB",
|
||||
},
|
||||
@@ -433,52 +453,72 @@ class MachineSpecs:
|
||||
cu_per_gpu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The total number of compute units per accelerator/GPU in the system. On systems with configurable\n"
|
||||
"partitioning, (e.g., MI300) this is the total number of compute units in a partition.",
|
||||
"doc": (
|
||||
"The total number of compute units per accelerator/GPU in the system. "
|
||||
"On systems with configurable partitioning, (e.g., MI300) this is "
|
||||
"the total number of compute units in a partition."
|
||||
),
|
||||
"name": "CU per GPU",
|
||||
},
|
||||
)
|
||||
simd_per_cu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number of SIMD processors in a compute unit for the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The number of SIMD processors in a compute unit for the "
|
||||
"accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "SIMD per CU",
|
||||
},
|
||||
)
|
||||
se_per_gpu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number of shader engines on the accelerators/GPUs in the system. On systems with configurable\n"
|
||||
"partitioning, (e.g., MI300) this is the total number of shader engines in a partition.",
|
||||
"doc": (
|
||||
"The number of shader engines on the accelerators/GPUs in the system. "
|
||||
"On systems with configurable partitioning, (e.g., MI300) this is "
|
||||
"the total number of shader engines in a partition."
|
||||
),
|
||||
"name": "SE per GPU",
|
||||
},
|
||||
)
|
||||
wave_size: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number work-items in a wavefront on the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The number work-items in a wavefront on the accelerators/GPUs in "
|
||||
"the system."
|
||||
),
|
||||
"name": "Wave Size",
|
||||
},
|
||||
)
|
||||
workgroup_max_size: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The maximum number of work-items in a workgroup on the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The maximum number of work-items in a workgroup on the "
|
||||
"accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "Workgroup Max Size",
|
||||
},
|
||||
)
|
||||
max_waves_per_cu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The maximum number of wavefronts that can be resident on a compute unit on the\n"
|
||||
"accelerators/GPUs in the system",
|
||||
"doc": (
|
||||
"The maximum number of wavefronts that can be resident on a "
|
||||
"compute unit on the accelerators/GPUs in the system"
|
||||
),
|
||||
"name": "Max Waves per CU",
|
||||
},
|
||||
)
|
||||
max_sclk: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The maximum engine (compute-unit) clock rate of the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The maximum engine (compute-unit) clock rate of the "
|
||||
"accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "Max SCLK",
|
||||
"unit": "MHz",
|
||||
},
|
||||
@@ -486,7 +526,9 @@ class MachineSpecs:
|
||||
max_mclk: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The maximum memory clock rate of the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The maximum memory clock rate of the accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "Max MCLK",
|
||||
"unit": "MHz",
|
||||
},
|
||||
@@ -494,7 +536,10 @@ class MachineSpecs:
|
||||
cur_sclk: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "[RESERVED] The current engine (compute unit) clock rate of the accelerators/GPUs in the system. Unused.",
|
||||
"doc": (
|
||||
"[RESERVED] The current engine (compute unit) clock rate of the "
|
||||
"accelerators/GPUs in the system. Unused."
|
||||
),
|
||||
"name": "Cur SCLK",
|
||||
"unit": "MHz",
|
||||
},
|
||||
@@ -502,54 +547,75 @@ class MachineSpecs:
|
||||
cur_mclk: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "[RESERVED] The current memory clock rate of the accelerators/GPUs in the system. Unused.",
|
||||
"doc": (
|
||||
"[RESERVED] The current memory clock rate of the accelerators/GPUs "
|
||||
"in the system. Unused."
|
||||
),
|
||||
"name": "Cur MCLK",
|
||||
"unit": "MHz",
|
||||
},
|
||||
)
|
||||
_l2_banks: str = None # NB: This only used in flatten_tcc_info_across_hbm_stacks()
|
||||
_l2_banks: str = None # NB: Only used in flatten_tcc_info_across_hbm_stacks()
|
||||
total_l2_chan: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The maximum number of L2 cache channels on the accelerators/GPUs in the system. On systems with\n"
|
||||
"configurable partitioning, (e.g., MI300) this is the total number of L2 cache channels in a partition.",
|
||||
"doc": (
|
||||
"The maximum number of L2 cache channels on the accelerators/GPUs "
|
||||
"in the system. On systems with configurable partitioning, "
|
||||
"(e.g., MI300) this is the total number of L2 cache channels "
|
||||
"in a partition."
|
||||
),
|
||||
"name": "Total L2 Channels",
|
||||
},
|
||||
)
|
||||
lds_banks_per_cu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number of banks in the LDS for a compute unit on the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The number of banks in the LDS for a compute unit on the "
|
||||
"accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "LDS Banks per CU",
|
||||
},
|
||||
)
|
||||
sqc_per_gpu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number of L1I/sL1D caches on the accelerators/GPUs in the system. On systems with\n"
|
||||
"configurable partitioning, (e.g., MI300) this is the total number of L1I/sL1D caches in a partition.",
|
||||
"doc": (
|
||||
"The number of L1I/sL1D caches on the accelerators/GPUs in the "
|
||||
"system. On systems with configurable partitioning, (e.g., MI300) "
|
||||
"this is the total number of L1I/sL1D caches in a partition."
|
||||
),
|
||||
"name": "SQC per GPU",
|
||||
},
|
||||
)
|
||||
pipes_per_gpu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The number of scheduler-pipes on the accelerators/GPUs in the system.",
|
||||
"doc": (
|
||||
"The number of scheduler-pipes on the accelerators/GPUs in the system."
|
||||
),
|
||||
"name": "Pipes per GPU",
|
||||
},
|
||||
)
|
||||
num_xcd: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "The total number of accelerator complex dies in a compute partition on the accelerators/GPUs in the\n"
|
||||
"system. For accelerators without partitioning (i.e., pre-MI300), this is considered to be one.",
|
||||
"doc": (
|
||||
"The total number of accelerator complex dies in a compute partition "
|
||||
"on the accelerators/GPUs in the system. For accelerators without "
|
||||
"partitioning (i.e., pre-MI300), this is considered to be one."
|
||||
),
|
||||
"name": "Num XCDs",
|
||||
"unit": "XCDs",
|
||||
},
|
||||
)
|
||||
num_hbm_channels: str = field(
|
||||
default=None,
|
||||
metadata={"doc": "Number of HBM channels", "name": "HBM channels"},
|
||||
metadata={
|
||||
"doc": "Number of HBM channels",
|
||||
"name": "HBM channels",
|
||||
},
|
||||
)
|
||||
|
||||
def get_hbm_channels(self):
|
||||
@@ -567,16 +633,16 @@ class MachineSpecs:
|
||||
all_populated = True
|
||||
data = {}
|
||||
# dataclass uses an OrderedDict for member variables, ensuring order consistency
|
||||
for field in fields(self):
|
||||
name = field.name
|
||||
for class_field in fields(self):
|
||||
name = class_field.name
|
||||
if not name.startswith("_"):
|
||||
value = getattr(self, name)
|
||||
if value is None:
|
||||
# check if we've marked it optional
|
||||
if (
|
||||
field.metadata
|
||||
and "optional" in field.metadata
|
||||
and field.metadata["optional"]
|
||||
class_field.metadata
|
||||
and "optional" in class_field.metadata
|
||||
and class_field.metadata["optional"]
|
||||
):
|
||||
pass
|
||||
else:
|
||||
@@ -592,27 +658,35 @@ class MachineSpecs:
|
||||
return pd.DataFrame(data, index=[0])
|
||||
|
||||
def __repr__(self):
|
||||
topstr = "Machine Specifications: describing the state of the machine that ROCm Compute Profiler data was collected on.\n"
|
||||
topstr = (
|
||||
"Machine Specifications: describing the state of the machine that "
|
||||
"ROCm Compute Profiler data was collected on.\n"
|
||||
)
|
||||
data = []
|
||||
for field in fields(self):
|
||||
name = field.name
|
||||
for class_field in fields(self):
|
||||
name = class_field.name
|
||||
if not name.startswith("_"):
|
||||
_data = {}
|
||||
value = getattr(self, name)
|
||||
if field.metadata:
|
||||
if class_field.metadata:
|
||||
# check out of table before any re-naming for pretty-printing
|
||||
if "intable" in field.metadata and not field.metadata["intable"]:
|
||||
if (
|
||||
"intable" in class_field.metadata
|
||||
and not class_field.metadata["intable"]
|
||||
):
|
||||
if name == "version":
|
||||
topstr += f"Output version: {value}\n"
|
||||
else:
|
||||
console_error(f"Unknown out of table printing field: {name}")
|
||||
console_error(
|
||||
f"Unknown out of table printing field: {name}"
|
||||
)
|
||||
continue
|
||||
if "name" in field.metadata:
|
||||
name = field.metadata["name"]
|
||||
if "unit" in field.metadata:
|
||||
_data["Unit"] = field.metadata["unit"]
|
||||
if "doc" in field.metadata:
|
||||
_data["Description"] = field.metadata["doc"]
|
||||
if "name" in class_field.metadata:
|
||||
name = class_field.metadata["name"]
|
||||
if "unit" in class_field.metadata:
|
||||
_data["Unit"] = class_field.metadata["unit"]
|
||||
if "doc" in class_field.metadata:
|
||||
_data["Description"] = class_field.metadata["doc"]
|
||||
_data["Spec"] = name
|
||||
_data["Value"] = value
|
||||
data.append(_data)
|
||||
@@ -660,7 +734,10 @@ def run(cmd, exit_on_error=False):
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
except FileNotFoundError as e:
|
||||
console_error(
|
||||
f"Unable to parse specs. Can't find ROCm asset: {e.filename}\nTry passing a path to an existing workload results in 'analyze' mode."
|
||||
(
|
||||
f"Unable to parse specs. Can't find ROCm asset: {e.filename}\n"
|
||||
"Try passing a path to an existing workload results in 'analyze' mode."
|
||||
)
|
||||
)
|
||||
|
||||
if exit_on_error:
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import copy
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
@@ -98,8 +97,10 @@ def convert_time_columns(df, time_unit):
|
||||
numeric_values = pd.to_numeric(
|
||||
df_copy.loc[mask, col], errors="coerce"
|
||||
)
|
||||
df_copy.loc[mask, col] = numeric_values / config.TIME_UNITS[time_unit]
|
||||
except:
|
||||
df_copy.loc[mask, col] = (
|
||||
numeric_values / config.TIME_UNITS[time_unit]
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Update the Unit column
|
||||
@@ -147,11 +148,11 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
|
||||
for data_source in panel["data source"]:
|
||||
for type, table_config in data_source.items():
|
||||
# If block filtering was used during analysis, then dont use profiling config
|
||||
# If block filtering was used in profiling config, only show those panels
|
||||
# If block filtering not used in profiling config, show all panels
|
||||
# Skip this table if table id or panel id is not present in block filters
|
||||
# However, always show panel id <= 100
|
||||
# If block filtering was used during analysis, then don't use profiling
|
||||
# config. If block filtering was used in profiling config, only show
|
||||
# those panels. If block filtering not used in profiling config, show
|
||||
# all panels. Skip this table if table id or panel id is not present
|
||||
# in block filters. However, always show panel id <= 100.
|
||||
if (
|
||||
not args.filter_metrics
|
||||
and filter_panel_ids
|
||||
@@ -165,13 +166,16 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
+ str(table_config["id"] % 100)
|
||||
)
|
||||
console_log(
|
||||
f"Not showing table not selected during profiling: {table_id_str} {table_config['title']}"
|
||||
f"Not showing table not selected during profiling: "
|
||||
f"{table_id_str} "
|
||||
f"{table_config['title']}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Show roofline
|
||||
# Check if we have filter_metrics for analyze stage:
|
||||
# no filter_metrics = show all, filter_metrics containing "4" = user requesting roofline chart
|
||||
# no filter_metrics = show all,
|
||||
# filter_metrics containing "4" = user requesting roofline chart
|
||||
if panel_id == 400 and (
|
||||
not args.filter_metrics or "4" in args.filter_metrics
|
||||
):
|
||||
@@ -179,7 +183,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
continue
|
||||
|
||||
# Metrics baseline comparison mode
|
||||
# We cannot guarantee that all runs have the same metrics. Only show common metrics.
|
||||
# We cannot guarantee that all runs have the same metrics.
|
||||
# Only show common metrics.
|
||||
if (
|
||||
type == "metric_table"
|
||||
and "Metric" in table_config["header"].values()
|
||||
@@ -191,7 +196,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
if not common_metrics:
|
||||
common_metrics = set(data.dfs[table_config["id"]]["Metric"])
|
||||
else:
|
||||
common_metrics &= set(data.dfs[table_config["id"]]["Metric"])
|
||||
common_metrics &= set(
|
||||
data.dfs[table_config["id"]]["Metric"]
|
||||
)
|
||||
# Apply common metrics across all runs
|
||||
# Reindex all runs based on first run
|
||||
initial_index = None
|
||||
@@ -217,7 +224,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
|
||||
for header in list(base_df.keys()):
|
||||
# For raw csv table, columns cannot be filtered
|
||||
# If columns are filtered, then skip the headers not in filtered columns
|
||||
# If columns are filtered, then skip the headers not in
|
||||
# filtered columns
|
||||
if (
|
||||
type == "raw_csv_table"
|
||||
or not args.cols
|
||||
@@ -234,7 +242,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
)
|
||||
and header == "Kernel_Name"
|
||||
):
|
||||
# NB: the width of kernel name might depend on the header of the table.
|
||||
# NB: the width of kernel name might depend
|
||||
# on the header of the table.
|
||||
if table_config["source"] == "pmc_kernel_top.csv":
|
||||
adjusted_name = base_df["Kernel_Name"].apply(
|
||||
lambda x: string_multiple_lines(x, 40, 3)
|
||||
@@ -255,10 +264,13 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
cur_df = data.dfs[table_config["id"]]
|
||||
|
||||
if args.time_unit and has_time_data(base_df):
|
||||
cur_df = convert_time_columns(cur_df, args.time_unit)
|
||||
cur_df = convert_time_columns(
|
||||
cur_df, args.time_unit
|
||||
)
|
||||
|
||||
if (type == "raw_csv_table") or (
|
||||
type == "metric_table" and (not header in hidden_cols)
|
||||
type == "metric_table"
|
||||
and (not header in hidden_cols)
|
||||
):
|
||||
if run != base_run:
|
||||
# calc percentage over the baseline
|
||||
@@ -304,9 +316,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
+ "%)"
|
||||
)
|
||||
df = pd.concat([df, t_df], axis=1)
|
||||
|
||||
# DEBUG: When in a CI setting and flag is set,
|
||||
# then verify metrics meet threshold requirement
|
||||
# then verify metrics meet threshold
|
||||
# requirement
|
||||
if (
|
||||
header in ["Value", "Count", "Avg"]
|
||||
and t_df_pretty.abs()
|
||||
@@ -319,14 +331,15 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
t_df_pretty.abs() > args.report_diff
|
||||
]
|
||||
console_warning(
|
||||
"Dataframe diff exceeds %s threshold requirement\nSee metric %s"
|
||||
"Dataframe diff exceeds %s "
|
||||
"threshold requirement\n"
|
||||
"See metric %s"
|
||||
% (
|
||||
str(args.report_diff) + "%",
|
||||
violation_idx.to_numpy(),
|
||||
)
|
||||
)
|
||||
console_warning(df)
|
||||
|
||||
else:
|
||||
cur_df_copy = copy.deepcopy(cur_df)
|
||||
cur_df_copy[header] = [
|
||||
@@ -337,7 +350,9 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
)
|
||||
for x in base_df[header]
|
||||
]
|
||||
df = pd.concat([df, cur_df_copy[header]], axis=1)
|
||||
df = pd.concat(
|
||||
[df, cur_df_copy[header]], axis=1
|
||||
)
|
||||
|
||||
if not df.empty:
|
||||
# subtitle for each table in a panel if existing
|
||||
@@ -348,22 +363,23 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
)
|
||||
|
||||
# Check if any column in df is empty
|
||||
is_empty_columns_exist = any(
|
||||
[
|
||||
df.columns[col_idx]
|
||||
for col_idx in range(len(df.columns))
|
||||
if df.replace("", None).iloc[:, col_idx].isnull().all()
|
||||
]
|
||||
)
|
||||
is_empty_columns_exist = any([
|
||||
df.columns[col_idx]
|
||||
for col_idx in range(len(df.columns))
|
||||
if df.replace("", None).iloc[:, col_idx].isnull().all()
|
||||
])
|
||||
# Do not print the table if any column is empty
|
||||
if is_empty_columns_exist:
|
||||
if "title" in table_config:
|
||||
console_log(
|
||||
f"Not showing table with empty column(s): {table_id_str} {table_config['title']}"
|
||||
f"Not showing table with empty column(s): "
|
||||
f"{table_id_str} "
|
||||
f"{table_config['title']}"
|
||||
)
|
||||
else:
|
||||
console_log(
|
||||
f"Not showing table with empty column(s): {table_id_str}"
|
||||
f"Not showing table with empty column(s): "
|
||||
f"{table_id_str}"
|
||||
)
|
||||
if (
|
||||
"title" in table_config
|
||||
@@ -383,7 +399,8 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
p.joinpath(table_id_str.replace(" ", "_") + ".csv"),
|
||||
index=False,
|
||||
)
|
||||
# Only show top N kernels (as specified in --max-kernel-num) in "Top Stats" section
|
||||
# Only show top N kernels (as specified in --max-kernel-num)
|
||||
# in "Top Stats" section
|
||||
if type == "raw_csv_table" and (
|
||||
table_config["source"] == "pmc_kernel_top.csv"
|
||||
or table_config["source"] == "pmc_dispatch_info.csv"
|
||||
@@ -398,17 +415,17 @@ def show_all(args, runs, archConfigs, output, profiling_config, roof_plot=None):
|
||||
transpose = (
|
||||
type != "raw_csv_table"
|
||||
and "columnwise" in table_config
|
||||
and table_config["columnwise"] == True
|
||||
and table_config["columnwise"]
|
||||
)
|
||||
if not is_empty_columns_exist:
|
||||
|
||||
# enable mem_chart only with single run
|
||||
if (
|
||||
"cli_style" in table_config
|
||||
and table_config["cli_style"] == "mem_chart"
|
||||
and len(runs) == 1
|
||||
):
|
||||
# NB: to avoid broken test with arbitrary number with "--cols" option
|
||||
# NB: to avoid broken test with
|
||||
# arbitrary number with "--cols" option
|
||||
if "Metric" in df.columns and "Value" in df.columns:
|
||||
ss += mem_chart.plot_mem_chart(
|
||||
"",
|
||||
@@ -442,7 +459,8 @@ def show_roof_plot(roof_plot):
|
||||
print(roof_plot)
|
||||
else:
|
||||
console_error(
|
||||
"Cannot create roofline plot for CLI with incomplete/missing roofline profiling data.",
|
||||
"Cannot create roofline plot for CLI with incomplete/missing "
|
||||
"roofline profiling data.",
|
||||
exit=False,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import glob
|
||||
import io
|
||||
import json
|
||||
@@ -74,7 +73,8 @@ def add_counter_extra_config_input_yaml(
|
||||
"""
|
||||
Add a new counter to the rocprofiler-sdk dictionary.
|
||||
Initialize missing parts if data is empty or incomplete.
|
||||
Enforces that 'architectures' and 'properties' are lists for correct YAML list serialization.
|
||||
Enforces that 'architectures' and 'properties' are lists
|
||||
for correct YAML list serialization.
|
||||
Overwrites the counter if it already exists.
|
||||
|
||||
Args:
|
||||
@@ -189,7 +189,7 @@ def get_version(rocprof_compute_home) -> dict:
|
||||
found = True
|
||||
versionDir = dir
|
||||
break
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
if not found:
|
||||
console_error("Cannot find VERSION file at {}".format(searchDirs))
|
||||
@@ -204,7 +204,7 @@ def get_version(rocprof_compute_home) -> dict:
|
||||
MODE = "dev"
|
||||
else:
|
||||
raise Exception(output)
|
||||
except:
|
||||
except Exception:
|
||||
try:
|
||||
shaFile = path(versionDir).joinpath("VERSION.sha").absolute().resolve()
|
||||
with open(shaFile, "r") as file:
|
||||
@@ -263,7 +263,10 @@ def detect_rocprof(args):
|
||||
rocprof_path = shutil.which(rocprof_cmd)
|
||||
if not rocprof_path:
|
||||
console_error(
|
||||
"Please verify installation or set ROCPROF environment variable with full path."
|
||||
(
|
||||
"Please verify installation or set ROCPROF environment variable "
|
||||
"with full path."
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Resolve any sym links in file path
|
||||
@@ -271,7 +274,8 @@ def detect_rocprof(args):
|
||||
console_debug("ROC Profiler: " + str(rocprof_path))
|
||||
|
||||
console_debug("rocprof_cmd is {}".format(str(rocprof_cmd)))
|
||||
return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call
|
||||
# TODO: Do we still need to return this? It's not being used in the function call
|
||||
return rocprof_cmd
|
||||
|
||||
|
||||
def store_app_cmd(args):
|
||||
@@ -364,14 +368,12 @@ def get_agent_dict(data):
|
||||
# Returns a dictionary that maps agent ID to GPU ID
|
||||
# starting at 0.
|
||||
def get_gpuid_dict(data):
|
||||
|
||||
agents = data["rocprofiler-sdk-tool"][0]["agents"]
|
||||
|
||||
agent_list = []
|
||||
|
||||
# Get agent ID and node_id for GPU agents only
|
||||
for agent in agents:
|
||||
|
||||
if agent["type"] == 2:
|
||||
agent_id = agent["id"]["handle"]
|
||||
node_id = agent["node_id"]
|
||||
@@ -419,12 +421,13 @@ def v3_json_get_dispatches(data):
|
||||
|
||||
|
||||
def v3_json_to_csv(json_file_path, csv_file_path):
|
||||
|
||||
f = open(json_file_path, "rt")
|
||||
data = json.load(f)
|
||||
|
||||
dispatch_records = v3_json_get_dispatches(data)
|
||||
dispatches = data["rocprofiler-sdk-tool"][0]["callback_records"]["counter_collection"]
|
||||
dispatches = data["rocprofiler-sdk-tool"][0]["callback_records"][
|
||||
"counter_collection"
|
||||
]
|
||||
kernel_symbols = data["rocprofiler-sdk-tool"][0]["kernel_symbols"]
|
||||
agents = get_agent_dict(data)
|
||||
pid = data["rocprofiler-sdk-tool"][0]["metadata"]["pid"]
|
||||
@@ -434,33 +437,30 @@ def v3_json_to_csv(json_file_path, csv_file_path):
|
||||
counter_info = v3_json_get_counters(data)
|
||||
|
||||
# CSV headers. If there are no dispatches we still end up with a valid CSV file.
|
||||
csv_data = dict.fromkeys(
|
||||
[
|
||||
"Dispatch_ID",
|
||||
"GPU_ID",
|
||||
"Queue_ID",
|
||||
"PID",
|
||||
"TID",
|
||||
"Grid_Size",
|
||||
"Workgroup_Size",
|
||||
"LDS_Per_Workgroup",
|
||||
"Scratch_Per_Workitem",
|
||||
"Arch_VGPR",
|
||||
"Accum_VGPR",
|
||||
"SGPR",
|
||||
"Wave_Size",
|
||||
"Kernel_Name",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
"Correlation_ID",
|
||||
]
|
||||
)
|
||||
csv_data = dict.fromkeys([
|
||||
"Dispatch_ID",
|
||||
"GPU_ID",
|
||||
"Queue_ID",
|
||||
"PID",
|
||||
"TID",
|
||||
"Grid_Size",
|
||||
"Workgroup_Size",
|
||||
"LDS_Per_Workgroup",
|
||||
"Scratch_Per_Workitem",
|
||||
"Arch_VGPR",
|
||||
"Accum_VGPR",
|
||||
"SGPR",
|
||||
"Wave_Size",
|
||||
"Kernel_Name",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
"Correlation_ID",
|
||||
])
|
||||
|
||||
for key in csv_data:
|
||||
csv_data[key] = []
|
||||
|
||||
for d in dispatches:
|
||||
|
||||
dispatch_info = d["dispatch_data"]["dispatch_info"]
|
||||
|
||||
agent_id = dispatch_info["agent_id"]["handle"]
|
||||
@@ -542,7 +542,8 @@ def v3_json_to_csv(json_file_path, csv_file_path):
|
||||
|
||||
def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_file):
|
||||
"""
|
||||
Convert the counter file of csv output for a certain csv from rocprofv3 format to rocprfv2 format.
|
||||
Convert the counter file of csv output for a certain csv from rocprofv3 format
|
||||
to rocprfv2 format.
|
||||
This function is not for use of other csv out file such as kernel trace file.
|
||||
"""
|
||||
pd_counter_collections = pd.read_csv(counter_file)
|
||||
@@ -576,7 +577,8 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
|
||||
values="Counter_Value",
|
||||
).reset_index()
|
||||
|
||||
# NB: Agent_Id is int in older rocporfv3, now switched to string with prefix "Agent ". We need to make sure handle both cases.
|
||||
# NB: Agent_Id is int in older rocporfv3, now switched to string with prefix
|
||||
# "Agent ". We need to make sure handle both cases.
|
||||
console_debug(
|
||||
"The type of Agent ID from counter csv file is {}".format(
|
||||
result["Agent_Id"].dtype
|
||||
@@ -592,9 +594,10 @@ def v3_counter_csv_to_v2_csv(counter_file, agent_info_filepath, converted_csv_fi
|
||||
)
|
||||
except Exception as e:
|
||||
console_error(
|
||||
'Parsing rocprofv3 csv output: Error of getting "Agent_Id", the error message "{}"'.format(
|
||||
e
|
||||
)
|
||||
(
|
||||
'Parsing rocprofv3 csv output: Error of getting "Agent_Id", '
|
||||
'the error message "{}"'
|
||||
).format(e)
|
||||
)
|
||||
|
||||
# Grab the Wave_Front_Size column from agent info
|
||||
@@ -762,7 +765,10 @@ def run_prof(
|
||||
# Set counter definitions
|
||||
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
|
||||
console_debug(
|
||||
f"Adding env var for counter definitions: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
(
|
||||
"Adding env var for counter definitions: "
|
||||
f"ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
)
|
||||
)
|
||||
|
||||
# set required env var for >= mi300
|
||||
@@ -836,7 +842,10 @@ def run_prof(
|
||||
return
|
||||
else:
|
||||
console_error(
|
||||
"rocpd output format is only supported with rocprofiler-sdk or rocprofv3."
|
||||
(
|
||||
"rocpd output format is only supported with "
|
||||
"rocprofiler-sdk or rocprofv3."
|
||||
)
|
||||
)
|
||||
elif rocprof_cmd.endswith("v2"):
|
||||
# rocprofv2 has separate csv files for each process
|
||||
@@ -863,12 +872,14 @@ def run_prof(
|
||||
)
|
||||
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves,
|
||||
# rocprof-compute should make updates accordingly
|
||||
if "ROCPROF_HIP_RUNTIME_API_TRACE" in options:
|
||||
process_hip_trace_output(workload_dir, fbase)
|
||||
else:
|
||||
if "--kokkos-trace" in options:
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves, rocprof-compute should make updates accordingly
|
||||
# TODO: as rocprofv3 --kokkos-trace feature improves,
|
||||
# rocprof-compute should make updates accordingly
|
||||
process_kokkos_trace_output(workload_dir, fbase)
|
||||
elif "--hip-trace" in options:
|
||||
process_hip_trace_output(workload_dir, fbase)
|
||||
@@ -880,7 +891,10 @@ def run_prof(
|
||||
)
|
||||
else:
|
||||
console_warning(
|
||||
f"Cannot write results for {fbase}.csv due to no counter csv files generated."
|
||||
(
|
||||
f"Cannot write results for {fbase}.csv due to no counter "
|
||||
"csv files generated."
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
@@ -976,7 +990,9 @@ def pc_sampling_prof(
|
||||
for key, value in options.items():
|
||||
new_env[key] = value
|
||||
console_debug("pc sampling rocprof sdk env vars: {}".format(new_env))
|
||||
console_debug("pc sampling rocprof sdk user provided command: {}".format(appcmd))
|
||||
console_debug(
|
||||
"pc sampling rocprof sdk user provided command: {}".format(appcmd)
|
||||
)
|
||||
success, output = capture_subprocess_output(
|
||||
appcmd, new_env=new_env, profileMode=True
|
||||
)
|
||||
@@ -1011,7 +1027,8 @@ def pc_sampling_prof(
|
||||
def process_rocprofv3_output(rocprof_output, workload_dir, is_timestamps):
|
||||
"""
|
||||
rocprofv3 specific output processing.
|
||||
takes care of json or csv formats, for csv format, additional processing is performed.
|
||||
takes care of json or csv formats, for csv format,
|
||||
additional processing is performed.
|
||||
"""
|
||||
results_files_csv = {}
|
||||
|
||||
@@ -1059,12 +1076,15 @@ def process_rocprofv3_output(rocprof_output, workload_dir, is_timestamps):
|
||||
|
||||
results_files_csv = glob.glob(workload_dir + "/out/pmc_1/*/*_converted.csv")
|
||||
elif is_timestamps:
|
||||
# when the input is timestamps, we know counter csv file is not generated and will instead parse kernel trace file
|
||||
# when the input is timestamps, we know counter csv file
|
||||
# is not generated and will instead parse kernel trace file
|
||||
results_files_csv = glob.glob(
|
||||
workload_dir + "/out/pmc_1/*/*_kernel_trace.csv"
|
||||
)
|
||||
else:
|
||||
# when the input is not for timestamps, and counter csv file is not generated, we assume failed rocprof run and will completely bypass the file generation and merging for current pmc
|
||||
# when the input is not for timestamps, and counter csv file
|
||||
# is not generated, we assume failed rocprof run and will completely
|
||||
# bypass the file generation and merging for current pmc
|
||||
results_files_csv = []
|
||||
else:
|
||||
console_error("The output file of rocprofv3 can only support json or csv!!!")
|
||||
@@ -1121,7 +1141,6 @@ def process_hip_trace_output(workload_dir, fbase):
|
||||
|
||||
|
||||
def replace_timestamps(workload_dir):
|
||||
|
||||
if not path(workload_dir, "timestamps.csv").is_file():
|
||||
return
|
||||
|
||||
@@ -1173,7 +1192,9 @@ def detect_roofline(mspec):
|
||||
if "ROOFLINE_BIN" in os.environ.keys():
|
||||
rooflineBinary = os.environ["ROOFLINE_BIN"]
|
||||
if path(rooflineBinary).exists():
|
||||
msg = "Detected user-supplied binary --> ROOFLINE_BIN = %s\n" % rooflineBinary
|
||||
msg = (
|
||||
"Detected user-supplied binary --> ROOFLINE_BIN = %s\n" % rooflineBinary
|
||||
)
|
||||
console_warning("roofline", msg)
|
||||
# distro stays marked as override and path value is substituted in
|
||||
target_binary["path"] = rooflineBinary
|
||||
@@ -1196,7 +1217,7 @@ def detect_roofline(mspec):
|
||||
|
||||
# Must be a valid SLES machine
|
||||
elif (
|
||||
(type(sles_distro) == str and len(sles_distro) >= 3)
|
||||
(isinstance(sles_distro, str) and len(sles_distro) >= 3)
|
||||
and sles_distro[:2] == "15" # confirm string and len
|
||||
and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6
|
||||
):
|
||||
@@ -1208,7 +1229,9 @@ def detect_roofline(mspec):
|
||||
distro = "22.04"
|
||||
|
||||
else:
|
||||
console_error("roofline", "Cannot find a valid binary for your operating system")
|
||||
console_error(
|
||||
"roofline", "Cannot find a valid binary for your operating system"
|
||||
)
|
||||
|
||||
# distro gets assigned, to follow default roofline bin location and nomenclature
|
||||
target_binary["distro"] = distro
|
||||
@@ -1250,14 +1273,16 @@ def mibench(args, mspec):
|
||||
|
||||
# Distro is valid but cant find rocm ver
|
||||
found = False
|
||||
for path in binary_paths:
|
||||
if pathlib.Path(path).exists():
|
||||
for binary_path in binary_paths:
|
||||
if pathlib.Path(binary_path).exists():
|
||||
found = True
|
||||
path_to_binary = path
|
||||
path_to_binary = binary_path
|
||||
break
|
||||
|
||||
if not found:
|
||||
console_error("roofline", "Unable to locate expected binary (%s)." % binary_paths)
|
||||
console_error(
|
||||
"roofline", "Unable to locate expected binary (%s)." % binary_paths
|
||||
)
|
||||
|
||||
my_args = [
|
||||
path_to_binary,
|
||||
@@ -1303,7 +1328,7 @@ def flatten_tcc_info_across_xcds(file, xcds, tcc_channel_per_xcd):
|
||||
# filter the channel index only
|
||||
p = re.compile(r"\[(\d+)\]")
|
||||
# pick up the 1st element only
|
||||
r = (
|
||||
r = ( # noqa: E731
|
||||
lambda match: "["
|
||||
+ str(int(match.group(1)) + i * tcc_channel_per_xcd)
|
||||
+ "]"
|
||||
@@ -1434,7 +1459,10 @@ def reverse_multi_index_df_pmc(final_df):
|
||||
|
||||
def merge_counters_spatial_multiplex(df_multi_index):
|
||||
"""
|
||||
For spatial multiplexing, this merges counter values for the same kernel that runs on different devices. For time stamp, start time stamp will use median while for end time stamp, it will be equal to the summation between median start stamp and median delta time.
|
||||
For spatial multiplexing, this merges counter values for the same kernel that
|
||||
runs on different devices. For time stamp, start time stamp will use median
|
||||
while for end time stamp, it will be equal to the summation between median
|
||||
start stamp and median delta time.
|
||||
"""
|
||||
non_counter_column_index = [
|
||||
"Dispatch_ID",
|
||||
@@ -1467,7 +1495,8 @@ def merge_counters_spatial_multiplex(df_multi_index):
|
||||
|
||||
result_dfs = []
|
||||
|
||||
# TODO: will need optimize to avoid this convertion to single index format and do merge directly on multi-index dataframe
|
||||
# TODO: will need to optimize to avoid this conversion to single index format
|
||||
# and do merge directly on multi-index dataframe
|
||||
dfs, coll_levels = reverse_multi_index_df_pmc(df_multi_index)
|
||||
|
||||
for df in dfs:
|
||||
@@ -1490,7 +1519,9 @@ def merge_counters_spatial_multiplex(df_multi_index):
|
||||
|
||||
# Process non-counter columns
|
||||
for col in [
|
||||
col for col in non_counter_column_index if col not in expired_column_index
|
||||
col
|
||||
for col in non_counter_column_index
|
||||
if col not in expired_column_index
|
||||
]:
|
||||
if col == "Start_Timestamp":
|
||||
# For Start_Timestamp, take the median
|
||||
@@ -1504,7 +1535,8 @@ def merge_counters_spatial_multiplex(df_multi_index):
|
||||
# For other non-counter columns, take the first occurrence (0th row)
|
||||
merged_row[col] = group.iloc[0][col]
|
||||
|
||||
# Process counter columns (assumed to be all columns not in non_counter_column_index)
|
||||
# Process counter columns (assumed to be all columns not in
|
||||
# non_counter_column_index)
|
||||
counter_columns = [
|
||||
col for col in group.columns if col not in non_counter_column_index
|
||||
]
|
||||
|
||||
@@ -23,14 +23,15 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import subprocess
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
|
||||
rocprof_compute = SourceFileLoader(
|
||||
"rocprof-compute", "src/rocprof-compute"
|
||||
).load_module()
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
@@ -52,7 +53,12 @@ def pytest_addoption(parser):
|
||||
@pytest.fixture
|
||||
def binary_handler_profile_rocprof_compute(request):
|
||||
def _handler(
|
||||
config, workload_dir, options=[], check_success=True, roof=False, app_name="app_1"
|
||||
config,
|
||||
workload_dir,
|
||||
options=[],
|
||||
check_success=True,
|
||||
roof=False,
|
||||
app_name="app_1",
|
||||
):
|
||||
if request.config.getoption("--rocprofiler-sdk-library-path"):
|
||||
options.extend(
|
||||
|
||||
@@ -23,11 +23,9 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
|
||||
if __name__ == "__main__":
|
||||
my_parser = argparse.ArgumentParser(description="create test_analyze_workloads.py")
|
||||
@@ -52,11 +50,16 @@ if __name__ == "__main__":
|
||||
+ arch
|
||||
+ "():"
|
||||
+ "\n\twith pytest.raises(SystemExit) as e:"
|
||||
+ "\n\t\twith patch('sys.argv',['rocprof-compute', 'analyze', '--path', '"
|
||||
+ workload
|
||||
+ "/"
|
||||
+ arch
|
||||
+ "']):\n\t\t\trocprof_compute.main()"
|
||||
+ (
|
||||
"\n\t\twith patch("
|
||||
"'sys.argv',"
|
||||
"["
|
||||
"'rocprof-compute', "
|
||||
"'analyze', "
|
||||
"'--path', "
|
||||
"'" + workload + "/" + arch + "']"
|
||||
"):\n\t\t\trocprof_compute.main()"
|
||||
)
|
||||
+ "\n\tassert e.value.code == 0"
|
||||
)
|
||||
f.write(test)
|
||||
|
||||
@@ -23,23 +23,18 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import csv
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import test_utils
|
||||
|
||||
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
|
||||
rocprof_compute = SourceFileLoader(
|
||||
"rocprof-compute", "src/rocprof-compute"
|
||||
).load_module()
|
||||
|
||||
config = {}
|
||||
config["vseq"] = ["./tests/vsequential_access"]
|
||||
@@ -49,7 +44,6 @@ config["COUNTER_LOGGING"] = False
|
||||
config["METRIC_COMPARE"] = False
|
||||
config["METRIC_LOGGING"] = False
|
||||
|
||||
|
||||
SUPPORTED_ARCHS = {
|
||||
"gfx940": {"mi300": ["MI300A_A0"]},
|
||||
"gfx941": {"mi300": ["MI300X_A0"]},
|
||||
@@ -153,7 +147,6 @@ def test_L1_cache_counters(
|
||||
base = Path(test_utils.get_output_dir())
|
||||
|
||||
for app_name in app_names:
|
||||
|
||||
workload_dir = str(base / app_name)
|
||||
|
||||
# 1. profile the app
|
||||
@@ -168,9 +161,15 @@ def test_L1_cache_counters(
|
||||
assert return_code == 0
|
||||
|
||||
# 2. analyze the results
|
||||
return_code = binary_handler_analyze_rocprof_compute(
|
||||
["analyze", "--path", workload_dir, "-b", "16.3", "--save-dfs", workload_dir]
|
||||
)
|
||||
return_code = binary_handler_analyze_rocprof_compute([
|
||||
"analyze",
|
||||
"--path",
|
||||
workload_dir,
|
||||
"-b",
|
||||
"16.3",
|
||||
"--save-dfs",
|
||||
workload_dir,
|
||||
])
|
||||
assert return_code == 0
|
||||
|
||||
# 3. save results in local
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -11,6 +11,6 @@ def test_modification_time():
|
||||
hash_map = yaml.safe_load(f)
|
||||
for file, hash in hash_map.items():
|
||||
file_hash = hashlib.sha256(Path(file).read_bytes()).hexdigest()
|
||||
assert (
|
||||
file_hash == hash
|
||||
), f"Hash mismatch for {file}: expected {hash}, got {file_hash}"
|
||||
assert file_hash == hash, (
|
||||
f"Hash mismatch for {file}: expected {hash}, got {file_hash}"
|
||||
)
|
||||
|
||||
@@ -23,12 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from db_connector import DatabaseConnector
|
||||
|
||||
logging.TRACE = logging.DEBUG - 5
|
||||
logging.addLevelName(logging.TRACE, "TRACE")
|
||||
@@ -40,8 +40,6 @@ def trace_logger(message, *args, **kwargs):
|
||||
|
||||
setattr(logging, "trace", trace_logger)
|
||||
|
||||
from db_connector import DatabaseConnector
|
||||
|
||||
"""
|
||||
Tests for the DatabaseConnector class that tests almost methods with initialization,
|
||||
CSV import, database removal, and error handling.
|
||||
@@ -109,9 +107,10 @@ class TestDatabaseConnector:
|
||||
mock_path.return_value.joinpath.return_value = "/fake/path/sysinfo.csv"
|
||||
mock_path.return_value.is_file.return_value = True
|
||||
|
||||
mock_sysinfo = pd.DataFrame(
|
||||
{"gpu_model": ["MI100 "], "workload_name": [" test_workload"]}
|
||||
)
|
||||
mock_sysinfo = pd.DataFrame({
|
||||
"gpu_model": ["MI100 "],
|
||||
"workload_name": [" test_workload"],
|
||||
})
|
||||
mock_read_csv.return_value = mock_sysinfo
|
||||
|
||||
connector = DatabaseConnector(mock_args_import)
|
||||
@@ -236,9 +235,9 @@ class TestDatabaseConnector:
|
||||
connector.db_remove()
|
||||
|
||||
mock_client.drop_database.assert_called_once_with(mock_db_to_remove)
|
||||
mock_names_col.delete_many.assert_called_once_with(
|
||||
{"name": "rocprofiler-compute_test_team_workload_mi100"}
|
||||
)
|
||||
mock_names_col.delete_many.assert_called_once_with({
|
||||
"name": "rocprofiler-compute_test_team_workload_mi100"
|
||||
})
|
||||
|
||||
def test_pre_processing_no_action_specified(self, mock_args_import):
|
||||
"""Test pre_processing when neither upload nor remove is specified"""
|
||||
@@ -398,9 +397,10 @@ class TestDatabaseConnectorIntegration:
|
||||
)
|
||||
mock_path.return_value.is_file.return_value = True
|
||||
|
||||
mock_sysinfo = pd.DataFrame(
|
||||
{"gpu_model": ["MI100"], "workload_name": ["device_filter"]}
|
||||
)
|
||||
mock_sysinfo = pd.DataFrame({
|
||||
"gpu_model": ["MI100"],
|
||||
"workload_name": ["device_filter"],
|
||||
})
|
||||
mock_read_csv.return_value = mock_sysinfo
|
||||
|
||||
connector = DatabaseConnector(args)
|
||||
|
||||
@@ -23,24 +23,20 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, mock_open, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from src.utils.specs import generate_machine_specs
|
||||
|
||||
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
|
||||
|
||||
rocprof_compute = SourceFileLoader(
|
||||
"rocprof-compute", "src/rocprof-compute"
|
||||
).load_module()
|
||||
|
||||
# NOTE: Only testing gfx942 for now.
|
||||
GFX942_CHIP_IDS_TO_NUM_XCDS = {
|
||||
@@ -70,7 +66,7 @@ def parse_table_dict(output: str) -> dict:
|
||||
"""
|
||||
Parse an ASCII table into a dict mapping Spec -> Value.
|
||||
"""
|
||||
lines = [l for l in output.splitlines() if l.startswith("│")]
|
||||
lines = [line for line in output.splitlines() if line.startswith("│")]
|
||||
# locate header row (the one containing 'Spec' and 'Value')
|
||||
header_idx = next(
|
||||
(i for i, ln in enumerate(lines) if "Spec" in ln and "Value" in ln), None
|
||||
@@ -132,7 +128,6 @@ def get_num_xcds():
|
||||
|
||||
|
||||
def get_gpu_arch():
|
||||
|
||||
rocminfo = str(
|
||||
# decode with utf-8 to account for rocm-smi changes in latest rocm
|
||||
subprocess.run(
|
||||
@@ -182,18 +177,18 @@ def test_num_xcds_cli_output():
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
assert (
|
||||
proc.returncode == 0
|
||||
), f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}"
|
||||
assert proc.returncode == 0, (
|
||||
f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}"
|
||||
)
|
||||
|
||||
# 3. strip ANSI, parse table
|
||||
clean = strip_ansi(proc.stdout)
|
||||
return_dict = parse_table_dict(clean)
|
||||
|
||||
# 4. check results are expected
|
||||
assert (
|
||||
"Compute Partition" in return_dict
|
||||
), "Spec 'Compute Partition' not found in table"
|
||||
assert "Compute Partition" in return_dict, (
|
||||
"Spec 'Compute Partition' not found in table"
|
||||
)
|
||||
assert "Num XCDs" in return_dict, "Spec 'Num XCDs' not found in table"
|
||||
|
||||
compute_partition_actual = return_dict["Compute Partition"]
|
||||
@@ -257,7 +252,7 @@ def test_get_gpu_series_uninitialized():
|
||||
|
||||
with patch.object(MIGPUSpecs, "_gpu_series_dict", {}):
|
||||
with pytest.raises(SystemExit):
|
||||
result = MIGPUSpecs.get_gpu_series("gfx942")
|
||||
result = MIGPUSpecs.get_gpu_series("gfx942") # noqa: F841
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
@@ -315,7 +310,7 @@ def test_get_num_xcds_no_compute_partition_data():
|
||||
|
||||
mock_dict = {"gfx942": None}
|
||||
with patch.object(MIGPUSpecs, "_gpu_arch_to_compute_partition_dict", mock_dict):
|
||||
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx942")
|
||||
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx942") # noqa: F841
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
@@ -333,7 +328,7 @@ def test_get_num_xcds_unknown_gpu_model():
|
||||
"""Test get_num_xcds with unknown gpu model - covers lines 319-321"""
|
||||
from src.utils.mi_gpu_spec import MIGPUSpecs
|
||||
|
||||
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx950", gpu_model="UNKNOWN_MODEL")
|
||||
result = MIGPUSpecs.get_num_xcds(gpu_arch="gfx950", gpu_model="UNKNOWN_MODEL") # noqa: F841
|
||||
|
||||
|
||||
@pytest.mark.misc
|
||||
@@ -341,7 +336,7 @@ def test_get_num_xcds_no_compute_partition():
|
||||
"""Test get_num_xcds with no compute partition - covers lines 325-327"""
|
||||
from src.utils.mi_gpu_spec import MIGPUSpecs
|
||||
|
||||
result = MIGPUSpecs.get_num_xcds(
|
||||
result = MIGPUSpecs.get_num_xcds( # noqa: F841
|
||||
gpu_arch="gfx950", gpu_model="MI350", compute_partition=""
|
||||
)
|
||||
|
||||
@@ -351,7 +346,7 @@ def test_get_num_xcds_unknown_compute_partition():
|
||||
"""Test get_num_xcds with unknown compute partition - covers lines 329-332"""
|
||||
from src.utils.mi_gpu_spec import MIGPUSpecs
|
||||
|
||||
result = MIGPUSpecs.get_num_xcds(
|
||||
result = MIGPUSpecs.get_num_xcds( # noqa: F841
|
||||
gpu_arch="gfx950", gpu_model="MI350", compute_partition="UNKNOWN"
|
||||
)
|
||||
|
||||
@@ -363,7 +358,7 @@ def test_get_num_xcds_none_partition_value():
|
||||
|
||||
mock_dict = {"mi350": {"spx": None}}
|
||||
with patch.object(MIGPUSpecs, "_num_xcds_dict", mock_dict):
|
||||
result = MIGPUSpecs.get_num_xcds(
|
||||
result = MIGPUSpecs.get_num_xcds( # noqa: F841
|
||||
gpu_arch="gfx950", gpu_model="MI350", compute_partition="spx"
|
||||
)
|
||||
|
||||
@@ -373,7 +368,7 @@ def test_get_num_xcds_no_gpu_model():
|
||||
"""Test get_num_xcds with no gpu model - covers line 342"""
|
||||
from src.utils.mi_gpu_spec import MIGPUSpecs
|
||||
|
||||
result = MIGPUSpecs.get_num_xcds(
|
||||
result = MIGPUSpecs.get_num_xcds( # noqa: F841
|
||||
gpu_arch="gfx950", gpu_model="", compute_partition="spx"
|
||||
)
|
||||
|
||||
@@ -385,7 +380,7 @@ def test_get_chip_id_dict_empty():
|
||||
|
||||
with patch.object(MIGPUSpecs, "_chip_id_dict", {}):
|
||||
with patch("src.utils.mi_gpu_spec.console_error") as mock_error:
|
||||
result = MIGPUSpecs.get_chip_id_dict()
|
||||
result = MIGPUSpecs.get_chip_id_dict() # noqa: F841
|
||||
mock_error.assert_called_once()
|
||||
|
||||
|
||||
@@ -396,7 +391,7 @@ def test_get_num_xcds_dict_empty():
|
||||
|
||||
with patch.object(MIGPUSpecs, "_num_xcds_dict", {}):
|
||||
with patch("src.utils.mi_gpu_spec.console_error") as mock_error:
|
||||
result = MIGPUSpecs.get_num_xcds_dict()
|
||||
result = MIGPUSpecs.get_num_xcds_dict() # noqa: F841
|
||||
mock_error.assert_called_once()
|
||||
|
||||
|
||||
|
||||
@@ -23,13 +23,12 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
|
||||
rocprof_compute = SourceFileLoader(
|
||||
"rocprof-compute", "src/rocprof-compute"
|
||||
).load_module()
|
||||
|
||||
##################################################
|
||||
## Generated tests ##
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
##############################################################################
|
||||
|
||||
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
@@ -54,7 +53,6 @@ CHIP_IDS = {
|
||||
"30112": "MI350",
|
||||
}
|
||||
|
||||
|
||||
# --
|
||||
# Runtime config options
|
||||
# --
|
||||
@@ -74,101 +72,91 @@ DEFAULT_ABS_DIFF = 15
|
||||
DEFAULT_REL_DIFF = 50
|
||||
MAX_REOCCURING_COUNT = 28
|
||||
|
||||
ALL_CSVS_MI100 = sorted(
|
||||
[
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"sysinfo.csv",
|
||||
]
|
||||
)
|
||||
ALL_CSVS_MI100 = sorted([
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"sysinfo.csv",
|
||||
])
|
||||
|
||||
ALL_CSVS_MI200 = sorted(
|
||||
[
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
]
|
||||
)
|
||||
ALL_CSVS_MI300 = sorted(
|
||||
[
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
]
|
||||
)
|
||||
ALL_CSVS_MI350 = sorted(
|
||||
[
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"pmc_perf_7.csv",
|
||||
"pmc_perf_8.csv",
|
||||
"pmc_perf_9.csv",
|
||||
"pmc_perf_10.csv",
|
||||
"pmc_perf_11.csv",
|
||||
"pmc_perf_12.csv",
|
||||
"pmc_perf_13.csv",
|
||||
"pmc_perf_14.csv",
|
||||
"sysinfo.csv",
|
||||
]
|
||||
)
|
||||
ALL_CSVS_MI200 = sorted([
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
])
|
||||
ALL_CSVS_MI300 = sorted([
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
])
|
||||
ALL_CSVS_MI350 = sorted([
|
||||
"SQ_IFETCH_LEVEL.csv",
|
||||
"SQ_INST_LEVEL_LDS.csv",
|
||||
"SQ_INST_LEVEL_SMEM.csv",
|
||||
"SQ_INST_LEVEL_VMEM.csv",
|
||||
"SQ_LEVEL_WAVES.csv",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"pmc_perf_3.csv",
|
||||
"pmc_perf_4.csv",
|
||||
"pmc_perf_5.csv",
|
||||
"pmc_perf_6.csv",
|
||||
"pmc_perf_7.csv",
|
||||
"pmc_perf_8.csv",
|
||||
"pmc_perf_9.csv",
|
||||
"pmc_perf_10.csv",
|
||||
"pmc_perf_11.csv",
|
||||
"pmc_perf_12.csv",
|
||||
"pmc_perf_13.csv",
|
||||
"pmc_perf_14.csv",
|
||||
"sysinfo.csv",
|
||||
])
|
||||
|
||||
ROOF_ONLY_FILES = sorted(
|
||||
[
|
||||
"empirRoof_gpu-0_FP32.pdf",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"roofline.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
]
|
||||
)
|
||||
ROOF_ONLY_FILES = sorted([
|
||||
"empirRoof_gpu-0_FP32.pdf",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
"pmc_perf_2.csv",
|
||||
"roofline.csv",
|
||||
"sysinfo.csv",
|
||||
"timestamps.csv",
|
||||
])
|
||||
|
||||
METRIC_THRESHOLDS = {
|
||||
"2.1.12": {"absolute": 0, "relative": 8},
|
||||
@@ -292,7 +280,9 @@ def counter_compare(test_name, errors_pd, baseline_df, run_df, threshold=5):
|
||||
# if 0 show absolute difference
|
||||
diff = round(baseline_data - run_data, 2)
|
||||
if diff > threshold:
|
||||
print(str(idx_1) + "[" + pmc_counter + "] diff is :" + str(diff))
|
||||
print(
|
||||
str(idx_1) + "[" + pmc_counter + "] diff is :" + str(diff)
|
||||
)
|
||||
differences["kernel_name"] = [kernel_name]
|
||||
differences["test_name"] = [test_name]
|
||||
differences["gpu-id"] = [gpu_id]
|
||||
@@ -429,7 +419,13 @@ def baseline_compare_metric(test_name, workload_dir, args=[]):
|
||||
metric_info = re.findall(
|
||||
r"(^"
|
||||
+ metric
|
||||
+ r")(?: *)([()0-9A-Za-z- ]+ )(?: *)([0-9.-]*)(?: *)([0-9.-]*)(?: *)\(([-0-9.]*)%\)(?: *)([-0-9.e]*)",
|
||||
+ (
|
||||
r")(?: *)([()0-9A-Za-z- ]+ )"
|
||||
r"(?: *)([0-9.-]*)"
|
||||
r"(?: *)([0-9.-]*)"
|
||||
r"(?: *)\(([-0-9.]*)%\)"
|
||||
r"(?: *)([-0-9.e]*)"
|
||||
),
|
||||
captured_output,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
@@ -489,20 +485,20 @@ def baseline_compare_metric(test_name, workload_dir, args=[]):
|
||||
# print("logging...")
|
||||
# print(metric_info)
|
||||
|
||||
new_error = pd.DataFrame.from_dict(
|
||||
{
|
||||
"Index": [metric_idx],
|
||||
"Metric": [metric_name],
|
||||
"Percent Difference": [relative_diff],
|
||||
"Absolute Difference": [absolute_diff],
|
||||
"Baseline": [baseline_val],
|
||||
"Current": [current_val],
|
||||
"Test Name": [test_name],
|
||||
}
|
||||
)
|
||||
new_error = pd.DataFrame.from_dict({
|
||||
"Index": [metric_idx],
|
||||
"Metric": [metric_name],
|
||||
"Percent Difference": [relative_diff],
|
||||
"Absolute Difference": [absolute_diff],
|
||||
"Baseline": [baseline_val],
|
||||
"Current": [current_val],
|
||||
"Test Name": [test_name],
|
||||
})
|
||||
error_df = pd.concat([error_df, new_error])
|
||||
counts = error_df.groupby(["Index"]).cumcount()
|
||||
reoccurring_metrics = error_df.loc[counts > MAX_REOCCURING_COUNT]
|
||||
reoccurring_metrics = error_df.loc[
|
||||
counts > MAX_REOCCURING_COUNT
|
||||
]
|
||||
reoccurring_metrics["counts"] = counts[
|
||||
counts > MAX_REOCCURING_COUNT
|
||||
]
|
||||
@@ -843,7 +839,7 @@ def test_roofline_empty_kernel_names_handling(binary_handler_profile_rocprof_com
|
||||
]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
@@ -860,10 +856,16 @@ def test_roofline_unsupported_datatype_error(binary_handler_profile_rocprof_comp
|
||||
pytest.skip("Skipping roofline test for MI100")
|
||||
return
|
||||
|
||||
options = ["--device", "0", "--roof-only", "--roofline-data-type", "UNSUPPORTED_TYPE"]
|
||||
options = [
|
||||
"--device",
|
||||
"0",
|
||||
"--roof-only",
|
||||
"--roofline-data-type",
|
||||
"UNSUPPORTED_TYPE",
|
||||
]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
@@ -914,7 +916,7 @@ def test_roof_cli_plot_generation(binary_handler_profile_rocprof_compute):
|
||||
return
|
||||
|
||||
try:
|
||||
import plotext as plt
|
||||
import plotext as plt # noqa: F401
|
||||
|
||||
cli_available = True
|
||||
except ImportError:
|
||||
@@ -924,7 +926,7 @@ def test_roof_cli_plot_generation(binary_handler_profile_rocprof_compute):
|
||||
options = ["--device", "0", "--roof-only"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
@@ -946,7 +948,7 @@ def test_roof_error_handling(binary_handler_profile_rocprof_compute):
|
||||
if os.path.exists(pmc_perf_path):
|
||||
os.remove(pmc_perf_path)
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
@@ -1072,7 +1074,7 @@ def test_roofline_ceiling_data_validation(binary_handler_profile_rocprof_compute
|
||||
options = ["--device", "0", "--roof-only", "--mem-level", "INVALID_LEVEL"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
returncode = binary_handler_profile_rocprof_compute( # noqa: F841
|
||||
config, workload_dir, options, check_success=False, roof=True
|
||||
)
|
||||
|
||||
@@ -1523,7 +1525,9 @@ def test_instmix_memchart_section(binary_handler_profile_rocprof_compute):
|
||||
assert test_utils.check_file_pattern(
|
||||
"- '10'", f"{workload_dir}/profiling_config.yaml"
|
||||
)
|
||||
assert test_utils.check_file_pattern("- '3'", f"{workload_dir}/profiling_config.yaml")
|
||||
assert test_utils.check_file_pattern(
|
||||
"- '3'", f"{workload_dir}/profiling_config.yaml"
|
||||
)
|
||||
assert test_utils.check_file_pattern(
|
||||
"TA_FLAT_WAVEFRONTS", f"{workload_dir}/pmc_perf.csv"
|
||||
)
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -1,10 +1,12 @@
|
||||
# NOTES
|
||||
#
|
||||
# Read utils/unified_config.yaml and split it into per gfx architecture per panel config files
|
||||
# WARNING: This script will overwrite existing files under per gfx architecture folders under src/rocprof_compute_soc/analysis_configs
|
||||
# Read utils/unified_config.yaml and split it into per gfx architecture per panel
|
||||
# config files. WARNING: This script will overwrite existing files under per gfx
|
||||
# architecture folders under src/rocprof_compute_soc/analysis_configs.
|
||||
#
|
||||
# Read utils/unified_config.yaml and split it into metric tables per documentation section
|
||||
# WARNING: This script will overwrite existing docs/data/metrics_description.yaml
|
||||
# Read utils/unified_config.yaml and split it into metric tables per documentation
|
||||
# section.
|
||||
# WARNING: This script will overwrite existing docs/data/metrics_description.yaml.
|
||||
|
||||
import copy
|
||||
import hashlib
|
||||
@@ -21,6 +23,10 @@ SETS_TARGET_DIR = ROOT_DIR.joinpath(
|
||||
"src", "rocprof_compute_soc", "profile_configs", "sets"
|
||||
)
|
||||
DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data")
|
||||
AUTOGEN_TEXT = (
|
||||
"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. "
|
||||
"Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n"
|
||||
)
|
||||
HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml")
|
||||
HASH_FILE_MAP = {}
|
||||
GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"]
|
||||
@@ -75,7 +81,6 @@ def update_analysis_config():
|
||||
data_source_config["metric_table"]["metric"] = data_source_config[
|
||||
"metric_table"
|
||||
]["metric"][gfx_version]
|
||||
|
||||
build_metric_id_mapping(
|
||||
panel_id_int,
|
||||
data_source_index,
|
||||
@@ -209,7 +214,9 @@ def update_documentation():
|
||||
for metric_name in sorted(list(metric_names)):
|
||||
metrics_info[metric_name] = {
|
||||
"rst": panel_config["metrics_description"][metric_name]["rst"],
|
||||
"unit": panel_config["metrics_description"][metric_name]["unit"],
|
||||
"unit": panel_config["metrics_description"][metric_name][
|
||||
"unit"
|
||||
],
|
||||
}
|
||||
panel_metric_map[data_source["metric_table"]["id"]] = metrics_info
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ tag = args.tag
|
||||
print("Current repository version = %s" % repoVer)
|
||||
print("--> tagname = %s" % tag)
|
||||
|
||||
|
||||
if repoCheck == tag:
|
||||
print("OK: exact match")
|
||||
exit(0)
|
||||
|
||||
Ссылка в новой задаче
Block a user