Merge branch 'develop' of github.com:ROCm/rocm-systems into develop
@@ -51,10 +51,6 @@ This table provides the current status of the migration of specific ROCm systems
|
||||
| `rocminfo` | 8/11 |
|
||||
| `rocr-runtime` | 8/11 |
|
||||
| `rocm-core` | 8/12 |
|
||||
| `clr` | 8/13 |
|
||||
| `hip` | 8/13 |
|
||||
| `hipother` | 8/13 |
|
||||
| `hip-tests` | 8/13 |
|
||||
|
||||
*Remaining schedule to be determined.
|
||||
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- AUTHORS
|
||||
- LICENSE
|
||||
- VERSION
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- AUTHORS
|
||||
- LICENSE
|
||||
- VERSION
|
||||
drafts: false
|
||||
|
||||
# For changes to mainline, only build & test against mainline ROCm
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-compute.yml@pipelines_repo
|
||||
parameters:
|
||||
jobMatrix:
|
||||
buildJobs:
|
||||
- gfx942-mainline:
|
||||
name: gfx942_mainline
|
||||
target: gfx942
|
||||
dependencySource: mainline
|
||||
- gfx90a-mainline:
|
||||
name: gfx90a_mainline
|
||||
target: gfx90a
|
||||
dependencySource: mainline
|
||||
testJobs:
|
||||
- gfx942-mainline:
|
||||
name: gfx942_mainline
|
||||
target: gfx942
|
||||
dependencySource: mainline
|
||||
- gfx90a-mainline:
|
||||
name: gfx90a_mainline
|
||||
target: gfx90a
|
||||
dependencySource: mainline
|
||||
@@ -0,0 +1,47 @@
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
- amd-staging
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- AUTHORS
|
||||
- LICENSE
|
||||
- VERSION
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
- amd-staging
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- AUTHORS
|
||||
- LICENSE
|
||||
- VERSION
|
||||
drafts: false
|
||||
|
||||
# For changes to develop and staging, build & test against both staging and mainline ROCm
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-compute.yml@pipelines_repo
|
||||
@@ -0,0 +1,65 @@
|
||||
parse:
|
||||
additional_commands: {}
|
||||
override_spec: {}
|
||||
vartags: []
|
||||
proptags: []
|
||||
format:
|
||||
disable: false
|
||||
line_width: 90
|
||||
tab_size: 4
|
||||
use_tabchars: false
|
||||
fractional_tab_policy: use-space
|
||||
max_subgroups_hwrap: 2
|
||||
max_pargs_hwrap: 6
|
||||
max_rows_cmdline: 2
|
||||
separate_ctrl_name_with_space: false
|
||||
separate_fn_name_with_space: false
|
||||
dangle_parens: false
|
||||
dangle_align: child
|
||||
min_prefix_chars: 4
|
||||
max_prefix_chars: 10
|
||||
max_lines_hwrap: 2
|
||||
line_ending: unix
|
||||
command_case: lower
|
||||
keyword_case: upper
|
||||
always_wrap: []
|
||||
enable_sort: true
|
||||
autosort: false
|
||||
require_valid_layout: false
|
||||
layout_passes: {}
|
||||
markup:
|
||||
bullet_char: '-'
|
||||
enum_char: '*'
|
||||
first_comment_is_literal: true
|
||||
literal_comment_pattern: ^#
|
||||
fence_pattern: ^\s*([`~]{3}[`~]*)(.*)$
|
||||
ruler_pattern: ^\s*[^\w\s]{3}.*[^\w\s]{3}$
|
||||
explicit_trailing_pattern: '#<'
|
||||
hashruler_min_length: 10
|
||||
canonicalize_hashrulers: true
|
||||
enable_markup: true
|
||||
lint:
|
||||
disabled_codes: []
|
||||
function_pattern: '[0-9a-z_]+'
|
||||
macro_pattern: '[0-9A-Z_]+'
|
||||
global_var_pattern: '[A-Z][0-9A-Z_]+'
|
||||
internal_var_pattern: _[A-Z][0-9A-Z_]+
|
||||
local_var_pattern: '[a-z][a-z0-9_]+'
|
||||
private_var_pattern: _[0-9a-z_]+
|
||||
public_var_pattern: '[A-Z][0-9A-Z_]+'
|
||||
argument_var_pattern: '[a-z][a-z0-9_]+'
|
||||
keyword_pattern: '[A-Z][0-9A-Z_]+'
|
||||
max_conditionals_custom_parser: 2
|
||||
min_statement_spacing: 1
|
||||
max_statement_spacing: 2
|
||||
max_returns: 6
|
||||
max_branches: 12
|
||||
max_arguments: 5
|
||||
max_localvars: 15
|
||||
max_statements: 50
|
||||
encode:
|
||||
emit_byteorder_mark: false
|
||||
input_encoding: utf-8
|
||||
output_encoding: utf-8
|
||||
misc:
|
||||
per_command: {}
|
||||
@@ -0,0 +1,13 @@
|
||||
* @coleramos425 @feizheng10 @vedithal-amd @xuchen-amd @cfallows-amd @ywang103-amd @jamessiddeley-amd
|
||||
|
||||
# Documentation files
|
||||
docs/ @ROCm/rocm-documentation @prbasyal-amd
|
||||
*.md @ROCm/rocm-documentation @prbasyal-amd
|
||||
*.rst @ROCm/rocm-documentation @prbasyal-amd
|
||||
.readthedocs.yaml @ROCm/rocm-documentation @prbasyal-amd
|
||||
|
||||
## Packaging
|
||||
# cmake/
|
||||
# tests/
|
||||
# CMakeLists.txt
|
||||
# utils/
|
||||
@@ -0,0 +1,131 @@
|
||||
name: Bug Report
|
||||
description: Report a bug you've encountered for further investigation
|
||||
title: "[Bug]: "
|
||||
labels: ["bug", "triage"]
|
||||
assignees:
|
||||
- njobypet
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please complete the following form.
|
||||
|
||||
- type: textarea
|
||||
id: bug-description
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
placeholder: e.g. I found the following error when trying to...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Development Environment
|
||||
|
||||
- type: input
|
||||
id: linux-distro
|
||||
attributes:
|
||||
label: Linux Distribution
|
||||
description: |
|
||||
What operating system are you using? Hint:
|
||||
```shell
|
||||
echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)"
|
||||
```
|
||||
placeholder: e.g. Ubuntu 22.04
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: rocprofiler-compute-version
|
||||
attributes:
|
||||
label: ROCm Compute Profiler Version
|
||||
description: |
|
||||
What version of ROCm Compute Profiler are you using? Hint:
|
||||
```shell
|
||||
rocprof-compute --version
|
||||
```
|
||||
placeholder: e.g. 2.1.0
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: gpu
|
||||
attributes:
|
||||
label: GPU
|
||||
description: |
|
||||
What GPU(s) did you encounter the issue on? Hint:
|
||||
```shell
|
||||
echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)"
|
||||
```
|
||||
placeholder: e.g. AMD MI250, AMD MI300X
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: rocm-version
|
||||
attributes:
|
||||
label: ROCm Version
|
||||
description: |
|
||||
What version(s) of ROCm did you encounter the issue on? Deduce from:
|
||||
```shell
|
||||
readlink -f $(which rocprof)
|
||||
```
|
||||
placeholder: e.g. ROCm 6.0.2
|
||||
|
||||
- type: input
|
||||
id: cluster
|
||||
attributes:
|
||||
label: Cluster name (if applicable)
|
||||
description: What is the name of the cluster you are using?
|
||||
placeholder: e.g. Frontier, El Capitan, etc.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## To Reproduce
|
||||
|
||||
- type: textarea
|
||||
id: reproducer
|
||||
attributes:
|
||||
label: Reproducer
|
||||
description: Steps to reproduce the behavior
|
||||
placeholder: |
|
||||
1. Run '...'
|
||||
2. Go to '...'
|
||||
3. Click on '....'
|
||||
4. See error
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected-behav
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: A clear and concise description of what you expected to happen.
|
||||
placeholder: e.g. I expected the following to happen...
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Other
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste rocprofiler-compute's `log.txt` file. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: If applicable, add screenshots to help explain your problem.
|
||||
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: Add any other context about the problem here.
|
||||
@@ -0,0 +1,45 @@
|
||||
name: Feature Request
|
||||
description: Suggest an idea for this project
|
||||
title: "[Req]: "
|
||||
labels: ["enhancement", "triage"]
|
||||
assignees:
|
||||
- njobypet
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please complete the following form.
|
||||
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: Is your feature request related to a problem?
|
||||
description: A clear and concise description of what the problem is.
|
||||
placeholder: e.g. I'm always frustrated when...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: A clear and concise description of what you want to happen.
|
||||
placeholder: e.g. I propose that...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Describe any alternatives you've considered
|
||||
description: Walk through your thought process and how you arrived at your solution.
|
||||
placeholder: |
|
||||
e.g. Some alternative approaches might be:
|
||||
1. ...
|
||||
2. ...
|
||||
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context or screenshots about the feature request here.
|
||||
@@ -0,0 +1,23 @@
|
||||
name: Question
|
||||
description: Clarifying questions and uncertainties
|
||||
labels: ["question"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please complete the following form.
|
||||
|
||||
- type: textarea
|
||||
id: question
|
||||
attributes:
|
||||
label: Describe your question
|
||||
description: A clear and concise description of your question and how it came up.
|
||||
placeholder: I was unsure how to ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context or screenshots about the question here.
|
||||
@@ -0,0 +1,21 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/docs/sphinx" # Location of package manifests
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "daily"
|
||||
target-branch: "develop"
|
||||
labels:
|
||||
- "documentation"
|
||||
- "dependencies"
|
||||
reviewers:
|
||||
- "samjwu"
|
||||
- "feizheng10"
|
||||
- "coleramos425"
|
||||
- "vedithal-amd"
|
||||
@@ -0,0 +1,34 @@
|
||||
# rocprofiler-compute Pull Request
|
||||
|
||||
## Related Issue
|
||||
<!-- Please link to the issue(s) that this PR addresses. -->
|
||||
- [ ] Closes #<issue number or link>
|
||||
|
||||
## What type of PR is this? (check all that apply)
|
||||
|
||||
- [ ] Bug Fix
|
||||
- [ ] Cherry Pick
|
||||
- [ ] Continuous Integration
|
||||
- [ ] Documentation Update
|
||||
- [ ] Feature
|
||||
- [ ] Optimization
|
||||
- [ ] Refactor
|
||||
- [ ] Other (please specify)
|
||||
|
||||
## Technical Details
|
||||
<!-- Please explain the changes. -->
|
||||
|
||||
## Have you added or updated tests to validate functionality?
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No - does not apply to this PR
|
||||
|
||||
## Added / Updated documentation?
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No - does not apply to this PR
|
||||
|
||||
## Have you updated CHANGELOG?
|
||||
<!-- Needed for Release updates for a ROCm release. -->
|
||||
- [ ] Yes
|
||||
- [ ] No - does not apply to this PR
|
||||
@@ -0,0 +1,31 @@
|
||||
name: Sync Staging with Develop
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: 0 0 * * *
|
||||
|
||||
jobs:
|
||||
promote-dev-to-stg:
|
||||
if: github.repository == 'ROCm/rocprofiler-compute'
|
||||
runs-on: ubuntu-latest
|
||||
name: Promote Develop to Staging
|
||||
steps:
|
||||
- name: Generate a token
|
||||
id: generate-token
|
||||
uses: actions/create-github-app-token@v1
|
||||
with:
|
||||
app-id: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_APP_ID }}
|
||||
private-key: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: develop
|
||||
fetch-depth: '0'
|
||||
token: ${{ steps.generate-token.outputs.token }}
|
||||
|
||||
- name: Merge - Fast Forward Only
|
||||
run: |
|
||||
git checkout amd-staging
|
||||
git merge origin/develop --ff-only
|
||||
git push origin HEAD
|
||||
@@ -0,0 +1,62 @@
|
||||
name: Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline ]
|
||||
paths:
|
||||
- 'docs/archive/docs-2.x/**'
|
||||
- 'docs/archive/docs-1.x/**'
|
||||
- '.github/workflows/docs.yml'
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: "pages"
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# Build job
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: sphinxdoc/sphinx
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Additional python packages
|
||||
run: pip3 install -r docs/archive/requirements-doc.txt
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v4
|
||||
- name: Build 1.x docs
|
||||
run: |
|
||||
cd docs/archive/docs-1.x
|
||||
make html
|
||||
- name: Build 2.x docs
|
||||
run: |
|
||||
cd docs/archive/docs-2.x
|
||||
make html
|
||||
- name: Relocate 1.x docs
|
||||
run: |
|
||||
mv docs/archive/docs-1.x/_build/html docs/archive/_build/html/1.x
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: ./docs/archive/_build/html
|
||||
|
||||
# Deployment job
|
||||
deploy:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
steps:
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
||||
@@ -0,0 +1,75 @@
|
||||
|
||||
name: Formatting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
pull_request:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
python:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python '3.x'
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install black isort
|
||||
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
|
||||
- name: Run black formatter
|
||||
uses: psf/black@stable
|
||||
with:
|
||||
use_pyproject: true
|
||||
- name: Run isort formatter
|
||||
uses: isort/isort-action@master
|
||||
|
||||
cmake:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-pip
|
||||
python3 -m pip install cmake-format
|
||||
- name: cmake-format
|
||||
run: |
|
||||
set +e
|
||||
cmake-format -i $(find . -type f | egrep 'CMakeLists.txt|\.cmake$')
|
||||
if [ $(git diff | wc -l) -gt 0 ]; then
|
||||
echo -e "\nError! CMake code not formatted. Run cmake-format...\n"
|
||||
echo -e "\nFiles:\n"
|
||||
git diff --name-only
|
||||
echo -e "\nFull diff:\n"
|
||||
git diff
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python-bytecode:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: find-bytecode
|
||||
run: |
|
||||
set +e
|
||||
FILES=$(find . -type f | egrep '__pycache__|\.pyc$')
|
||||
if [ -n "${FILES}" ]; then
|
||||
echo -e "\nError! Python bytecode included in commit\n"
|
||||
echo -e "### FILES: ###"
|
||||
echo -e "${FILES}"
|
||||
echo -e "##############"
|
||||
exit 1
|
||||
fi
|
||||
@@ -0,0 +1,116 @@
|
||||
name: mi-rhel9
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline, release/** ]
|
||||
|
||||
# Allows manual execution
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
checks: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
profile:
|
||||
strategy:
|
||||
matrix:
|
||||
version: [5.7.1, 6.0.2]
|
||||
hardware: [mi100, mi200]
|
||||
profiler: [default, rocprofv2]
|
||||
exclude:
|
||||
- profiler: rocprofv2
|
||||
hardware: mi100
|
||||
fail-fast: false
|
||||
runs-on: [mi100, rhel9]
|
||||
|
||||
env:
|
||||
PYTHONPATH: /home1/ciuser/rocprofiler-compute_deps
|
||||
CI_VISIBLE_DEVICES: 1
|
||||
name: ROCm v${{ matrix.version }} / ${{ matrix.hardware }} / ${{ matrix.profiler }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Python Path
|
||||
run: echo ${PYTHONPATH}
|
||||
- name: Setup hardware-specific run details
|
||||
run: |
|
||||
if [ ${{ matrix.hardware }} == "mi100" ];then
|
||||
echo "CI_QUEUE=ci" >> $GITHUB_ENV
|
||||
echo "CI_ARCH=gfx908" >> $GITHUB_ENV
|
||||
elif [ ${{ matrix.hardware }} == "mi200" ];then
|
||||
echo "CI_QUEUE=mi2104x" >> $GITHUB_ENV
|
||||
echo "CI_ARCH=gfx90a" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Unsupported hardware"
|
||||
exit 1
|
||||
fi
|
||||
- name: Setup profiling mode
|
||||
run: |
|
||||
if [ ${{ matrix.profiler }} == "rocprofv2" ];then
|
||||
echo "ROCPROF=rocprofv2" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Install Python collateral (build and test)
|
||||
run: |
|
||||
pip3 install -t ${PYTHONPATH} -r requirements.txt
|
||||
pip3 install -t ${PYTHONPATH} -r requirements-test.txt
|
||||
- name: Load ROCm ${{ matrix.version}}
|
||||
run: |
|
||||
module load cmake
|
||||
module load rocm/${{ matrix.version }}
|
||||
echo $PATH > $GITHUB_PATH
|
||||
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
|
||||
echo "ROCM_PATH=$ROCM_PATH" >> "$GITHUB_ENV"
|
||||
- name: Check Environment
|
||||
run: |
|
||||
echo "PATH=$PATH"
|
||||
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
|
||||
echo "ROCM_PATH=$ROCM_PATH"
|
||||
echo "CI_QUEUE=$CI_QUEUE"
|
||||
echo "CI_ARCH=$CI_ARCH"
|
||||
echo "CI_VISIBLE_DEVICES=$CI_VISIBLE_DEVICES"
|
||||
echo "ROCPROF=$ROCPROF"
|
||||
- name: Configure
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
ml cmake
|
||||
cmake -DENABLE_TESTS=ON -DCMAKE_HIP_ARCHITECTURES=$CI_ARCH -DENABLE_COVERAGE=ON -DPYTEST_NUMPROCS=8 ..
|
||||
- name: Build tests and Run [profile] mode
|
||||
run: |
|
||||
cd build
|
||||
make
|
||||
srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:20:00 ctest -j 4 --resource-spec-file ../tests/4gpus.json --verbose -L profile
|
||||
- name: Run [analyze workloads] mode
|
||||
if: '!cancelled()'
|
||||
run: |
|
||||
cd build
|
||||
srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:10:00 ctest --verbose -R test_analyze_workloads
|
||||
- name: Run [analyze commands] mode
|
||||
if: '!cancelled()'
|
||||
run: |
|
||||
cd build
|
||||
srun -N 1 -J rocprof-compute -p $CI_QUEUE -t 00:10:00 ctest --verbose -R test_analyze_commands
|
||||
- name: Publish Test Results
|
||||
uses: EnricoMi/publish-unit-test-result-action/linux@v2
|
||||
if: always()
|
||||
with:
|
||||
files: |
|
||||
tests/**/test_*.xml
|
||||
- name: Summarize code coverage
|
||||
if: always()
|
||||
run: coverage report
|
||||
- name: Upload code coverage
|
||||
uses: zgosalvez/github-actions-report-lcov@v4
|
||||
if: always()
|
||||
with:
|
||||
coverage-files: tests/coverage.info
|
||||
minimum-coverage: 35
|
||||
artifact-name: code-coverage-report-rocm${{ matrix.version }}-${{ matrix.hardware }}-${{ matrix.profiler }}
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
update-comment: true
|
||||
@@ -0,0 +1,64 @@
|
||||
name: packaging
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v[1-9].[0-9]+.[0-9]+*"
|
||||
- "rocm-[0-9]+.[0-9]+.[0-9]+*"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
distbuild:
|
||||
runs-on: ubuntu-latest
|
||||
name: Create release distribution
|
||||
env:
|
||||
INSTALL_DIR: /tmp
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- name: Verify VERSION file consistent with tag
|
||||
run: utils/ver_check.py --tag ${{github.ref_name}}
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Python dependency installs
|
||||
run: python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt
|
||||
- name: Configure
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DPYTHON_DEPS=${INSTALL_DIR}/python-libs ..
|
||||
- name: Release tarball
|
||||
run: |
|
||||
cd build
|
||||
make package_source
|
||||
- name: Rename tarball
|
||||
run: mv build/rocprofiler-compute-*.tar.gz build/rocprofiler-compute-${{github.ref_name}}.tar.gz
|
||||
- name: Archive tarball
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: rocprofiler-compute-${{github.ref_name}}.tar.gz
|
||||
path: build/rocprofiler-compute-${{github.ref_name}}.tar.gz
|
||||
- name: Set version
|
||||
run: echo "VERSION=$(cat VERSION)" >> $GITHUB_ENV
|
||||
- name: Determine release name
|
||||
run: |
|
||||
if [[ ${{github.ref_name}} == rocm-* ]]; then
|
||||
echo "RELEASE_NAME=rocprofiler-compute ${{ env.VERSION }} for ${{github.ref_name}}"
|
||||
else
|
||||
echo "RELEASE_NAME=rocprofiler-compute ${{ env.VERSION }}"
|
||||
fi >> $GITHUB_ENV
|
||||
- name: Upload tarball Release Asset
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/') && github.repository == 'ROCm/rocprofiler-compute'
|
||||
with:
|
||||
fail_on_unmatched: True
|
||||
generate_release_notes: True
|
||||
draft: False # toggle for debugging
|
||||
files: |
|
||||
build/rocprofiler-compute-${{github.ref_name}}.tar.gz
|
||||
name: ${{ env.RELEASE_NAME }}
|
||||
@@ -0,0 +1,71 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: RHEL 8/9
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- '.github/**/*.md'
|
||||
- 'docs/**'
|
||||
- 'docker/**'
|
||||
pull_request:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- '.github/**/*.md'
|
||||
- 'docs/**'
|
||||
- 'docker/**'
|
||||
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: dgaliffiamd/rocprofiler-systems:ci-base-rhel-${{ matrix.os-release }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os-release: [ '8.10', '9.3']
|
||||
build-type: ['Release']
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
- name: Install baseline OS dependencies
|
||||
run: |
|
||||
yum clean all
|
||||
yum makecache
|
||||
yum -y install git
|
||||
yum -y install python39
|
||||
yum -y install cmake3
|
||||
yum -y install which
|
||||
yum -y install glibc-langpack-en
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Python prereqs
|
||||
run: |
|
||||
python3.9 -m pip install -r requirements.txt
|
||||
python3.9 -m pip install -r requirements-test.txt
|
||||
- name: Configure and install
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-compute -DPYTEST_NUMPROCS=4 ..
|
||||
make install
|
||||
- name: CTest- Analyze Commands
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_commands
|
||||
- name: CTest- Analyze Workloads
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_workloads
|
||||
@@ -0,0 +1,112 @@
|
||||
name: tarball
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline, release/** ]
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- '.github/**/*.md'
|
||||
- 'docs/**'
|
||||
- 'docker/**'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
distbuild:
|
||||
runs-on: ubuntu-latest
|
||||
name: Create distribution tarball
|
||||
env:
|
||||
INSTALL_DIR: /tmp/foo1
|
||||
steps:
|
||||
- name: Set git sha mode
|
||||
id: sha-mode
|
||||
run: |
|
||||
if [ "$EVENT" == 'pull_request' ]; then
|
||||
echo "sha=${{github.event.pull_request.head.sha}}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "sha=$GITHUB_SHA" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ steps.sha-mode.sha }}
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Python dependency installs
|
||||
run: python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt
|
||||
- name: Configure
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DPYTHON_DEPS=${INSTALL_DIR}/python-libs ..
|
||||
- name: Release tarball
|
||||
run: |
|
||||
cd build
|
||||
make package_source
|
||||
- name: Archive tarball
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tarball-testing
|
||||
path: build/rocprofiler-compute-*.tar.gz
|
||||
retention-days: 3
|
||||
disttest:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [distbuild]
|
||||
name: Tarball tests
|
||||
env:
|
||||
INSTALL_DIR: /tmp/foo2
|
||||
steps:
|
||||
- name: Access tarball
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: tarball-testing
|
||||
- name: Expand
|
||||
run: tar xfz rocprofiler-compute-*.tar.gz; rm rocprofiler-compute-*.tar.gz
|
||||
- name: Python dependency installs
|
||||
run: |
|
||||
cd rocprofiler-compute-*
|
||||
python3 -m pip install -t${INSTALL_DIR}/python-libs -r requirements.txt
|
||||
- name: Configure
|
||||
run: |
|
||||
cd rocprofiler-compute-*
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/rocprofiler-compute \
|
||||
-DPYTHON_DEPS=${INSTALL_DIR}/python-libs ..
|
||||
- name: Install
|
||||
run: |
|
||||
cd rocprofiler-compute-*
|
||||
cd build
|
||||
make install
|
||||
- name: Verify expected paths
|
||||
run: |
|
||||
# find $INSTALL_DIR
|
||||
test -d $INSTALL_DIR/rocprofiler-compute
|
||||
test -x $INSTALL_DIR/rocprofiler-compute/bin/rocprof-compute
|
||||
test -s $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/VERSION
|
||||
test -s $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/VERSION.sha
|
||||
test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_analyze
|
||||
test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_profile
|
||||
test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/rocprof_compute_soc
|
||||
test -d $INSTALL_DIR/rocprofiler-compute/libexec/rocprofiler-compute/utils
|
||||
test -s $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/sample/vcopy.cpp
|
||||
test -d $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/modulefiles
|
||||
test -s $INSTALL_DIR/rocprofiler-compute/share/doc/rocprofiler-compute/LICENSE
|
||||
- name: Query version (setting PYTHONPATH by hand)
|
||||
run: |
|
||||
export PYTHONPATH=${INSTALL_DIR}/python-libs:$PYTHONPATH
|
||||
$INSTALL_DIR/rocprofiler-compute/bin/rocprof-compute --version
|
||||
- name: Install Lmod
|
||||
run: sudo apt-get install -y lmod
|
||||
- name: Access rocprofiler-compute using modulefile
|
||||
run: |
|
||||
. /etc/profile.d/lmod.sh
|
||||
module use $INSTALL_DIR/rocprofiler-compute/share/rocprofiler-compute/modulefiles
|
||||
module load rocprofiler-compute
|
||||
module list
|
||||
rocprof-compute --version
|
||||
@@ -0,0 +1,61 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Ubuntu 22.04
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- '.github/**/*.md'
|
||||
- 'docs/**'
|
||||
- 'docker/**'
|
||||
pull_request:
|
||||
branches: [ amd-mainline, amd-staging, release/**, develop ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- '.github/**/*.md'
|
||||
- 'docs/**'
|
||||
- 'docker/**'
|
||||
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: dgaliffiamd/rocprofiler-systems:ci-base-ubuntu-22.04
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
- name: Install baseline OS dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y git
|
||||
apt-get install -y python3-pip
|
||||
apt-get install -y cmake
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Python prereqs
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install -r requirements-test.txt
|
||||
- name: Configure and install
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-compute -DPYTEST_NUMPROCS=4 ..
|
||||
make install
|
||||
- name: CTest- Analyze Commands
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_commands
|
||||
- name: CTest- Analyze Workloads
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -R test_analyze_workloads
|
||||
@@ -0,0 +1,31 @@
|
||||
name: Rebase liangdin-test on top of amd-mainline
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: 0 0 * * 1
|
||||
|
||||
jobs:
|
||||
promote-dev-to-stg:
|
||||
if: github.repository == 'ROCm/rocprofiler-compute'
|
||||
runs-on: ubuntu-latest
|
||||
name: Rebase liagndin-test on top of amd-mainline
|
||||
steps:
|
||||
- name: Generate a token
|
||||
id: generate-token
|
||||
uses: actions/create-github-app-token@v1
|
||||
with:
|
||||
app-id: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_APP_ID }}
|
||||
private-key: ${{ secrets.ROCPROFILER_COMPUTE_RUNNER_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: develop
|
||||
fetch-depth: '0'
|
||||
token: ${{ steps.generate-token.outputs.token }}
|
||||
|
||||
- name: Rebase
|
||||
run: |
|
||||
git checkout liangdin-test
|
||||
git rebase origin/amd-mainline
|
||||
git push origin HEAD
|
||||
@@ -0,0 +1,25 @@
|
||||
# mongodb_connector files
|
||||
__pycache__
|
||||
|
||||
# edit files
|
||||
*~
|
||||
|
||||
# generated files/folders
|
||||
/dist
|
||||
/omniperf.spec
|
||||
/build*
|
||||
/.vscode
|
||||
/.cache
|
||||
/.venv
|
||||
/workloads
|
||||
.coverage
|
||||
saved_analysis
|
||||
pmc_kernel_top.csv
|
||||
VERSION.sha
|
||||
|
||||
# temp files
|
||||
/tests/Testing
|
||||
|
||||
# documentation artifacts
|
||||
/_build
|
||||
_toc.yml
|
||||
@@ -0,0 +1,19 @@
|
||||
default_stages: [pre-commit]
|
||||
fail_fast: true
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
# Python import sorting
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 6.0.1
|
||||
hooks:
|
||||
- id: isort
|
||||
# Python formatting (Using this mirror lets us use mypyc-compiled black, which is about 2x faster)
|
||||
- repo: https://github.com/psf/black-pre-commit-mirror
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
@@ -0,0 +1,16 @@
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
version: 2
|
||||
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.10"
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/sphinx/requirements.txt
|
||||
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"creators": [
|
||||
{
|
||||
"affiliation": "AMD",
|
||||
"name": "Xiaomin Lu"
|
||||
},
|
||||
{
|
||||
"affiliation": "AMD Research",
|
||||
"name": "Cole Ramos"
|
||||
},
|
||||
{
|
||||
"affiliation": "AMD",
|
||||
"name": "Fei Zheng"
|
||||
},
|
||||
{
|
||||
"affiliation": "AMD Research",
|
||||
"name": "Karl W. Schulz"
|
||||
},
|
||||
{
|
||||
"affiliation": "AMD Research",
|
||||
"name": "Jose Santos"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
# This is the list of ROCm Compute Profiler's significant contributors.
|
||||
#
|
||||
# This does not necessarily list everyone who has contributed code,
|
||||
# especially since many employees of one corporation may be contributing.
|
||||
# To see the full list of contributors, see the revision history in
|
||||
# source control.
|
||||
Xiaomin Lu
|
||||
Cole Ramos
|
||||
Karl Schultz
|
||||
Fei Zheng
|
||||
Nicholas Curtis
|
||||
Jose Santos
|
||||
@@ -0,0 +1,289 @@
|
||||
# Changelog for ROCm Compute Profiler
|
||||
|
||||
Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/).
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
|
||||
* Support Roofline plot on CLI (single run)
|
||||
|
||||
* Stochastic (hardware-based) PC sampling has been enabled for AMD Instinct MI300X series and later accelerators.
|
||||
|
||||
* Sorting of PC sampling by type: offset or count.
|
||||
|
||||
* Add rocprof-compute Text User Interface (TUI) support for analyze mode (beta version)
|
||||
* A command line based user interface to support interactive single-run analysis
|
||||
* launch with `--tui` option in analyze mode. i.e., `rocprof-compute analyze --tui`
|
||||
|
||||
* Add support to be able to acquire from rocprofv3 every single channle on each XCD of TCC counters
|
||||
|
||||
* Add Docker files to package the application and dependencies into a single portable and executable standalone binary file
|
||||
|
||||
* Analysis report based filtering
|
||||
* -b option in profile mode now additionally accepts metric id(s) for analysis report based filtering
|
||||
* -b option in profile mode also accept hardware IP block for filtering, however, this support will be deprecated soon
|
||||
* --list-metrics option added in profile mode to list possible metric id(s), similar to analyze mode
|
||||
|
||||
* Data type selection option for roofline profiling
|
||||
* --roofline-data-type / -R option added to specify which data types the user wants to capture in the roofline PDF plot outputs
|
||||
* Default is FP32, but user can specify as many types as desired to overlay on the same plot output
|
||||
|
||||
* Additional data types for roofline profiling
|
||||
* Now supports FP4, FP6, FP8, FP16, BF16, FP32, FP64, I8, I32, I64 (dependent on gpu architecture)
|
||||
|
||||
* Support host-trap PC Sampling on CLI (beta version)
|
||||
|
||||
* Support for AMD Instinct MI350 series GPUs with the addition of the following counters:
|
||||
* VALU co-issue (Two VALUs are issued instructions) efficiency
|
||||
* Stream Processor Instruction (SPI) Wave Occupancy
|
||||
* Scheduler-Pipe Wave Utilization
|
||||
* Scheduler FIFO Full Rate
|
||||
* CPC ADC Utilization
|
||||
* F6F4 data type metrics
|
||||
* Update formula for total FLOPs while taking into account F6F4 ops
|
||||
* LDS STORE, LDS LOAD, LDS ATOMIC instruction count metrics
|
||||
* LDS STORE, LDS LOAD, LDS ATOMIC bandwidth metrics
|
||||
* LDS FIFO full rate
|
||||
* Sequencer -> TA ADDR Stall rates
|
||||
* Sequencer -> TA CMD Stall rates
|
||||
* Sequencer -> TA DATA Stall rates
|
||||
* L1 latencies
|
||||
* L2 latencies
|
||||
* L2 to EA stalls
|
||||
* L2 to EA stalls per channel
|
||||
|
||||
* Roofline support for RHEL 10
|
||||
|
||||
* Roofline support for MI350 series architecture
|
||||
|
||||
* Interface to rocprofiler-sdk
|
||||
* Setting ROCPROF=rocprofiler-sdk environment variable will use rocprofiler-sdk C++ library instead of rocprofv3 python script
|
||||
* Add --rocprofiler-sdk-library-path runtime option to choose the path to rocprofiler-sdk library to be used
|
||||
* Using rocprof v1 / v2 / v3 interfaces will trigger a deprecation warning to use rocprofiler-sdk interface
|
||||
|
||||
* Support MEM chart on CLI (single run)
|
||||
|
||||
* Add deprecation warning for database update mode.
|
||||
|
||||
### Changed
|
||||
|
||||
* Change the default rocprof version to rocprofv3, this is used when environment variable "ROCPROF" is not set
|
||||
* Change the rocprof version for unit tests to rocprofv3 on all SoCs except MI100
|
||||
* Change normal_unit default to per_kernel
|
||||
* Change dependency from rocm-smi to amd-smi
|
||||
* Decrease profiling time by not collecting counters not used in post analysis
|
||||
* Update definition of following metrics for MI 350:
|
||||
* VGPR Writes
|
||||
* Total FLOPs (consider fp6 and fp4 ops)
|
||||
* Update Dash to >=3.0.0 (for web UI)
|
||||
* Change when Roofline PDFs are generated- during general profiling and --roof-only profiling (skip only when --no-roof option is present)
|
||||
* Update Roofline binaries
|
||||
* Rebuild using latest ROCm stack
|
||||
* OS distribution support minimum for roofline feature is now Ubuntu22.04, RHEL9, and SLES15SP6
|
||||
|
||||
### Optimized
|
||||
|
||||
* ROCm Compute Profiler CLI has been improved to better display the GPU architecture analytics
|
||||
|
||||
### Resolved issues
|
||||
|
||||
* Fixed MI 100 counters not being collected when rocprofv3 is used
|
||||
* Fixed option specs-correction
|
||||
* Fixed kernel name and kernel dispatch filtering when using rocprof v3
|
||||
* Fixed not collecting TCC channel counters in rocprof v3
|
||||
* Fixed peak FLOPS of F8 I8 F16 and BF16 on MI300
|
||||
|
||||
### Known issues
|
||||
|
||||
* On MI 100, accumulation counters will not be collected and the following metrics will not show up in analysis: Instruction Fetch Latency, Wavefront Occupancy, LDS Latency
|
||||
* As a workaround, use ROCPROF=rocprof environement variable, to use rocprofv1 for profiling on MI 100
|
||||
|
||||
* GPU id filtering is not supported when using rocprof v3
|
||||
|
||||
* Analysis of previously collected workload data will not work due to sysinfo.csv schema change
|
||||
* As a workaround, run the profiling operation again for the workload and interrupt the process after ten seconds.
|
||||
Followed by copying the `sysinfo.csv` file from the new data folder to the old one.
|
||||
This assumes your system specification hasn't changed since the creation of the previous workload data.
|
||||
|
||||
* Analysis of new workloads might require providing shader/memory clock speed using
|
||||
--specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds.
|
||||
|
||||
* Memory chart on CLI might look corrupted if CLI width is too narrow
|
||||
|
||||
### Removed
|
||||
|
||||
* Roofline support for Ubuntu 20.04 and SLES below 15.6
|
||||
* Usage of rocm-smi
|
||||
|
||||
## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0
|
||||
|
||||
### Added
|
||||
|
||||
* Roofline support for Ubuntu 24.04
|
||||
* Experimental support rocprofv3 (not enabled as default)
|
||||
|
||||
### Resolved issues
|
||||
|
||||
* Fixed PoP of VALU Active Threads
|
||||
* Workaround broken mclk for old version of rocm-smi
|
||||
|
||||
## ROCm Compute Profiler 3.0.0 for ROCm 6.3.0
|
||||
|
||||
### Changed
|
||||
|
||||
* Renamed Omniperf to ROCm Compute Profiler (#475)
|
||||
|
||||
## Omniperf 2.0.1 for ROCm 6.2.1
|
||||
|
||||
### Changed
|
||||
|
||||
* enable rocprofv1 for MI300 hardware (#391)
|
||||
* refactoring and updating documemtation (#362, #394, #398, #414, #420)
|
||||
* branch renaming and workflow updates (#389, #404, #409)
|
||||
* bug fix for analysis output
|
||||
* add dependency checks on application launch (#393)
|
||||
* patch for profiling multi-process/multi-GPU applications (#376, #396)
|
||||
* packaging updates (#386)
|
||||
* rename CHANGES to CHANGELOG.md (#410)
|
||||
* rollback Grafana version in Dockerfile for Angular plugin compatibility (#416)
|
||||
* enable CI triggers for Azure CI (#426)
|
||||
* add GPU model distinction for MI300 systems (#423)
|
||||
* new MAINTAINERS.md guide for omniperf publishing procedures (#402)
|
||||
|
||||
### Optimized
|
||||
|
||||
* reduced running time of Omniperf when profiling (#384)
|
||||
* console logging improvements
|
||||
|
||||
## Omniperf 2.0.1 for ROCm 6.2.0
|
||||
|
||||
### Added
|
||||
|
||||
* new option to force hardware target via `OMNIPERF_ARCH_OVERRIDE` global (#370)
|
||||
* CI/CD support for MI300 hardware (#373)
|
||||
* support for MI308X hardware (#375)
|
||||
|
||||
### Optimized
|
||||
|
||||
* cmake build improvements (#374)
|
||||
|
||||
## Omniperf 2.0.0 (17 May 2024)
|
||||
|
||||
* improved logging than spans all modes (#177) (#317) (#335) (#341)
|
||||
* overhauled CI/CD that spans all modes (#179)
|
||||
* extensible SoC classes to better support adding new hardware configs (#180)
|
||||
* --kernel-verbose no longer overwrites kernel names (#193)
|
||||
* general cleanup and improved organization of source code (#200) (#210)
|
||||
* separate requirement files for docs and testing dependencies (#205) (#262) (#358)
|
||||
* add support for MI300 hardware (#231)
|
||||
* upgrade Grafana assets and build script to latest release (#235)
|
||||
* update minimum ROCm and Python requirements (#277)
|
||||
* sort rocprofiler input files prior to profiling (#304)
|
||||
* new --quiet option will suppress verbose output and show a progress bar (#308)
|
||||
* roofline support for Ubuntu 22.04 (#319)
|
||||
|
||||
## Omniperf 1.1.0-PR1 (13 Oct 2023)
|
||||
|
||||
* standardize headers to use 'avg' instead of 'mean'
|
||||
* add color code thresholds to standalone gui to match grafana
|
||||
* modify kernel name shortener to use cpp_filt (#168)
|
||||
* enable stochastic kernel dispatch selection (#183)
|
||||
* patch grafana plugin module to address a known issue in the latest version (#186)
|
||||
* enhanced communication between analyze mode kernel flags (#187)
|
||||
|
||||
## Omniperf 1.0.10 (22 Aug 2023)
|
||||
|
||||
* critical patch for detection of llvm in rocm installs on SLURM systems
|
||||
|
||||
## Omniperf 1.0.9 (17 Aug 2023)
|
||||
|
||||
* add units to L2 per-channel panel (#133)
|
||||
* new quickstart guide for Grafana setup in docs (#135)
|
||||
* more detail on kernel and dispatch filtering in docs (#136, #137)
|
||||
* patch manual join utility for ROCm >5.2.x (#139)
|
||||
* add % of peak values to low level speed-of-light panels (#140)
|
||||
* patch critical bug in Grafana by removing a deprecated plugin (#141)
|
||||
* enhancements to KernelName demangeler (#142)
|
||||
* general metric updates and enhancements (#144, #155, #159)
|
||||
* add min/max/avg breakdown to instruction mix panel (#154)
|
||||
|
||||
## Omniperf 1.0.8 (30 May 2023)
|
||||
|
||||
* add `--kernel-names` option to toggle kernelName overlay in standalone roofline plot (#93)
|
||||
* remove unused python modules (#96)
|
||||
* fix empirical roofline calculation for single dispatch workloads (#97)
|
||||
* match color of arithmetic intensity points to corresponding bw lines
|
||||
|
||||
* ux improvements in standalone GUI (#101)
|
||||
* enhanced readability for filtering dropdowns in standalone GUI (#102)
|
||||
* new logfile to capture rocprofiler output (#106)
|
||||
* roofline support for sles15 sp4 and future service packs (#109)
|
||||
* adding dockerfiles for all supported Linux distros
|
||||
* new examples for `--roof-only` and `--kernel` options added to documentation
|
||||
|
||||
* enable cli analysis in Windows (#110)
|
||||
* optional random port number in standalone GUI (#111)
|
||||
* limit length of visible kernelName in `--kernel-names` option (#115)
|
||||
* adjust metric definitions (#117, #130)
|
||||
* manually merge rocprof runs, overriding default rocprofiler implementation (#125)
|
||||
* fixed compatibility issues with Python 3.11 (#131)
|
||||
|
||||
## Omniperf 1.0.8-PR2 (17 Apr 2023)
|
||||
|
||||
* ux improvements in standalone GUI (#101)
|
||||
* enhanced readability for filtering dropdowns in standalone GUI (#102)
|
||||
* new logfile to capture rocprofiler output (#106)
|
||||
* roofline support for sles15 sp4 and future service packs (#109)
|
||||
* adding dockerfiles for all supported Linux distros
|
||||
* new examples for `--roof-only` and `--kernel` options added to documentation
|
||||
|
||||
## Omniperf 1.0.8-PR1 (13 Mar 2023)
|
||||
|
||||
* add `--kernel-names` option to toggle kernelName overlay in standalone roofline plot (#93)
|
||||
* remove unused python modules (#96)
|
||||
* fix empirical roofline calculation for single dispatch workloads (#97)
|
||||
* match color of arithmetic intensity points to corresponding bw lines
|
||||
|
||||
## Omniperf 1.0.7 (21 Feb 2023)
|
||||
|
||||
* update documentation (#52, #64)
|
||||
* improved detection of invalid command line arguments (#58, #76)
|
||||
* enhancements to standalone roofline (#61)
|
||||
* enable Omniperf on systems with X-server (#62)
|
||||
* raise minimum version requirement for rocm (#64)
|
||||
* enable baseline comparison in CLI analysis (#65)
|
||||
* add multi-normalization to new metrics (#68, #81)
|
||||
* support alternative profilers (#70)
|
||||
* add MI100 configs to override rocprofiler's incomplete default (#75)
|
||||
* improve error message when no GPU(s) detected (#85)
|
||||
* separate CI tests by Linux distro and add status badges
|
||||
|
||||
## Omniperf 1.0.6 (21 Dec 2022)
|
||||
|
||||
* CI update: documentation now published via github action (#22)
|
||||
* better error detection for incomplete ROCm installs (#56)
|
||||
|
||||
## Omniperf 1.0.5 (13 Dec 2022)
|
||||
|
||||
* store application command-line parameters in profiling output (#27)
|
||||
* enable additional normalizations in CLI mode (#30)
|
||||
* add missing ubuntu 20.04 roofline binary to packaging (#34)
|
||||
* update L1 bandwidth metric calculations (#36)
|
||||
* add L1 <-> L2 bandwidth calculation (#37)
|
||||
* documentation updates (#38, #41)
|
||||
* enhanced subprocess logging to identify critical errors in rocprofiler (#50)
|
||||
* maintain git sha in production installs from tarball (#53)
|
||||
|
||||
## Omniperf 1.0.4 (11 Nov 2022)
|
||||
|
||||
* update python requirements.txt with minimum versions for numpy and pandas
|
||||
* addition of progress bar indicator in web-based GUI (#8)
|
||||
* reduced default content for web-based GUI to reduce load times (#9)
|
||||
* minor packaging and CI updates
|
||||
* variety of documentation updates
|
||||
* added an optional argument to vcopy.cpp workload example to specify device id
|
||||
|
||||
## Omniperf 1.0.3 (07 Nov 2022)
|
||||
|
||||
* initial Omniperf release
|
||||
@@ -0,0 +1,618 @@
|
||||
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
|
||||
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL
|
||||
CMAKE_SOURCE_DIR)
|
||||
set(MSG "")
|
||||
message(STATUS "Warning! Building from the source directory is not recommended")
|
||||
message(STATUS "If unintended, please remove 'CMakeCache.txt' and 'CMakeFiles'")
|
||||
message(STATUS "and build from a separate directory")
|
||||
message(FATAL_ERROR "In-source build")
|
||||
endif()
|
||||
|
||||
# System info
|
||||
cmake_host_system_information(RESULT LOCALHOST QUERY FQDN)
|
||||
message(STATUS "Hostname: ${LOCALHOST}")
|
||||
|
||||
# Versioning info derived from file
|
||||
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" FULL_VERSION_STRING LIMIT_COUNT 1)
|
||||
string(REGEX REPLACE "(\n|\r)" "" FULL_VERSION_STRING "${FULL_VERSION_STRING}")
|
||||
set(ROCPROFCOMPUTE_FULL_VERSION "${FULL_VERSION_STRING}")
|
||||
string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3"
|
||||
ROCPROFCOMPUTE_VERSION "${FULL_VERSION_STRING}")
|
||||
|
||||
# string(REGEX REPLACE "(${ROCPROFCOMPUTE_VERSION})(.*)" "\\2"
|
||||
# ROCPROFCOMPUTE_VERSION_TWEAK
|
||||
# "${FULL_VERSION_STRING}")
|
||||
# string(REGEX REPLACE "^\\." "" ROCPROFCOMPUTE_VERSION_TWEAK
|
||||
# "${ROCPROFCOMPUTE_VERSION_TWEAK}")
|
||||
|
||||
project(
|
||||
rocprofiler-compute
|
||||
VERSION ${ROCPROFCOMPUTE_VERSION}
|
||||
LANGUAGES C
|
||||
DESCRIPTION
|
||||
"A kernel-level profiling tool for machine learning/HPC workloads running on AMD MI GPUs"
|
||||
HOMEPAGE_URL "https://github.com/ROCm/rocprofiler-compute")
|
||||
|
||||
set(PACKAGE_NAME "rocprofiler-compute")
|
||||
set(PACKAGE_NAME_UNDERSCORE "rocprofiler_compute")
|
||||
set(EXECUTABLE_NAME "rocprof-compute")
|
||||
|
||||
include(ExternalProject)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# version control info
|
||||
find_package(Git)
|
||||
if(Git_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
|
||||
execute_process(
|
||||
COMMAND git log --pretty=format:%h -n 1
|
||||
OUTPUT_VARIABLE ROCPROFCOMPUTE_GIT_REV
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
message(STATUS "Git revision: ${ROCPROFCOMPUTE_GIT_REV}")
|
||||
set(GIT_CLONE TRUE)
|
||||
else()
|
||||
set(GIT_CLONE FALSER)
|
||||
endif()
|
||||
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
set(CMAKE_INSTALL_PREFIX
|
||||
"/opt/rocm"
|
||||
CACHE PATH "default install path" FORCE)
|
||||
endif()
|
||||
message(STATUS "Installation path: ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
option(CHECK_PYTHON_DEPS "Verify necessary python dependencies" ON)
|
||||
if(CHECK_PYTHON_DEPS)
|
||||
# Python 3 is required
|
||||
message(STATUS "Detecting Python interpreter...")
|
||||
find_package(
|
||||
Python3 3.8
|
||||
COMPONENTS Interpreter
|
||||
REQUIRED)
|
||||
|
||||
# Allow user-provided python search path
|
||||
if(DEFINED PYTHON_DEPS)
|
||||
set(ENV{PYTHONPATH} "${PYTHON_DEPS}")
|
||||
message(STATUS "Optional PYTHON_DEPS provided:")
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
message(STATUS "including ${PYTHON_DEPS} in search path")
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
endif()
|
||||
|
||||
# Check required Python packages
|
||||
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" pythonDeps)
|
||||
|
||||
message(STATUS "Checking for required Python package dependencies...")
|
||||
set_property(GLOBAL PROPERTY pythonDepsFlag "groovy")
|
||||
|
||||
function(checkPythonPackage [package])
|
||||
# mapping for non-default package names
|
||||
set(PACKAGE ${ARGV0})
|
||||
if(${ARGV0} STREQUAL "pyyaml")
|
||||
set(PACKAGE "yaml")
|
||||
endif()
|
||||
# Skip check for textual-fspicker
|
||||
if(${package} STREQUAL "textual-fspicker")
|
||||
message(STATUS "Skipping check for textual-fspicker")
|
||||
return()
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE} -c "import ${PACKAGE}"
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE EXIT_CODE)
|
||||
if(${EXIT_CODE} EQUAL 0)
|
||||
message(STATUS "${ARGV0} = yes")
|
||||
else()
|
||||
message(STATUS "${ARGV0} = missing")
|
||||
set_property(GLOBAL PROPERTY pythonDepsFlag "missing")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
foreach(package IN LISTS pythonDeps)
|
||||
# Filter out any version requirements from requirements.txt
|
||||
string(REGEX REPLACE "[><=].*" "" package "${package}")
|
||||
string(REPLACE "-" "_" package "${package}")
|
||||
checkpythonpackage(${package})
|
||||
endforeach()
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
|
||||
get_property(pythonDepsInstalled GLOBAL PROPERTY pythonDepsFlag)
|
||||
if(${pythonDepsInstalled} STREQUAL "groovy")
|
||||
message(STATUS "OK: Python dependencies available in current environment.")
|
||||
else()
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"\nNecessary Python package dependencies not found. Please install required dependencies "
|
||||
"above using your favorite package manager. If using pip, consider running:\n"
|
||||
"python3 -m pip install -r requirements.txt\n"
|
||||
"at the top-level of this repository. If preparing a shared installation for "
|
||||
"multiple users, consider adding the -t <target-dir> option to install necessary dependencies "
|
||||
"into a shared directory, e.g.\n"
|
||||
"python3 -m pip install -t <shared-install-path> -r requirements.txt\n"
|
||||
"Note that the -DPYTHON_DEPS=<shared-install-path> can be used to provide an "
|
||||
"additional search path to cmake for python packages.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ----------------------
|
||||
# modulefile creation
|
||||
# ----------------------
|
||||
|
||||
set(MOD_INSTALL_PATH
|
||||
"${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/modulefiles/${PROJECT_NAME}"
|
||||
CACHE STRING "Install path for modulefile")
|
||||
message(STATUS "Modulefile install path: ${MOD_INSTALL_PATH}")
|
||||
|
||||
set(moduleFileTemplate "rocprofcompute.lua.in")
|
||||
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/${moduleFileTemplate}
|
||||
${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua @ONLY)
|
||||
|
||||
# Thera mods
|
||||
if(LOCALHOST MATCHES "TheraS01|.*\.thera\.amd\.com|thera-hn")
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
message(STATUS "Using thera-specific modulefile modification")
|
||||
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.thera.mod mod_additions)
|
||||
file(APPEND
|
||||
${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua
|
||||
${mod_additions})
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
endif()
|
||||
|
||||
# git versioning file
|
||||
if(${GIT_CLONE})
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/VERSION.sha.in
|
||||
${PROJECT_SOURCE_DIR}/VERSION.sha @ONLY)
|
||||
endif()
|
||||
|
||||
# Setup testing collateral
|
||||
|
||||
option(ENABLE_TESTS "Enable compilation of testing collateral" OFF)
|
||||
set(CMAKE_HIP_FLAGS_RELEASE "-O2")
|
||||
if(${ENABLE_TESTS})
|
||||
enable_language("C" "HIP")
|
||||
add_subdirectory(tests)
|
||||
|
||||
endif()
|
||||
message(STATUS "Enable tests compilation: ${ENABLE_TESTS}")
|
||||
|
||||
enable_testing()
|
||||
|
||||
option(ENABLE_COVERAGE "Enable code coverage" OFF)
|
||||
set(COV_OPTION "")
|
||||
if(${ENABLE_COVERAGE})
|
||||
set(COV_OPTION "--cov=src" "--cov-append" "--cov-report=term-missing"
|
||||
"--cov-report=lcov:tests/coverage.info")
|
||||
# "--cov-report=term-missing" "--cov-report=xml:tests/coverage.xml")
|
||||
endif()
|
||||
message(STATUS "Code coverage: ${ENABLE_COVERAGE}")
|
||||
|
||||
# CPU threads available for testing
|
||||
set(PYTEST_NUMPROCS
|
||||
"1"
|
||||
CACHE STRING "Number of parallel threads to use with CPU-oriented tests")
|
||||
message(STATUS "Pytest CPU threadcount: ${PYTEST_NUMPROCS}")
|
||||
|
||||
# 2 CPU threads available for testing(test-analyze-commands)
|
||||
set(PYTEST_NUMPROCS_ANALYSIS
|
||||
"4"
|
||||
CACHE STRING "Number of parallel threads to use with CPU-oriented tests")
|
||||
message(STATUS "Pytest CPU threadcount: ${PYTEST_NUMPROCS_ANALYSIS}")
|
||||
|
||||
# ---------------------------
|
||||
# profile mode tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_profile_kernel_execution
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m kernel_execution
|
||||
--junitxml=tests/test_profile_kernel_execution.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_ipblocks
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m block --junitxml=tests/test_profile_blocks.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
set_property(TEST test_profile_ipblocks PROPERTY COST 11)
|
||||
|
||||
add_test(
|
||||
NAME test_profile_dispatch
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m dispatch
|
||||
--junitxml=tests/test_profile_dispatch.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
set_property(TEST test_profile_ipblocks PROPERTY COST 5)
|
||||
|
||||
add_test(
|
||||
NAME test_profile_mem
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest -m mem --junitxml=tests/test_profile_mem.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_join
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest -m join --junitxml=tests/test_profile_join.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_sort
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest -m sort --junitxml=tests/test_profile_sort.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_misc
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest -m misc --junitxml=tests/test_profile_misc.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_section
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m section
|
||||
--junitxml=tests/test_profile_section.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
set_tests_properties(
|
||||
test_profile_kernel_execution
|
||||
test_profile_ipblocks
|
||||
test_profile_dispatch
|
||||
test_profile_mem
|
||||
test_profile_join
|
||||
test_profile_sort
|
||||
test_profile_misc
|
||||
PROPERTIES LABELS "profile" RESOURCE_GROUPS gpus:1)
|
||||
|
||||
# ---------------------------
|
||||
# analysis command tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_analyze_commands
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS_ANALYSIS} --verbose
|
||||
--junitxml=tests/test_analyze_commands.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_analyze_commands.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# analyze workloads tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_analyze_workloads
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -n ${PYTEST_NUMPROCS}
|
||||
--junitxml=tests/test_analyze_workloads.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_analyze_workloads.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# TCP counter tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_L1_cache_counters
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m L1_cache
|
||||
--junitxml=tests/test_L1_cache_counters.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# Spec tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_num_xcds_spec_class
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m num_xcds_spec_class
|
||||
--junitxml=tests/test_num_xcds_spec_class.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_num_xcds_cli_output
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m num_xcds_cli_output
|
||||
--junitxml=tests/test_num_xcds_cli_output.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# DB Connector tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_db_connector
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest --junitxml=tests/test_db_connector.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_db_connector.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# Utils tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_utils
|
||||
COMMAND ${Python3_EXECUTABLE} -m pytest --junitxml=tests/test_utils.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_utils.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------
|
||||
# Install
|
||||
# ---------
|
||||
|
||||
# top-level rocprofiler-compute utility
|
||||
install(
|
||||
PROGRAMS src/${EXECUTABLE_NAME}
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main)
|
||||
# python dependency requirements
|
||||
install(
|
||||
FILES requirements.txt
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main)
|
||||
# support files and version info
|
||||
install(
|
||||
FILES src/argparser.py src/config.py src/rocprof_compute_base.py src/roofline.py
|
||||
VERSION VERSION.sha
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main)
|
||||
# src/rocprof_compute_analyze
|
||||
install(
|
||||
DIRECTORY src/rocprof_compute_analyze
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN src/rocprof_compute_analyze/tests EXCLUDE
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
# src/utils
|
||||
install(
|
||||
DIRECTORY src/utils
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN "rooflines*" EXCLUDE
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
# src/utils/rooflines
|
||||
file(GLOB rooflinebins src/utils/rooflines/roofline-*)
|
||||
install(
|
||||
PROGRAMS ${rooflinebins}
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
COMPONENT main)
|
||||
# src/rocprof_compute_soc
|
||||
install(
|
||||
DIRECTORY src/rocprof_compute_soc
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
# src/rocprof_compute_profile
|
||||
install(
|
||||
DIRECTORY src/rocprof_compute_profile
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
# src/rocprof_compute_tui
|
||||
install(
|
||||
DIRECTORY src/rocprof_compute_tui
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
# grafana assets
|
||||
install(
|
||||
DIRECTORY grafana
|
||||
DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}
|
||||
COMPONENT main)
|
||||
# samples
|
||||
install(
|
||||
DIRECTORY sample
|
||||
DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
FILES_MATCHING
|
||||
PATTERN "*.hip"
|
||||
PATTERN "*.h"
|
||||
PATTERN "*.cpp"
|
||||
PATTERN "workloads" EXCLUDE)
|
||||
# modulefile
|
||||
install(
|
||||
FILES ${PROJECT_BINARY_DIR}/${MOD_INSTALL_PATH}/${ROCPROFCOMPUTE_FULL_VERSION}.lua
|
||||
DESTINATION ${MOD_INSTALL_PATH}
|
||||
COMPONENT main)
|
||||
|
||||
# top-level symlink for bin/rocprof-compute
|
||||
install(
|
||||
CODE "execute_process(
|
||||
COMMAND bash -c \"set -e
|
||||
cd \$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}
|
||||
ln -sf ../${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}/${EXECUTABLE_NAME} ${CMAKE_INSTALL_BINDIR}/${EXECUTABLE_NAME}
|
||||
\")"
|
||||
COMPONENT main)
|
||||
|
||||
# License header update(s)
|
||||
add_custom_target(
|
||||
license
|
||||
COMMAND
|
||||
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}/src
|
||||
--license ${PROJECT_SOURCE_DIR}/LICENSE --extension '.py'
|
||||
COMMAND
|
||||
${PROJECT_SOURCE_DIR}/utils/update_license.py --source ${PROJECT_SOURCE_DIR}
|
||||
--license ${PROJECT_SOURCE_DIR}/LICENSE --file
|
||||
"src/${PACKAGE_NAME},cmake/Dockerfile,cmake/rocm_install.sh,docker/docker-entrypoint.sh,src/rocprof_compute_analyze/convertor/mongodb/convert"
|
||||
)
|
||||
|
||||
# Standalone binary creation
|
||||
add_custom_target(
|
||||
standalonebinary
|
||||
# Change working directory to src
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src
|
||||
# Check nuitka
|
||||
COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i nuitka > /dev/null 2>&1
|
||||
# Check patchelf
|
||||
COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i patchelf > /dev/null 2>&1
|
||||
# Create VERSION.sha file
|
||||
COMMAND git -C ${PROJECT_SOURCE_DIR} rev-parse HEAD > VERSION.sha
|
||||
# Build standalone binary
|
||||
# NOTE: --no-deployment-flag=self-execution is used to avoid self-execution and fork
|
||||
# bombs as explained in
|
||||
# https://nuitka.net/user-documentation/common-issue-solutions.html#fork-bombs-self-execution
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m nuitka --mode=onefile --no-deployment-flag=self-execution
|
||||
--include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt
|
||||
--include-package=dash_svg --include-package-data=dash_svg
|
||||
--include-package=dash_bootstrap_components
|
||||
--include-package-data=dash_bootstrap_components --include-package=plotly
|
||||
--include-package-data=plotly --include-package=kaleido
|
||||
--include-package-data=kaleido --include-package=rocprof_compute_analyze
|
||||
--include-package-data=rocprof_compute_analyze
|
||||
--include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc
|
||||
--include-package=utils --include-package-data=utils rocprof-compute
|
||||
# Remove library rpath from executable
|
||||
COMMAND patchelf --remove-rpath rocprof-compute.bin
|
||||
# Move to build directory
|
||||
COMMAND mv rocprof-compute.bin ${CMAKE_BINARY_DIR})
|
||||
|
||||
install(
|
||||
FILES ${PROJECT_SOURCE_DIR}/LICENSE
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}
|
||||
COMPONENT main)
|
||||
|
||||
# TEST collateral
|
||||
option(INSTALL_TESTS "Build test suite" OFF)
|
||||
if(INSTALL_TESTS)
|
||||
install(
|
||||
DIRECTORY tests
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT tests
|
||||
FILES_MATCHING
|
||||
PATTERN "*.py"
|
||||
PATTERN "__pycache__" EXCLUDE)
|
||||
install(
|
||||
FILES requirements-test.txt
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT tests)
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake
|
||||
COMPONENT tests
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME})
|
||||
endif()
|
||||
message(STATUS "Install tests: ${INSTALL_TESTS}")
|
||||
|
||||
# ----------
|
||||
# Packaging
|
||||
# ----------
|
||||
|
||||
message(STATUS "Packaging config...")
|
||||
set(CPACK_GENERATOR
|
||||
"DEB" "RPM"
|
||||
CACHE STRING "")
|
||||
set(CPACK_PACKAGE_NAME
|
||||
"${PROJECT_NAME}"
|
||||
CACHE STRING "")
|
||||
set(CPACK_PACKAGE_CONTACT "https://github.com/ROCm/rocprofiler-compute")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
|
||||
"ROCm Compute Profiler: tool for GPU performance profiling")
|
||||
set(CPACK_RPM_PACKAGE_DESCRIPTION
|
||||
"ROCm Compute Profiler is a performance analysis tool for profiling
|
||||
machine learning/HPC workloads running on AMD GPUs.")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
|
||||
# Package versioning
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
|
||||
set(CPACK_PACKAGE_VERSION
|
||||
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}"
|
||||
)
|
||||
|
||||
# RPM package specific variables
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_PACKAGE_RELEASE_DIST ON)
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
set(CPACK_RPM_SPEC_MORE_DEFINE "%undefine __brp_mangle_shebangs")
|
||||
|
||||
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}")
|
||||
endif()
|
||||
|
||||
# Debian package specific variables
|
||||
set(CPACK_DEBIAN_PACKAGE_LICENSE "MIT")
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
|
||||
# Dependencies
|
||||
set(PACKAGE_REQUIRES
|
||||
"rocprofiler"
|
||||
CACHE STRING "Package dependencies")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES ${PACKAGE_REQUIRES})
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS ${PACKAGE_REQUIRES})
|
||||
|
||||
# Handle the project rebranding from omniperf to rocprofiler-compute
|
||||
set(OMNIPERF_PACKAGE_NAME "omniperf")
|
||||
set(CPACK_RPM_PACKAGE_PROVIDES ${OMNIPERF_PACKAGE_NAME})
|
||||
set(CPACK_RPM_PACKAGE_OBSOLETES "${OMNIPERF_PACKAGE_NAME} < 3.0.0")
|
||||
set(CPACK_RPM_PACKAGE_CONFLICTS ${OMNIPERF_PACKAGE_NAME})
|
||||
|
||||
set(CPACK_DEBIAN_PACKAGE_PROVIDES ${OMNIPERF_PACKAGE_NAME})
|
||||
set(CPACK_DEBIAN_PACKAGE_REPLACES ${OMNIPERF_PACKAGE_NAME})
|
||||
set(CPACK_DEBIAN_PACKAGE_BREAKS ${OMNIPERF_PACKAGE_NAME})
|
||||
|
||||
# Disable automatic dependency generation
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV OFF)
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQ OFF)
|
||||
set(CPACK_RPM_PACKAGE_AUTOPROV OFF)
|
||||
|
||||
if(INSTALL_TESTS)
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES ${CPACK_PACKAGE_NAME})
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS ${CPACK_PACKAGE_NAME})
|
||||
endif()
|
||||
|
||||
# ----- Check for packaging override -----
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
# Log package info
|
||||
message(STATUS " Package Name: ${CPACK_PACKAGE_NAME}")
|
||||
message(STATUS " Package Version: ${CPACK_PACKAGE_VERSION}")
|
||||
message(STATUS " RPM Package Release: ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
message(STATUS " Debian Package Release: ${CPACK_DEBIAN_PACKAGE_RELEASE}")
|
||||
message(STATUS " Packaging Install Prefix: ${CPACK_PACKAGING_INSTALL_PREFIX}")
|
||||
message(STATUS " Install Tests: ${INSTALL_TESTS}")
|
||||
message(STATUS " Package Dependencies: ${PACKAGE_REQUIRES}")
|
||||
message(STATUS " CPack Generator: ${CPACK_GENERATOR}")
|
||||
|
||||
# Source tarball
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${FULL_VERSION_STRING})
|
||||
set(CPACK_SOURCE_IGNORE_FILES
|
||||
".*~$"
|
||||
\.git/
|
||||
\.github
|
||||
\.gitmodules
|
||||
\.gitignore
|
||||
/tests
|
||||
/build)
|
||||
|
||||
include(CPack)
|
||||
@@ -0,0 +1,59 @@
|
||||
## How to fork from us
|
||||
|
||||
To keep our development fast and conflict free, we recommend you to [fork](https://github.com/ROCm/rocprofiler-compute/fork) our repository and start your work from our `develop` branch in your private repository.
|
||||
|
||||
Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote.
|
||||
|
||||
```
|
||||
git remote add mainline https://github.com/ROCm/rocprofiler-compute.git
|
||||
git checkout develop
|
||||
```
|
||||
|
||||
As always in git, start a new branch with
|
||||
|
||||
```
|
||||
git checkout -b topic-<yourFeatureName>
|
||||
```
|
||||
|
||||
and apply your changes there. For more help reference GitHub's ['About Forking'](https://docs.github.com/en/get-started/exploring-projects-on-github/contributing-to-a-project) page.
|
||||
|
||||
## How to contribute to ROCm Compute Profiler
|
||||
|
||||
### Did you find a bug?
|
||||
|
||||
- Ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/ROCm/rocprofiler-compute/issues).
|
||||
|
||||
- If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/ROCm/rocprofiler-compute/issues/new).
|
||||
|
||||
### Did you write a patch that fixes a bug?
|
||||
|
||||
- Open a new GitHub [pull request](https://github.com/ROCm/rocprofiler-compute/compare) with the patch.
|
||||
|
||||
- Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it.
|
||||
|
||||
- Ensure the PR is based on the `develop` branch of the ROCm Compute Profiler GitHub repository.
|
||||
|
||||
> [!TIP]
|
||||
> To ensure you meet all formatting requirements before publishing, we recommend you utilize our included [*pre-commit hooks*](https://pre-commit.com/#introduction). For more information on how to use pre-commit hooks please see the [section below](#using-pre-commit-hooks).
|
||||
|
||||
## Using pre-commit hooks
|
||||
|
||||
Our project supports optional [*pre-commit hooks*](https://pre-commit.com/#introduction) which developers can leverage to verify formatting before publishing their code. Once enabled, any commits you propose to the repository will be automatically checked for formatting. Initial setup is as follows:
|
||||
|
||||
```console
|
||||
python3 -m pip install pre-commit
|
||||
cd rocprofiler-compute
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
Now, when you commit code to the repository you should see something like this:
|
||||
|
||||

|
||||
|
||||
Please see the [pre-commit documentation](https://pre-commit.com/#quick-start) for additional information.
|
||||
|
||||
## Coding guidelines
|
||||
|
||||
Below are some repository specific guidelines which are followed througout the repository.
|
||||
Any future contributions should adhere to these guidelines:
|
||||
* Use the `pathlib` library functions instead of `os.path` for manipulating the file paths.
|
||||
@@ -0,0 +1,44 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
This application uses the following dependencies and their usage is governed by their respective licenses
|
||||
Python 3 standard library: PSFL
|
||||
astunparse python library: PSFL
|
||||
colorlover python library: MIT
|
||||
dash python library: MIT
|
||||
dash-bootstrap-components python library: MIT
|
||||
dash-svg python library: MIT
|
||||
kaleido python library: MIT
|
||||
matplotlib python library: PSFL
|
||||
Nuitka specific runtime code: Apache 2.0 license
|
||||
numpy python library: BSD
|
||||
pandas python library: BSD
|
||||
plotext python library: MIT
|
||||
plotille python library: MIT
|
||||
pymongo python library: Apache 2.0 license
|
||||
pyyaml python library: MIT
|
||||
setuptools python library: MIT
|
||||
tabulate python library: MIT
|
||||
textual python library: MIT
|
||||
textual_plotext python library: MIT
|
||||
textual-fspicker python library: MIT
|
||||
tqdm python library: MIT
|
||||
@@ -0,0 +1,29 @@
|
||||
# Maintainers Guide to ROCm Compute Profiler
|
||||
|
||||
## Publishing a release
|
||||
|
||||
Before publishing a new ROCm Compute Profiler release, please review this checklist to ensure all prerequisites are met:
|
||||
|
||||
1) **Ensure [VERSION](VERSION) file is updated** to reflect your desired release version.
|
||||
2) **Sync `amd-mainline` with `amd-staging`**. Unless major changes were introduced, you should be able to merge using the fast-forward only strategy.
|
||||
3) **Update [CHANGES](CHANGES)** to reflect all major modifications to the codebase since the last release. When modifying [CHANGES](CHANGES) please ensure formatting is consistent with the rest of the ROCm software stack. See [this template](https://github.com/ROCm/hipTensor/blob/develop/CHANGELOG.md) for reference.
|
||||
4) **Confirm all CI tests are passing**. You can easily confirm this by peeking the passing status of all GitHub continuous integration tests.
|
||||
5) **Create a tag from `amd-mainline`**. More information on tagging can be found at [Git Docs - Tagging](https://git-scm.com/book/en/v2/Git-Basics-Tagging).
|
||||
|
||||
> [!NOTE]
|
||||
Note: A successful tag should trigger the [packaging action](.github/workflows/packaging.yml) which will produce a tarball artifact. **This artifact needs to be included as an asset in your release**. The [packaging action](.github/workflows/packaging.yml) will automatically upload the artifact and generate release notes to the corresponding tag.
|
||||
|
||||
Once you've completed the above checklist, you are ready to publish your release. Please ensure you follow formatting from [past ROCm Compute Profiler releases](https://github.com/ROCm/rocprofiler-compute/releases) for consistency. Some important aspects of our release formatting include:
|
||||
|
||||
- Date of release is included in "Release Title".
|
||||
- Updates are called out in "Release Description". Updates should mirror those listed in [CHANGES](CHANGES).
|
||||
- Links to documentation and associated release tarball are called out in "Release Description".
|
||||
- The tarball artifact from the corresponding tag is added to "Release Assets".
|
||||
|
||||
### Publishing a release for ROCm
|
||||
|
||||
If you are preparing for a new ROCm release, note that the [rocm-ci](https://github.com/rocm-ci) bot managed by DevOps will be triggering a tag automatically. This tag will follow the format `rocm-X.X.X`.
|
||||
|
||||
Traditionally, we will bump the ROCm Compute Profiler [VERSION](VERSION) with a new ROCm release. When we bump the version and reach the prerequisite step (5) above, try tagging with `vX.X.X` to validate the release tarball generated by the [packaging action](.github/workflows/packaging.yml).
|
||||
|
||||
In addition to the prerequisites mentioned above, please make sure that all changes have been merged from `amd-staging` -> `release/rocm-rel-X.X.X` to ensure that the "rocm-ci" bot will capture all your changes. It is easiest to file a single pull request ahead of the ROCm release.
|
||||
@@ -0,0 +1,106 @@
|
||||
[](https://github.com/ROCm/rocprofiler-compute/actions/workflows/ubuntu-jammy.yml)
|
||||
[](https://github.com/ROCm/rocprofiler-compute/actions/workflows/rhel-8.yml)
|
||||
[](https://github.com/ROCm/rocprofiler-compute/actions/workflows/mi-rhel9.yml)
|
||||
[](https://rocm.github.io/rocprofiler-compute/)
|
||||
[](https://zenodo.org/badge/latestdoi/561919887)
|
||||
|
||||
# ROCm Compute Profiler
|
||||
|
||||
## General
|
||||
|
||||
ROCm Compute Profiler is a system performance profiling tool for machine
|
||||
learning/HPC workloads running on AMD MI GPUs. The tool presently
|
||||
targets usage on MI100, MI200, and MI300 accelerators.
|
||||
|
||||
* For more information on available features, installation steps, and
|
||||
workload profiling and analysis, please refer to the online
|
||||
[documentation](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/).
|
||||
|
||||
* ROCm Compute Profiler is an AMD open source research project and is not supported
|
||||
as part of the ROCm software stack. We welcome contributions and
|
||||
feedback from the community. Please see the
|
||||
[CONTRIBUTING.md](CONTRIBUTING.md) file for additional details on our
|
||||
contribution process.
|
||||
|
||||
* Licensing information can be found in the [LICENSE](LICENSE) file.
|
||||
|
||||
## Development
|
||||
|
||||
ROCm Compute Profiler follows a
|
||||
[main-dev](https://nvie.com/posts/a-successful-git-branching-model/)
|
||||
branching model. As a result, our latest stable release is shipped
|
||||
from the `amd-mainline` branch, while new features are developed in our
|
||||
`develop` branch.
|
||||
|
||||
Users may checkout `amd-staging` to preview upcoming features.
|
||||
|
||||
## Testing
|
||||
|
||||
Populate the empty variables in `Dockerfile.customrocmtest` based on latest CI build information.
|
||||
|
||||
To quickly get the environment (bash shell) for building and testing, run the following commands:
|
||||
* `cd docker`
|
||||
* `docker compose -f docker-compose.customrocmtest.yml up --force-recreate -d && docker attach docker-customrocmtest-1`
|
||||
|
||||
Inside the docker container, clean, build and install the project with tests enabled:
|
||||
```
|
||||
rm -rf build install && cmake -B build -D CMAKE_INSTALL_PREFIX=install -D ENABLE_TESTS=ON -D INSTALL_TESTS=ON -DENABLE_COVERAGE=ON -S . && cmake --build build --target install --parallel 8
|
||||
```
|
||||
|
||||
Note that per the above command, build assets will be stored under `build` directory and installed assets will be stored under `install` directory.
|
||||
|
||||
Then, to run the automated test suite, run the following command:
|
||||
```
|
||||
ctest
|
||||
```
|
||||
|
||||
For manual testing, you can find the executable at `install/bin/rocprof-compute`
|
||||
|
||||
NOTE: This Dockerfile uses `ubuntu 22.04` as the base operating system image
|
||||
|
||||
## Standalone binary
|
||||
|
||||
To create a standalone binary, run the following commands:
|
||||
* `cd docker`
|
||||
* `docker compose -f docker-compose.standalone.yml up --force-recreate -d && docker attach docker-standalone-1`
|
||||
|
||||
You should find the rocprof-compute.bin standalone binary inside the `build` folder in the root directory of the project.
|
||||
|
||||
To build the binary we follow these steps:
|
||||
* Use RHEL 8 image used to build ROCm as the base image
|
||||
* Install python3.8
|
||||
* Install dependencies for runtime and for making standalone binary
|
||||
* Call the make target which uses Nuitka to build the standalone binary
|
||||
|
||||
NOTE: Since RHEL 8 ships with glibc version 2.28, this standalone binary can only be run on environment with glibc version greater than 2.28.
|
||||
glibc version can be checked using `ldd --version` command.
|
||||
|
||||
NOTE: libnss3.so shared library is required when using --roof-only option which generates roofline data in PDF format
|
||||
|
||||
To test the standalone binary provide the `--call-binary` option to pytest.
|
||||
|
||||
## How to Cite
|
||||
|
||||
This software can be cited using a Zenodo
|
||||
[DOI](https://doi.org/10.5281/zenodo.7314631) reference. A BibTex
|
||||
style reference is provided below for convenience:
|
||||
|
||||
```
|
||||
@software{xiaomin_lu_2022_7314631
|
||||
author = {Xiaomin Lu and
|
||||
Cole Ramos and
|
||||
Fei Zheng and
|
||||
Karl W. Schulz and
|
||||
Jose Santos and
|
||||
Keith Lowery and
|
||||
Nicholas Curtis and
|
||||
Cristian Di Pietrantonio},
|
||||
title = {ROCm/rocprofiler-compute: v3.1.0 (12 February 2025)},
|
||||
month = February,
|
||||
year = 2025,
|
||||
publisher = {Zenodo},
|
||||
version = {v3.1.0},
|
||||
doi = {10.5281/zenodo.7314631},
|
||||
url = {https://doi.org/10.5281/zenodo.7314631}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1 @@
|
||||
3.2.0
|
||||
@@ -0,0 +1,68 @@
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
FROM ubuntu:20.04
|
||||
|
||||
USER root
|
||||
|
||||
COPY rocm_install.sh /rocprofiler-compute/rocm_install.sh
|
||||
|
||||
ENV PATH="/rocprofiler-compute:${PATH}"
|
||||
ENV TZ="US/Chicago"
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
#pyenv dependencies
|
||||
RUN apt update && \
|
||||
apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl
|
||||
ENV HOME="/rocprofiler-compute"
|
||||
WORKDIR $HOME
|
||||
ENV PYENV_ROOT="$HOME/.pyenv"
|
||||
ENV PATH="$PYENV_ROOT/bin:$PATH"
|
||||
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
|
||||
apt update && \
|
||||
apt-get install -y cmake wget git python3-dev rpm python3-venv software-properties-common &&\
|
||||
add-apt-repository ppa:deadsnakes/ppa -y &&\
|
||||
apt install python3.7 -y libpython3.7-dev python3.7-venv libnuma-dev &&\
|
||||
curl https://pyenv.run | bash
|
||||
|
||||
RUN echo "export PATH=$HOME/.pyenv/bin:$PATH" >> ~/.bashrc &&\
|
||||
echo eval "$(pyenv init -)" >> ~/.bashrc &&\
|
||||
echo eval "$(pyenv virtualenv-init -)" >> ~/.bashrc &&\
|
||||
CPPFLAGS=-I/usr/bin/openssl \
|
||||
LDFLAGS=-L/usr/lib64 \
|
||||
CONFIGURE_OPTS=--enable-shared pyenv install -v 3.8.12 &&\
|
||||
pyenv global 3.8.12 &&\
|
||||
apt-get install -y python3-pip
|
||||
#clang?
|
||||
|
||||
RUN python3 -m pip install astunparse==1.6.2 colorlover dash matplotlib numpy pandas pymongo pyyaml tabulate tqdm dash-svg pyinstaller dash-bootstrap-components &&\
|
||||
python3 -m pip install 'cmake==3.21.4' && \
|
||||
./rocm_install.sh &&\
|
||||
#wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
|
||||
#echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION}/ ${ROCM_REPO_DIST} main" | tee /etc/apt/sources.list.d/rocm.list && \
|
||||
apt-get update && \
|
||||
apt-get dist-upgrade -y && \
|
||||
#apt-get install -y rocm-dev rocm-utils rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev hip-base hsa-amd-aqlprofile hsa-rocr-dev hsakmt-roct-dev ${EXTRA_PACKAGES} && \
|
||||
apt-get autoclean
|
||||
@@ -0,0 +1 @@
|
||||
@ROCPROFCOMPUTE_GIT_REV@
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Crusher-specific additions
|
||||
depends_on "cray-python"
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","5.2.0"))
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Thera-specific additions
|
||||
depends_on "python"
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","5.2.0"))
|
||||
local home = os.getenv("HOME")
|
||||
setenv("MPLCONFIGDIR",pathJoin(home,".matplotlib"))
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################################bl
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
##############################################################################el
|
||||
|
||||
declare -a rocm_versions=("4.3.1" "4.5.2" "5.0.2" "5.1.3" "5.2.3")
|
||||
wget https://repo.radeon.com/amdgpu-install/22.10/ubuntu/focal/amdgpu-install_22.10.50100-1_all.deb
|
||||
apt-get install -y ./amdgpu-install_22.10.50100-1_all.deb
|
||||
for rocm_version in ${rocm_versions[@]}; do
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$rocm_version ubuntu main" | tee /etc/apt/sources.list.d/rocm.list
|
||||
apt update
|
||||
amdgpu-install -y --usecase=rocm --rocmrelease=$rocm_version --no-dkms
|
||||
done
|
||||
@@ -0,0 +1,31 @@
|
||||
local help_message = [[
|
||||
|
||||
ROCm Compute Profiler is an open-source performance analysis tool for profiling
|
||||
machine learning/HPC workloads running on AMD MI GPUs.
|
||||
|
||||
Version @ROCPROFCOMPUTE_FULL_VERSION@
|
||||
]]
|
||||
|
||||
help(help_message,"\n")
|
||||
|
||||
whatis("Name: @PROJECT_NAME@")
|
||||
whatis("Version: @ROCPROFCOMPUTE_FULL_VERSION@")
|
||||
whatis("Keywords: Profiling, Performance, GPU")
|
||||
whatis("Description: tool for GPU performance profiling")
|
||||
whatis("URL: https://github.com/ROCm/rocprofiler-compute")
|
||||
|
||||
-- Export environmental variables
|
||||
local topDir="@CMAKE_INSTALL_PREFIX@"
|
||||
local binDir="@CMAKE_INSTALL_FULL_BINDIR@"
|
||||
local shareDir="@CMAKE_INSTALL_FULL_DATADIR@"
|
||||
local pythonDeps="@PYTHON_DEPS@"
|
||||
|
||||
setenv("ROCPROFCOMPUTE_DIR",topDir)
|
||||
setenv("ROCPROFCOMPUTE_BIN",binDir)
|
||||
setenv("ROCPROFCOMPUTE_SHARE",shareDir)
|
||||
|
||||
-- Update relevant PATH variables
|
||||
prepend_path("PATH",binDir)
|
||||
if ( pythonDeps ~= "" ) then
|
||||
prepend_path("PYTHONPATH",pythonDeps)
|
||||
end
|
||||
@@ -0,0 +1,49 @@
|
||||
# Use a base image
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Update package list and install prerequisites
|
||||
RUN apt-get update && apt-get install -y \
|
||||
software-properties-common cmake locales git curl \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa \
|
||||
&& apt-get update
|
||||
|
||||
# Allows running git commands in /app
|
||||
RUN git config --global --add safe.directory /app
|
||||
|
||||
# Generate the desired locale
|
||||
RUN locale-gen en_US.UTF-8
|
||||
|
||||
# Install Python 3.10 and pip
|
||||
RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip libsqlite3-dev
|
||||
|
||||
# Update pip
|
||||
RUN apt remove -y python3-wheel
|
||||
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
|
||||
RUN python3.10 get-pip.py
|
||||
RUN python3.10 -m pip install --upgrade pip setuptools wheel
|
||||
|
||||
# Set Python 3.10 as the default python3
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
|
||||
|
||||
# Remove blinker python package
|
||||
RUN apt-get remove -y python3-blinker
|
||||
|
||||
# Install rocm
|
||||
# Define custom version
|
||||
ARG DEB_FILE=""
|
||||
ARG AMDGPU_BUILD=""
|
||||
ARG ROCM_BUILD=""
|
||||
RUN curl -O "https://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/${DEB_FILE}"
|
||||
RUN apt-get install -y "./${DEB_FILE}"
|
||||
RUN amdgpu-repo --amdgpu-build="${AMDGPU_BUILD}" --rocm-build="compute-rocm-dkms-no-npi-hipclang/${ROCM_BUILD}"
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ="America/Toronto" amdgpu-install --yes --usecase=rocm
|
||||
|
||||
# Install any dependencies specified in requirements.txt
|
||||
# Run interactive bash shell
|
||||
CMD ["/bin/bash", "-c", "\
|
||||
python3.10 -m pip install -r requirements.txt -r requirements-test.txt \
|
||||
&& exec /bin/bash \
|
||||
"]
|
||||
@@ -0,0 +1,27 @@
|
||||
# Use a base image
|
||||
FROM rocm/dev-ubuntu-22.04
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Update package list and install prerequisites
|
||||
RUN apt-get update && apt-get install -y \
|
||||
software-properties-common cmake locales git \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa \
|
||||
&& apt-get update
|
||||
|
||||
# Allows running git commands in /app
|
||||
RUN git config --global --add safe.directory /app
|
||||
|
||||
# Install Python 3.10 and pip
|
||||
RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip
|
||||
|
||||
# Set Python 3.10 as the default python3
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
|
||||
|
||||
# Install any dependencies specified in requirements.txt
|
||||
# Run interactive bash shell
|
||||
CMD ["/bin/bash", "-c", "\
|
||||
python3 -m pip install -r docs/sphinx/requirements.txt \
|
||||
&& exec /bin/bash \
|
||||
"]
|
||||
@@ -0,0 +1,54 @@
|
||||
ARG DISTRO=opensuse/leap
|
||||
ARG VERSION=15.3
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ENV PATH /usr/local/bin:${PATH}
|
||||
ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64
|
||||
|
||||
RUN set +e; \
|
||||
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
|
||||
zypper --non-interactive -i patch; \
|
||||
zypper --non-interactive -i patch; \
|
||||
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
|
||||
exit 0
|
||||
|
||||
RUN zypper --non-interactive update -y && \
|
||||
zypper --non-interactive dist-upgrade -y && \
|
||||
zypper --non-interactive install -y -t pattern devel_basis && \
|
||||
zypper --non-interactive install -y python3-pip gcc-c++ git dpkg-devel rpm-build wget curl binutils-gold && \
|
||||
python3 -m pip install 'cmake==3.28.4'
|
||||
|
||||
ARG ROCM_VERSION=0.0
|
||||
ARG AMDGPU_RPM=latest/sle/15/amdgpu-install-21.50.50000-1.noarch.rpm
|
||||
ARG PERL_REPO=SLE_15
|
||||
|
||||
RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
zypper --non-interactive addrepo https://mirrorcache-us.opensuse.org/repositories/devel:/languages:/perl/${PERL_REPO}/devel:languages:perl.repo && \
|
||||
zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
zypper --non-interactive --gpg-auto-import-keys refresh && \
|
||||
zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \
|
||||
zypper --non-interactive clean --all; \
|
||||
fi
|
||||
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
|
||||
RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
conda init
|
||||
|
||||
WORKDIR /home
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
@@ -0,0 +1,48 @@
|
||||
ARG DISTRO=opensuse/leap
|
||||
ARG VERSION=15.3
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ENV PATH /usr/local/bin:${PATH}
|
||||
|
||||
ARG EXTRA_PACKAGES=""
|
||||
# ARG ELFUTILS_DOWNLOAD_VERSION="0.186"
|
||||
# ARG NJOBS="12"
|
||||
|
||||
RUN set +e; \
|
||||
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
|
||||
zypper --non-interactive -i patch; \
|
||||
zypper --non-interactive -i patch; \
|
||||
zypper --non-interactive -i --gpg-auto-import-keys refresh; \
|
||||
exit 0
|
||||
|
||||
RUN zypper --non-interactive update -y && \
|
||||
zypper --non-interactive dist-upgrade -y && \
|
||||
zypper --non-interactive install -y -t pattern devel_basis && \
|
||||
zypper --non-interactive install -y python3-pip gcc-c++ git dpkg-devel rpm-build curl wget binutils-gold && \
|
||||
python3 -m pip install 'cmake==3.28.4' && \
|
||||
zypper --non-interactive clean --all
|
||||
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
|
||||
RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
cd /tmp && \
|
||||
shopt -s dotglob extglob && \
|
||||
rm -rf *
|
||||
|
||||
WORKDIR /home
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
@@ -0,0 +1,52 @@
|
||||
ARG DISTRO=rockylinux
|
||||
ARG VERSION=8
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ENV PATH /usr/local/bin:${PATH}
|
||||
ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64
|
||||
|
||||
RUN yum groupinstall -y "Development Tools" && \
|
||||
yum install -y epel-release && \
|
||||
yum install -y --allowerasing curl dpkg-devel python3-pip wget zlib-devel which && \
|
||||
yum clean all && \
|
||||
python3 -m pip install 'cmake==3.28.4'
|
||||
|
||||
ARG ROCM_VERSION=0.0
|
||||
ARG AMDGPU_RPM=5.4/rhel/8.7/amdgpu-install-5.4.50400-1.el8.noarch.rpm
|
||||
|
||||
RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
OS_VERSION_MAJOR=$(cat /etc/os-release | grep 'VERSION_ID' | sed 's/=/ /1' | awk '{print $NF}' | sed 's/"//g' | sed 's/\./ /g' | awk '{print $1}') && \
|
||||
if [ "${OS_VERSION_MAJOR}" -eq 8 ]; then PERL_REPO=powertools; else PERL_REPO=crb; fi && \
|
||||
dnf -y --enablerepo=${PERL_REPO} install perl-File-BaseDir && \
|
||||
yum install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \
|
||||
yum install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev libpciaccess && \
|
||||
yum clean all; \
|
||||
fi
|
||||
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
|
||||
RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
conda init
|
||||
|
||||
RUN if [ "${ROCM_VERSION}" != "0.0" ]; then ln -sf /opt/rocm-${ROCM_VERSION}* /opt/rocm; fi
|
||||
|
||||
WORKDIR /home
|
||||
ENV LC_ALL C.UTF-8
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
COPY ./entrypoint-rhel.sh /docker-entrypoint.sh
|
||||
ENTRYPOINT [ "/docker-entrypoint.sh" ]
|
||||
@@ -0,0 +1,42 @@
|
||||
|
||||
ARG DISTRO=rockylinux
|
||||
ARG VERSION=8
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ENV PATH /usr/local/bin:${PATH}
|
||||
|
||||
ARG EXTRA_PACKAGES=""
|
||||
# ARG ELFUTILS_DOWNLOAD_VERSION="0.186"
|
||||
# ARG NJOBS="12"
|
||||
|
||||
RUN yum groupinstall -y "Development Tools" && \
|
||||
yum install -y epel-release && \
|
||||
yum install -y --allowerasing curl dpkg-devel python3-pip wget zlib-devel which git && \
|
||||
yum clean all && \
|
||||
python3 -m pip install --upgrade pip && \
|
||||
python3 -m pip install 'cmake==3.28.4'
|
||||
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
|
||||
RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
cd /tmp && \
|
||||
shopt -s dotglob extglob && \
|
||||
rm -rf *
|
||||
|
||||
WORKDIR /home
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
@@ -0,0 +1,22 @@
|
||||
FROM redhat/ubi8:8.10-1184
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN yum install -y curl gcc cmake git
|
||||
|
||||
# Allows running git commands in /app
|
||||
RUN git config --global --add safe.directory /app
|
||||
|
||||
RUN yum install -y python38 python38-devel && \
|
||||
yum clean all && \
|
||||
rm -rf /var/cache/yum && \
|
||||
curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
|
||||
python3.8 get-pip.py
|
||||
|
||||
CMD ["/bin/bash", "-c", "\
|
||||
python3.8 -m pip install -r requirements.txt \
|
||||
&& python3.8 -m pip install nuitka patchelf \
|
||||
&& rm -rf build \
|
||||
&& cmake -B build -S . \
|
||||
&& make -C build standalonebinary \
|
||||
"]
|
||||
@@ -0,0 +1,57 @@
|
||||
ARG DISTRO
|
||||
ARG VERSION
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US
|
||||
ENV LC_ALL C
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ARG EXTRA_PACKAGES=""
|
||||
ARG ROCM_REPO_VERSION="debian"
|
||||
ARG ROCM_VERSION="0.0"
|
||||
ARG ROCM_REPO_DIST="ubuntu"
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
ENV PATH ${HOME}/.local/bin:${PATH}
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get dist-upgrade -y && \
|
||||
apt-get install -y build-essential cmake libnuma1 wget gnupg2 m4 bash-completion git-core autoconf libtool autotools-dev python3-pip lsb-release libpapi-dev libpfm4-dev libudev1 libopenmpi-dev rpm librpm-dev curl apt-utils && \
|
||||
python3 -m pip install 'cmake==3.28.4'
|
||||
|
||||
RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \
|
||||
if [ -d /etc/apt/trusted.gpg.d ]; then \
|
||||
wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/trusted.gpg.d/rocm.gpg; \
|
||||
else \
|
||||
wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -; \
|
||||
fi && \
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION}/ ${ROCM_REPO_DIST} main" | tee /etc/apt/sources.list.d/rocm.list && \
|
||||
apt-get update && \
|
||||
apt-get dist-upgrade -y && \
|
||||
apt-get install -y hsa-amd-aqlprofile hsa-rocr-dev hsakmt-roct-dev && \
|
||||
apt-get install -y hip-base hip-runtime-amd hip-dev && \
|
||||
apt-get install -y rocm-llvm rocm-core rocm-smi-lib rocm-device-libs && \
|
||||
apt-get install -y roctracer-dev rocprofiler-dev rccl-dev ${EXTRA_PACKAGES} && \
|
||||
if [ "$(echo ${ROCM_VERSION} | awk -F '.' '{print $1}')" -lt "5" ]; then apt-get install -y rocm-dev; fi && \
|
||||
apt-get autoclean; \
|
||||
fi
|
||||
|
||||
RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
conda init
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
WORKDIR /home
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
@@ -0,0 +1,48 @@
|
||||
|
||||
ARG DISTRO
|
||||
ARG VERSION
|
||||
FROM ${DISTRO}:${VERSION}
|
||||
|
||||
ENV HOME /root
|
||||
ENV LANG C.UTF-8
|
||||
ENV SHELL /bin/bash
|
||||
ENV BASH_ENV /etc/bash.bashrc
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
WORKDIR /tmp
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ARG EXTRA_PACKAGES=""
|
||||
# ARG ELFUTILS_DOWNLOAD_VERSION="0.186"
|
||||
# ARG BOOST_DOWNLOAD_VERSION="1.79.0"
|
||||
# ARG NJOBS="12"
|
||||
ARG PYTHON_VERSIONS="6 7 8 9 10 11 12"
|
||||
|
||||
ENV PATH /usr/local/bin:${PATH}
|
||||
ENV LIBRARY_PATH /usr/local/lib:/usr/local/lib64:${LIBRARY_PATH}
|
||||
ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}
|
||||
ENV CMAKE_PREFIX_PATH /usr/local:${CMAKE_PREFIX_PATH}
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get dist-upgrade -y && \
|
||||
apt-get install -y autoconf autotools-dev bash-completion build-essential bzip2 cmake curl environment-modules git-core gnupg2 gzip libtool locales lsb-release m4 python3-pip unzip wget zip zlib1g-dev && \
|
||||
python3 -m pip install 'cmake==3.28.4' && \
|
||||
apt-get autoclean && \
|
||||
locale -a && \
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
|
||||
bash miniconda.sh -b -p /opt/conda && \
|
||||
export PATH="/opt/conda/bin:${PATH}" && \
|
||||
conda config --set always_yes yes --set changeps1 no && \
|
||||
conda update -c defaults -n base conda && \
|
||||
for i in ${PYTHON_VERSIONS}; do conda create -n py3.${i} -c defaults -c conda-forge python=3.${i} pip numpy; done && \
|
||||
for i in ${PYTHON_VERSIONS}; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy pandas dataclasses; done && \
|
||||
conda clean -a -y && \
|
||||
cd /tmp && \
|
||||
shopt -s dotglob extglob && \
|
||||
rm -rf *
|
||||
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
WORKDIR /home
|
||||
SHELL [ "/bin/bash", "--login", "-c" ]
|
||||
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
: ${USER:=$(whoami)}
|
||||
: ${DISTRO:=ubuntu}
|
||||
: ${VERSIONS:=20.04}
|
||||
# : ${NJOBS=$(nproc)}
|
||||
# : ${ELFUTILS_VERSION:=0.186}
|
||||
# : ${BOOST_VERSION:=1.79.0}
|
||||
: ${PYTHON_VERSIONS:="6 7 8 9 10 11 12"}
|
||||
: ${PUSH:=0}
|
||||
: ${PULL:=--pull}
|
||||
|
||||
verbose-run()
|
||||
{
|
||||
echo -e "\n### Executing \"${@}\"... ###\n"
|
||||
eval $@
|
||||
}
|
||||
|
||||
tolower()
|
||||
{
|
||||
echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}';
|
||||
}
|
||||
|
||||
toupper()
|
||||
{
|
||||
echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}';
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
print_option() { printf " --%-20s %-24s %s\n" "${1}" "${2}" "${3}"; }
|
||||
echo "Options:"
|
||||
print_option "help -h" "" "This message"
|
||||
print_option "push" "" "Push the container to DockerHub when completed"
|
||||
print_option "no-pull" "" "Do not pull down most recent base container"
|
||||
|
||||
echo ""
|
||||
print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; }
|
||||
print_default_option distro "[ubuntu|opensuse|rhel]" "OS distribution" "${DISTRO}"
|
||||
print_default_option versions "[VERSION] [VERSION...]" "Ubuntu, OpenSUSE, or RHEL release" "${VERSIONS}"
|
||||
print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}"
|
||||
# print_default_option "jobs -j" "[N]" "parallel build jobs" "${NJOBS}"
|
||||
# print_default_option elfutils-version "[0.183..0.186]" "ElfUtils version" "${ELFUTILS_VERSION}"
|
||||
# print_default_option boost-version "[1.67.0..1.79.0]" "Boost version" "${BOOST_VERSION}"
|
||||
print_default_option user "[USERNAME]" "DockerHub username" "${USER}"
|
||||
}
|
||||
|
||||
send-error()
|
||||
{
|
||||
usage
|
||||
echo -e "\nError: ${@}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
reset-last()
|
||||
{
|
||||
last() { send-error "Unsupported argument :: ${1}"; }
|
||||
}
|
||||
|
||||
reset-last
|
||||
|
||||
n=0
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case "${1}" in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
"--distro")
|
||||
shift
|
||||
DISTRO=${1}
|
||||
last() { DISTRO="${DISTRO} ${1}"; }
|
||||
;;
|
||||
"--versions")
|
||||
shift
|
||||
VERSIONS=${1}
|
||||
last() { VERSIONS="${VERSIONS} ${1}"; }
|
||||
;;
|
||||
"--python-versions")
|
||||
shift
|
||||
PYTHON_VERSIONS=${1}
|
||||
last() { PYTHON_VERSIONS="${PYTHON_VERSIONS} ${1}"; }
|
||||
;;
|
||||
--user|-u)
|
||||
shift
|
||||
USER=${1}
|
||||
reset-last
|
||||
;;
|
||||
"--push")
|
||||
PUSH=1
|
||||
reset-last
|
||||
;;
|
||||
"--no-pull")
|
||||
PULL=""
|
||||
reset-last
|
||||
;;
|
||||
--*)
|
||||
reset-last
|
||||
last ${1}
|
||||
;;
|
||||
*)
|
||||
last ${1}
|
||||
;;
|
||||
esac
|
||||
n=$((${n} + 1))
|
||||
shift
|
||||
done
|
||||
|
||||
DOCKER_FILE=Dockerfile.${DISTRO}.ci
|
||||
|
||||
if [ ! -f ${DOCKER_FILE} ]; then cd docker; fi
|
||||
|
||||
if [ ! -f ${DOCKER_FILE} ]; then
|
||||
echo "Error! Execute script from source directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# verbose-run rm -rf ./dyninst-source
|
||||
# verbose-run cp -r ../external/dyninst ./dyninst-source
|
||||
# verbose-run rm -rf ./dyninst-source/{build,install}*
|
||||
|
||||
set -e
|
||||
|
||||
if [ "${DISTRO}" = "opensuse" ]; then
|
||||
DISTRO_IMAGE="opensuse/leap"
|
||||
elif [ "${DISTRO}" = "rhel" ]; then
|
||||
DISTRO_IMAGE="rockylinux"
|
||||
else
|
||||
DISTRO_IMAGE=${DISTRO}
|
||||
fi
|
||||
|
||||
for VERSION in ${VERSIONS}
|
||||
do
|
||||
verbose-run docker build . \
|
||||
${PULL} \
|
||||
-f ${DOCKER_FILE} \
|
||||
--tag ${USER}/rocprofiler-compute:ci-base-${DISTRO}-${VERSION} \
|
||||
--build-arg DISTRO=${DISTRO_IMAGE} \
|
||||
--build-arg VERSION=${VERSION} \
|
||||
--build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\"
|
||||
# --build-arg NJOBS=${NJOBS} \
|
||||
# --build-arg ELFUTILS_DOWNLOAD_VERSION=${ELFUTILS_VERSION} \
|
||||
# --build-arg BOOST_DOWNLOAD_VERSION=${BOOST_VERSION}
|
||||
done
|
||||
|
||||
if [ "${PUSH}" -gt 0 ]; then
|
||||
for VERSION in ${VERSIONS}
|
||||
do
|
||||
verbose-run docker push ${USER}/rocprofiler-compute:ci-base-${DISTRO}-${VERSION}
|
||||
done
|
||||
fi
|
||||
@@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
: ${USER:=$(whoami)}
|
||||
: ${ROCM_VERSIONS:="5.0"}
|
||||
: ${DISTRO:=ubuntu}
|
||||
: ${VERSIONS:=20.04}
|
||||
: ${PYTHON_VERSIONS:="6 7 8 9 10 11 12"}
|
||||
: ${BUILD_CI:=""}
|
||||
: ${PUSH:=0}
|
||||
: ${PULL:=--pull}
|
||||
: ${RETRY:=3}
|
||||
|
||||
set -e
|
||||
|
||||
tolower()
|
||||
{
|
||||
echo "$@" | awk -F '\\|~\\|' '{print tolower($1)}';
|
||||
}
|
||||
|
||||
toupper()
|
||||
{
|
||||
echo "$@" | awk -F '\\|~\\|' '{print toupper($1)}';
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
print_option() { printf " --%-20s %-24s %s\n" "${1}" "${2}" "${3}"; }
|
||||
echo "Options:"
|
||||
print_option "help -h" "" "This message"
|
||||
print_option "no-pull" "" "Do not pull down most recent base container"
|
||||
|
||||
echo ""
|
||||
print_default_option() { printf " --%-20s %-24s %s (default: %s)\n" "${1}" "${2}" "${3}" "$(tolower ${4})"; }
|
||||
print_default_option distro "[ubuntu|opensuse|rhel]" "OS distribution" "${DISTRO}"
|
||||
print_default_option versions "[VERSION] [VERSION...]" "Ubuntu, OpenSUSE, or RHEL release" "${VERSIONS}"
|
||||
print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}"
|
||||
print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}"
|
||||
print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}"
|
||||
print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}"
|
||||
print_default_option push "" "Push the image to Dockerhub" ""
|
||||
#print_default_option lto "[on|off]" "Enable LTO" "${LTO}"
|
||||
}
|
||||
|
||||
send-error()
|
||||
{
|
||||
usage
|
||||
echo -e "\nError: ${@}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
verbose-run()
|
||||
{
|
||||
echo -e "\n### Executing \"${@}\"... ###\n"
|
||||
eval "${@}"
|
||||
}
|
||||
|
||||
verbose-build()
|
||||
{
|
||||
echo -e "\n### Executing \"${@}\" a maximum of ${RETRY} times... ###\n"
|
||||
for i in $(seq 1 1 ${RETRY})
|
||||
do
|
||||
set +e
|
||||
eval "${@}"
|
||||
local RETC=$?
|
||||
set -e
|
||||
if [ "${RETC}" -eq 0 ]; then
|
||||
break
|
||||
else
|
||||
echo -en "\n### Command failed with error code ${RETC}... "
|
||||
if [ "${i}" -ne "${RETRY}" ]; then
|
||||
echo -e "Retrying... ###\n"
|
||||
sleep 3
|
||||
else
|
||||
echo -e "Exiting... ###\n"
|
||||
exit ${RETC}
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
reset-last()
|
||||
{
|
||||
last() { send-error "Unsupported argument :: ${1}"; }
|
||||
}
|
||||
|
||||
reset-last
|
||||
|
||||
n=0
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case "${1}" in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
"--distro")
|
||||
shift
|
||||
DISTRO=${1}
|
||||
last() { DISTRO="${DISTRO} ${1}"; }
|
||||
;;
|
||||
"--versions")
|
||||
shift
|
||||
VERSIONS=${1}
|
||||
last() { VERSIONS="${VERSIONS} ${1}"; }
|
||||
;;
|
||||
"--rocm-versions")
|
||||
shift
|
||||
ROCM_VERSIONS=${1}
|
||||
last() { ROCM_VERSIONS="${ROCM_VERSIONS} ${1}"; }
|
||||
;;
|
||||
"--python-versions")
|
||||
shift
|
||||
PYTHON_VERSIONS=${1}
|
||||
last() { PYTHON_VERSIONS="${PYTHON_VERSIONS} ${1}"; }
|
||||
;;
|
||||
--user|-u)
|
||||
shift
|
||||
USER=${1}
|
||||
reset-last
|
||||
;;
|
||||
--push)
|
||||
PUSH=1
|
||||
reset-last
|
||||
;;
|
||||
--no-pull)
|
||||
PULL=""
|
||||
reset-last
|
||||
;;
|
||||
--retry|-r)
|
||||
shift
|
||||
RETRY=${1}
|
||||
reset-last
|
||||
;;
|
||||
"--*")
|
||||
send-error "Unsupported argument at position $((${n} + 1)) :: ${1}"
|
||||
;;
|
||||
*)
|
||||
last ${1}
|
||||
;;
|
||||
esac
|
||||
n=$((${n} + 1))
|
||||
shift
|
||||
done
|
||||
|
||||
DOCKER_FILE="Dockerfile.${DISTRO}"
|
||||
|
||||
if [ "${RETRY}" -lt 1 ]; then
|
||||
RETRY=1
|
||||
fi
|
||||
|
||||
if [ -n "${BUILD_CI}" ]; then DOCKER_FILE="${DOCKER_FILE}.ci"; fi
|
||||
if [ ! -f ${DOCKER_FILE} ]; then cd docker; fi
|
||||
if [ ! -f ${DOCKER_FILE} ]; then send-error "File \"${DOCKER_FILE}\" not found"; fi
|
||||
|
||||
for VERSION in ${VERSIONS}
|
||||
do
|
||||
VERSION_MAJOR=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $1}')
|
||||
VERSION_MINOR=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $2}')
|
||||
VERSION_PATCH=$(echo ${VERSION} | sed 's/\./ /g' | awk '{print $3}')
|
||||
for ROCM_VERSION in ${ROCM_VERSIONS}
|
||||
do
|
||||
CONTAINER=${USER}/rocprofiler-compute:release-base-${DISTRO}-${VERSION}-rocm-${ROCM_VERSION}
|
||||
ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}')
|
||||
ROCM_MINOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $2}')
|
||||
ROCM_PATCH=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $3}')
|
||||
if [ -n "${ROCM_PATCH}" ]; then
|
||||
ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100)+(${ROCM_PATCH}) ))
|
||||
ROCM_SEP="."
|
||||
else
|
||||
ROCM_VERSN=$(( (${ROCM_MAJOR}*10000)+(${ROCM_MINOR}*100) ))
|
||||
ROCM_SEP=""
|
||||
fi
|
||||
if [ "${DISTRO}" = "ubuntu" ]; then
|
||||
ROCM_REPO_DIST="ubuntu"
|
||||
ROCM_REPO_VERSION=${ROCM_VERSION}
|
||||
case "${ROCM_VERSION}" in
|
||||
4.1* | 4.0*)
|
||||
ROCM_REPO_DIST="xenial"
|
||||
;;
|
||||
5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.*)
|
||||
case "${VERSION}" in
|
||||
24.04)
|
||||
ROCM_REPO_DIST="noble"
|
||||
;;
|
||||
22.04)
|
||||
ROCM_REPO_DIST="jammy"
|
||||
;;
|
||||
20.04)
|
||||
ROCM_REPO_DIST="focal"
|
||||
;;
|
||||
18.04)
|
||||
ROCM_REPO_DIST="bionic"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
echo
|
||||
verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg ROCM_REPO_VERSION=${ROCM_REPO_VERSION} --build-arg ROCM_REPO_DIST=${ROCM_REPO_DIST} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\"
|
||||
elif [ "${DISTRO}" = "rhel" ]; then
|
||||
if [ -z "${VERSION_MINOR}" ]; then
|
||||
send-error "Please provide a major and minor version of the OS. Supported: >= 8.7, <= 9.3"
|
||||
fi
|
||||
|
||||
# Components used to create the sub-URL below
|
||||
# set <OS-VERSION> in amdgpu-install/<ROCM-VERSION>/rhel/<OS-VERSION>
|
||||
RPM_PATH=${VERSION_MAJOR}.${VERSION_MINOR}
|
||||
RPM_TAG=".el${VERSION_MAJOR}"
|
||||
|
||||
# set the sub-URL in https://repo.radeon.com/amdgpu-install/<sub-URL>
|
||||
case "${ROCM_VERSION}" in
|
||||
5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.0 | 6.0.*)
|
||||
ROCM_RPM=${ROCM_VERSION}/rhel/${RPM_PATH}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1${RPM_TAG}.noarch.rpm
|
||||
;;
|
||||
5.2 | 5.2.* | 5.1 | 5.1.* | 5.0 | 5.0.* | 4.*)
|
||||
send-error "Invalid ROCm version ${ROCM_VERSION}. Supported: >= 5.3.0, <= 5.5.x"
|
||||
;;
|
||||
0.0)
|
||||
;;
|
||||
*)
|
||||
send-error "Unsupported combination :: ${DISTRO}-${VERSION} + ROCm ${ROCM_VERSION}"
|
||||
;;
|
||||
esac
|
||||
|
||||
# use Rocky Linux as a base image for RHEL builds
|
||||
DISTRO_BASE_IMAGE=rockylinux
|
||||
|
||||
verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_BASE_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\"
|
||||
elif [ "${DISTRO}" = "opensuse" ]; then
|
||||
case "${VERSION}" in
|
||||
15.*)
|
||||
DISTRO_IMAGE="opensuse/leap"
|
||||
echo "DISTRO_IMAGE: ${DISTRO_IMAGE}"
|
||||
;;
|
||||
*)
|
||||
send-error "Invalid opensuse version ${VERSION}. Supported: 15.x"
|
||||
;;
|
||||
esac
|
||||
case "${ROCM_VERSION}" in
|
||||
5.3 | 5.3.* | 5.4 | 5.4.* | 5.5 | 5.5.* | 5.6 | 5.6.* | 5.7 | 5.7.* | 6.0 | 6.0.*)
|
||||
ROCM_RPM=${ROCM_VERSION}/sle/${VERSION}/amdgpu-install-${ROCM_MAJOR}.${ROCM_MINOR}.${ROCM_VERSN}-1.noarch.rpm
|
||||
;;
|
||||
5.2 | 5.2.*)
|
||||
ROCM_RPM=22.20${ROCM_SEP}${ROCM_PATCH}/sle/${VERSION}/amdgpu-install-22.20.${ROCM_VERSN}-1.noarch.rpm
|
||||
;;
|
||||
5.1 | 5.1.*)
|
||||
ROCM_RPM=22.10${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-22.10${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm
|
||||
;;
|
||||
5.0 | 5.0.*)
|
||||
ROCM_RPM=21.50${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-21.50${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm
|
||||
;;
|
||||
4.5 | 4.5.*)
|
||||
ROCM_RPM=21.40${ROCM_SEP}${ROCM_PATCH}/sle/15/amdgpu-install-21.40${ROCM_SEP}${ROCM_PATCH}.${ROCM_VERSN}-1.noarch.rpm
|
||||
;;
|
||||
0.0)
|
||||
;;
|
||||
*)
|
||||
send-error "Unsupported combination :: ${DISTRO}-${VERSION} + ROCm ${ROCM_VERSION}"
|
||||
;;
|
||||
esac
|
||||
if [[ "${VERSION_MAJOR}" -le 15 && "${VERSION_MINOR}" -le 5 ]]; then
|
||||
PERL_REPO="15.6"
|
||||
else
|
||||
PERL_REPO="${VERSION_MAJOR}.${VERSION_MINOR}"
|
||||
fi
|
||||
verbose-build docker build . ${PULL} --progress plain -f ${DOCKER_FILE} --tag ${CONTAINER} --build-arg DISTRO=${DISTRO_IMAGE} --build-arg VERSION=${VERSION} --build-arg ROCM_VERSION=${ROCM_VERSION} --build-arg AMDGPU_RPM=${ROCM_RPM} --build-arg PERL_REPO=${PERL_REPO} --build-arg PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\"
|
||||
fi
|
||||
if [ "${PUSH}" -ne 0 ]; then
|
||||
docker push ${CONTAINER}
|
||||
fi
|
||||
done
|
||||
done
|
||||
@@ -0,0 +1,17 @@
|
||||
services:
|
||||
customrocmtest: # service name
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: docker/Dockerfile.customrocmtest
|
||||
devices:
|
||||
- /dev/kfd
|
||||
- /dev/dri
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
volumes:
|
||||
- ../:/app
|
||||
- ../../rocprofiler-sdk:/rocprofiler-sdk
|
||||
ports:
|
||||
- 8050:8050
|
||||
tty: true
|
||||
stdin_open: true
|
||||
@@ -0,0 +1,14 @@
|
||||
services:
|
||||
doctest: # service name
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: docker/Dockerfile.doctest
|
||||
devices:
|
||||
- /dev/kfd
|
||||
- /dev/dri
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
volumes:
|
||||
- ../:/app
|
||||
tty: true
|
||||
stdin_open: true
|
||||
@@ -0,0 +1,12 @@
|
||||
services:
|
||||
standalone:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: docker/Dockerfile.standalone
|
||||
devices:
|
||||
- /dev/kfd
|
||||
- /dev/dri
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
volumes:
|
||||
- ../:/app
|
||||
@@ -0,0 +1,5 @@
|
||||
/build*
|
||||
/_build
|
||||
/_doxygen
|
||||
/.gitinfo
|
||||
/omniperf.dox
|
||||
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -0,0 +1,6 @@
|
||||
This subdirectory houses the input markup for Omniperf documentation using
|
||||
Sphinx. Changes committed here on the main branch will automatically be built
|
||||
and pushed live using a Github action.
|
||||
|
||||
You can build a local copy of the documentation in this directory using
|
||||
"make html" assuming you have the necessary sphinx dependencies installed.
|
||||
@@ -0,0 +1 @@
|
||||
1.0.10
|
||||
@@ -0,0 +1,770 @@
|
||||
# Analyze Mode
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 4
|
||||
```
|
||||
Omniperf offers several ways to interact with the metrics it generates from profiling. The option you choose will likey be influnced by your familiarity with the profiled application, computing enviroment, and experience with Omniperf.
|
||||
|
||||
While analyzing with the CLI offers quick and straightforward access to Omniperf metrics from terminal, the GUI adds an extra layer of styling and interactiveness some users may prefer.
|
||||
|
||||
See sections below for more information on each.
|
||||
|
||||
## CLI Analysis
|
||||
> Profiling results from the [aforementioned vcopy workload](https://rocm.github.io/omniperf/profiling.html#workload-compilation) will be used in the following sections to demonstrate the use of Omniperf in MI GPU performance analysis. Unless otherwise noted, the performance analysis is done on the MI200 platform.
|
||||
|
||||
### Features
|
||||
|
||||
- All Omniperf built-in metrics.
|
||||
- Multiple runs base line comparison.
|
||||
- Metrics customization: pick up subset of build-in metrics or build your own profiling configuration.
|
||||
- Kernel, gpu-id, dispatch-id filters.
|
||||
|
||||
Run `omniperf analyze -h` for more details.
|
||||
|
||||
### Recommended workflow
|
||||
|
||||
1) To begin, generate a comprehensive analysis report with Omniperf CLI.
|
||||
```shell-session
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/
|
||||
|
||||
--------
|
||||
Analyze
|
||||
--------
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
0. Top Stat
|
||||
╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
|
||||
│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │
|
||||
╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
|
||||
│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │
|
||||
│ │ int) [clone .kd] │ │ │ │ │ │
|
||||
╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
1. System Info
|
||||
╒══════════════════╤═══════════════════════════════════════════════╕
|
||||
│ │ Info │
|
||||
╞══════════════════╪═══════════════════════════════════════════════╡
|
||||
│ workload_name │ vcopy │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ command │ /home/colramos/vcopy 1048576 256 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ host_name │ sv-pdp-2 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ host_cpu │ AMD EPYC 7282 16-Core Processor │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ host_distro │ Ubuntu 20.04.3 LTS │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ host_kernel │ 5.15.0-43-generic │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ host_rocmver │ 5.2.1-79 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ date │ Fri Jan 20 11:22:20 2023 (CST) │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ gpu_soc │ gfx90a │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ numSE │ 8 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ numCU │ 104 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ numSIMD │ 4 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ waveSize │ 64 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ maxWavesPerCU │ 32 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ maxWorkgroupSize │ 1024 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ L1 │ 16 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ L2 │ 8192 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ sclk │ 1700 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ mclk │ 1600 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ cur_sclk │ 800 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ cur_mclk │ 1600 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ L2Banks │ 32 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ name │ mi200 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ numSQC │ 56 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ hbmBW │ 1638.4 │
|
||||
├──────────────────┼───────────────────────────────────────────────┤
|
||||
│ ip_blocks │ roofline|SQ|LDS|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF │
|
||||
╘══════════════════╧═══════════════════════════════════════════════╛
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
2. System Speed-of-Light
|
||||
....
|
||||
```
|
||||
2. Use `--list-metrics` to generate a list of availible metrics for inspection
|
||||
```shell-session
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
|
||||
╒═════════╤═════════════════════════════╕
|
||||
│ │ Metric │
|
||||
╞═════════╪═════════════════════════════╡
|
||||
│ 0 │ Top Stat │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 1 │ System Info │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.0 │ VALU_FLOPs │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.1 │ VALU_IOPs │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.2 │ MFMA_FLOPs_(BF16) │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.3 │ MFMA_FLOPs_(F16) │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.4 │ MFMA_FLOPs_(F32) │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.5 │ MFMA_FLOPs_(F64) │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.6 │ MFMA_IOPs_(Int8) │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.7 │ Active_CUs │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.8 │ SALU_Util │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.9 │ VALU_Util │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.10 │ MFMA_Util │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.11 │ VALU_Active_Threads/Wave │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.12 │ IPC_-_Issue │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.13 │ LDS_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.14 │ LDS_Bank_Conflict │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.15 │ Instr_Cache_Hit_Rate │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.16 │ Instr_Cache_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.17 │ Scalar_L1D_Cache_Hit_Rate │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.18 │ Scalar_L1D_Cache_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.19 │ Vector_L1D_Cache_Hit_Rate │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.20 │ Vector_L1D_Cache_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.21 │ L2_Cache_Hit_Rate │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.22 │ L2-Fabric_Read_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.23 │ L2-Fabric_Write_BW │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.24 │ L2-Fabric_Read_Latency │
|
||||
├─────────┼─────────────────────────────┤
|
||||
│ 2.1.25 │ L2-Fabric_Write_Latency │
|
||||
├─────────┼─────────────────────────────┤
|
||||
...
|
||||
```
|
||||
2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/ROCm/omniperf/blob/amd-mainline/src/rocprof_compute_analyze/configs/panel_config_template.yaml). Below shows how to generate a report containing only metric 2 (a.k.a. System Speed-of-Light).
|
||||
```shell-session
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ -b 2
|
||||
--------
|
||||
Analyze
|
||||
--------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
0. Top Stat
|
||||
╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╕
|
||||
│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │
|
||||
╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╡
|
||||
│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20000.00 │ 20000.00 │ 20000.00 │ 100.00 │
|
||||
│ │ int) [clone .kd] │ │ │ │ │ │
|
||||
╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╛
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
2. System Speed-of-Light
|
||||
╒═════════╤═══════════════════════════╤═══════════════════════╤══════════════════╤════════════════════╤════════════════════════╕
|
||||
│ Index │ Metric │ Value │ Unit │ Peak │ PoP │
|
||||
╞═════════╪═══════════════════════════╪═══════════════════════╪══════════════════╪════════════════════╪════════════════════════╡
|
||||
│ 2.1.0 │ VALU FLOPs │ 0.0 │ Gflop │ 22630.4 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.1 │ VALU IOPs │ 367.0016 │ Giop │ 22630.4 │ 1.6217194570135745 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.2 │ MFMA FLOPs (BF16) │ 0.0 │ Gflop │ 90521.6 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.3 │ MFMA FLOPs (F16) │ 0.0 │ Gflop │ 181043.2 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.4 │ MFMA FLOPs (F32) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.5 │ MFMA FLOPs (F64) │ 0.0 │ Gflop │ 45260.8 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.6 │ MFMA IOPs (Int8) │ 0.0 │ Giop │ 181043.2 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.7 │ Active CUs │ 74 │ Cus │ 104 │ 71.15384615384616 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.8 │ SALU Util │ 4.016057506716307 │ Pct │ 100 │ 4.016057506716307 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.9 │ VALU Util │ 5.737225009594725 │ Pct │ 100 │ 5.737225009594725 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.10 │ MFMA Util │ 0.0 │ Pct │ 100 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.11 │ VALU Active Threads/Wave │ 64.0 │ Threads │ 64 │ 100.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.12 │ IPC - Issue │ 1.0 │ Instr/cycle │ 5 │ 20.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.13 │ LDS BW │ 0.0 │ Gb/sec │ 22630.4 │ 0.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.14 │ LDS Bank Conflict │ │ Conflicts/access │ 32 │ │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.15 │ Instr Cache Hit Rate │ 99.91306912556854 │ Pct │ 100 │ 99.91306912556854 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.16 │ Instr Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.17 │ Scalar L1D Cache Hit Rate │ 99.81986908342313 │ Pct │ 100 │ 99.81986908342313 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.18 │ Scalar L1D Cache BW │ 209.7152 │ Gb/s │ 6092.8 │ 3.442016806722689 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.19 │ Vector L1D Cache Hit Rate │ 50.0 │ Pct │ 100 │ 50.0 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.20 │ Vector L1D Cache BW │ 1677.7216 │ Gb/s │ 11315.199999999999 │ 14.82714932126697 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.21 │ L2 Cache Hit Rate │ 35.55067615693325 │ Pct │ 100 │ 35.55067615693325 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.22 │ L2-Fabric Read BW │ 419.8496 │ Gb/s │ 1638.4 │ 25.6255859375 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.23 │ L2-Fabric Write BW │ 293.9456 │ Gb/s │ 1638.4 │ 17.941015625 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.24 │ L2-Fabric Read Latency │ 256.6482321288385 │ Cycles │ │ │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.25 │ L2-Fabric Write Latency │ 317.2264255699014 │ Cycles │ │ │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.26 │ Wave Occupancy │ 1821.723057333852 │ Wavefronts │ 3328 │ 54.73927455931046 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.27 │ Instr Fetch BW │ 4.174722306564298e-08 │ Gb/s │ 3046.4 │ 1.3703789084047721e-09 │
|
||||
├─────────┼───────────────────────────┼───────────────────────┼──────────────────┼────────────────────┼────────────────────────┤
|
||||
│ 2.1.28 │ Instr Fetch Latency │ 21.729248046875 │ Cycles │ │ │
|
||||
╘═════════╧═══════════════════════════╧═══════════════════════╧══════════════════╧════════════════════╧════════════════════════╛
|
||||
```
|
||||
> **Note:** Some cells may be blank indicating a missing/unavailable hardware counter or NULL value
|
||||
|
||||
3. Optimizatize application, iterate, and re-profile to inspect performance changes.
|
||||
4. Redo a comprehensive analysis with Omniperf CLI at any milestone or at the end.
|
||||
|
||||
### Demo
|
||||
|
||||
- Single run
|
||||
```shell
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/
|
||||
```
|
||||
|
||||
- List top kernels
|
||||
```shell
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels
|
||||
```
|
||||
|
||||
- List metrics
|
||||
|
||||
```shell
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
|
||||
```
|
||||
|
||||
- Customized profiling "System Speed-of-Light" and "CS_Busy" only
|
||||
|
||||
```shell
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ -b 2 5.1.0
|
||||
```
|
||||
|
||||
> Note: Users can filter single metric or the whole IP block by its id. In this case, 1 is the id for "system speed of light" and 5.1.0 the id for metric "GPU Busy Cycles".
|
||||
|
||||
- Filter kernels
|
||||
|
||||
First, list the top kernels in your application using `--list-kernels`.
|
||||
```shell-session
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels
|
||||
|
||||
--------
|
||||
Analyze
|
||||
--------
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
Detected Kernels
|
||||
╒════╤══════════════════════════════════════════════════════════╕
|
||||
│ │ KernelName │
|
||||
╞════╪══════════════════════════════════════════════════════════╡
|
||||
│ 0 │ vecCopy(double*, double*, double*, int, int) [clone .kd] │
|
||||
╘════╧══════════════════════════════════════════════════════════╛
|
||||
|
||||
```
|
||||
|
||||
Second, select the index of the kernel you'd like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`.
|
||||
|
||||
```shell-session
|
||||
$ omniperf -p workloads/vcopy/mi200/ -k 0
|
||||
|
||||
--------
|
||||
Analyze
|
||||
--------
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
0. Top Stat
|
||||
╒════╤══════════════════════════════════════════╤═════════╤═══════════╤════════════╤══════════════╤════════╤═════╕
|
||||
│ │ KernelName │ Count │ Sum(ns) │ Mean(ns) │ Median(ns) │ Pct │ S │
|
||||
╞════╪══════════════════════════════════════════╪═════════╪═══════════╪════════════╪══════════════╪════════╪═════╡
|
||||
│ 0 │ vecCopy(double*, double*, double*, int, │ 1 │ 20800.00 │ 20800.00 │ 20800.00 │ 100.00 │ * │
|
||||
│ │ int) [clone .kd] │ │ │ │ │ │ │
|
||||
╘════╧══════════════════════════════════════════╧═════════╧═══════════╧════════════╧══════════════╧════════╧═════╛
|
||||
... ...
|
||||
```
|
||||
|
||||
> Note: You'll see your filtered kernel(s) indicated by a asterisk in the Top Stats table
|
||||
|
||||
|
||||
- Baseline comparison
|
||||
|
||||
```shell
|
||||
omniperf analyze -p workload1/path/ -p workload2/path/
|
||||
```
|
||||
> Note: You can also apply diffrent filters to each workload.
|
||||
|
||||
OR
|
||||
```shell
|
||||
omniperf analyze -p workload1/path/ -k 0 -p workload2/path/ -k 1
|
||||
```
|
||||
|
||||
## GUI Analysis
|
||||
|
||||
### Web-based GUI
|
||||
|
||||
#### Features
|
||||
|
||||
Omniperf's standalone GUI analyzer is a lightweight web page that can
|
||||
be generated directly from the command-line. This option is provided
|
||||
as an alternative for users wanting to explore profiling results
|
||||
graphically, but without the additional setup requirements or
|
||||
server-side overhead of Omniperf's detailed [Grafana
|
||||
interface](https://rocm.github.io/omniperf/analysis.html#grafana-based-gui)
|
||||
option. The standalone GUI analyzer is provided as simple
|
||||
[Flask](https://flask.palletsprojects.com/en/2.2.x/) application
|
||||
allowing users to view results from within a web browser.
|
||||
|
||||
```{admonition} Port forwarding
|
||||
|
||||
Note that the standalone GUI analyzer publishes a web interface on port 8050 by default.
|
||||
On production HPC systems where profiling jobs run
|
||||
under the auspices of a resource manager, additional SSH tunneling
|
||||
between the desired web browser host (e.g. login node or remote workstation) and compute host may be
|
||||
required. Alternatively, users may find it more convenient to download
|
||||
profiled workloads to perform analysis on their local system.
|
||||
|
||||
See [FAQ](https://rocm.github.io/omniperf/faq.html) for more details on SSH tunneling.
|
||||
```
|
||||
|
||||
#### Usage
|
||||
|
||||
To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example:
|
||||
|
||||
```shell-session
|
||||
$ omniperf analyze -p workloads/vcopy/mi200/ --gui
|
||||
|
||||
--------
|
||||
Analyze
|
||||
--------
|
||||
|
||||
Dash is running on http://0.0.0.0:8050/
|
||||
|
||||
* Serving Flask app 'rocprof_compute_analyze.rocprof_compute_analyze' (lazy loading)
|
||||
* Environment: production
|
||||
WARNING: This is a development server. Do not use it in a production deployment.
|
||||
Use a production WSGI server instead.
|
||||
* Debug mode: off
|
||||
* Running on all addresses (0.0.0.0)
|
||||
WARNING: This is a development server. Do not use it in a production deployment.
|
||||
* Running on http://127.0.0.1:8050
|
||||
* Running on http://10.228.32.139:8050 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
At this point, users can then launch their web browser of choice and
|
||||
go to http://localhost:8050/ to see an analysis page.
|
||||
|
||||
|
||||
|
||||

|
||||
|
||||
```{tip}
|
||||
To launch the web application on a port other than 8050, include an optional port argument:
|
||||
`--gui <desired port>`
|
||||
```
|
||||
|
||||
When no filters are applied, users will see five basic sections derived from their application's profiling data:
|
||||
|
||||
1. Memory Chart Analysis
|
||||
2. Empirical Roofline Analysis
|
||||
3. Top Stats (Top Kernel Statistics)
|
||||
4. System Info
|
||||
5. System Speed-of-Light
|
||||
|
||||
To dive deeper, use the top drop down menus to isolate particular
|
||||
kernel(s) or dispatch(s). You will then see the web page update with
|
||||
metrics specific to the filter you've applied.
|
||||
|
||||
Once you have applied a filter, you will also see several additional
|
||||
sections become available with detailed metrics specific to that area
|
||||
of AMD hardware. These detailed sections mirror the data displayed in
|
||||
Omniperf's [Grafana
|
||||
interface](https://rocm.github.io/omniperf/analysis.html#grafana-based-gui).
|
||||
|
||||
### Grafana-based GUI
|
||||
|
||||
#### Features
|
||||
The Omniperf Grafana GUI Analyzer supports the following features to facilitate MI GPU performance profiling and analysis:
|
||||
|
||||
- System and IP-Block Speed-of-Light (SOL)
|
||||
- Multiple normalization options, including per-cycle, per-wave, per-kernel and per-second.
|
||||
- Baseline comparisons
|
||||
- Regex based Dispatch ID filtering
|
||||
- Roofline Analysis
|
||||
- Detailed per IP Block performance counters and metrics
|
||||
- CPC/CPF
|
||||
- SPI
|
||||
- SQ
|
||||
- SQC
|
||||
- TA/TD
|
||||
- TCP
|
||||
- TCC (both aggregated and per-channel perf info)
|
||||
|
||||
##### Speed-of-Light
|
||||
Speed-of-light panels are provided at both the system and per IP block level to help diagnosis performance bottlenecks. The performance numbers of the workload under testing are compared to the theoretical maximum, (e.g. floating point operations, bandwidth, cache hit rate, etc.), to indicate the available room to further utilize the hardware capability.
|
||||
|
||||
##### Multi Normalization
|
||||
|
||||
Multiple performance number normalizations are provided to allow performance inspection within both HW and SW context. The following normalizations are permitted:
|
||||
- per cycle
|
||||
- per wave
|
||||
- per kernel
|
||||
- per second
|
||||
|
||||
##### Baseline Comparison
|
||||
Omniperf enables baseline comparison to allow checking A/B effect. The current release limits the baseline comparison to the same SoC. Cross comparison between SoCs is in development.
|
||||
|
||||
For both the Current Workload and the Baseline Workload, one can independently setup the following filters to allow fine grained comparions:
|
||||
- Workload Name
|
||||
- GPU ID filtering (multi selection)
|
||||
- Kernel Name filtering (multi selection)
|
||||
- Dispatch ID filtering (Regex filtering)
|
||||
- Omniperf Panels (multi selection)
|
||||
|
||||
##### Regex based Dispatch ID filtering
|
||||
This release enables regex based dispatch ID filtering to flexibly choose the kernel invocations. One may refer to [Regex Numeric Range Generator](https://3widgets.com/), to generate typical number ranges.
|
||||
|
||||
For example, if one wants to inspect Dispatch Range from 17 to 48, inclusive, the corresponding regex is : **(1[7-9]|[23]\d|4[0-8])**. The generated express can be copied over for filtering.
|
||||
|
||||
##### Incremental Profiling
|
||||
Omniperf supports incremental profiling to significantly speed up performance analysis.
|
||||
|
||||
> Refer to [*IP Block profiling*](https://rocm.github.io/omniperf/profiling.html#ip-block-profiling) section for this command.
|
||||
|
||||
By default, the entire application is profiled to collect perfmon counter for all IP blocks, giving a system level view of where the workload stands in terms of performance optimization opportunities and bottlenecks.
|
||||
|
||||
After that one may focus on only a few IP blocks, (e.g., L1 Cache or LDS) to closely check the effect of software optimizations, without performing application replay for all other IP Blocks. This saves lots of compute time. In addition, the prior profiling results for other IP blocks are not overwritten. Instead, they can be merged during the import to piece together the system view.
|
||||
|
||||
##### Color Coding
|
||||
The uniform color coding is applied to most visualizations (bars, table, diagrams etc). Typically, Yellow color means over 50%, while Red color mean over 90% percent, for easy inspection.
|
||||
|
||||
##### Global Variables and Configurations
|
||||
|
||||

|
||||
|
||||
#### Grafana GUI Import
|
||||
The omniperf database `--import` option imports the raw profiling data to Grafana's backend MongoDB database. This step is only required for Grafana GUI based performance analysis.
|
||||
|
||||
Default username and password for MongoDB (to be used in database mode) are as follows:
|
||||
|
||||
- Username: **temp**
|
||||
- Password: **temp123**
|
||||
|
||||
Each workload is imported to a separate database with the following naming convention:
|
||||
|
||||
omniperf_<team>_<database>_<soc>
|
||||
|
||||
e.g., omniperf_asw_vcopy_mi200.
|
||||
|
||||
When using database mode, be sure to tailor the connection options to the machine hosting your [sever-side instance](./installation.md). Below is the sample command to import the *vcopy* profiling data, lets assuming our host machine is called "dummybox".
|
||||
|
||||
```shell-session
|
||||
$ omniperf database --help
|
||||
ROC Profiler: /usr/bin/rocprof
|
||||
|
||||
usage:
|
||||
|
||||
omniperf database <interaction type> [connection options]
|
||||
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
Examples:
|
||||
|
||||
omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/
|
||||
|
||||
omniperf database --remove -H pavii1 -u temp -w omniperf_asw_sample_mi200
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
Help:
|
||||
-h, --help show this help message and exit
|
||||
|
||||
General Options:
|
||||
-v, --version show program's version number and exit
|
||||
-V, --verbose Increase output verbosity
|
||||
|
||||
Interaction Type:
|
||||
-i, --import Import workload to Omniperf DB
|
||||
-r, --remove Remove a workload from Omniperf DB
|
||||
|
||||
Connection Options:
|
||||
-H , --host Name or IP address of the server host.
|
||||
-P , --port TCP/IP Port. (DEFAULT: 27018)
|
||||
-u , --username Username for authentication.
|
||||
-p , --password The user's password. (will be requested later if it's not set)
|
||||
-t , --team Specify Team prefix.
|
||||
-w , --workload Specify name of workload (to remove) or path to workload (to import)
|
||||
-k , --kernelVerbose Specify Kernel Name verbose level 1-5.
|
||||
Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)
|
||||
```
|
||||
|
||||
**omniperf import for vcopy:**
|
||||
```shell-session
|
||||
$ omniperf database --import -H dummybox -u temp -t asw -w workloads/vcopy/mi200/
|
||||
ROC Profiler: /usr/bin/rocprof
|
||||
|
||||
--------
|
||||
Import Profiling Results
|
||||
--------
|
||||
|
||||
Pulling data from /home/amd/xlu/test/workloads/vcopy/mi200
|
||||
The directory exists
|
||||
Found sysinfo file
|
||||
KernelName shortening enabled
|
||||
Kernel name verbose level: 2
|
||||
Password:
|
||||
Password recieved
|
||||
-- Conversion & Upload in Progress --
|
||||
0%| | 0/11 [00:00<?, ?it/s]/home/amd/xlu/test/workloads/vcopy/mi200/SQ_IFETCH_LEVEL.csv
|
||||
9%|█████████████████▉ | 1/11 [00:00<00:01, 8.53it/s]/home/amd/xlu/test/workloads/vcopy/mi200/pmc_perf.csv
|
||||
18%|███████████████████████████████████▊ | 2/11 [00:00<00:01, 6.99it/s]/home/amd/xlu/test/workloads/vcopy/mi200/SQ_INST_LEVEL_SMEM.csv
|
||||
27%|█████████████████████████████████████████████████████▋ | 3/11 [00:00<00:01, 7.90it/s]/home/amd/xlu/test/workloads/vcopy/mi200/SQ_LEVEL_WAVES.csv
|
||||
36%|███████████████████████████████████████████████████████████████████████▋ | 4/11 [00:00<00:00, 8.56it/s]/home/amd/xlu/test/workloads/vcopy/mi200/SQ_INST_LEVEL_LDS.csv
|
||||
45%|█████████████████████████████████████████████████████████████████████████████████████████▌ | 5/11 [00:00<00:00, 9.00it/s]/home/amd/xlu/test/workloads/vcopy/mi200/SQ_INST_LEVEL_VMEM.csv
|
||||
55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6/11 [00:00<00:00, 9.24it/s]/home/amd/xlu/test/workloads/vcopy/mi200/sysinfo.csv
|
||||
64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7/11 [00:00<00:00, 9.37it/s]/home/amd/xlu/test/workloads/vcopy/mi200/roofline.csv
|
||||
82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9/11 [00:00<00:00, 12.60it/s]/home/amd/xlu/test/workloads/vcopy/mi200/timestamps.csv
|
||||
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 11.05it/s]
|
||||
9 collections added.
|
||||
Workload name uploaded
|
||||
-- Complete! --
|
||||
```
|
||||
|
||||
#### Omniperf Panels
|
||||
|
||||
##### Overview
|
||||
|
||||
There are currently 18 main panel categories available for analyzing the compute workload performance. Each category contains several panels for close inspection of the system performance.
|
||||
|
||||
- Kernel Statistics
|
||||
- Kernel time histogram
|
||||
- Top Ten bottleneck kernels
|
||||
- System Speed-of-Light
|
||||
- Speed-of-Light
|
||||
- System Info table
|
||||
- Memory Chart Analysis
|
||||
- Roofline Analysis
|
||||
- FP32/FP64
|
||||
- FP16/INT8
|
||||
- Command Processor
|
||||
- Command Processor - Fetch (CPF)
|
||||
- Command Processor - Controller (CPC)
|
||||
- Shader Processing Input (SPI)
|
||||
- SPI Stats
|
||||
- SPI Resource Allocations
|
||||
- Wavefront Launch
|
||||
- Wavefront Launch Stats
|
||||
- Wavefront runtime stats
|
||||
- per-SE Wavefront Scheduling performance
|
||||
- Wavefront Lifetime
|
||||
- Wavefront lifetime breakdown
|
||||
- per-SE wavefront life (average)
|
||||
- per-SE wavefront life (histogram)
|
||||
- Wavefront Occupancy
|
||||
- per-SE wavefront occupancy
|
||||
- per-CU wavefront occupancy
|
||||
- Compute Unit - Instruction Mix
|
||||
- per-wave Instruction mix
|
||||
- per-wave VALU Arithmetic instruction mix
|
||||
- per-wave MFMA Arithmetic instruction mix
|
||||
- Compute Unit - Compute Pipeline
|
||||
- Speed-of-Light: Compute Pipeline
|
||||
- Arithmetic OPs count
|
||||
- Compute pipeline stats
|
||||
- Memory latencies
|
||||
- Local Data Share (LDS)
|
||||
- Speed-of-Light: LDS
|
||||
- LDS stats
|
||||
- Instruction Cache
|
||||
- Speed-of-Light: Instruction Cache
|
||||
- Instruction Cache Accesses
|
||||
- Constant Cache
|
||||
- Speed-of-Light: Constant Cache
|
||||
- Constant Cache Accesses
|
||||
- Constant Cache - L2 Interface stats
|
||||
- Texture Address and Texture Data
|
||||
- Texture Address (TA)
|
||||
- Texture Data (TD)
|
||||
- L1 Cache
|
||||
- Speed-of-Light: L1 Cache
|
||||
- L1 Cache Accesses
|
||||
- L1 Cache Stalls
|
||||
- L1 - L2 Transactions
|
||||
- L1 - UTCL1 Interface stats
|
||||
- L2 Cache
|
||||
- Speed-of-Light: L2 Cache
|
||||
- L2 Cache Accesses
|
||||
- L2 - EA Transactions
|
||||
- L2 - EA Stalls
|
||||
- L2 Cache Per Channel Performance
|
||||
- Per-channel L2 Hit rate
|
||||
- Per-channel L1-L2 Read requests
|
||||
- Per-channel L1-L2 Write Requests
|
||||
- Per-channel L1-L2 Atomic Requests
|
||||
- Per-channel L2-EA Read requests
|
||||
- Per-channel L2-EA Write requests
|
||||
- Per-channel L2-EA Atomic requests
|
||||
- Per-channel L2-EA Read latency
|
||||
- Per-channel L2-EA Write latency
|
||||
- Per-channel L2-EA Atomic latency
|
||||
- Per-channel L2-EA Read stall (I/O, GMI, HBM)
|
||||
- Per-channel L2-EA Write stall (I/O, GMI, HBM, Starve)
|
||||
|
||||
Most panels are designed around a specific IP block to thoroughly understand its behavior. Additional panels, including custom panels, could also be added to aid the performance analysis.
|
||||
|
||||
##### System Info Panel
|
||||

|
||||
##### Kernel Statistics
|
||||
|
||||
###### Kernel Time Histogram
|
||||

|
||||
###### Top Bottleneck Kernels
|
||||

|
||||
###### Top Bottleneck Dispatches
|
||||

|
||||
###### Current and Baseline Dispatch IDs (Filtered)
|
||||

|
||||
|
||||
##### System Speed-of-Light
|
||||

|
||||
|
||||
##### Memory Chart Analysis
|
||||
> Note: The Memory Chart Analysis support multiple normalizations. Due to the space limit, all transactions, when normalized to per-sec, default to unit of Billion transactions per second.
|
||||
|
||||

|
||||
|
||||
##### Roofline Analysis
|
||||

|
||||
##### Command Processor
|
||||

|
||||
##### Shader Processing Input (SPI)
|
||||

|
||||
##### Wavefront Launch
|
||||

|
||||
|
||||
##### Compute Unit - Instruction Mix
|
||||
###### Instruction Mix
|
||||

|
||||
###### VALU Arithmetic Instruction Mix
|
||||

|
||||
###### MFMA Arithmetic Instruction Mix
|
||||

|
||||
###### VMEM Arithmetic Instruction Mix
|
||||

|
||||
|
||||
##### Compute Unit - Compute Pipeline
|
||||
###### Speed-of-Light
|
||||

|
||||
###### Compute Pipeline Stats
|
||||

|
||||
###### Arithmetic Operations
|
||||

|
||||
###### Memory Latencies
|
||||

|
||||
|
||||
##### Local Data Share (LDS)
|
||||
###### Speed-of-Light
|
||||

|
||||
###### LDS Stats
|
||||

|
||||
|
||||
##### Instruction Cache
|
||||
###### Speed-of-Light
|
||||

|
||||
###### Instruction Cache Stats
|
||||

|
||||
|
||||
##### Scalar L1D Cache
|
||||
###### Speed-of-Light
|
||||

|
||||
###### Constant Cache Stats
|
||||

|
||||
###### Constant Cache - L2 Interface
|
||||

|
||||
|
||||
##### Texture Address and Texture Data
|
||||
###### Texture Address (TA)
|
||||

|
||||
###### Texture Data (TD)
|
||||

|
||||
|
||||
##### Vector L1D Cache
|
||||
###### Speed-of-Light
|
||||

|
||||
###### Vector L1D Cache Accesses
|
||||

|
||||
###### L1 Cache Stalls
|
||||

|
||||
###### L1 - L2 Transactions
|
||||

|
||||
###### L1 - UTCL1 Interface Stats
|
||||

|
||||
|
||||
##### L2 Cache
|
||||
###### Speed-of-Light
|
||||

|
||||
###### L2 Cache Accesses
|
||||

|
||||
###### L2 - EA Transactions
|
||||

|
||||
###### L2 - EA Stalls
|
||||

|
||||
|
||||
##### L2 Cache Per Channel Performance
|
||||
###### L1-L2 Transactions
|
||||

|
||||
###### L2-EA Transactions
|
||||

|
||||
###### L2-EA Latencies
|
||||

|
||||
###### L2-EA Stalls
|
||||

|
||||
###### L2-EA Write Stalls
|
||||

|
||||
###### L2-EA Write Starvation
|
||||

|
||||
@@ -0,0 +1,178 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file does only contain a selection of the most common options. For a
|
||||
# full list see the documentation:
|
||||
# http://www.sphinx-doc.org/en/master/config
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
import subprocess as sp
|
||||
import sys
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use str(Path(<rel_path>).absolute().resolve()) to make it absolute, like shown here.
|
||||
#
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path("..").absolute().resolve()))
|
||||
|
||||
repo_version = "unknown"
|
||||
# Determine short version by file in repo
|
||||
if Path("./VERSION").is_file():
|
||||
with open("./VERSION") as f:
|
||||
repo_version = f.readline().strip()
|
||||
|
||||
|
||||
def install(package):
|
||||
sp.call([sys.executable, "-m", "pip", "install", package])
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "Omniperf"
|
||||
copyright = "2022, Audacious Software Group"
|
||||
author = "Audacious Software Group"
|
||||
|
||||
# The short X.Y version
|
||||
version = repo_version
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = ""
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
install("sphinx_rtd_theme")
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
"sphinx.ext.githubpages",
|
||||
"myst_parser",
|
||||
]
|
||||
|
||||
myst_heading_anchors = 2
|
||||
# enable replacement of (tm) & friends
|
||||
myst_enable_extensions = ["replacements"]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
".txt": "markdown",
|
||||
".md": "markdown",
|
||||
}
|
||||
|
||||
from recommonmark.parser import CommonMarkParser
|
||||
|
||||
source_parsers = {".md": CommonMarkParser}
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = "index"
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#
|
||||
# This is also used if you do content translation via gettext catalogs.
|
||||
# Usually you set "language" from the command line for these cases.
|
||||
language = "en"
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = None
|
||||
|
||||
# options for latex output
|
||||
latex_engine = "lualatex"
|
||||
latex_show_urls = "footnote"
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#
|
||||
# html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ["_static"]
|
||||
|
||||
|
||||
# -- Options for HTMLHelp output ---------------------------------------------
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = "Omniperfdoc"
|
||||
|
||||
html_theme_options = {
|
||||
"analytics_id": "G-C5DYLCE9ED", # Provided by Google in your dashboard
|
||||
"analytics_anonymize_ip": False,
|
||||
"logo_only": False,
|
||||
"display_version": True,
|
||||
"prev_next_buttons_location": "bottom",
|
||||
"style_external_links": False,
|
||||
"vcs_pageview_mode": "",
|
||||
# 'style_nav_header_background': 'white',
|
||||
# Toc options
|
||||
"collapse_navigation": True,
|
||||
"sticky_navigation": True,
|
||||
"navigation_depth": 4,
|
||||
"includehidden": True,
|
||||
"titles_only": False,
|
||||
}
|
||||
|
||||
from pygments.styles import get_all_styles
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
styles = list(get_all_styles())
|
||||
preferences = ("emacs", "pastie", "colorful")
|
||||
for pref in preferences:
|
||||
if pref in styles:
|
||||
pygments_style = pref
|
||||
break
|
||||
|
||||
from recommonmark.transform import AutoStructify
|
||||
|
||||
|
||||
# app setup hook
|
||||
def setup(app):
|
||||
app.add_config_value(
|
||||
"recommonmark_config",
|
||||
{
|
||||
"auto_toc_tree_section": "Contents",
|
||||
"enable_eval_rst": True,
|
||||
"enable_auto_doc_ref": False,
|
||||
},
|
||||
True,
|
||||
)
|
||||
app.add_transform(AutoStructify)
|
||||
app.add_config_value("docstring_replacements", {}, True)
|
||||
app.connect("source-read", replaceString)
|
||||
|
||||
|
||||
# function to replace version string througout documentation
|
||||
|
||||
|
||||
def replaceString(app, docname, source):
|
||||
result = source[0]
|
||||
for key in app.config.docstring_replacements:
|
||||
result = result.replace(key, app.config.docstring_replacements[key])
|
||||
source[0] = result
|
||||
|
||||
|
||||
docstring_replacements = {"{__VERSION__}": version}
|
||||
@@ -0,0 +1,55 @@
|
||||
# FAQ
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 4
|
||||
```
|
||||
|
||||
**1. How do I export profiling data I've already generated using Omniperf?**
|
||||
|
||||
In order to interact with the Grafana GUI you must sync data with the MongoDB backend. This interaction is done through ***database*** mode.
|
||||
|
||||
Simply pass the directory of your desired workload like so,
|
||||
```shell
|
||||
$ omniperf database --import -w <path-to-results> -H <hostname> -u <username> -t <team-name>
|
||||
```
|
||||
**2. python ast error: 'Constant' object has no attribute 'kind'**
|
||||
|
||||
This comes from a bug in the default astunparse 1.6.3 with python 3.8. Seems good with python 3.7 and 3.9.
|
||||
|
||||
Workaround:
|
||||
```shell
|
||||
$ pip3 uninstall astunparse
|
||||
$ pip3 astunparse
|
||||
```
|
||||
|
||||
**3. tabulate doesn't print properly**
|
||||
Workaround:
|
||||
```shell
|
||||
$ export LC_ALL=C.UTF-8
|
||||
$ export LANG=C.UTF-8
|
||||
```
|
||||
|
||||
**3. How can I SSH Tunnel in MobaXterm?**
|
||||
|
||||
1. Open MobaXterm
|
||||
2. In the top ribbon, select `Tunneling`
|
||||

|
||||
This pop up will appear
|
||||

|
||||
3. Press `New SSH tunnel`
|
||||

|
||||
4. Configure tunnel accordingly
|
||||
|
||||
Local clients
|
||||
- Forwarded Port: [PORT]
|
||||
|
||||
Remote Server
|
||||
- Remote Server: localhost
|
||||
- Remote Port: [PORT]
|
||||
|
||||
SSH Server
|
||||
- SSH server: Name of the server one is connecting to
|
||||
- SSH login: Username to login to the server
|
||||
- SSH port: 22
|
||||
@@ -0,0 +1,93 @@
|
||||
# Getting Started
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 4
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
|
||||
1. **Launch & Profile the target application with the command line profiler**
|
||||
|
||||
The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or IP blocks. If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable.
|
||||
|
||||
To collect the default set of data for all kernels in the target application, launch, e.g.:
|
||||
```shell
|
||||
$ omniperf profile -n vcopy_data -- ./vcopy 1048576 256
|
||||
```
|
||||
The app runs, each kernel is launched, and profiling results are generated. By default, results are written to (e.g.,) ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times.
|
||||
|
||||
2. **Customize data collection**
|
||||
|
||||
Options are available to specify for which kernels/metrics data should be collected.
|
||||
Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time.
|
||||
|
||||
Some common filters include:
|
||||
|
||||
- `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID
|
||||
- `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks.
|
||||
|
||||
To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block.
|
||||
```shell
|
||||
$ omniperf analyze --list-metrics <sys_arch>
|
||||
```
|
||||
|
||||
3. **Analyze at the command line**
|
||||
|
||||
After generating a local output folder (./workloads/\<name>), the command line tool can also be used to quickly interface with profiling results. View different metrics derived from your profiled results and get immediate access all metrics organized by IP block.
|
||||
|
||||
If no kernel, dispatch, or ipblock filters are applied at this stage, analysis will be reflective of the entirety of the profiling data.
|
||||
|
||||
To interact with profiling results from a different session, users just provide the workload path. `-p`/`--path` enables users to analyze existing profiling data in the Omniperf CLI.
|
||||
|
||||
4. **Analyze in the Grafana GUI**
|
||||
|
||||
To conduct a more in-depth analysis of profiling results we recommend users utilize the Omniperf Grafana GUI. To interact with profiling results, users must import their data to the MongoDB instance included in the Omniperf dockerfile.
|
||||
|
||||
To interact with Grafana GUI data, stored in the Omniperf DB, users can enter ***database*** mode. For example:
|
||||
```shell
|
||||
$ omniperf database --import [CONNECTION OPTIONS]
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Modes
|
||||
Modes change the fundamental behavior of the Omniperf command line tool. Depending on which mode is chosen, different command line options become available.
|
||||
|
||||
- **Profile**: Target application is launched on the local system utilizing AMD’s [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). Depending on the profiling options chosen, selected kernels, dispatches, and/or IP Blocks in the application are profiled and results are stored locally in an output folder (./workloads/\<name>).
|
||||
|
||||
```shell
|
||||
$ omniperf profile --help
|
||||
```
|
||||
|
||||
- **Analyze**: Profiling data from `-p`/`--path` directory is loaded into the Omniperf CLI analyzer where users have immediate access to profiling results and generated metrics. Metrics are quickly generated from the entirety of your profiled application or a subset you’ve identified through the Omniperf CLI analysis filters.
|
||||
|
||||
To gererate a lightweight GUI interface users can add the `--gui` flag to their analysis command.
|
||||
|
||||
This mode is designed to be a middle ground to the highly detailed Omniperf Grafana GUI and is great for users who want immediate access to an IP Block they’re already familiar with.
|
||||
|
||||
```shell
|
||||
$ omniperf analyze --help
|
||||
```
|
||||
|
||||
- **Database**: Our detailed Grafana GUI is built on a MongoDB database. `--import` profiling results to the DB to interact with the workload in Grafana or `--remove` the workload from the DB.
|
||||
|
||||
Connection options will need to be specified. See the [*Grafana
|
||||
Analysis*](https://rocm.github.io/omniperf/analysis.html#grafana-gui-import) import section
|
||||
for more details on this.
|
||||
|
||||
```shell
|
||||
$ omniperf database --help
|
||||
```
|
||||
|
||||
## Basic Operations
|
||||
|
||||
Operation | Mode | Required Arguments
|
||||
:--|:--|:--
|
||||
Profile a workload | profile | `--name`, `-- <profile_cmd>`
|
||||
Standalone roofline analysis | profile | `--name`, `--roof-only`, `-- <profile_cmd>`
|
||||
Import a workload to database | database | `--import`, `--host`, `--username`, `--workload`, `--team`
|
||||
Remove a workload from database | database | `--remove`, `--host`, `--username`, `--workload`, `--team`
|
||||
Launch standalone GUI from CLI | analyze | `--path`, `--gui`
|
||||
Interact with profiling results from CLI | analyze | `--path`
|
||||
@@ -0,0 +1,20 @@
|
||||
# High Level Design
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 4
|
||||
```
|
||||
|
||||
The [Omniperf](https://github.com/ROCm/omniperf) Tool is architecturally composed of three major components, as shown in the following figure.
|
||||
|
||||
- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [rocProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/latest/rocprof.html). The counters are stored in a comma-seperated value, for further analyis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators.
|
||||
|
||||
- **Omniperf Grafana Analyzer**:
|
||||
- *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guarenteed.
|
||||
- *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization.
|
||||
- **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database.
|
||||
|
||||

|
||||
|
||||
> Note: To learn more about the client vs. server model of Omniperf and our install process please see the [Deployment section](./installation.md) of the docs.
|
||||
|
After Width: | Height: | Ukuran: 28 KiB |
|
After Width: | Height: | Ukuran: 79 KiB |
|
After Width: | Height: | Ukuran: 16 KiB |
|
After Width: | Height: | Ukuran: 23 KiB |
|
After Width: | Height: | Ukuran: 16 KiB |
|
After Width: | Height: | Ukuran: 48 KiB |
|
After Width: | Height: | Ukuran: 99 KiB |
|
After Width: | Height: | Ukuran: 22 KiB |
|
After Width: | Height: | Ukuran: 22 KiB |
|
After Width: | Height: | Ukuran: 59 KiB |
|
After Width: | Height: | Ukuran: 29 KiB |
|
After Width: | Height: | Ukuran: 51 KiB |
|
After Width: | Height: | Ukuran: 30 KiB |
|
After Width: | Height: | Ukuran: 11 KiB |
|
After Width: | Height: | Ukuran: 81 KiB |
|
After Width: | Height: | Ukuran: 24 KiB |
|
After Width: | Height: | Ukuran: 77 KiB |
|
After Width: | Height: | Ukuran: 30 KiB |
|
After Width: | Height: | Ukuran: 62 KiB |
|
After Width: | Height: | Ukuran: 11 KiB |
|
After Width: | Height: | Ukuran: 61 KiB |
|
After Width: | Height: | Ukuran: 53 KiB |
|
After Width: | Height: | Ukuran: 55 KiB |
|
After Width: | Height: | Ukuran: 47 KiB |
|
After Width: | Height: | Ukuran: 18 KiB |
|
After Width: | Height: | Ukuran: 11 KiB |
|
After Width: | Height: | Ukuran: 44 KiB |
|
After Width: | Height: | Ukuran: 11 KiB |
|
After Width: | Height: | Ukuran: 79 KiB |
|
After Width: | Height: | Ukuran: 20 KiB |
|
After Width: | Height: | Ukuran: 99 KiB |
|
After Width: | Height: | Ukuran: 70 KiB |
|
After Width: | Height: | Ukuran: 50 KiB |
|
After Width: | Height: | Ukuran: 199 KiB |
|
After Width: | Height: | Ukuran: 54 KiB |
|
After Width: | Height: | Ukuran: 26 KiB |
|
After Width: | Height: | Ukuran: 49 KiB |
|
After Width: | Height: | Ukuran: 24 KiB |
|
After Width: | Height: | Ukuran: 43 KiB |