Initial Commit
Contributors:
Ammar ELWazir <aelwazir@amd.com>
AravindanC <aravindan.cheruvally@amd.com>
Benjamin Welton <bewelton@amd.com>
Ma, Bing <Bing.Ma@amd.com>
Chun Yang <chun.yang@amd.com>
Cole Nelson <cole.nelson@amd.com>
Ethan Stewart <ethan.stewart@amd.com>
Evgeny <evgeny.shcherbakov@amd.com>
Freddy Paul <Freddy.paul@amd.com>
Giovanni Baraldi <gbaraldi@amd.com>
Gopesh Bhardwaj <Gopesh.Bhardwaj@amd.com>
Icarus Sparry <icarus.sparry@amd.com>
itrowbri <Ian.Trowbridge@amd.com>
James Edwards <JamesAdrian.Edwards@amd.com>
jatang <jatang@amd.com>
Jeremy Newton <Jeremy.Newton@amd.com>
Jonathan Kim <jonathan.kim@amd.com>
Kent Russell <kent.russell@amd.com>
Kiumars Sabeti <kiumars.sabeti@amd.com>
Lang Yu <lang.yu@amd.com>
Laurent Morichetti <laurent.morichetti@amd.com>
Mallya, Ameya Keshava <AmeyaKeshava.Mallya@amd.com>
Manjunath Jakaraddi <manjunath.jakaraddi@amd.com>
Mark Laws <markdavid.laws@amd.com>
Mohan Kumar Mithur <Mohan.KumarMithur@amd.com>
Nicholas Curtis <nicurtis@amd.com>
Nirmal Unnikrishnan <Nirmal.Unnikrishnan@amd.com>
Parag Bhandari <parag.bhandari@amd.com>
Ranjith Ramakrishnan <Ranjith.Ramakrishnan@amd.com>
Robert Gregory <Robert.Gregory@amd.com>
Saravanan Solaiyappan <saravanan.solaiyappan@amd.com>
Saurabh Verma <saurabh.verma@amd.com>
Srihari Uttanur <srihari.u@amd.com>
Srinivasan Subramanian <srinivasan.subramanian@amd.com>
Sriraksha Nagaraj <Sriraksha.Nagaraj@amd.com>
Sushma Vaddireddy <svaddire@amd.com>
Xianwei Zhang <Xianwei.Zhang@amd.com>
[ROCm/aqlprofile commit: 1ed169e30c]
This commit is contained in:
@@ -0,0 +1,205 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveMacros: None
|
||||
AlignConsecutiveAssignments: None
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeConceptDeclarations: true
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeColon
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 100
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: true
|
||||
DisableFormat: false
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<ext/.*\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: true
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentRequires: false
|
||||
IndentWidth: 2
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Never
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
RawStringFormats:
|
||||
- Language: Cpp
|
||||
Delimiters:
|
||||
- cc
|
||||
- CC
|
||||
- cpp
|
||||
- Cpp
|
||||
- CPP
|
||||
- 'c++'
|
||||
- 'C++'
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
- Language: TextProto
|
||||
Delimiters:
|
||||
- pb
|
||||
- PB
|
||||
- proto
|
||||
- PROTO
|
||||
EnclosingFunctions:
|
||||
- EqualsProto
|
||||
- EquivToProto
|
||||
- PARSE_PARTIAL_TEXT_PROTO
|
||||
- PARSE_TEST_PROTO
|
||||
- PARSE_TEXT_PROTO
|
||||
- ParseTextOrDie
|
||||
- ParseTextProtoOrDie
|
||||
- ParseTestProto
|
||||
- ParsePartialTestProto
|
||||
CanonicalDelimiter: pb
|
||||
BasedOnStyle: google
|
||||
ReflowComments: true
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: false
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: Both
|
||||
Standard: Auto
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
...
|
||||
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions" # See documentation for possible values
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/docs/sphinx" # Location of package manifests
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "daily"
|
||||
versioning-strategy: increase
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
disabled: false
|
||||
scmId: gh-emu-rocm
|
||||
branchesToScan:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
@@ -0,0 +1,91 @@
|
||||
name: "CodeQL Advanced"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "amd-staging" ]
|
||||
pull_request:
|
||||
branches: [ "amd-staging" ]
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
env:
|
||||
EXCLUDED_PATHS: ""
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
# Runner size impacts CodeQL analysis time. To learn more, please see:
|
||||
# - https://gh.io/recommended-hardware-resources-for-running-codeql
|
||||
# - https://gh.io/supported-runners-and-hardware-resources
|
||||
# - https://gh.io/using-larger-runners (GitHub.com only)
|
||||
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
|
||||
runs-on: gpuless-emu-runner-set
|
||||
permissions:
|
||||
# required for all workflows
|
||||
security-events: write
|
||||
|
||||
# required to fetch internal or private CodeQL packs
|
||||
packages: read
|
||||
|
||||
# only required for workflows in private repositories
|
||||
actions: read
|
||||
contents: read
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- language: c-cpp
|
||||
build-mode: manual
|
||||
- language: python
|
||||
build-mode: none
|
||||
- language: actions
|
||||
build-mode: none
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- if: matrix.build-mode == 'manual'
|
||||
name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
apt-get update
|
||||
apt-get install -y build-essential cmake g++-11 g++-12 python3-pip libdw-dev rocm-llvm-dev
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: ${{ matrix.build-mode }}
|
||||
queries: security-extended
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
# If the analyze step fails for one of the languages you are analyzing with
|
||||
# "We were unable to automatically build your code", modify the matrix above
|
||||
# to set the build mode to "manual" for that language. Then modify this step
|
||||
# to build your code.
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
- if: matrix.build-mode == 'manual'
|
||||
name: Configure and Build
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run: |
|
||||
cmake -B /tmp/build -DGPU_TARGETS='gfx906,gfx90a,gfx942,gfx1101,gfx1201' -DCMAKE_PREFIX_PATH=/opt/rocm
|
||||
cmake --build /tmp/build --target all --parallel 16
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
@@ -0,0 +1,146 @@
|
||||
name: Continuous Integration
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ amd-staging, amd-mainline, amd-npi ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'source/docs/**'
|
||||
- 'CODEOWNERS'
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'source/docs/**'
|
||||
- 'CODEOWNERS'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
|
||||
PATH: "/usr/bin:$PATH"
|
||||
navi3_EXCLUDE_TESTS_REGEX: ""
|
||||
vega20_EXCLUDE_TESTS_REGEX: ""
|
||||
mi200_EXCLUDE_TESTS_REGEX: ""
|
||||
mi300_EXCLUDE_TESTS_REGEX: ""
|
||||
mi300a_EXCLUDE_TESTS_REGEX: ""
|
||||
mi325_EXCLUDE_TESTS_REGEX: ""
|
||||
navi4_EXCLUDE_TESTS_REGEX: ""
|
||||
navi3_EXCLUDE_LABEL_REGEX: ""
|
||||
vega20_EXCLUDE_LABEL_REGEX: ""
|
||||
mi200_EXCLUDE_LABEL_REGEX: ""
|
||||
mi300_EXCLUDE_LABEL_REGEX: ""
|
||||
mi300a_EXCLUDE_LABEL_REGEX: ""
|
||||
mi325_EXCLUDE_LABEL_REGEX: ""
|
||||
navi4_EXCLUDE_LABEL_REGEX: ""
|
||||
|
||||
jobs:
|
||||
core-deb:
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ['navi4', 'mi300a', 'mi200', 'navi3']
|
||||
os: ['ubuntu-22.04']
|
||||
build-type: ['RelWithDebInfo']
|
||||
|
||||
runs-on: ${{ matrix.runner }}${{ github.ref == 'refs/heads/amd-npi' && '-npi' || '' }}-emu-runner-set
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
# define this for containers
|
||||
env:
|
||||
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
|
||||
CORE_EXT_RUNNER: mi300a
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
apt-get update
|
||||
apt-get install -y build-essential cmake g++-11 g++-12 python3-pip
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12
|
||||
|
||||
- name: List Files
|
||||
shell: bash
|
||||
run: |
|
||||
echo "PATH: ${PATH}"
|
||||
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
|
||||
which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; }
|
||||
for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done
|
||||
cat /opt/rocm/.info/version
|
||||
ls -la
|
||||
pwd
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run:
|
||||
LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH ctest --output-on-failure -V -DCTEST_SOURCE_DIRECTORY="$(pwd)"
|
||||
-DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}"
|
||||
-DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.runner }}-core
|
||||
-DCMAKE_CTEST_ARGUMENTS=""
|
||||
-DAQLPROFILE_EXTRA_CONFIGURE_ARGS=""
|
||||
-S ./dashboard.cmake
|
||||
|
||||
core-rpm:
|
||||
if: github.ref != 'refs/heads/amd-npi'
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ['mi300']
|
||||
os: ['rhel-emu', 'sles-emu']
|
||||
build-type: ['RelWithDebInfo']
|
||||
ci-flags: ['--linter clang-tidy']
|
||||
|
||||
runs-on: ${{ matrix.os }}-runner-set
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
# define this for containers
|
||||
env:
|
||||
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
|
||||
CORE_EXT_RUNNER: mi300
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
|
||||
- name: List Files
|
||||
shell: bash
|
||||
run: |
|
||||
echo "PATH: ${PATH}"
|
||||
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
|
||||
which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; }
|
||||
for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done
|
||||
cat /opt/rocm/.info/version
|
||||
ls -la
|
||||
pwd
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run:
|
||||
sudo LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH ctest --output-on-failure -V -DCTEST_SOURCE_DIRECTORY="$(pwd)"
|
||||
-DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}"
|
||||
-DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.runner }}-core
|
||||
-DCMAKE_CTEST_ARGUMENTS=""
|
||||
-DAQLPROFILE_EXTRA_CONFIGURE_ARGS=""
|
||||
-S ./dashboard.cmake
|
||||
@@ -0,0 +1,15 @@
|
||||
name: Rocm Validation Suite KWS
|
||||
on:
|
||||
push:
|
||||
branches: [amd-staging]
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
kws:
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{github.event.pull_request.number}}
|
||||
base_branch: ${{github.base_ref}}
|
||||
@@ -0,0 +1,25 @@
|
||||
name: ROCm CI Caller
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-npi, release/rocm-rel-*]
|
||||
types: [opened, reopened, synchronize]
|
||||
push:
|
||||
branches: [amd-mainline]
|
||||
workflow_dispatch:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
if: ${{ github.event_name != 'issue_comment' || github.event.comment.body == '!verify' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
input_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
input_pr_num: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 0 }}
|
||||
input_pr_url: ${{ github.event_name == 'pull_request' && github.event.pull_request.html_url || '' }}
|
||||
input_pr_title: ${{ github.event_name == 'pull_request' && github.event.pull_request.title || '' }}
|
||||
repository_name: ${{ github.repository }}
|
||||
base_ref: ${{ github.event_name == 'pull_request' && github.base_ref || github.ref }}
|
||||
trigger_event_type: ${{ github.event_name }}
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-mainline to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/aqlprofile.git"
|
||||
source_branch: "amd-mainline"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/aqlprofile.git"
|
||||
destination_branch: "amd-mainline"
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-staging to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-staging ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/aqlprofile.git"
|
||||
source_branch: "amd-staging"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/aqlprofile.git"
|
||||
destination_branch: "amd-staging"
|
||||
@@ -0,0 +1,2 @@
|
||||
build
|
||||
.cache
|
||||
@@ -0,0 +1,188 @@
|
||||
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.0)
|
||||
|
||||
## Set module name and project name.
|
||||
set ( AQLPROFILE_NAME "hsa-amd-aqlprofile" )
|
||||
set ( AQLPROFILE_TARGET "${AQLPROFILE_NAME}64" )
|
||||
set ( AQLPROFILE_LIBRARY "lib${AQLPROFILE_TARGET}" )
|
||||
project ( ${AQLPROFILE_NAME} )
|
||||
|
||||
include(GNUInstallDirs)
|
||||
## Adding default path cmake modules
|
||||
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
|
||||
## Include common cmake modules
|
||||
include ( utils )
|
||||
## Set build environment
|
||||
include ( env )
|
||||
|
||||
set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory")
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
message ( "-- BUILD-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
|
||||
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
|
||||
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
set ( LIB_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( LIB_VERSION_MINOR ${VERSION_MINOR} )
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( LIB_VERSION_PATCH ${ROCM_PATCH_VERSION} )
|
||||
else()
|
||||
set ( LIB_VERSION_PATCH ${VERSION_PATCH} )
|
||||
endif()
|
||||
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
|
||||
|
||||
## Set target and root/lib/test directory
|
||||
set ( TARGET_NAME "${AQLPROFILE_TARGET}" )
|
||||
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
set ( LIB_DIR "${ROOT_DIR}/src" )
|
||||
set ( TEST_DIR "${ROOT_DIR}/test" )
|
||||
|
||||
## Build library
|
||||
include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${AQLPROFILE_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *aqlprofile*.so )
|
||||
endif ()
|
||||
|
||||
## Build tests
|
||||
enable_testing()
|
||||
include(CTest)
|
||||
set ( TEST_BINARY_DIR ${PROJECT_BINARY_DIR}/test )
|
||||
add_subdirectory ( ${TEST_DIR} ${TEST_BINARY_DIR} )
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
set ( DEST_NAME ${AQLPROFILE_NAME} )
|
||||
install ( TARGETS ${AQLPROFILE_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime )
|
||||
install ( TARGETS ${AQLPROFILE_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan )
|
||||
|
||||
option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" OFF)
|
||||
|
||||
if(FILE_REORG_BACKWARD_COMPATIBILITY)
|
||||
include(aqlprof-backward-compat.cmake)
|
||||
endif()
|
||||
|
||||
## Add the packaging directives for the runtime library.
|
||||
if ( ENABLE_ASAN_PACKAGING )
|
||||
set ( CPACK_PACKAGE_NAME ${AQLPROFILE_NAME}-asan )
|
||||
# ASAN Package requires only asan component with libraries and license file
|
||||
set ( CPACK_COMPONENTS_ALL asan )
|
||||
else()
|
||||
set ( CPACK_PACKAGE_NAME ${AQLPROFILE_NAME} )
|
||||
set ( CPACK_COMPONENTS_ALL runtime tests )
|
||||
endif()
|
||||
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.rocm-profiler.support@amd.com>" )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}" )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AQLPROFILE library for AMD HSA runtime API extension support" )
|
||||
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
|
||||
|
||||
|
||||
set(CPACK_STGZ_HEADER_FILE ${PROJECT_SOURCE_DIR}/cmake_modules/CPack.STGZ_Header.sh.in)
|
||||
set(CPACK_STGZ_INCLUDE_SUBDIR OFF)
|
||||
|
||||
if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
|
||||
endif()
|
||||
|
||||
## Debian package specific variables
|
||||
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-AqlProfile-AMD-extension" )
|
||||
## Process the Debian install/remove scripts to update the CPACK variables
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm" )
|
||||
|
||||
## Process the Rpm install/remove scripts to update the CPACK variables
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
|
||||
|
||||
if ( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
|
||||
|
||||
## RPM package specific variables
|
||||
if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
set( CPACK_RPM_PACKAGE_LICENSE "AMD Proprietary" )
|
||||
|
||||
#Disable build id for rocprofiler as its creating transaction error
|
||||
set ( CPACK_RPM_SPEC_MORE_DEFINE "%define _build_id_links none
|
||||
%global __strip ${CPACK_STRIP_EXECUTABLE}
|
||||
%global __objdump ${CPACK_OBJDUMP_EXECUTABLE}
|
||||
%global __objcopy ${CPACK_OBJCOPY_EXECUTABLE}
|
||||
%global __readelf ${CPACK_READELF_EXECUTABLE}")
|
||||
|
||||
## 'dist' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process( COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE )
|
||||
|
||||
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
|
||||
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
|
||||
endif()
|
||||
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
|
||||
if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX )
|
||||
set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}" )
|
||||
endif ( )
|
||||
|
||||
# Enable Component Mode & install settings.
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${AQLPROFILE_NAME}")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${AQLPROFILE_NAME}-asan")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${AQLPROFILE_NAME}-tests")
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${AQLPROFILE_NAME}")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_NAME "${AQLPROFILE_NAME}-asan")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_NAME "${AQLPROFILE_NAME}-tests")
|
||||
# Add dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON
|
||||
if(ROCM_DEP_ROCMCORE)
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "rocm-core")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "rocm-core")
|
||||
endif()
|
||||
|
||||
include ( CPack )
|
||||
|
||||
cpack_add_component(
|
||||
runtime
|
||||
DISPLAY_NAME "Runtime"
|
||||
DESCRIPTION "Dynamic libraries for the AQLProfile")
|
||||
|
||||
cpack_add_component(
|
||||
asan
|
||||
DISPLAY_NAME "ASAN"
|
||||
DESCRIPTION "ASAN libraries for the AQLProfile"
|
||||
DEPENDS asan)
|
||||
|
||||
cpack_add_component(
|
||||
tests
|
||||
DISPLAY_NAME "Tests"
|
||||
DESCRIPTION "Tests for the AQLProfile"
|
||||
DEPENDS runtime)
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
do_ldconfig() {
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
( configure )
|
||||
do_ldconfig
|
||||
;;
|
||||
( abort-upgrade | abort-remove | abort-deconfigure )
|
||||
echo "$1"
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
rm_ldconfig() {
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
rm -f /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
( remove | upgrade )
|
||||
rm_ldconfig
|
||||
;;
|
||||
( purge )
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,5 @@
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
@@ -0,0 +1,6 @@
|
||||
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ $1 -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
|
||||
rm -f /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
@@ -0,0 +1,67 @@
|
||||
HSA extension AMD AQL profile library.
|
||||
Provides AQL packets helper methods for perfcounters (PMC) and SQ threadtraces (SQTT).
|
||||
|
||||
Library supports GFX9 APIs.
|
||||
The library source tree:
|
||||
- doc - Documentation, the API specification and the presentation
|
||||
- <hsa-runtime>/inc/hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API
|
||||
- src - AMD AQL profile library sources
|
||||
- core - AQL API sources
|
||||
- pm4 - cmd/pmc/sqtt pm4 builders
|
||||
- def - Generated GFXIP definition headers
|
||||
- test - library test suite
|
||||
- ctrl - Test control
|
||||
- util - Test utils
|
||||
- simple_convolution - Simple convolution test kernel
|
||||
|
||||
Build environment:
|
||||
|
||||
$ export CMAKE_PREFIX_PATH=<path to hsa-runtime includes>:<path to hsa-runtime library>
|
||||
$ export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
$ export CMAKE_DEBUG_TRACE=1 # 1 to enable debug tracing
|
||||
|
||||
To build with the current installed ROCM:
|
||||
|
||||
$ export CMAKE_PREFIX_PATH=/opt/rocm/lib:/opt/rocm/include/hsa
|
||||
|
||||
$ cd .../aqlprofile
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
To regenerate src/def headers:
|
||||
|
||||
Need to use 'clang' compiler:
|
||||
$ export CXX=/usr/bin/clang++
|
||||
$ export CC=/usr/bin/clang
|
||||
|
||||
'mygen' make target to regenerate the headers from full set of gfxip headers:
|
||||
$ make mygen
|
||||
|
||||
To reset the generated headers:
|
||||
$ make mygenreset
|
||||
|
||||
To run the test:
|
||||
|
||||
$ cd ../aqlprofile/build
|
||||
$ export LD_LIBRARY_PATH=$PWD
|
||||
$ run.sh
|
||||
|
||||
To enabled error messages logging to '/tmp/aql_profile_log.txt':
|
||||
|
||||
$ export HSA_VEN_AMD_AQLPROFILE_LOG=1
|
||||
|
||||
To enable verbose tracing:
|
||||
|
||||
$ export AQLPROFILE_TRACE=1
|
||||
|
||||
To recompile kernel object:
|
||||
|
||||
$ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/opencl-c.h -nogpulib -Xclang -mlink-bitcode-file -Xclang /opt/rocm/amdgcn/bitcode/opencl.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang /opt/rocm/amdgcn/bitcode/ockl.amdgcn.bc -target amdgcn-amd-amdhsa -mcpu=gfx906 vector_add_kernel.cl -o vector_add_kernel.so
|
||||
|
||||
With newer device-libs layout, use this recompile command:
|
||||
$ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/opencl-c.h --hip-device-lib-path=/opt/rocm/amdgcn/bitcode -target amdgcn-amd-amdhsa -mcpu=gfx906 vector_add_kernel.cl -o vector_add_kernel.so
|
||||
|
||||
### ROCm 5.7
|
||||
Added support for GFX10/GFX11
|
||||
@@ -0,0 +1,27 @@
|
||||
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.8)
|
||||
|
||||
set(AQLPROF_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(AQLPROF_WRAPPER_DIR ${AQLPROF_BUILD_DIR}/wrapper_dir)
|
||||
set(AQLPROF_WRAPPER_LIB_DIR ${AQLPROF_WRAPPER_DIR}/lib)
|
||||
|
||||
#function to create symlink to libraries
|
||||
function(create_library_symlink)
|
||||
file(MAKE_DIRECTORY ${AQLPROF_WRAPPER_LIB_DIR})
|
||||
set(LIB_AQLPROF "${AQLPROFILE_LIBRARY}.so")
|
||||
set(MAJ_VERSION "${LIB_VERSION_MAJOR}")
|
||||
set(SO_VERSION "${LIB_VERSION_STRING}")
|
||||
set(library_files "${LIB_AQLPROF}" "${LIB_AQLPROF}.${MAJ_VERSION}" "${LIB_AQLPROF}.${SO_VERSION}")
|
||||
|
||||
foreach(file_name ${library_files})
|
||||
add_custom_target(link_${file_name} ALL
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${AQLPROF_WRAPPER_LIB_DIR}/${file_name})
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# Create symlink to library files
|
||||
create_library_symlink()
|
||||
install(DIRECTORY ${AQLPROF_WRAPPER_LIB_DIR} DESTINATION ${AQLPROFILE_NAME} COMPONENT ${AQLPROFILE_LIBRARY})
|
||||
Executable
+82
@@ -0,0 +1,82 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
|
||||
|
||||
SRC_DIR=$(dirname "$0")
|
||||
COMPONENT="aqlprofile"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
usage() {
|
||||
echo -e "AQLProfile Build Script Usage:"
|
||||
echo -e "\nTo run ./build.sh PARAMs, PARAMs can be the following:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
TO_CLEAN=no
|
||||
shift
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -z "$AQLPROFILE_ROOT" ]; then AQLPROFILE_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151"; fi
|
||||
|
||||
AQLPROFILE_ROOT=$(cd $AQLPROFILE_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-'RelWithDebInfo'} \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_GENERATOR=${CPACKGEN:-'DEB;RPM'} \
|
||||
-DCMAKE_INSTALL_RPATH=${ROCM_RPATH} \
|
||||
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
|
||||
-DCPACK_GENERATOR="STGZ" \
|
||||
-DGPU_TARGETS="$GPU_LIST" \
|
||||
-DCPACK_OBJCOPY_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objcopy" \
|
||||
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
|
||||
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
|
||||
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
|
||||
$AQLPROFILE_ROOT
|
||||
|
||||
popd
|
||||
|
||||
MAKE_OPTS="-j -C $AQLPROFILE_ROOT/$BUILD_DIR"
|
||||
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS all mytest
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS test
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS package
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/bin/sh
|
||||
# Display usage
|
||||
cpack_usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: $0 [options]
|
||||
Options: [defaults in brackets after descriptions]
|
||||
--help print this message
|
||||
--prefix=dir directory in which to install
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
cpack_echo_exit()
|
||||
{
|
||||
echo $1
|
||||
exit 1
|
||||
}
|
||||
# Display version
|
||||
cpack_version()
|
||||
{
|
||||
echo "@CPACK_PACKAGE_NAME@ Installer Version: @CPACK_PACKAGE_VERSION@, Copyright (c) @CPACK_PACKAGE_VENDOR@"
|
||||
}
|
||||
# Helper function to fix windows paths.
|
||||
cpack_fix_slashes ()
|
||||
{
|
||||
echo "$1" | sed 's/\\/\//g'
|
||||
}
|
||||
interactive=TRUE
|
||||
cpack_skip_license=FALSE
|
||||
cpack_include_subdir=FALSE
|
||||
for a in "$@CPACK_AT_SIGN@"; do
|
||||
if echo $a | grep "^--prefix=" > /dev/null 2> /dev/null; then
|
||||
cpack_prefix_dir=`echo $a | sed "s/^--prefix=//"`
|
||||
cpack_prefix_dir=`cpack_fix_slashes "${cpack_prefix_dir}"`
|
||||
fi
|
||||
if echo $a | grep "^--help" > /dev/null 2> /dev/null; then
|
||||
cpack_usage
|
||||
fi
|
||||
if echo $a | grep "^--version" > /dev/null 2> /dev/null; then
|
||||
cpack_version
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
if [ "x${cpack_include_subdir}x" != "xx" -o "x${cpack_skip_license}x" = "xTRUEx" ]
|
||||
then
|
||||
interactive=FALSE
|
||||
fi
|
||||
cpack_version
|
||||
echo "This is a self-extracting archive."
|
||||
toplevel="`pwd`"
|
||||
if [ "x${cpack_prefix_dir}x" != "xx" ]
|
||||
then
|
||||
toplevel="${cpack_prefix_dir}"
|
||||
fi
|
||||
echo "The archive will be extracted to: ${toplevel}"
|
||||
if [ "x${interactive}x" = "xTRUEx" ]
|
||||
then
|
||||
echo ""
|
||||
echo "If you want to stop extracting, please press <ctrl-C>."
|
||||
if [ "x${cpack_skip_license}x" != "xTRUEx" ]
|
||||
then
|
||||
more << '____cpack__here_doc____'
|
||||
@CPACK_RESOURCE_FILE_LICENSE_CONTENT@
|
||||
____cpack__here_doc____
|
||||
echo
|
||||
echo "Do you accept the license? [yN]: "
|
||||
read line leftover
|
||||
case ${line} in
|
||||
y* | Y*)
|
||||
cpack_license_accepted=TRUE;;
|
||||
*)
|
||||
echo "License not accepted. Exiting ..."
|
||||
exit 1;;
|
||||
esac
|
||||
fi
|
||||
if [ "x${cpack_include_subdir}x" = "xx" ]
|
||||
then
|
||||
echo "By default the @CPACK_PACKAGE_NAME@ will be installed in:"
|
||||
echo " \"${toplevel}/@CPACK_PACKAGE_FILE_NAME@\""
|
||||
echo "Do you want to include the subdirectory @CPACK_PACKAGE_FILE_NAME@?"
|
||||
echo "Saying no will install in: \"${toplevel}\" [Yn]: "
|
||||
read line leftover
|
||||
cpack_include_subdir=TRUE
|
||||
case ${line} in
|
||||
n* | N*)
|
||||
cpack_include_subdir=FALSE
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
if [ "x${cpack_include_subdir}x" = "xTRUEx" ]
|
||||
then
|
||||
toplevel="${toplevel}/@CPACK_PACKAGE_FILE_NAME@"
|
||||
mkdir -p "${toplevel}"
|
||||
fi
|
||||
echo
|
||||
echo "Using target directory: ${toplevel}"
|
||||
echo "Extracting, please wait..."
|
||||
echo ""
|
||||
# take the archive portion of this file and pipe it to tar
|
||||
# the NUMERIC parameter in this command should be one more
|
||||
# than the number of lines in this header file
|
||||
# there are tails which don't understand the "-n" argument, e.g. on SunOS
|
||||
# OTOH there are tails which complain when not using the "-n" argument (e.g. GNU)
|
||||
# so at first try to tail some file to see if tail fails if used with "-n"
|
||||
# if so, don't use "-n"
|
||||
use_new_tail_syntax="-n"
|
||||
tail $use_new_tail_syntax +1 "$0" > /dev/null 2> /dev/null || use_new_tail_syntax=""
|
||||
tail $use_new_tail_syntax +###CPACK_HEADER_LENGTH### "$0" | gunzip | (cd "${toplevel}" && tar xf -) || cpack_echo_exit "Problem unpacking the @CPACK_PACKAGE_FILE_NAME@"
|
||||
echo "Unpacking finished successfully"
|
||||
exit 0
|
||||
#-----------------------------------------------------------
|
||||
# Start of TAR.GZ file
|
||||
#-----------------------------------------------------------;
|
||||
@@ -0,0 +1,88 @@
|
||||
## Build is not supported on Windows plaform
|
||||
if ( WIN32 )
|
||||
message ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
## Compiler Preprocessor definitions.
|
||||
add_definitions ( -DAMD_INTERNAL_BUILD )
|
||||
add_definitions ( -DHSA_LARGE_MODEL= )
|
||||
add_definitions ( -DHSA_DEPRECATED= )
|
||||
add_definitions ( -DLITTLEENDIAN_CPU=1 )
|
||||
|
||||
## Linux Compiler options
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
|
||||
|
||||
add_definitions(-DNEW_TRACE_API=1)
|
||||
|
||||
## CLANG options
|
||||
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
|
||||
endif()
|
||||
|
||||
## Enable debug trace
|
||||
if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
|
||||
add_definitions ( -DDEBUG_TRACE=1 )
|
||||
endif()
|
||||
|
||||
## Enable direct loading of AQL-profile HSA extension
|
||||
if ( DEFINED ENV{CMAKE_LD_AQLPROFILE} )
|
||||
add_definitions (-DROCP_LD_AQLPROFILE=1)
|
||||
endif()
|
||||
|
||||
## Build type
|
||||
if ( NOT DEFINED CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "" )
|
||||
if ( DEFINED ENV{CMAKE_BUILD_TYPE} )
|
||||
set ( CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE} )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Installation prefix path
|
||||
if ( NOT DEFINED CMAKE_PREFIX_PATH AND DEFINED ENV{CMAKE_PREFIX_PATH} )
|
||||
set ( CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH} )
|
||||
endif()
|
||||
set ( ENV{CMAKE_PREFIX_PATH} ${CMAKE_PREFIX_PATH} )
|
||||
|
||||
## Extend Compiler flags based on build type
|
||||
string ( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE )
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL debug )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
|
||||
set ( CMAKE_BUILD_TYPE "debug" )
|
||||
else ()
|
||||
set ( CMAKE_BUILD_TYPE "release" )
|
||||
endif ()
|
||||
|
||||
## Extend Compiler flags based on Processor architecture
|
||||
if ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" )
|
||||
set ( NBIT 64 )
|
||||
set ( NBITSTR "64" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
|
||||
elseif ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" )
|
||||
set ( NBIT 32 )
|
||||
set ( NBITSTR "" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
|
||||
endif ()
|
||||
|
||||
## Find hsa-runtime
|
||||
find_package(hsa-runtime64 REQUIRED HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
|
||||
|
||||
# find KFD thunk
|
||||
find_package(hsakmt REQUIRED HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
|
||||
|
||||
## Basic Tool Chain Information
|
||||
message ( "----------------NBIT: ${NBIT}" )
|
||||
message ( "-----------BuildType: ${CMAKE_BUILD_TYPE}" )
|
||||
message ( "------------Compiler: ${CMAKE_CXX_COMPILER}" )
|
||||
message ( "----Compiler-Version: ${CMAKE_CXX_COMPILER_VERSION}" )
|
||||
message ( "------------API-path: ${API_PATH}" )
|
||||
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
|
||||
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
|
||||
message ( "-CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}" )
|
||||
message ( "-CMAKE_CXX_COMPILER_VERSION: ${CMAKE_CXX_COMPILER_VERSION}" )
|
||||
message ( "---------GPU_TARGETS: ${GPU_TARGETS}" )
|
||||
@@ -0,0 +1,76 @@
|
||||
|
||||
|
||||
## Parses the VERSION_STRING variable and places
|
||||
## the first, second and third number values in
|
||||
## the major, minor and patch variables.
|
||||
function( parse_version VERSION_STRING )
|
||||
|
||||
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
|
||||
|
||||
if ( ${STRING_INDEX} GREATER -1 )
|
||||
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
|
||||
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
|
||||
endif ()
|
||||
|
||||
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
|
||||
list ( LENGTH VERSIONS VERSION_COUNT )
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 0)
|
||||
list ( GET VERSIONS 0 MAJOR )
|
||||
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${MAJOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 1 )
|
||||
list ( GET VERSIONS 1 MINOR )
|
||||
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 2 )
|
||||
list ( GET VERSIONS 2 PATCH )
|
||||
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
|
||||
endif ()
|
||||
|
||||
if ( DEFINED VERSION_BUILD )
|
||||
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
endif ()
|
||||
|
||||
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
|
||||
|
||||
endfunction ()
|
||||
|
||||
## Gets the current version of the repository
|
||||
## using versioning tags and git describe.
|
||||
## Passes back a packaging version string
|
||||
## and a library version string.
|
||||
function ( get_version DEFAULT_VERSION_STRING )
|
||||
|
||||
parse_version ( ${DEFAULT_VERSION_STRING} )
|
||||
|
||||
find_program ( GIT NAMES git )
|
||||
|
||||
if ( GIT )
|
||||
|
||||
execute_process ( COMMAND "git describe --dirty --long --match [0-9]* 2>/dev/null"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_TAG_STRING
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RESULT )
|
||||
|
||||
if ( ${RESULT} EQUAL 0 )
|
||||
|
||||
parse_version ( ${GIT_TAG_STRING} )
|
||||
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
|
||||
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
|
||||
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
|
||||
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
|
||||
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
|
||||
endfunction()
|
||||
@@ -0,0 +1,81 @@
|
||||
macro(dashboard_submit)
|
||||
ctest_submit()
|
||||
endmacro()
|
||||
|
||||
set(CTEST_PROJECT_NAME "aqlprofile-emu")
|
||||
set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC")
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "cdash.cdash.svc.cluster.local:8080")
|
||||
set(CTEST_DROP_LOCATION "/submit.php?project=${CTEST_PROJECT_NAME}")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
|
||||
set(CTEST_UPDATE_TYPE git)
|
||||
set(CTEST_UPDATE_VERSION_ONLY TRUE)
|
||||
set(CTEST_GIT_COMMAND git)
|
||||
set(CTEST_GIT_INIT_SUBMODULES FALSE)
|
||||
|
||||
set(CTEST_OUTPUT_ON_FAILURE TRUE)
|
||||
set(CTEST_USE_LAUNCHERS TRUE)
|
||||
set(CTEST_VERBOSE ON)
|
||||
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_PASSED_TEST_OUTPUT_SIZE "51200")
|
||||
|
||||
if(NOT DEFINED CTEST_SOURCE_DIRECTORY)
|
||||
set(CTEST_SOURCE_DIRECTORY ".")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CTEST_BINARY_DIRECTORY)
|
||||
set(CTEST_BINARY_DIRECTORY "./build")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED AQLPROFILE_EXTRA_CONFIGURE_ARGS)
|
||||
set(AQLPROFILE_EXTRA_CONFIGURE_ARGS "")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED AQLPROFILE_BUILD_NUM_JOBS)
|
||||
set(AQLPROFILE_BUILD_NUM_JOBS "16")
|
||||
endif()
|
||||
|
||||
set(CTEST_CONFIGURE_COMMAND "cmake -B ${CTEST_BINARY_DIRECTORY} -DCMAKE_BUILD_TYPE='RelWithDebInfo' -DCMAKE_PREFIX_PATH=/opt/rocm -DCMAKE_INSTALL_PREFIX=/opt/rocm -DCPACK_PACKAGING_INSTALL_PREFIX=/opt/rocm -DCPACK_GENERATOR='DEB;RPM;STGZ' -DGPU_TARGETS='gfx906,gfx90a,gfx942,gfx1101,gfx1201' ${AQLPROFILE_EXTRA_CONFIGURE_ARGS} ${CTEST_SOURCE_DIRECTORY}")
|
||||
set(CTEST_BUILD_COMMAND "cmake --build \"${CTEST_BINARY_DIRECTORY}\" -- -j ${AQLPROFILE_BUILD_NUM_JOBS} all mytest")
|
||||
|
||||
if(NOT DEFINED CTEST_SITE)
|
||||
set(CTEST_SITE "${HOSTNAME}")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CTEST_BUILD_NAME)
|
||||
set(CTEST_BUILD_NAME "aqlprofile-amd-staging-ubuntu-${RUNNER_HOSTNAME}-core")
|
||||
endif()
|
||||
|
||||
macro(handle_error _message _ret)
|
||||
if(NOT ${${_ret}} EQUAL 0)
|
||||
dashboard_submit(PARTS Done RETURN_VALUE _submit_ret)
|
||||
message(FATAL_ERROR "${_message} failed: ${${_ret}}")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
ctest_start(Continuous)
|
||||
ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _update_ret)
|
||||
handle_error("Configure" _update_ret)
|
||||
ctest_configure(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _configure_ret)
|
||||
dashboard_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Configure" _configure_ret)
|
||||
|
||||
ctest_build(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _build_ret)
|
||||
dashboard_submit(PARTS Build RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Build" _build_ret)
|
||||
|
||||
ctest_test(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Testing" _test_ret)
|
||||
|
||||
dashboard_submit(PARTS Done RETURN_VALUE _submit_ret)
|
||||
@@ -0,0 +1,211 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_BLOCKINFO_H_
|
||||
#define _GFX10_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
// To define GFX10 specific blocks info like GC caches blocks
|
||||
// All common with GFX9 blocks are inherited from GFX9 space
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
IaCounterBlockId,
|
||||
PaScCounterBlockId,
|
||||
PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
TcaCounterBlockId,
|
||||
TccCounterBlockId,
|
||||
TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
VgtCounterBlockId,
|
||||
WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
AtcCounterBlockId,
|
||||
AtcL2CounterBlockId,
|
||||
McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
Gl1aCounterBlockId,
|
||||
Gl1cCounterBlockId,
|
||||
Gl2aCounterBlockId,
|
||||
Gl2cCounterBlockId,
|
||||
GcrCounterBlockId,
|
||||
GusCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
SdmaCounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 5;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t GusCounterBlockNumCounters = 2;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent = CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_ME2_DC1_SPI_BUSY;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CPF_UTCL2IU_STALL;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_CPG_UTCL2IU_STALL;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_GWS_BYPASS;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
// static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
// static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SC_SC_SPI_EVENT;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent = SX_PERF_SEL_DB3_SIZE;
|
||||
// static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
// static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
// static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
// static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
// static const uint32_t TdCounterBlockMaxEvent =
|
||||
// TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt; static const uint32_t VgtCounterBlockMaxEvent =
|
||||
// vgt_perf_sclk_te11_vld; static const uint32_t WdCounterBlockMaxEvent =
|
||||
// wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 24;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 83;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 91;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 254;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 142;
|
||||
static const uint32_t GusCounterBlockMaxEvent = 89;
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_BLOCKINFO_H_
|
||||
@@ -0,0 +1,425 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_BLOCKTABLE_H_
|
||||
#define _GFX10_BLOCKTABLE_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
|
||||
/*
|
||||
* CPC
|
||||
*/
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* CPF
|
||||
*/
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GDS
|
||||
*/
|
||||
static const CounterRegInfo GdsCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM
|
||||
*/
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM_SE
|
||||
*/
|
||||
static const CounterRegInfo GrbmSeCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_HI)}};
|
||||
|
||||
/*
|
||||
* SPI
|
||||
*/
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_HI)}};
|
||||
|
||||
/*
|
||||
* SQ
|
||||
*/
|
||||
static const CounterRegInfo SqCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_HI)}};
|
||||
|
||||
/*
|
||||
* SX
|
||||
*/
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GCEA
|
||||
*/
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_HI)}};
|
||||
|
||||
// Define GFX10 specific blocks table entries like GC caches blocks
|
||||
/*
|
||||
* GCR
|
||||
*/
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, mmGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, mmGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GL1A
|
||||
*/
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GL1C
|
||||
*/
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2A
|
||||
*/
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2C
|
||||
*/
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GUS
|
||||
*/
|
||||
static const CounterRegInfo GusCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER1_CFG), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER2_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* TA
|
||||
*/
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_HI)}};
|
||||
|
||||
// Counter block CPC
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {
|
||||
"CPC",
|
||||
CpcCounterBlockId,
|
||||
1,
|
||||
CpcCounterBlockMaxEvent,
|
||||
CpcCounterBlockNumCounters,
|
||||
CpcCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_CPC_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpcBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
// Counter block CPF
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {
|
||||
"CPF",
|
||||
CpfCounterBlockId,
|
||||
1,
|
||||
CpfCounterBlockMaxEvent,
|
||||
CpfCounterBlockNumCounters,
|
||||
CpfCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_CPF_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpfBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
// Counter block GDS
|
||||
static const GpuBlockInfo GdsCounterBlockInfo = {
|
||||
"GDS",
|
||||
GdsCounterBlockId,
|
||||
1,
|
||||
GdsCounterBlockMaxEvent,
|
||||
GdsCounterBlockNumCounters,
|
||||
GdsCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GDS_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*GdsBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS};
|
||||
// Counter block GRBM
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {
|
||||
"GRBM",
|
||||
GrbmCounterBlockId,
|
||||
1,
|
||||
GrbmCounterBlockMaxEvent,
|
||||
GrbmCounterBlockNumCounters,
|
||||
GrbmCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GRBM_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockGRBMAttr};
|
||||
// Counter block GRBMSE
|
||||
static const GpuBlockInfo GrbmSeCounterBlockInfo = {
|
||||
"GRBM_SE",
|
||||
GrbmSeCounterBlockId,
|
||||
1,
|
||||
GrbmSeCounterBlockMaxEvent,
|
||||
GrbmSeCounterBlockNumCounters,
|
||||
GrbmSeCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GRBM_SE0_PERFCOUNTER_SELECT,
|
||||
CounterBlockDfltAttr};
|
||||
// Counter block SPI
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {
|
||||
"SPI",
|
||||
SpiCounterBlockId,
|
||||
1,
|
||||
SpiCounterBlockMaxEvent,
|
||||
SpiCounterBlockNumCounters,
|
||||
SpiCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_SPI_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSPIAttr,
|
||||
NULL /*SpiBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SPI};
|
||||
// Counter block SQ
|
||||
static const GpuBlockInfo SqCounterBlockInfo = {"SQ",
|
||||
SqCounterBlockId,
|
||||
1,
|
||||
SqCounterBlockMaxEvent,
|
||||
SqCounterBlockNumCounters,
|
||||
SqCounterRegAddr,
|
||||
gfx10_cntx_prim::sq_select_value,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr,
|
||||
NULL,
|
||||
SPM_SE_BLOCK_NAME_SQG};
|
||||
// Counter block SX
|
||||
static const GpuBlockInfo SxCounterBlockInfo = {
|
||||
"SX",
|
||||
SxCounterBlockId,
|
||||
1,
|
||||
SxCounterBlockMaxEvent,
|
||||
SxCounterBlockNumCounters,
|
||||
SxCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_SX_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockCleanAttr,
|
||||
NULL /*SxBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SX};
|
||||
// Counter block GCEA
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {
|
||||
"GCEA",
|
||||
GceaCounterBlockId,
|
||||
GceaCounterBlockNumInstances,
|
||||
GceaCounterBlockMaxEvent,
|
||||
GceaCounterBlockNumCounters,
|
||||
GceaCounterRegAddr,
|
||||
gfx10_cntx_prim::mc_select_value_GCEA_PERFCOUNTER0_CFG,
|
||||
CounterBlockMcAttr};
|
||||
// Counter block GL1A
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {
|
||||
"GL1A",
|
||||
Gl1aCounterBlockId,
|
||||
8,
|
||||
Gl1aCounterBlockMaxEvent,
|
||||
Gl1aCounterBlockNumCounters,
|
||||
Gl1aCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL1C
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {
|
||||
"GL1C",
|
||||
Gl1cCounterBlockId,
|
||||
8,
|
||||
Gl1cCounterBlockMaxEvent,
|
||||
Gl1cCounterBlockNumCounters,
|
||||
Gl1cCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2A
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {
|
||||
"GL2A",
|
||||
Gl2aCounterBlockId,
|
||||
32,
|
||||
Gl2aCounterBlockMaxEvent,
|
||||
Gl2aCounterBlockNumCounters,
|
||||
Gl2aCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GL2C
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {
|
||||
"GL2C",
|
||||
Gl2cCounterBlockId,
|
||||
32,
|
||||
Gl2cCounterBlockMaxEvent,
|
||||
Gl2cCounterBlockNumCounters,
|
||||
Gl2cCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GCR
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {
|
||||
"GCR",
|
||||
GcrCounterBlockId,
|
||||
1,
|
||||
GcrCounterBlockMaxEvent,
|
||||
GcrCounterBlockNumCounters,
|
||||
GcrCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GUS
|
||||
static const GpuBlockInfo GusCounterBlockInfo = {
|
||||
"GUS",
|
||||
GusCounterBlockId,
|
||||
1,
|
||||
GusCounterBlockMaxEvent,
|
||||
GusCounterBlockNumCounters,
|
||||
GusCounterRegAddr,
|
||||
gfx10_cntx_prim::mc_select_value_RPB_PERFCOUNTER0_CFG,
|
||||
CounterBlockGusAttr};
|
||||
// Counter block TA
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {
|
||||
"TA",
|
||||
TaCounterBlockId,
|
||||
TaCounterBlockNumInstances,
|
||||
235 /*TaCounterBlockMaxEvent*/,
|
||||
TaCounterBlockNumCounters,
|
||||
TaCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TA_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockTcAttr,
|
||||
NULL /*TaBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_TA};
|
||||
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,685 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_PRIMITIVES_H_
|
||||
#define _GFX10_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define SQTT_PRIM_ENABLED 1
|
||||
|
||||
// Taken from gfx10_mask.h
|
||||
// GCR_CNTL
|
||||
#define GCR_CNTL__SEQ_FORWARD 0x00010000L
|
||||
#define GCR_CNTL__SEQ_MASK 0x00030000L
|
||||
#define GCR_CNTL__GL2_WB_MASK 0x00008000L
|
||||
|
||||
// Taken from gfx10_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
|
||||
class gfx10_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 10;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, mmGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmCP_PERFMON_CNTL);
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BUF0_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
static constexpr Register GUS_PERFCOUNTER_RSLT_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL);
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, mmRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable{0};
|
||||
compute_perfcount_enable = SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable{0};
|
||||
compute_perfcount_enable = SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select{0};
|
||||
sq_perfcounter0_select = SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
#if defined(SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK__SHIFT)
|
||||
sq_perfcounter0_select |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF);
|
||||
#else
|
||||
sq_perfcounter0_select |= 0xF000;
|
||||
#endif
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select{0};
|
||||
sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
#if defined(SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK__SHIFT)
|
||||
sq_perfcounter0_select |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF);
|
||||
#else
|
||||
sq_perfcounter0_select |= 0xF000;
|
||||
#endif
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx10
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} else if (block_id == SqVsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} else if (block_id == SqPsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() { return 0; }
|
||||
static uint32_t sq_control2_disable_value() { return 0; }
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
// Counter Select Register value templates
|
||||
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value(SX_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
select_value(GDS_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// GUS primitives
|
||||
static uint32_t gus_disable_clear_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
gus_perfcounter_rslt_cntl = SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, CLEAR_ALL, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_start_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
gus_perfcounter_rslt_cntl = SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, ENABLE_ANY, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t gus0_perfcounter_cfg{0};
|
||||
gus0_perfcounter_cfg = SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, ENABLE, 0x1);
|
||||
return gus0_perfcounter_cfg;
|
||||
}
|
||||
|
||||
static uint32_t gus_stop_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl{0};
|
||||
rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl{0};
|
||||
rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size{0};
|
||||
rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_LINE, global_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t mask{0};
|
||||
mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE, 1 << 6);
|
||||
return mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx10
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
static const uint32_t SQTT_TOKEN_REG_USERDATA = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_VALU = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WVRDY = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_WAVE = 1 << 4;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_IMMED = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 8;
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_VALU | SQTT_TOKEN_WVRDY | SQTT_TOKEN_WAVE | SQTT_TOKEN_REG |
|
||||
SQTT_TOKEN_IMMED | SQTT_TOKEN_INST) ^
|
||||
0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_WAVE | SQTT_TOKEN_REG) ^ 0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx10
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t base{0};
|
||||
base = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE, BASE_LO,
|
||||
Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return base;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) { return 0; }
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t size{0};
|
||||
size = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, BASE_HI, base_hi);
|
||||
return size;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_ctrl{0};
|
||||
sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RT_FREQ, 2) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, REG_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, REG_DROP_ON_STALL, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_PRIMITIVES_H_
|
||||
@@ -0,0 +1,217 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_BLOCKINFO_H_
|
||||
#define _GFX11_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
// To define GFX11 specific blocks info like GC caches blocks
|
||||
// All common with GFX9 blocks are inherited from GFX9 space
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
// IaCounterBlockId,
|
||||
// PaScCounterBlockId,
|
||||
// PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
// SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
// TcaCounterBlockId,
|
||||
// TccCounterBlockId,
|
||||
// TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
// VgtCounterBlockId,
|
||||
// WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
// AtcCounterBlockId,
|
||||
// AtcL2CounterBlockId,
|
||||
// McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
Gl1aCounterBlockId,
|
||||
Gl1cCounterBlockId,
|
||||
Gl2aCounterBlockId,
|
||||
Gl2cCounterBlockId,
|
||||
GcrCounterBlockId,
|
||||
GusCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
Sdma0CounterBlockId,
|
||||
Sdma1CounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
TcpCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 5;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t GusCounterBlockNumCounters = 2;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent =
|
||||
CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK; // CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_MEC_THREAD3;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_OREO_Events_stalls;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
// static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
// static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_SPI_WAVE_STALLED_BY_SPI; // SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 15; // SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SPI_PERF_BUSY; // SC_SC_SPI_EVENT;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQ_PERF_SEL_NONE2; // SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent =
|
||||
SX_PERF_SEL_DB3_4X2_DISCARD; // SX_PERF_SEL_DB3_SIZE;
|
||||
// static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
// static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
// static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
// static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
// static const uint32_t TdCounterBlockMaxEvent =
|
||||
// TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt; static const uint32_t VgtCounterBlockMaxEvent =
|
||||
// vgt_perf_sclk_te11_vld; static const uint32_t WdCounterBlockMaxEvent =
|
||||
// wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent =
|
||||
RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3; // RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 61;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 24;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 84;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 108;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 259;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 155;
|
||||
static const uint32_t GusCounterBlockMaxEvent = 176;
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_BLOCKINFO_H_
|
||||
@@ -0,0 +1,441 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_BLOCKTABLE_H_
|
||||
#define _GFX11_BLOCKTABLE_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
|
||||
/*
|
||||
* CPC CORRECT
|
||||
*/
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* CPF CORRECT
|
||||
*/
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GDS CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GdsCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_HI)}};
|
||||
/*
|
||||
* GRBM CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM_SE CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GrbmSeCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_HI)}};
|
||||
|
||||
/*
|
||||
* SPI CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_HI)}};
|
||||
/*
|
||||
* SQ CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SqCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER3_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER5_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
|
||||
/*
|
||||
* SX CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GCEA
|
||||
*/
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_HI)}};
|
||||
|
||||
// Define GFX10 specific blocks table entries like GC caches blocks
|
||||
/*
|
||||
* GCR CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* TCP
|
||||
*/
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_HI)}};
|
||||
/*
|
||||
* GL1A CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL1C CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2A CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2C CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GUS ????? need more investigations
|
||||
*/
|
||||
static const CounterRegInfo GusCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER2_LO),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER2_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* TA CORRECT
|
||||
*/
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_HI)}};
|
||||
|
||||
// Counter block CPC
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {
|
||||
"CPC",
|
||||
CpcCounterBlockId,
|
||||
1,
|
||||
CpcCounterBlockMaxEvent,
|
||||
CpcCounterBlockNumCounters,
|
||||
CpcCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_CPC_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpcBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
// Counter block CPF
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {
|
||||
"CPF",
|
||||
CpfCounterBlockId,
|
||||
1,
|
||||
CpfCounterBlockMaxEvent,
|
||||
CpfCounterBlockNumCounters,
|
||||
CpfCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_CPF_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpfBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
// Counter block GDS
|
||||
static const GpuBlockInfo GdsCounterBlockInfo = {
|
||||
"GDS",
|
||||
GdsCounterBlockId,
|
||||
1,
|
||||
GdsCounterBlockMaxEvent,
|
||||
GdsCounterBlockNumCounters,
|
||||
GdsCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GDS_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*GdsBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS};
|
||||
// Counter block GRBM
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {
|
||||
"GRBM",
|
||||
GrbmCounterBlockId,
|
||||
1,
|
||||
GrbmCounterBlockMaxEvent,
|
||||
GrbmCounterBlockNumCounters,
|
||||
GrbmCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GRBM_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockGRBMAttr};
|
||||
// Counter block GRBMSE
|
||||
static const GpuBlockInfo GrbmSeCounterBlockInfo = {
|
||||
"GRBM_SE",
|
||||
GrbmSeCounterBlockId,
|
||||
1,
|
||||
GrbmSeCounterBlockMaxEvent,
|
||||
GrbmSeCounterBlockNumCounters,
|
||||
GrbmSeCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GRBM_SE0_PERFCOUNTER_SELECT,
|
||||
CounterBlockDfltAttr};
|
||||
// Counter block SPI
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {
|
||||
"SPI",
|
||||
SpiCounterBlockId,
|
||||
1,
|
||||
SpiCounterBlockMaxEvent,
|
||||
SpiCounterBlockNumCounters,
|
||||
SpiCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_SPI_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSPIAttr,
|
||||
NULL /*SpiBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SPI};
|
||||
// Counter block SQ
|
||||
static const GpuBlockInfo SqCounterBlockInfo = {
|
||||
"SQ",
|
||||
SqCounterBlockId,
|
||||
1,
|
||||
SqCounterBlockMaxEvent,
|
||||
SqCounterBlockNumCounters,
|
||||
SqCounterRegAddr,
|
||||
gfx11_cntx_prim::sq_select_value,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr,
|
||||
NULL,
|
||||
SPM_SE_BLOCK_NAME_SQG};
|
||||
// Counter block SX
|
||||
static const GpuBlockInfo SxCounterBlockInfo = {
|
||||
"SX",
|
||||
SxCounterBlockId,
|
||||
1,
|
||||
SxCounterBlockMaxEvent,
|
||||
SxCounterBlockNumCounters,
|
||||
SxCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_SX_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockCleanAttr,
|
||||
NULL /*SxBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SX};
|
||||
// Counter block GCEA
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {
|
||||
"GCEA",
|
||||
GceaCounterBlockId,
|
||||
GceaCounterBlockNumInstances,
|
||||
GceaCounterBlockMaxEvent,
|
||||
GceaCounterBlockNumCounters,
|
||||
GceaCounterRegAddr,
|
||||
gfx11_cntx_prim::mc_select_value_GCEA_PERFCOUNTER0_CFG,
|
||||
CounterBlockMcAttr};
|
||||
// Counter block TCP
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {
|
||||
"TCP",
|
||||
TcpCounterBlockId,
|
||||
16,
|
||||
TcpCounterBlockMaxEvent,
|
||||
TcpCounterBlockNumCounters,
|
||||
TcpCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr};
|
||||
// Counter block GL1A
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {
|
||||
"GL1A",
|
||||
Gl1aCounterBlockId,
|
||||
4,
|
||||
Gl1aCounterBlockMaxEvent,
|
||||
Gl1aCounterBlockNumCounters,
|
||||
Gl1aCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL1C
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {
|
||||
"GL1C",
|
||||
Gl1cCounterBlockId,
|
||||
4,
|
||||
Gl1cCounterBlockMaxEvent,
|
||||
Gl1cCounterBlockNumCounters,
|
||||
Gl1cCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2A
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {
|
||||
"GL2A",
|
||||
Gl2aCounterBlockId,
|
||||
32,
|
||||
Gl2aCounterBlockMaxEvent,
|
||||
Gl2aCounterBlockNumCounters,
|
||||
Gl2aCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2C
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {
|
||||
"GL2C",
|
||||
Gl2cCounterBlockId,
|
||||
32,
|
||||
Gl2cCounterBlockMaxEvent,
|
||||
Gl2cCounterBlockNumCounters,
|
||||
Gl2cCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockTcAttr};
|
||||
// Counter block GCR
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {
|
||||
"GCR",
|
||||
GcrCounterBlockId,
|
||||
1,
|
||||
GcrCounterBlockMaxEvent,
|
||||
GcrCounterBlockNumCounters,
|
||||
GcrCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GUS
|
||||
static const GpuBlockInfo GusCounterBlockInfo = {
|
||||
"GUS",
|
||||
GusCounterBlockId,
|
||||
1,
|
||||
GusCounterBlockMaxEvent,
|
||||
GusCounterBlockNumCounters,
|
||||
GusCounterRegAddr,
|
||||
gfx11_cntx_prim::mc_select_value_RPB_PERFCOUNTER0_CFG,
|
||||
CounterBlockGusAttr};
|
||||
// Counter block TA
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {
|
||||
"TA",
|
||||
TaCounterBlockId,
|
||||
TaCounterBlockNumInstances,
|
||||
235 /*TaCounterBlockMaxEvent*/,
|
||||
TaCounterBlockNumCounters,
|
||||
TaCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TA_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockTcAttr,
|
||||
NULL /*TaBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_TA};
|
||||
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,699 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_PRIMITIVES_H_
|
||||
#define _GFX11_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
// Taken from gfx11_mask.h
|
||||
// GCR_CNTL
|
||||
#define GCR_CNTL__SEQ_FORWARD 0x00010000L
|
||||
#define GCR_CNTL__SEQ_MASK 0x00030000L
|
||||
#define GCR_CNTL__GL2_WB_MASK 0x00008000L
|
||||
|
||||
// Taken from gfx11_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
#define SQTT_PRIM_ENABLED 1
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
|
||||
class gfx11_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 11;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, regGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL);
|
||||
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL2);
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = Register(0xD9E1);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
static constexpr Register GUS_PERFCOUNTER_RSLT_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL);
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, regRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, wgp_index << 2);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_index << 2) | (instance_index << 1)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t cp_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return cp_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t cp_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return cp_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_cntr_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_cntr_sel;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_cntr_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_cntr_sel;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx11
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0xFFFFFFFF; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_cntr_ctrl{0};
|
||||
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} /* else if (block_id == SqVsCounterBlockId) {
|
||||
sq_cntr_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} */
|
||||
else if (block_id == SqPsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
static uint32_t sq_control2_enable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, true) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
static uint32_t sq_control2_disable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, false) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
// Counter Select Register value templates
|
||||
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value(SX_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
select_value(GDS_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// GUS primitives
|
||||
static uint32_t gus_disable_clear_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, CLEAR_ALL, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_start_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, ENABLE_ANY, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t gus0_perfcounter_cfg =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, ENABLE, 0x1);
|
||||
return gus0_perfcounter_cfg;
|
||||
}
|
||||
|
||||
static uint32_t gus_stop_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t value =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return value;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t value =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, TOTAL_NUM_SEGMENT, segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_SEGMENT, global_nlines);
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return value;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE, 1 << 6) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_SHADERDATA, 1);
|
||||
return sq_thread_trace_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const uint32_t SQTT_TOKEN_REG_USERDATA = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_VALU = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WVRDY = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_WAVE = 1 << 4;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_IMMED = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 8;
|
||||
|
||||
// not supported in gfx11
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_VALU | SQTT_TOKEN_WVRDY | SQTT_TOKEN_WAVE | SQTT_TOKEN_REG |
|
||||
SQTT_TOKEN_IMMED | SQTT_TOKEN_INST) ^
|
||||
0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_WAVE | SQTT_TOKEN_REG) ^ 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx11
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_buf0_base = SET_REG_FIELD_BITS(
|
||||
SQ_THREAD_TRACE_BUF0_BASE, BASE_LO, Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) { return 0; }
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, BASE_HI, base_hi);
|
||||
return sq_thread_trace_buf0_size;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RT_FREQ, 2) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_PRIMITIVES_H_
|
||||
@@ -0,0 +1,285 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_BLOCKINFO_H_
|
||||
#define _GFX12_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
#define __BLOCK_ID(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
|
||||
// Private PMC Counter BlockId is defined here
|
||||
// Pubclic PMC Counter BlockId is defined in hsa_ven_amd_aqlprofile.h
|
||||
enum CounterBlockId {
|
||||
__BLOCK_ID(RLC) = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
|
||||
__BLOCK_ID(CPG),
|
||||
__BLOCK_ID(GRBMH),
|
||||
__BLOCK_ID(GRBMA),
|
||||
__BLOCK_ID(SQG),
|
||||
|
||||
// mem blocks
|
||||
__BLOCK_ID(CHA),
|
||||
__BLOCK_ID(CHC),
|
||||
__BLOCK_ID(GLARBA),
|
||||
__BLOCK_ID(GLARBC),
|
||||
__BLOCK_ID(GC_CANE),
|
||||
__BLOCK_ID(GC_FFBM),
|
||||
__BLOCK_ID(GC_NHTTLB),
|
||||
__BLOCK_ID(GC_L2TLB),
|
||||
__BLOCK_ID(GC_UTCL1),
|
||||
__BLOCK_ID(GC_UTCL2),
|
||||
__BLOCK_ID(GC_VML2),
|
||||
|
||||
__BLOCK_ID(GCEA_SE),
|
||||
|
||||
// New SDMA Perfmon interface, comparing to the original SDMA PerfCnt. gfx12
|
||||
// supports both and they should provide the same counter events. We might
|
||||
// remove SDMA PerfCnt support in aqlprofile in the future since it is easier
|
||||
// to program Perfmon
|
||||
__BLOCK_ID(SDMA_PM),
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
// Define SPM Counter BlockId
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_FIRST = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = SPM_GLOBAL_BLOCK_NAME_FIRST,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS,
|
||||
SPM_GLOBAL_BLOCK_NAME_GCR,
|
||||
SPM_GLOBAL_BLOCK_NAME_PH,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE1,
|
||||
SPM_GLOBAL_BLOCK_NAME_GL2A,
|
||||
SPM_GLOBAL_BLOCK_NAME_GL2C,
|
||||
SPM_GLOBAL_BLOCK_NAME_SDMA,
|
||||
SPM_GLOBAL_BLOCK_NAME_GUS,
|
||||
SPM_GLOBAL_BLOCK_NAME_EA,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHA,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHC,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHCG,
|
||||
SPM_GLOBAL_BLOCK_NAME_ATCL2,
|
||||
SPM_GLOBAL_BLOCK_NAME_VML2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE2_SE,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE2_DIST,
|
||||
SPM_GLOBAL_BLOCK_NAME_FFBM,
|
||||
SPM_GLOBAL_BLOCK_NAME_CANE,
|
||||
SPM_GLOBAL_BLOCK_NAME_LAST = SPM_GLOBAL_BLOCK_NAME_CANE,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_FIRST = 0,
|
||||
SPM_SE_BLOCK_NAME_CB = SPM_SE_BLOCK_NAME_FIRST,
|
||||
SPM_SE_BLOCK_NAME_DB,
|
||||
SPM_SE_BLOCK_NAME_PA,
|
||||
SPM_SE_BLOCK_NAME_SX,
|
||||
SPM_SE_BLOCK_NAME_SC,
|
||||
SPM_SE_BLOCK_NAME_TA,
|
||||
SPM_SE_BLOCK_NAME_TD,
|
||||
SPM_SE_BLOCK_NAME_TCP,
|
||||
SPM_SE_BLOCK_NAME_SPI,
|
||||
SPM_SE_BLOCK_NAME_SQG,
|
||||
SPM_SE_BLOCK_NAME_GL1A,
|
||||
SPM_SE_BLOCK_NAME_RMI,
|
||||
SPM_SE_BLOCK_NAME_GL1C,
|
||||
SPM_SE_BLOCK_NAME_GL1CG,
|
||||
SPM_SE_BLOCK_NAME_CBR,
|
||||
SPM_SE_BLOCK_NAME_DBR,
|
||||
SPM_SE_BLOCK_NAME_GL1H,
|
||||
SPM_SE_BLOCK_NAME_SQC,
|
||||
SPM_SE_BLOCK_NAME_PC,
|
||||
SPM_SE_BLOCK_NAME_EA,
|
||||
SPM_SE_BLOCK_NAME_GE,
|
||||
SPM_SE_BLOCK_NAME_GL2A,
|
||||
SPM_SE_BLOCK_NAME_GL2C,
|
||||
SPM_SE_BLOCK_NAME_WGS,
|
||||
SPM_SE_BLOCK_NAME_GL1XA,
|
||||
SPM_SE_BLOCK_NAME_GL1XC,
|
||||
SPM_SE_BLOCK_NAME_UTCL1,
|
||||
SPM_SE_BLOCK_NAME_LAST = SPM_SE_BLOCK_NAME_UTCL1,
|
||||
};
|
||||
|
||||
namespace gfx1201 {
|
||||
// IP versions for Radeon RX 9070
|
||||
// ip_block : gc_12_0_1
|
||||
// ip_block : athub_4_1_0
|
||||
// ip_block : umc_8_14_0
|
||||
// ip_block : df_4_15_1
|
||||
// ip_block : pcie_6_1_0
|
||||
|
||||
// Number of block instances
|
||||
// Reference: global_features.h (from gfxip header file package)
|
||||
// rspm_config.pm (from design configuration files)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumInstances = 1;
|
||||
static const uint32_t RlcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpgCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpfCounterBlockNumInstances = 1;
|
||||
static const uint32_t GcrCounterBlockNumInstances = 1;
|
||||
static const uint32_t Ge1CounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl2aCounterBlockNumInstances = 4; // GFX_CPWD__NUM_GL2A_PER_CPWD
|
||||
static const uint32_t Gl2cCounterBlockNumInstances = 32; // GFX_CPWD__NUM_GL2C_PER_CPWD
|
||||
static const uint32_t GceaCounterBlockNumInstances = 36; // GFX_CPWD__NUM_EA_PER_CPWD
|
||||
static const uint32_t ChaCounterBlockNumInstances = 1;
|
||||
static const uint32_t ChcCounterBlockNumInstances = 4; // GFX_CPWD__NUM_CHC
|
||||
static const uint32_t Ge2DistCounterBlockNumInstances = 1;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2; // GFX_CPWD__NUM_SDMA_PER_CPWD
|
||||
static const uint32_t GcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcMcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumInstances = 1;
|
||||
static const uint32_t CbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t DbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t SuCounterBlockNumInstances = 1; // GFX_SE__NUM_PA_PER_SE
|
||||
static const uint32_t SxCounterBlockNumInstances = 1;
|
||||
static const uint32_t ScCounterBlockNumInstances = 2; // GFX_SE__NUM_PACKER_PER_SA
|
||||
static const uint32_t TaCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TdCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TcpCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t SpiCounterBlockNumInstances = 1;
|
||||
static const uint32_t SqgCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1aCounterBlockNumInstances = 1;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 2; // GFX_SE__NUM_RMI_PER_SA
|
||||
static const uint32_t Gl1cCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t SqcCounterBlockNumInstances = 1;
|
||||
static const uint32_t PcCounterBlockNumInstances = 1;
|
||||
static const uint32_t GceaSeCounterBlockNumInstances = 4;
|
||||
static const uint32_t GeCounterBlockNumInstances = 1;
|
||||
static const uint32_t WgsCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xaCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xcCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t GcUtcl1CounterBlockNumInstances = 2;
|
||||
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
|
||||
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra
|
||||
// caution Reference: chip_offset_byte.h (from gfxip header file package) The following default
|
||||
// values are generated from Radeon RX 9070, the first product of the RDNA 4 lineup. It could change
|
||||
// for other products, and the change will be made in [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t PhCounterBlockNumCounters = 8;
|
||||
static const uint32_t Ge1CounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t ChaCounterBlockNumCounters = 4;
|
||||
static const uint32_t ChcCounterBlockNumCounters = 4;
|
||||
static const uint32_t Ge2DistCounterBlockNumCounters = 4;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcVml2CounterBlockNumCounters = 2;
|
||||
static const uint32_t GcMcVml2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumCounters = 2;
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t SuCounterBlockNumCounters = 4;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqgCounterBlockNumCounters = 8;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t SqcCounterBlockNumCounters = 16;
|
||||
static const uint32_t PcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaSeCounterBlockNumCounters = 2;
|
||||
static const uint32_t GeCounterBlockNumCounters = 4;
|
||||
static const uint32_t WgsCounterBlockNumCounters = 2;
|
||||
static const uint32_t Gl1xaCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1xcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcUtcl1CounterBlockNumCounters = 4;
|
||||
|
||||
// Block counters max event value - Auto-generated from chip_enum.h, edit with extra caution
|
||||
// Reference: chip_enum.h (from gfxip header file package)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = 51;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 6;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = 30;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = 55;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = 4;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 151;
|
||||
static const uint32_t PhCounterBlockMaxEvent = 1023;
|
||||
static const uint32_t Ge1CounterBlockMaxEvent = 54;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 32;
|
||||
static const uint32_t ChaCounterBlockMaxEvent = 25;
|
||||
static const uint32_t ChcCounterBlockMaxEvent = 94;
|
||||
static const uint32_t Ge2DistCounterBlockMaxEvent = 188;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 125;
|
||||
static const uint32_t GcVml2CounterBlockMaxEvent = 90;
|
||||
static const uint32_t GcMcVml2CounterBlockMaxEvent =
|
||||
1; // This is handled by GCMC_VM_L2_PERFCOUNTER0_CFG
|
||||
static const uint32_t GcUtcl2CounterBlockMaxEvent = 36;
|
||||
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
|
||||
static const uint32_t CbCounterBlockMaxEvent = 315;
|
||||
static const uint32_t DbCounterBlockMaxEvent = 441;
|
||||
static const uint32_t PaSuCounterBlockMaxEvent = 828;
|
||||
static const uint32_t SxCounterBlockMaxEvent = 81;
|
||||
static const uint32_t ScCounterBlockMaxEvent = 821;
|
||||
static const uint32_t TaCounterBlockMaxEvent = 254;
|
||||
static const uint32_t TdCounterBlockMaxEvent = 271;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 99;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = 318;
|
||||
static const uint32_t SqgCounterBlockMaxEvent = 45;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = 138;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
|
||||
static const uint32_t SqcCounterBlockMaxEvent = 511;
|
||||
static const uint32_t PcCounterBlockMaxEvent = 164;
|
||||
static const uint32_t GceaSeCounterBlockMaxEvent = 32;
|
||||
static const uint32_t GeCounterBlockMaxEvent = 103;
|
||||
static const uint32_t WgsCounterBlockMaxEvent = 4;
|
||||
static const uint32_t Gl1xaCounterBlockMaxEvent = 21;
|
||||
static const uint32_t Gl1xcCounterBlockMaxEvent = 109;
|
||||
static const uint32_t GcUtcl1CounterBlockMaxEvent = 71;
|
||||
} // namespace gfx1201
|
||||
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_BLOCKINFO_H_
|
||||
@@ -0,0 +1,158 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_BLOCKTABLE_H_
|
||||
#define _GFX12_BLOCKTABLE_H_
|
||||
|
||||
#define REG_INFO_WITH_CTRL(BLOCK, CTRL, INDEX) \
|
||||
{REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_SELECT), CTRL, REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_LO), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_HI)}
|
||||
#define REG_INFO_WITH_CTRL_1(BLOCK, CTRL) REG_INFO_WITH_CTRL(BLOCK, CTRL, 0)
|
||||
#define REG_INFO_WITH_CTRL_2(BLOCK, CTRL) REG_INFO_WITH_CTRL_1(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 1)
|
||||
#define REG_INFO_WITH_CTRL_3(BLOCK, CTRL) REG_INFO_WITH_CTRL_2(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 2)
|
||||
#define REG_INFO_WITH_CTRL_4(BLOCK, CTRL) REG_INFO_WITH_CTRL_3(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 3)
|
||||
#define REG_INFO_WITH_CTRL_5(BLOCK, CTRL) REG_INFO_WITH_CTRL_4(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 4)
|
||||
#define REG_INFO_WITH_CTRL_6(BLOCK, CTRL) REG_INFO_WITH_CTRL_5(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 5)
|
||||
#define REG_INFO_WITH_CTRL_7(BLOCK, CTRL) REG_INFO_WITH_CTRL_6(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 6)
|
||||
#define REG_INFO_WITH_CTRL_8(BLOCK, CTRL) REG_INFO_WITH_CTRL_7(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 7)
|
||||
#define REG_INFO_1(BLOCK) REG_INFO_WITH_CTRL_1(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_2(BLOCK) REG_INFO_WITH_CTRL_2(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_3(BLOCK) REG_INFO_WITH_CTRL_3(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_4(BLOCK) REG_INFO_WITH_CTRL_4(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_5(BLOCK) REG_INFO_WITH_CTRL_5(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_6(BLOCK) REG_INFO_WITH_CTRL_6(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_7(BLOCK) REG_INFO_WITH_CTRL_7(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_8(BLOCK) REG_INFO_WITH_CTRL_8(BLOCK, REG_32B_NULL)
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
namespace gfx1201 {
|
||||
// Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
|
||||
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
|
||||
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
|
||||
static const CounterRegInfo PaPhCounterRegAddr[] = {REG_INFO_8(PA_PH)};
|
||||
static const CounterRegInfo Ge1CounterRegAddr[] = {REG_INFO_4(GE1)};
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
|
||||
static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)};
|
||||
static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)};
|
||||
static const CounterRegInfo Ge2CounterRegAddr[] = {REG_INFO_4(GE2_DIST)};
|
||||
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
|
||||
//static const CounterRegInfo GcVml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
|
||||
//static const CounterRegInfo GcMcVml2CounterRegAddr[] = {REG_INFO_1(GCMC_VM_L2)};
|
||||
//static const CounterRegInfo GcUtcl2CounterRegAddr[] = {REG_INFO_1(GCUTCL2)};
|
||||
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
|
||||
static const CounterRegInfo CbCounterRegAddr[] = {REG_INFO_4(CB)};
|
||||
static const CounterRegInfo DbCounterRegAddr[] = {REG_INFO_4(DB)};
|
||||
static const CounterRegInfo PaSuCounterRegAddr[] = {REG_INFO_4(PA_SU)};
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {REG_INFO_4(SX)};
|
||||
static const CounterRegInfo PaScCounterRegAddr[] = {REG_INFO_8(PA_SC)};
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
|
||||
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
|
||||
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {REG_INFO_4(GL1A)};
|
||||
static const CounterRegInfo RmiCounterRegAddr[] = {REG_INFO_4(RMI)};
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {REG_INFO_4(GL1C)};
|
||||
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, regSQ_PERFCOUNTER_CTRL)};
|
||||
static const CounterRegInfo PcCounterRegAddr[] = {REG_INFO_4(PC)};
|
||||
static const CounterRegInfo GeCounterRegAddr[] = {REG_INFO_4(GE2_SE)};
|
||||
static const CounterRegInfo GceaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
|
||||
// static const CounterRegInfo WgsCounterRegAddr[] = {REG_INFO_2(WGS)};
|
||||
static const CounterRegInfo Gl1xaCounterRegAddr[] = {REG_INFO_4(GL1XA)};
|
||||
static const CounterRegInfo Gl1xcCounterRegAddr[] = {REG_INFO_4(GL1XC)};
|
||||
static const CounterRegInfo GcUtcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
|
||||
|
||||
// Special handling of SQC:
|
||||
// SQC only supports 32bit PMC, only regSQ_PERFCOUNTER#even_number#_SELECT is
|
||||
// used by PMC. regSQ_PERFCOUNTER#odd_number#_SELECT is used only by SPM
|
||||
static const CounterRegInfo SqcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER3_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER5_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
|
||||
|
||||
// Special handling of GCVML2:
|
||||
static const CounterRegInfo GcVml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_HI)}};
|
||||
|
||||
// Special handling of GCMC_VM_L2:
|
||||
static const CounterRegInfo GcMcVml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Special handling of GCUTCL2: Not sure if this is SPM-only
|
||||
static const CounterRegInfo GcUtcl2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Global blocks: ATCL2 CHA CHC CPC CPF CPG EA FFBM GCR GL2A GL2C GRBM RLC SDMA VML2 UTCL2
|
||||
// (Grphics only - not supported in ROCm): GE1 GE2_DIST PH
|
||||
// (Grphics only): CPG is for graphics, but it is not physically removed for compute products
|
||||
// (Not enabled for gfx12): CHCG GDS GUS
|
||||
static const GpuBlockInfo GcAtcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID(ATCL2)}; // Placeholder now
|
||||
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value_Cha, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value_Chc, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value_Cpc, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value_Cpf, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value_Cpg, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID(GCEA), GceaCounterBlockNumInstances, GceaCounterBlockMaxEvent, GceaCounterBlockNumCounters, GceaCounterRegAddr, gfx12_cntx_prim::select_value_Gcea, 0};
|
||||
static const GpuBlockInfo GcFfbmCounterBlockInfo = {"GC_FFBM", __BLOCK_ID(GC_FFBM)}; // Placeholder now
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value_Gcr, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value_Gl2a, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value_Gl2c, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value_Grbm, CounterBlockGRBMAttr};
|
||||
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value_Rlc, 0};
|
||||
static const GpuBlockInfo SdmaPmCounterBlockInfo = {"SDMA_PM", __BLOCK_ID(SDMA_PM), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value_SdmaPm, CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
|
||||
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2)}; // Placeholder now
|
||||
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2)}; // Placeholder now
|
||||
// SE blocks: EA_SE GL2A GL2C GRBMH SPI SQG UTCL1
|
||||
// (Grphics only - not supported in ROCm): GE GL1XA GL1XC PA PC WGS
|
||||
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GceaSeCounterBlockNumInstances, GceaSeCounterBlockMaxEvent, GceaSeCounterBlockNumCounters, GceaSeCounterRegAddr, gfx12_cntx_prim::select_value_GceaSe, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value_Grbmh, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value_Spi, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
|
||||
static const GpuBlockInfo SqgCounterBlockInfo = {"SQG", __BLOCK_ID(SQG), SqgCounterBlockNumInstances, SqgCounterBlockMaxEvent, SqgCounterBlockNumCounters, SqgCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQG};
|
||||
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), GcUtcl1CounterBlockNumInstances, GcUtcl1CounterBlockMaxEvent, GcUtcl1CounterBlockNumCounters, GcUtcl1CounterRegAddr, gfx12_cntx_prim::select_value_GcUtcl1, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
|
||||
// SA blocks: GL1A GL1C
|
||||
// (Grphics only - not supported in ROCm): CB DB SC SX
|
||||
// (Not enabled for gfx12): GL1CG
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value_Gl1a, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value_Gl1c, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
// WGP blocks: SQC TA TCP TD
|
||||
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value_Ta, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
|
||||
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value_Td, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value_Tcp, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
|
||||
} // namespace gfx1201
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,651 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_PRIMITIVES_H_
|
||||
#define _GFX12_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
// taken from gfx12_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_blank() \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = 0; \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
|
||||
class gfx12_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 12;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, regGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL_1);
|
||||
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR = REG_32B_ADDR(GC, 0, regSPI_SQG_EVENT_CTL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL2);
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = Register(0xD9E1);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE_LO);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE_HI);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, regRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static const uint32_t NUM_WGP1_PER_SA = 0;
|
||||
static const uint32_t NUM_ROWS_PER_WGP = 2;
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID(SQ);
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = SPM_SE_BLOCK_NAME_SQG;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
// Hardcode wgp_side to 0 now because we don't have a product with wgp1 configuration
|
||||
uint32_t wgp_side = 0;
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
// Hardcode wgp_side to 0 now because we don't have a product with wgp1 configuration
|
||||
uint32_t wgp_side = 0;
|
||||
assert(instance_index < NUM_ROWS_PER_WGP);
|
||||
uint32_t grbm_gfx_index =
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | (instance_index << 1)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 2) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_perfcounter0_sel;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_perfcounter0_sel;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx12
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0xFFFFFFFF; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl2 = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, true) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_perfcounter_ctrl2;
|
||||
}
|
||||
static uint32_t sq_control2_disable_value() {
|
||||
uint32_t sq_perfcounter_ctrl2 = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, false) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_perfcounter_ctrl2;
|
||||
}
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_Cha= select_value(CHA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Chc= select_value(CHC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpc= select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpf= select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpg= select_value(CPG_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gcea= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gcr= select_value(GCR_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2a= select_value(GL2A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2c= select_value(GL2C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Grbm= select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Rlc= select_value_t2(RLC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SdmaPm= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcVml2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcUtcl2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GceaSe= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Grbmh= select_value(GRBMH_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Spi= select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GcUtcl1= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gl1a= select_value(GL1A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl1c= select_value(GL1C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Ta= select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Td= select_value(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Tcp= select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value_blank();
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl{0};
|
||||
rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl{0};
|
||||
rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size{0};
|
||||
rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, TOTAL_NUM_SEGMENT, segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_SEGMENT, global_nlines);
|
||||
// rlc_spm_perfmon_segment_size = SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE,
|
||||
// se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
uint32_t sq_thread_trace_mask{0};
|
||||
sq_thread_trace_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE,
|
||||
1 << 6) | // SQ_TT_WTYPE_INCLUDE_CS_BIT
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_SHADERDATA, 1);
|
||||
// sq_thread_trace_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK,
|
||||
// EXCLUDE_NONDETAIL_WAVESTART_EXT, 1) |
|
||||
// SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_ALLOC, 1);
|
||||
return sq_thread_trace_mask;
|
||||
}
|
||||
// not supported in gfx12
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_DETAIL_ALL, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE,
|
||||
(SQ_TT_TOKEN_MASK_SQDEC_BIT | SQ_TT_TOKEN_MASK_SHDEC_BIT |
|
||||
SQ_TT_TOKEN_MASK_GFXUDEC_BIT | SQ_TT_TOKEN_MASK_CONTEXT_BIT |
|
||||
SQ_TT_TOKEN_MASK_COMP_BIT)) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
((1 << SQ_TT_TOKEN_EXCLUDE_VMEMEXEC_SHIFT) |
|
||||
(1 << SQ_TT_TOKEN_EXCLUDE_ALUEXEC_SHIFT))) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, EXCLUDE_BARRIER_WAIT,
|
||||
1); // // See DEGFX12-10117
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x7) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, 0x8) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
// not supported in gfx12
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_buf0_base_lo{0};
|
||||
sq_thread_trace_buf0_base_lo = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE_LO, BASE_LO,
|
||||
Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base_lo;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_buf0_base_hi{0};
|
||||
sq_thread_trace_buf0_base_hi = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE_HI, BASE_HI,
|
||||
High32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base_hi;
|
||||
}
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) {
|
||||
uint32_t sq_thread_trace_buf0_size{0};
|
||||
sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT);
|
||||
return sq_thread_trace_buf0_size;
|
||||
}
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) {
|
||||
uint32_t spi_sqg_event_ctl{0};
|
||||
spi_sqg_event_ctl =
|
||||
SET_REG_FIELD_BITS(SPI_SQG_EVENT_CTL, ENABLE_SQG_TOP_EVENTS, enableSqgEvents) |
|
||||
SET_REG_FIELD_BITS(SPI_SQG_EVENT_CTL, ENABLE_SQG_BOP_EVENTS, enableSqgEvents);
|
||||
return spi_sqg_event_ctl;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl{0};
|
||||
sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_PRIMITIVES_H_
|
||||
@@ -0,0 +1,201 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_BLOCKINFO_H_
|
||||
#define _GFX9_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx9 {
|
||||
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
IaCounterBlockId,
|
||||
PaScCounterBlockId,
|
||||
PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
TcaCounterBlockId,
|
||||
TccCounterBlockId,
|
||||
TcpCounterBlockId,
|
||||
TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
VgtCounterBlockId,
|
||||
WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
AtcCounterBlockId,
|
||||
AtcL2CounterBlockId,
|
||||
McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
SdmaCounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
static const uint32_t UmcCounterBlockNumInstances = 32;
|
||||
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 0;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
#ifndef _mi300_OFFSET_HEADER
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
#else
|
||||
// MI300 bumped this to 16
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 16;
|
||||
#endif
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent = CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_ME2_DC1_SPI_BUSY;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CPF_UTCL2IU_STALL;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_CPG_UTCL2IU_STALL;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_GWS_BYPASS;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t UmcCounterBlockMaxEvent = 255;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SPI_PERF_VWC_CSC_WR;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent = SX_PERF_SEL_DB3_SIZE;
|
||||
static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
static const uint32_t TdCounterBlockMaxEvent = TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt;
|
||||
static const uint32_t VgtCounterBlockMaxEvent = vgt_perf_sclk_te11_vld;
|
||||
static const uint32_t WdCounterBlockMaxEvent = wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
|
||||
} // namespace gfx9
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX9_BLOCKINFO_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,727 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_PRIMITIVES_H_
|
||||
#define _GFX9_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t3(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, CNTR_SEL0, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx9 {
|
||||
|
||||
class gfx9_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 9;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, mmGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmCP_PERFMON_CNTL);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR{};
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_PERF_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK2);
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MODE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BASE2);
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_HIWATER);
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, mmRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
#if defined(regRLC_SPM_PERFMON_SEGMENT_SIZE_CORE1)
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE_CORE1);
|
||||
#else
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR = Register(0xDCAF);
|
||||
#endif
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) {
|
||||
static const Register SQTT_PERFCOUNTERS_SELECT[16] = {
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_SELECT)};
|
||||
return SQTT_PERFCOUNTERS_SELECT[index & 0xF];
|
||||
}
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sh_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_INDEX, sh_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sh_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_INDEX, sh_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SIMD_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_CLIENT_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SIMD_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_CLIENT_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value
|
||||
static uint32_t sq_mask_value(const counter_des_t&) {
|
||||
uint32_t sq_perfcounter_mask = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_MASK, SH0_MASK, 0xFFFF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_MASK, SH1_MASK, 0xFFFF);
|
||||
return sq_perfcounter_mask;
|
||||
}
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} else if (block_id == SqVsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} else if (block_id == SqPsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
#if defined(SQ_PERFCOUNTER_CTRL__VMID_MASK__SHIFT)
|
||||
sq_perfcounter_ctrl |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VMID_MASK, 0xFFFF);
|
||||
#else
|
||||
sq_perfcounter_ctrl |= 0xFFFF0000;
|
||||
#endif
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable perfomance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
#if defined(SQ_PERFCOUNTER_CTRL__VMID_MASK__SHIFT)
|
||||
sq_perfcounter_ctrl |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VMID_MASK, 0xFFFF);
|
||||
#else
|
||||
sq_perfcounter_ctrl |= 0xFFFF0000;
|
||||
#endif
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() { return 0; }
|
||||
static uint32_t sq_control2_disable_value() { return 0; }
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_MC_VM_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(MC_VM_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = select_value(CB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = select_value(DB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_PA_SU_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SU_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_PA_SC_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCA_PERFCOUNTER0_SELECT =
|
||||
select_value(TCA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCC_PERFCOUNTER0_SELECT =
|
||||
select_value(TCC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = select_value(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_VGT_PERFCOUNTER0_SELECT =
|
||||
select_value(VGT_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = select_value(IA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = select_value(WD_PERFCOUNTER0_SELECT);
|
||||
|
||||
// static auto constexpr select_value_SX_PERFCOUNTER0_SELECT =
|
||||
// select_value_t2(SX_PERFCOUNTER0_SELECT); static auto constexpr
|
||||
// select_value_GDS_PERFCOUNTER0_SELECT = select_value_t2(GDS_PERFCOUNTER0_SELECT);
|
||||
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = [](const counter_des_t& counter_des) {
|
||||
return (uint32_t)0;
|
||||
};
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
[](const counter_des_t& counter_des) { return (uint32_t)0; };
|
||||
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 1); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 1); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_LINE, global_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) {
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = 4 * se_nlines;
|
||||
uint32_t rlc_spm_perfmon_segment_size_core1{0};
|
||||
#if defined(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__PERFMON_SEGMENT_SIZE_CORE1__SHIFT)
|
||||
rlc_spm_perfmon_segment_size_core1 =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, PERFMON_SEGMENT_SIZE_CORE1,
|
||||
segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE4_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE5_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE6_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE7_NUM_LINE, se_nlines);
|
||||
#endif
|
||||
return rlc_spm_perfmon_segment_size_core1;
|
||||
}
|
||||
|
||||
// Enable Thread Trace for all VM Id's
|
||||
// Enable all of the SIMD's of the compute unit
|
||||
// Enable Compute Unit (CU) at index Zero to be used for fine-grained data
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
//
|
||||
// @note: Not enabling REG_STALL_EN, SPI_STALL_EN and SQ_STALL_EN bits. They
|
||||
// are useful if we wish to program buffer throttling.
|
||||
//
|
||||
static uint32_t sqtt_mask_value(uint32_t targetCu, uint32_t simd, uint32_t vmIdMask) {
|
||||
uint32_t sq_thread_trace_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SH_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_EN, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, CU_SEL, targetCu) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SQ_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SPI_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, REG_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, VM_ID_MASK, vmIdMask);
|
||||
return sq_thread_trace_mask;
|
||||
}
|
||||
|
||||
// Mask of compute units to get thread trace data from
|
||||
static uint32_t sqtt_perf_mask_value() {
|
||||
uint32_t sq_thread_trace_perf_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_PERF_MASK, SH0_MASK, 0xFFFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_PERF_MASK, SH1_MASK, 0xFFFF);
|
||||
return sq_thread_trace_perf_mask;
|
||||
}
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
|
||||
static const uint32_t SQTT_TOKEN_MISC = 1 << 0;
|
||||
static const uint32_t SQTT_TOKEN_TIME = 1 << 1;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WAVE_START = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_REG_CS = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_WAVE_END = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 10;
|
||||
static const uint32_t SQTT_TOKEN_INST_PC = 1 << 11;
|
||||
static const uint32_t SQTT_TOKEN_USERDATA = 1 << 12;
|
||||
static const uint32_t SQTT_TOKEN_ISSUE = 1 << 13;
|
||||
static const uint32_t SQTT_TOKEN_REG_CS_PRIV = 1 << 15;
|
||||
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
uint32_t sq_thread_trace_token_mask;
|
||||
uint32_t sq_thread_trace_token_mask_token_mask =
|
||||
SQTT_TOKEN_MISC | SQTT_TOKEN_TIME | SQTT_TOKEN_REG | SQTT_TOKEN_WAVE_START |
|
||||
SQTT_TOKEN_WAVE_END | SQTT_TOKEN_INST | SQTT_TOKEN_INST_PC | SQTT_TOKEN_USERDATA |
|
||||
SQTT_TOKEN_ISSUE | SQTT_TOKEN_REG_CS | SQTT_TOKEN_REG_CS_PRIV;
|
||||
|
||||
sq_thread_trace_token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK,
|
||||
sq_thread_trace_token_mask_token_mask);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK, 0xF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
uint32_t sq_thread_trace_token_mask;
|
||||
uint32_t sq_thread_trace_token_mask_token_mask =
|
||||
SQTT_TOKEN_MISC | SQTT_TOKEN_TIME | SQTT_TOKEN_REG | SQTT_TOKEN_WAVE_START |
|
||||
SQTT_TOKEN_WAVE_END | SQTT_TOKEN_REG_CS_PRIV | SQTT_TOKEN_REG_CS | SQTT_TOKEN_USERDATA;
|
||||
|
||||
sq_thread_trace_token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK,
|
||||
sq_thread_trace_token_mask_token_mask);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
// Indicate the different TT tokens that specify instruction operations to be logged
|
||||
// Disabling specifically instruction operations updating Program Counter (PC).
|
||||
// @note: The field is defined in the spec incorrectly as a 16-bit value
|
||||
static uint32_t sqtt_token_mask2_value() {
|
||||
uint32_t sq_thread_trace_token_mask2 =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK2, INST_MASK, 0xFFFFFFFF);
|
||||
return sq_thread_trace_token_mask2;
|
||||
}
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SQ_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SPI_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, REG_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_DROP_ON_STALL, token_mask_val);
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() {
|
||||
uint32_t sq_thread_trace_mode =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, WRAP, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, CAPTURE_MODE, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MASK_CS, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, AUTOFLUSH_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MODE, SQ_THREAD_TRACE_MODE_OFF);
|
||||
return sq_thread_trace_mode;
|
||||
}
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() {
|
||||
uint32_t sq_thread_trace_mode =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, WRAP, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, CAPTURE_MODE, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MASK_CS, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, AUTOFLUSH_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MODE, SQ_THREAD_TRACE_MODE_ON);
|
||||
return sq_thread_trace_mode;
|
||||
}
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_base =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BASE, ADDR, Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_base;
|
||||
}
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_base = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BASE2, ADDR_HI,
|
||||
High32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_base;
|
||||
}
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
uint32_t sq_thread_trace_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_SIZE, SIZE, (size_val >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_size;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RESET_BUFFER, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x10000000,
|
||||
// Mask to check if SQTT buffer is wrapped
|
||||
TT_CONTROL_FULL_MASK = 0x80000000,
|
||||
TT_WRITE_PTR_MASK = 0x3FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 30;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << NUM_PIPES) - 1;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx9
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX9_PRIMITIVES_H_
|
||||
@@ -0,0 +1,109 @@
|
||||
#ifndef _GPU_BLOCKINFO_H_
|
||||
#define _GPU_BLOCKINFO_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// Counter Block attributes
|
||||
enum CounterBlockAttr {
|
||||
// Default block attribute
|
||||
CounterBlockDfltAttr = 1,
|
||||
// Per ShaderEngine blocks
|
||||
CounterBlockSeAttr = 2,
|
||||
// SQ blocks
|
||||
CounterBlockSqAttr = 4,
|
||||
// Need to clean counter registers
|
||||
CounterBlockCleanAttr = 8,
|
||||
// MC Block
|
||||
CounterBlockMcAttr = 0x10,
|
||||
// CP PERFMON controllable blocks
|
||||
CounterBlockCpmonAttr = 0x1f,
|
||||
// SDMA block
|
||||
CounterBlockSdmaAttr = 0x100,
|
||||
// Texture cache
|
||||
CounterBlockTcAttr = 0x400,
|
||||
// Explicitly indexed blocks
|
||||
CounterBlockExplInstAttr = 0x800,
|
||||
// SPM blocks
|
||||
CounterBlockSpmGlobalAttr = 0x1000,
|
||||
CounterBlockSpmSeAttr = 0x2000,
|
||||
// GUS block
|
||||
CounterBlockGusAttr = 0x4000,
|
||||
// GRBM block
|
||||
CounterBlockGRBMAttr = 0x8000,
|
||||
// UMC blocks
|
||||
CounterBlockUmcAttr = 0x10000,
|
||||
// SE and SA-dependent blocks
|
||||
CounterBlockSaAttr = 0x20000,
|
||||
// MI300 AID blocks
|
||||
CounterBlockAidAttr = 0x40000,
|
||||
// SPI counter
|
||||
CounterBlockSPIAttr = 0x80000,
|
||||
// Blocks within WGP
|
||||
CounterBlockWgpAttr = 0x100000,
|
||||
};
|
||||
|
||||
// Register address corresponding to each counter
|
||||
struct CounterRegInfo {
|
||||
// counter select register address
|
||||
uint32_t select_addr;
|
||||
// counter control register address
|
||||
uint32_t control_addr;
|
||||
// counter register address low
|
||||
uint32_t register_addr_lo;
|
||||
// counter register address high
|
||||
uint32_t register_addr_hi;
|
||||
};
|
||||
|
||||
struct BlockDelayInfo {
|
||||
uint32_t reg;
|
||||
uint32_t val;
|
||||
};
|
||||
|
||||
struct counter_des_t;
|
||||
|
||||
// GPU Block info definition
|
||||
struct GpuBlockInfo {
|
||||
// Unique string identifier of the block.
|
||||
const char* name;
|
||||
// Block ID
|
||||
uint32_t id;
|
||||
// Maximum number of block instances in the group per shader array
|
||||
uint32_t instance_count;
|
||||
// Maximum counter event ID
|
||||
uint32_t event_id_max;
|
||||
// Maximum number of counters that can be enabled at once
|
||||
uint32_t counter_count;
|
||||
// Counter registers addresses
|
||||
const CounterRegInfo* counter_reg_info;
|
||||
// Counter select value function
|
||||
uint32_t (*select_value)(const counter_des_t&);
|
||||
// Block attributes mask
|
||||
uint32_t attr;
|
||||
// Block delay info
|
||||
const BlockDelayInfo* delay_info;
|
||||
// SPM block id
|
||||
uint32_t spm_block_id;
|
||||
};
|
||||
|
||||
// Block descriptor
|
||||
struct block_des_t {
|
||||
uint32_t id;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
// block_des_t less then functor
|
||||
struct lt_block_des {
|
||||
bool operator()(const block_des_t& a1, const block_des_t& a2) const {
|
||||
return (a1.id < a2.id) || ((a1.id == a2.id) && (a1.index < a2.index));
|
||||
}
|
||||
};
|
||||
|
||||
// Counter descriptor
|
||||
struct counter_des_t {
|
||||
uint32_t id;
|
||||
uint32_t index;
|
||||
block_des_t block_des;
|
||||
const GpuBlockInfo* block_info;
|
||||
};
|
||||
|
||||
#endif // _GPU_BLOCKINFO_H_
|
||||
@@ -0,0 +1,58 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef INC_AQL_PROFILE_H_
|
||||
#define INC_AQL_PROFILE_H_
|
||||
|
||||
#if 0
|
||||
// Profiling parameters
|
||||
// All parameters are generic and if not applicable for a specific
|
||||
// profile configuration then error status will be returned.
|
||||
typedef enum {
|
||||
// SQTT applicable parameters
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5
|
||||
} hsa_ven_amd_aqlprofile_parameter_name_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA = 3, // get_info returns SQTT buffer ptr/size
|
||||
// in info_data object
|
||||
//
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
//
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
} hsa_ven_amd_aqlprofile_info_type_t;
|
||||
#endif
|
||||
|
||||
#endif // INC_AQL_PROFILE_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,678 @@
|
||||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NVD_H
|
||||
#define NVD_H
|
||||
|
||||
/**
|
||||
* Navi's PM4 definitions
|
||||
*/
|
||||
#define PACKET_TYPE0 0
|
||||
#define PACKET_TYPE1 1
|
||||
#define PACKET_TYPE2 2
|
||||
#define PACKET_TYPE3 3
|
||||
|
||||
#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
|
||||
#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
|
||||
#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
|
||||
#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
|
||||
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
#define CP_PACKET2 0x80000000
|
||||
#define PACKET2_PAD_SHIFT 0
|
||||
#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
/* Packet 3 types */
|
||||
#define PACKET3_NOP 0x10
|
||||
#define PACKET3_SET_BASE 0x11
|
||||
#define PACKET3_BASE_INDEX(x) ((x) << 0)
|
||||
#define CE_PARTITION_BASE 3
|
||||
#define PACKET3_CLEAR_STATE 0x12
|
||||
#define PACKET3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PACKET3_DISPATCH_DIRECT 0x15
|
||||
#define PACKET3_DISPATCH_INDIRECT 0x16
|
||||
#define PACKET3_INDIRECT_BUFFER_END 0x17
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
|
||||
#define PACKET3_ATOMIC_GDS 0x1D
|
||||
#define PACKET3_ATOMIC_MEM 0x1E
|
||||
#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_OCCLUSION_QUERY 0x1F
|
||||
#define PACKET3_SET_PREDICATION 0x20
|
||||
#define PACKET3_REG_RMW 0x21
|
||||
#define PACKET3_COND_EXEC 0x22
|
||||
#define PACKET3_PRED_EXEC 0x23
|
||||
#define PACKET3_DRAW_INDIRECT 0x24
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PACKET3_INDEX_BASE 0x26
|
||||
#define PACKET3_DRAW_INDEX_2 0x27
|
||||
#define PACKET3_CONTEXT_CONTROL 0x28
|
||||
#define PACKET3_INDEX_TYPE 0x2A
|
||||
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PACKET3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PACKET3_NUM_INSTANCES 0x2F
|
||||
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
|
||||
#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
|
||||
#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
|
||||
#define PACKET3_COPY_DW 0x3B
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
|
||||
#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
|
||||
#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
|
||||
#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
|
||||
#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
|
||||
#define PACKET3_CP_DMA 0x41
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_SURFACE_SYNC 0x43
|
||||
#define PACKET3_ME_INITIALIZE 0x44
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
#define PACKET3_EVENT_WRITE 0x46
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
*/
|
||||
#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
|
||||
#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
|
||||
#define PACKET3_EVENT_WRITE_EOP 0x47
|
||||
#define PACKET3_EVENT_WRITE_EOS 0x48
|
||||
#define PACKET3_RELEASE_MEM 0x49
|
||||
#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
|
||||
#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
|
||||
#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22)
|
||||
#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - cache_policy__me_release_mem__lru
|
||||
* 1 - cache_policy__me_release_mem__stream
|
||||
* 2 - cache_policy__me_release_mem__noa
|
||||
* 3 - cache_policy__me_release_mem__bypass
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
|
||||
|
||||
#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
* 2 - send 64bit data
|
||||
* 3 - send 64bit GPU counter value
|
||||
* 4 - send 64bit sys counter value
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
|
||||
/* 0 - none
|
||||
* 1 - interrupt only (DATA_SEL = 0)
|
||||
* 2 - interrupt when data write is confirmed
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
|
||||
/* 0 - MC
|
||||
* 1 - TC/L2
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PACKET3_PREAMBLE_CNTL 0x4A
|
||||
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
|
||||
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
|
||||
#define PACKET3_DMA_DATA 0x50
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. SRC_ADDR_LO or DATA [31:0]
|
||||
* 4. SRC_ADDR_HI [31:0]
|
||||
* 5. DST_ADDR_LO [31:0]
|
||||
* 6. DST_ADDR_HI [7:0]
|
||||
* 7. COMMAND [31:26] | BYTE_COUNT [25:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
|
||||
/* 0 - ME
|
||||
* 1 - PFP
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
|
||||
/* 0 - DST_ADDR using DAS
|
||||
* 1 - GDS
|
||||
* 3 - DST_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
|
||||
/* 0 - SRC_ADDR using SAS
|
||||
* 1 - GDS
|
||||
* 2 - DATA
|
||||
* 3 - SRC_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
|
||||
/* COMMAND */
|
||||
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||
#define PACKET3_CONTEXT_REG_RMW 0x51
|
||||
#define PACKET3_GFX_CNTX_UPDATE 0x52
|
||||
#define PACKET3_BLK_CNTX_UPDATE 0x53
|
||||
#define PACKET3_INCR_UPDT_STATE 0x55
|
||||
#define PACKET3_ACQUIRE_MEM 0x58
|
||||
/* 1. HEADER
|
||||
* 2. COHER_CNTL [30:0]
|
||||
* 2.1 ENGINE_SEL [31:31]
|
||||
* 2. COHER_SIZE [31:0]
|
||||
* 3. COHER_SIZE_HI [7:0]
|
||||
* 4. COHER_BASE_LO [31:0]
|
||||
* 5. COHER_BASE_HI [23:0]
|
||||
* 7. POLL_INTERVAL [15:0]
|
||||
* 8. GCR_CNTL [18:0]
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
|
||||
/*
|
||||
* 0:NOP
|
||||
* 1:ALL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:reserved
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:VOL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
|
||||
/*
|
||||
* 0: PARALLEL
|
||||
* 1: FORWARD
|
||||
* 2: REVERSE
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
|
||||
#define PACKET3_REWIND 0x59
|
||||
#define PACKET3_INTERRUPT 0x5A
|
||||
#define PACKET3_GEN_PDEPTE 0x5B
|
||||
#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
|
||||
#define PACKET3_PRIME_UTCL2 0x5D
|
||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||
#define PACKET3_LOAD_SH_REG 0x5F
|
||||
#define PACKET3_LOAD_CONFIG_REG 0x60
|
||||
#define PACKET3_LOAD_CONTEXT_REG 0x61
|
||||
#define PACKET3_LOAD_COMPUTE_STATE 0x62
|
||||
#define PACKET3_LOAD_SH_REG_INDEX 0x63
|
||||
#define PACKET3_SET_CONFIG_REG 0x68
|
||||
#define PACKET3_SET_CONFIG_REG_START 0x00002000
|
||||
#define PACKET3_SET_CONFIG_REG_END 0x00002c00
|
||||
#define PACKET3_SET_CONTEXT_REG 0x69
|
||||
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
|
||||
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
|
||||
#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
|
||||
#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
|
||||
#define PACKET3_SET_SH_REG_DI 0x72
|
||||
#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
|
||||
#define PACKET3_SET_SH_REG_DI_MULTI 0x74
|
||||
#define PACKET3_GFX_PIPE_LOCK 0x75
|
||||
#define PACKET3_SET_SH_REG 0x76
|
||||
#define PACKET3_SET_SH_REG_START 0x00002c00
|
||||
#define PACKET3_SET_SH_REG_END 0x00003000
|
||||
#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
|
||||
#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
|
||||
#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
|
||||
#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
|
||||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
|
||||
#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
|
||||
#define PACKET3_FORWARD_HEADER 0x7C
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
#define PACKET3_WRITE_CONST_RAM 0x81
|
||||
#define PACKET3_DUMP_CONST_RAM 0x83
|
||||
#define PACKET3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW 0x8D
|
||||
#define PACKET3_DISPATCH_DRAW_ACE 0x8D
|
||||
#define PACKET3_GET_LOD_STATS 0x8E
|
||||
#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
|
||||
#define PACKET3_FRAME_CONTROL 0x90
|
||||
# define FRAME_TMZ (1 << 0)
|
||||
# define FRAME_CMD(x) ((x) << 28)
|
||||
/*
|
||||
* x=0: tmz_begin
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
|
||||
#define PACKET3_WAIT_REG_MEM64 0x93
|
||||
#define PACKET3_COND_PREEMPT 0x94
|
||||
#define PACKET3_HDP_FLUSH 0x95
|
||||
#define PACKET3_COPY_DATA_RB 0x96
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_AQL_PACKET 0x99
|
||||
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
|
||||
#define PACKET3_SET_SH_REG_INDEX 0x9B
|
||||
#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
|
||||
#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
|
||||
#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. QUEUE_MASK_LO [31:0]
|
||||
* 4. QUEUE_MASK_HI [31:0]
|
||||
* 5. GWS_MASK_LO [31:0]
|
||||
* 6. GWS_MASK_HI [31:0]
|
||||
* 7. OAC_MASK [15:0]
|
||||
* 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
*/
|
||||
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
|
||||
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
|
||||
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_MAP_PROCESS 0xA1
|
||||
#define PACKET3_MAP_QUEUES 0xA2
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. MQD_ADDR_LO [31:0]
|
||||
* 5. MQD_ADDR_HI [31:0]
|
||||
* 6. WPTR_ADDR_LO [31:0]
|
||||
* 7. WPTR_ADDR_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
|
||||
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
|
||||
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
|
||||
# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
|
||||
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2 */
|
||||
# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
|
||||
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
#define PACKET3_UNMAP_QUEUES 0xA3
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. CONTROL3
|
||||
* 5. CONTROL4
|
||||
* 6. CONTROL5
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
|
||||
/* 0 - PREEMPT_QUEUES
|
||||
* 1 - RESET_QUEUES
|
||||
* 2 - DISABLE_PROCESS_QUEUES
|
||||
* 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
*/
|
||||
# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
|
||||
/* CONTROL3a */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
|
||||
/* CONTROL3b */
|
||||
# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
|
||||
/* CONTROL4 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
|
||||
/* CONTROL5 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
|
||||
#define PACKET3_QUERY_STATUS 0xA4
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. ADDR_LO [31:0]
|
||||
* 5. ADDR_HI [31:0]
|
||||
* 6. DATA_LO [31:0]
|
||||
* 7. DATA_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
|
||||
# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
|
||||
# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
|
||||
#define PACKET3_RUN_LIST 0xA5
|
||||
#define PACKET3_MAP_PROCESS_VM 0xA6
|
||||
|
||||
#define PACKET3_RUN_CLEANER_SHADER 0xD2
|
||||
/* 1. header
|
||||
* 2. RESERVED [31:0]
|
||||
*/
|
||||
|
||||
/* GFX11 */
|
||||
#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,583 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef SOC15_H
|
||||
#define SOC15_H
|
||||
|
||||
#define GFX9_NUM_GFX_RINGS 1
|
||||
#define GFX9_NUM_COMPUTE_RINGS 8
|
||||
|
||||
/*
|
||||
* PM4
|
||||
*/
|
||||
#define PACKET_TYPE0 0
|
||||
#define PACKET_TYPE1 1
|
||||
#define PACKET_TYPE2 2
|
||||
#define PACKET_TYPE3 3
|
||||
|
||||
#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
|
||||
#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
|
||||
#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
|
||||
#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
|
||||
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
#define CP_PACKET2 0x80000000
|
||||
#define PACKET2_PAD_SHIFT 0
|
||||
#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
#define PACKETJ_CONDITION_CHECK0 0
|
||||
#define PACKETJ_CONDITION_CHECK1 1
|
||||
#define PACKETJ_CONDITION_CHECK2 2
|
||||
#define PACKETJ_CONDITION_CHECK3 3
|
||||
#define PACKETJ_CONDITION_CHECK4 4
|
||||
#define PACKETJ_CONDITION_CHECK5 5
|
||||
#define PACKETJ_CONDITION_CHECK6 6
|
||||
#define PACKETJ_CONDITION_CHECK7 7
|
||||
|
||||
#define PACKETJ_TYPE0 0
|
||||
#define PACKETJ_TYPE1 1
|
||||
#define PACKETJ_TYPE2 2
|
||||
#define PACKETJ_TYPE3 3
|
||||
#define PACKETJ_TYPE4 4
|
||||
#define PACKETJ_TYPE5 5
|
||||
#define PACKETJ_TYPE6 6
|
||||
#define PACKETJ_TYPE7 7
|
||||
|
||||
#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \
|
||||
((r & 0x3F) << 18) | \
|
||||
((cond & 0xF) << 24) | \
|
||||
((type & 0xF) << 28))
|
||||
|
||||
#define CP_PACKETJ_NOP 0x60000000
|
||||
#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
|
||||
#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
|
||||
#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
|
||||
#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
|
||||
|
||||
/* Packet 3 types */
|
||||
#define PACKET3_NOP 0x10
|
||||
#define PACKET3_SET_BASE 0x11
|
||||
#define PACKET3_BASE_INDEX(x) ((x) << 0)
|
||||
#define CE_PARTITION_BASE 3
|
||||
#define PACKET3_CLEAR_STATE 0x12
|
||||
#define PACKET3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PACKET3_DISPATCH_DIRECT 0x15
|
||||
#define PACKET3_DISPATCH_INDIRECT 0x16
|
||||
#define PACKET3_ATOMIC_GDS 0x1D
|
||||
#define PACKET3_ATOMIC_MEM 0x1E
|
||||
#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
|
||||
#define PACKET3_OCCLUSION_QUERY 0x1F
|
||||
#define PACKET3_SET_PREDICATION 0x20
|
||||
#define PACKET3_REG_RMW 0x21
|
||||
#define PACKET3_COND_EXEC 0x22
|
||||
#define PACKET3_PRED_EXEC 0x23
|
||||
#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
|
||||
#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
|
||||
#define PACKET3_DRAW_INDIRECT 0x24
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PACKET3_INDEX_BASE 0x26
|
||||
#define PACKET3_DRAW_INDEX_2 0x27
|
||||
#define PACKET3_CONTEXT_CONTROL 0x28
|
||||
#define PACKET3_INDEX_TYPE 0x2A
|
||||
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PACKET3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PACKET3_NUM_INSTANCES 0x2F
|
||||
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PACKET3_INDIRECT_BUFFER_CONST 0x33
|
||||
#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
|
||||
#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
|
||||
#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
#define PACKET3_EVENT_WRITE 0x46
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
*/
|
||||
#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
|
||||
#define PACKET3_RELEASE_MEM 0x49
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
#define EOP_TCL1_VOL_ACTION_EN (1 << 12)
|
||||
#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */
|
||||
#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
|
||||
#define EOP_TCL1_ACTION_EN (1 << 16)
|
||||
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
|
||||
#define EOP_TC_NC_ACTION_EN (1 << 19)
|
||||
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
|
||||
#define EOP_EXEC (1 << 28) /* For Trailing Fence */
|
||||
|
||||
#define DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
* 2 - send 64bit data
|
||||
* 3 - send 64bit GPU counter value
|
||||
* 4 - send 64bit sys counter value
|
||||
*/
|
||||
#define INT_SEL(x) ((x) << 24)
|
||||
/* 0 - none
|
||||
* 1 - interrupt only (DATA_SEL = 0)
|
||||
* 2 - interrupt when data write is confirmed
|
||||
*/
|
||||
#define DST_SEL(x) ((x) << 16)
|
||||
/* 0 - MC
|
||||
* 1 - TC/L2
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PACKET3_PREAMBLE_CNTL 0x4A
|
||||
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
|
||||
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
|
||||
#define PACKET3_DMA_DATA 0x50
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. SRC_ADDR_LO or DATA [31:0]
|
||||
* 4. SRC_ADDR_HI [31:0]
|
||||
* 5. DST_ADDR_LO [31:0]
|
||||
* 6. DST_ADDR_HI [7:0]
|
||||
* 7. COMMAND [30:21] | BYTE_COUNT [20:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
|
||||
/* 0 - ME
|
||||
* 1 - PFP
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
|
||||
/* 0 - DST_ADDR using DAS
|
||||
* 1 - GDS
|
||||
* 3 - DST_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
|
||||
/* 0 - SRC_ADDR using SAS
|
||||
* 1 - GDS
|
||||
* 2 - DATA
|
||||
* 3 - SRC_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
|
||||
/* COMMAND */
|
||||
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||
#define PACKET3_ACQUIRE_MEM 0x58
|
||||
/* 1. HEADER
|
||||
* 2. COHER_CNTL [30:0]
|
||||
* 2.1 ENGINE_SEL [31:31]
|
||||
* 3. COHER_SIZE [31:0]
|
||||
* 4. COHER_SIZE_HI [7:0]
|
||||
* 5. COHER_BASE_LO [31:0]
|
||||
* 6. COHER_BASE_HI [23:0]
|
||||
* 7. POLL_INTERVAL [15:0]
|
||||
*/
|
||||
/* COHER_CNTL fields for CP_COHER_CNTL */
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
|
||||
#define PACKET3_REWIND 0x59
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
|
||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||
#define PACKET3_LOAD_SH_REG 0x5F
|
||||
#define PACKET3_LOAD_CONFIG_REG 0x60
|
||||
#define PACKET3_LOAD_CONTEXT_REG 0x61
|
||||
#define PACKET3_SET_CONFIG_REG 0x68
|
||||
#define PACKET3_SET_CONFIG_REG_START 0x00002000
|
||||
#define PACKET3_SET_CONFIG_REG_END 0x00002c00
|
||||
#define PACKET3_SET_CONTEXT_REG 0x69
|
||||
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
|
||||
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
|
||||
#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
|
||||
#define PACKET3_SET_SH_REG 0x76
|
||||
#define PACKET3_SET_SH_REG_START 0x00002c00
|
||||
#define PACKET3_SET_SH_REG_END 0x00003000
|
||||
#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
|
||||
#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
|
||||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
|
||||
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
|
||||
#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
#define PACKET3_WRITE_CONST_RAM 0x81
|
||||
#define PACKET3_DUMP_CONST_RAM 0x83
|
||||
#define PACKET3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
#define PACKET3_FRAME_CONTROL 0x90
|
||||
# define FRAME_TMZ (1 << 0)
|
||||
# define FRAME_CMD(x) ((x) << 28)
|
||||
/*
|
||||
* x=0: tmz_begin
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. QUEUE_MASK_LO [31:0]
|
||||
* 4. QUEUE_MASK_HI [31:0]
|
||||
* 5. GWS_MASK_LO [31:0]
|
||||
* 6. GWS_MASK_HI [31:0]
|
||||
* 7. OAC_MASK [15:0]
|
||||
* 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
*/
|
||||
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
|
||||
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
|
||||
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_MAP_QUEUES 0xA2
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. MQD_ADDR_LO [31:0]
|
||||
* 5. MQD_ADDR_HI [31:0]
|
||||
* 6. WPTR_ADDR_LO [31:0]
|
||||
* 7. WPTR_ADDR_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
|
||||
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
|
||||
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
|
||||
# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
|
||||
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2 */
|
||||
# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
|
||||
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
#define PACKET3_UNMAP_QUEUES 0xA3
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. CONTROL3
|
||||
* 5. CONTROL4
|
||||
* 6. CONTROL5
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
|
||||
/* 0 - PREEMPT_QUEUES
|
||||
* 1 - RESET_QUEUES
|
||||
* 2 - DISABLE_PROCESS_QUEUES
|
||||
* 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
*/
|
||||
# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
|
||||
/* CONTROL3a */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
|
||||
/* CONTROL3b */
|
||||
# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
|
||||
/* CONTROL4 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
|
||||
/* CONTROL5 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
|
||||
#define PACKET3_QUERY_STATUS 0xA4
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. ADDR_LO [31:0]
|
||||
* 5. ADDR_HI [31:0]
|
||||
* 6. DATA_LO [31:0]
|
||||
* 7. DATA_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
|
||||
# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
|
||||
# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
|
||||
|
||||
#define PACKET3_RUN_CLEANER_SHADER 0xD2
|
||||
/* 1. header
|
||||
* 2. RESERVED [31:0]
|
||||
*/
|
||||
|
||||
#define VCE_CMD_NO_OP 0x00000000
|
||||
#define VCE_CMD_END 0x00000001
|
||||
#define VCE_CMD_IB 0x00000002
|
||||
#define VCE_CMD_FENCE 0x00000003
|
||||
#define VCE_CMD_TRAP 0x00000004
|
||||
#define VCE_CMD_IB_AUTO 0x00000005
|
||||
#define VCE_CMD_SEMAPHORE 0x00000006
|
||||
|
||||
#define VCE_CMD_IB_VM 0x00000102
|
||||
#define VCE_CMD_WAIT_GE 0x00000106
|
||||
#define VCE_CMD_UPDATE_PTB 0x00000107
|
||||
#define VCE_CMD_FLUSH_TLB 0x00000108
|
||||
#define VCE_CMD_REG_WRITE 0x00000109
|
||||
#define VCE_CMD_REG_WAIT 0x0000010a
|
||||
|
||||
#define HEVC_ENC_CMD_NO_OP 0x00000000
|
||||
#define HEVC_ENC_CMD_END 0x00000001
|
||||
#define HEVC_ENC_CMD_FENCE 0x00000003
|
||||
#define HEVC_ENC_CMD_TRAP 0x00000004
|
||||
#define HEVC_ENC_CMD_IB_VM 0x00000102
|
||||
#define HEVC_ENC_CMD_REG_WRITE 0x00000109
|
||||
#define HEVC_ENC_CMD_REG_WAIT 0x0000010a
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,453 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _athub_1_0_OFFSET_HEADER
|
||||
#define _athub_1_0_OFFSET_HEADER
|
||||
|
||||
|
||||
|
||||
// addressBlock: athub_atsdec
|
||||
// base address: 0x3080
|
||||
#define mmATC_ATS_CNTL 0x0000
|
||||
#define mmATC_ATS_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_STATUS 0x0003
|
||||
#define mmATC_ATS_STATUS_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_CNTL 0x0004
|
||||
#define mmATC_ATS_FAULT_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO 0x0005
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_ADDR 0x0006
|
||||
#define mmATC_ATS_FAULT_STATUS_ADDR_BASE_IDX 0
|
||||
#define mmATC_ATS_DEFAULT_PAGE_LOW 0x0007
|
||||
#define mmATC_ATS_DEFAULT_PAGE_LOW_BASE_IDX 0
|
||||
#define mmATC_TRANS_FAULT_RSPCNTRL 0x0008
|
||||
#define mmATC_TRANS_FAULT_RSPCNTRL_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO2 0x0009
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO2_BASE_IDX 0
|
||||
#define mmATHUB_MISC_CNTL 0x000a
|
||||
#define mmATHUB_MISC_CNTL_BASE_IDX 0
|
||||
#define mmATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x000b
|
||||
#define mmATC_VMID_PASID_MAPPING_UPDATE_STATUS_BASE_IDX 0
|
||||
#define mmATC_VMID0_PASID_MAPPING 0x000c
|
||||
#define mmATC_VMID0_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID1_PASID_MAPPING 0x000d
|
||||
#define mmATC_VMID1_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID2_PASID_MAPPING 0x000e
|
||||
#define mmATC_VMID2_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID3_PASID_MAPPING 0x000f
|
||||
#define mmATC_VMID3_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID4_PASID_MAPPING 0x0010
|
||||
#define mmATC_VMID4_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID5_PASID_MAPPING 0x0011
|
||||
#define mmATC_VMID5_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID6_PASID_MAPPING 0x0012
|
||||
#define mmATC_VMID6_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID7_PASID_MAPPING 0x0013
|
||||
#define mmATC_VMID7_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID8_PASID_MAPPING 0x0014
|
||||
#define mmATC_VMID8_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID9_PASID_MAPPING 0x0015
|
||||
#define mmATC_VMID9_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID10_PASID_MAPPING 0x0016
|
||||
#define mmATC_VMID10_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID11_PASID_MAPPING 0x0017
|
||||
#define mmATC_VMID11_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID12_PASID_MAPPING 0x0018
|
||||
#define mmATC_VMID12_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID13_PASID_MAPPING 0x0019
|
||||
#define mmATC_VMID13_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID14_PASID_MAPPING 0x001a
|
||||
#define mmATC_VMID14_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID15_PASID_MAPPING 0x001b
|
||||
#define mmATC_VMID15_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_STATUS 0x001c
|
||||
#define mmATC_ATS_VMID_STATUS_BASE_IDX 0
|
||||
#define mmATC_ATS_GFX_ATCL2_STATUS 0x001d
|
||||
#define mmATC_ATS_GFX_ATCL2_STATUS_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER0_CFG 0x001e
|
||||
#define mmATC_PERFCOUNTER0_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER1_CFG 0x001f
|
||||
#define mmATC_PERFCOUNTER1_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER2_CFG 0x0020
|
||||
#define mmATC_PERFCOUNTER2_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER3_CFG 0x0021
|
||||
#define mmATC_PERFCOUNTER3_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_RSLT_CNTL 0x0022
|
||||
#define mmATC_PERFCOUNTER_RSLT_CNTL_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_LO 0x0023
|
||||
#define mmATC_PERFCOUNTER_LO_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_HI 0x0024
|
||||
#define mmATC_PERFCOUNTER_HI_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL 0x0025
|
||||
#define mmATHUB_PCIE_ATS_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_PASID_CNTL 0x0026
|
||||
#define mmATHUB_PCIE_PASID_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_PAGE_REQ_CNTL 0x0027
|
||||
#define mmATHUB_PCIE_PAGE_REQ_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_OUTSTAND_PAGE_REQ_ALLOC 0x0028
|
||||
#define mmATHUB_PCIE_OUTSTAND_PAGE_REQ_ALLOC_BASE_IDX 0
|
||||
#define mmATHUB_COMMAND 0x0029
|
||||
#define mmATHUB_COMMAND_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_0 0x002a
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_0_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_1 0x002b
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_1_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_2 0x002c
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_2_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_3 0x002d
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_3_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_4 0x002e
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_4_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_5 0x002f
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_5_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_6 0x0030
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_6_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_7 0x0031
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_7_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_8 0x0032
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_8_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_9 0x0033
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_9_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_10 0x0034
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_10_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_11 0x0035
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_11_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_12 0x0036
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_12_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_13 0x0037
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_13_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_14 0x0038
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_14_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_15 0x0039
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_15_BASE_IDX 0
|
||||
#define mmATHUB_MEM_POWER_LS 0x003a
|
||||
#define mmATHUB_MEM_POWER_LS_BASE_IDX 0
|
||||
#define mmATS_IH_CREDIT 0x003b
|
||||
#define mmATS_IH_CREDIT_BASE_IDX 0
|
||||
#define mmATHUB_IH_CREDIT 0x003c
|
||||
#define mmATHUB_IH_CREDIT_BASE_IDX 0
|
||||
#define mmATC_VMID16_PASID_MAPPING 0x003d
|
||||
#define mmATC_VMID16_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID17_PASID_MAPPING 0x003e
|
||||
#define mmATC_VMID17_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID18_PASID_MAPPING 0x003f
|
||||
#define mmATC_VMID18_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID19_PASID_MAPPING 0x0040
|
||||
#define mmATC_VMID19_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID20_PASID_MAPPING 0x0041
|
||||
#define mmATC_VMID20_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID21_PASID_MAPPING 0x0042
|
||||
#define mmATC_VMID21_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID22_PASID_MAPPING 0x0043
|
||||
#define mmATC_VMID22_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID23_PASID_MAPPING 0x0044
|
||||
#define mmATC_VMID23_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID24_PASID_MAPPING 0x0045
|
||||
#define mmATC_VMID24_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID25_PASID_MAPPING 0x0046
|
||||
#define mmATC_VMID25_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID26_PASID_MAPPING 0x0047
|
||||
#define mmATC_VMID26_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID27_PASID_MAPPING 0x0048
|
||||
#define mmATC_VMID27_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID28_PASID_MAPPING 0x0049
|
||||
#define mmATC_VMID28_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID29_PASID_MAPPING 0x004a
|
||||
#define mmATC_VMID29_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID30_PASID_MAPPING 0x004b
|
||||
#define mmATC_VMID30_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID31_PASID_MAPPING 0x004c
|
||||
#define mmATC_VMID31_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_ATS_MMHUB_ATCL2_STATUS 0x004d
|
||||
#define mmATC_ATS_MMHUB_ATCL2_STATUS_BASE_IDX 0
|
||||
#define mmATHUB_SHARED_VIRT_RESET_REQ 0x004e
|
||||
#define mmATHUB_SHARED_VIRT_RESET_REQ_BASE_IDX 0
|
||||
#define mmATHUB_SHARED_ACTIVE_FCN_ID 0x004f
|
||||
#define mmATHUB_SHARED_ACTIVE_FCN_ID_BASE_IDX 0
|
||||
#define mmATC_ATS_SDPPORT_CNTL 0x0050
|
||||
#define mmATC_ATS_SDPPORT_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_GFX_STAT 0x0052
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_GFX_STAT_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_MMHUB_STAT 0x0053
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_MMHUB_STAT_BASE_IDX 0
|
||||
|
||||
|
||||
// addressBlock: athub_xpbdec
|
||||
// base address: 0x31f0
|
||||
#define mmXPB_RTR_SRC_APRTR0 0x005c
|
||||
#define mmXPB_RTR_SRC_APRTR0_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR1 0x005d
|
||||
#define mmXPB_RTR_SRC_APRTR1_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR2 0x005e
|
||||
#define mmXPB_RTR_SRC_APRTR2_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR3 0x005f
|
||||
#define mmXPB_RTR_SRC_APRTR3_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR4 0x0060
|
||||
#define mmXPB_RTR_SRC_APRTR4_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR5 0x0061
|
||||
#define mmXPB_RTR_SRC_APRTR5_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR6 0x0062
|
||||
#define mmXPB_RTR_SRC_APRTR6_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR7 0x0063
|
||||
#define mmXPB_RTR_SRC_APRTR7_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR8 0x0064
|
||||
#define mmXPB_RTR_SRC_APRTR8_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR9 0x0065
|
||||
#define mmXPB_RTR_SRC_APRTR9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR0 0x0066
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR1 0x0067
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR2 0x0068
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR3 0x0069
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR3_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP0 0x006a
|
||||
#define mmXPB_RTR_DEST_MAP0_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP1 0x006b
|
||||
#define mmXPB_RTR_DEST_MAP1_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP2 0x006c
|
||||
#define mmXPB_RTR_DEST_MAP2_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP3 0x006d
|
||||
#define mmXPB_RTR_DEST_MAP3_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP4 0x006e
|
||||
#define mmXPB_RTR_DEST_MAP4_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP5 0x006f
|
||||
#define mmXPB_RTR_DEST_MAP5_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP6 0x0070
|
||||
#define mmXPB_RTR_DEST_MAP6_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP7 0x0071
|
||||
#define mmXPB_RTR_DEST_MAP7_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP8 0x0072
|
||||
#define mmXPB_RTR_DEST_MAP8_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP9 0x0073
|
||||
#define mmXPB_RTR_DEST_MAP9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP0 0x0074
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP1 0x0075
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP2 0x0076
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP3 0x0077
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP3_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG0 0x0078
|
||||
#define mmXPB_CLG_CFG0_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG1 0x0079
|
||||
#define mmXPB_CLG_CFG1_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG2 0x007a
|
||||
#define mmXPB_CLG_CFG2_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG3 0x007b
|
||||
#define mmXPB_CLG_CFG3_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG4 0x007c
|
||||
#define mmXPB_CLG_CFG4_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG5 0x007d
|
||||
#define mmXPB_CLG_CFG5_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG6 0x007e
|
||||
#define mmXPB_CLG_CFG6_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG7 0x007f
|
||||
#define mmXPB_CLG_CFG7_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA 0x0080
|
||||
#define mmXPB_CLG_EXTRA_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_MSK 0x0081
|
||||
#define mmXPB_CLG_EXTRA_MSK_BASE_IDX 0
|
||||
#define mmXPB_LB_ADDR 0x0082
|
||||
#define mmXPB_LB_ADDR_BASE_IDX 0
|
||||
#define mmXPB_WCB_STS 0x0083
|
||||
#define mmXPB_WCB_STS_BASE_IDX 0
|
||||
#define mmXPB_HST_CFG 0x0084
|
||||
#define mmXPB_HST_CFG_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_CFG 0x0085
|
||||
#define mmXPB_P2P_BAR_CFG_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR0 0x0086
|
||||
#define mmXPB_P2P_BAR0_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR1 0x0087
|
||||
#define mmXPB_P2P_BAR1_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR2 0x0088
|
||||
#define mmXPB_P2P_BAR2_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR3 0x0089
|
||||
#define mmXPB_P2P_BAR3_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR4 0x008a
|
||||
#define mmXPB_P2P_BAR4_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR5 0x008b
|
||||
#define mmXPB_P2P_BAR5_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR6 0x008c
|
||||
#define mmXPB_P2P_BAR6_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR7 0x008d
|
||||
#define mmXPB_P2P_BAR7_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_SETUP 0x008e
|
||||
#define mmXPB_P2P_BAR_SETUP_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_DELTA_ABOVE 0x0090
|
||||
#define mmXPB_P2P_BAR_DELTA_ABOVE_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_DELTA_BELOW 0x0091
|
||||
#define mmXPB_P2P_BAR_DELTA_BELOW_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR0 0x0092
|
||||
#define mmXPB_PEER_SYS_BAR0_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR1 0x0093
|
||||
#define mmXPB_PEER_SYS_BAR1_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR2 0x0094
|
||||
#define mmXPB_PEER_SYS_BAR2_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR3 0x0095
|
||||
#define mmXPB_PEER_SYS_BAR3_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR4 0x0096
|
||||
#define mmXPB_PEER_SYS_BAR4_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR5 0x0097
|
||||
#define mmXPB_PEER_SYS_BAR5_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR6 0x0098
|
||||
#define mmXPB_PEER_SYS_BAR6_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR7 0x0099
|
||||
#define mmXPB_PEER_SYS_BAR7_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR8 0x009a
|
||||
#define mmXPB_PEER_SYS_BAR8_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR9 0x009b
|
||||
#define mmXPB_PEER_SYS_BAR9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR0 0x009c
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR1 0x009d
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR2 0x009e
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR3 0x009f
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR3_BASE_IDX 0
|
||||
#define mmXPB_CLK_GAT 0x00a0
|
||||
#define mmXPB_CLK_GAT_BASE_IDX 0
|
||||
#define mmXPB_INTF_CFG 0x00a1
|
||||
#define mmXPB_INTF_CFG_BASE_IDX 0
|
||||
#define mmXPB_INTF_STS 0x00a2
|
||||
#define mmXPB_INTF_STS_BASE_IDX 0
|
||||
#define mmXPB_PIPE_STS 0x00a3
|
||||
#define mmXPB_PIPE_STS_BASE_IDX 0
|
||||
#define mmXPB_SUB_CTRL 0x00a4
|
||||
#define mmXPB_SUB_CTRL_BASE_IDX 0
|
||||
#define mmXPB_MAP_INVERT_FLUSH_NUM_LSB 0x00a5
|
||||
#define mmXPB_MAP_INVERT_FLUSH_NUM_LSB_BASE_IDX 0
|
||||
#define mmXPB_PERF_KNOBS 0x00a6
|
||||
#define mmXPB_PERF_KNOBS_BASE_IDX 0
|
||||
#define mmXPB_STICKY 0x00a7
|
||||
#define mmXPB_STICKY_BASE_IDX 0
|
||||
#define mmXPB_STICKY_W1C 0x00a8
|
||||
#define mmXPB_STICKY_W1C_BASE_IDX 0
|
||||
#define mmXPB_MISC_CFG 0x00a9
|
||||
#define mmXPB_MISC_CFG_BASE_IDX 0
|
||||
#define mmXPB_INTF_CFG2 0x00aa
|
||||
#define mmXPB_INTF_CFG2_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_RD 0x00ab
|
||||
#define mmXPB_CLG_EXTRA_RD_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_MSK_RD 0x00ac
|
||||
#define mmXPB_CLG_EXTRA_MSK_RD_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_MATCH 0x00ad
|
||||
#define mmXPB_CLG_GFX_MATCH_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_MATCH_MSK 0x00ae
|
||||
#define mmXPB_CLG_GFX_MATCH_MSK_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_MATCH 0x00af
|
||||
#define mmXPB_CLG_MM_MATCH_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_MATCH_MSK 0x00b0
|
||||
#define mmXPB_CLG_MM_MATCH_MSK_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING0 0x00b1
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING0_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING1 0x00b2
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING1_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING2 0x00b3
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING2_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING3 0x00b4
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING3_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING4 0x00b5
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING4_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING5 0x00b6
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING5_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING6 0x00b7
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING6_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING7 0x00b8
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING7_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING0 0x00b9
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING0_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING1 0x00ba
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING1_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING2 0x00bb
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING2_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING3 0x00bc
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING3_BASE_IDX 0
|
||||
|
||||
|
||||
// addressBlock: athub_rpbdec
|
||||
// base address: 0x33b0
|
||||
#define mmRPB_PASSPW_CONF 0x00cc
|
||||
#define mmRPB_PASSPW_CONF_BASE_IDX 0
|
||||
#define mmRPB_BLOCKLEVEL_CONF 0x00cd
|
||||
#define mmRPB_BLOCKLEVEL_CONF_BASE_IDX 0
|
||||
#define mmRPB_TAG_CONF 0x00cf
|
||||
#define mmRPB_TAG_CONF_BASE_IDX 0
|
||||
#define mmRPB_EFF_CNTL 0x00d1
|
||||
#define mmRPB_EFF_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ARB_CNTL 0x00d2
|
||||
#define mmRPB_ARB_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ARB_CNTL2 0x00d3
|
||||
#define mmRPB_ARB_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_BIF_CNTL 0x00d4
|
||||
#define mmRPB_BIF_CNTL_BASE_IDX 0
|
||||
#define mmRPB_WR_SWITCH_CNTL 0x00d5
|
||||
#define mmRPB_WR_SWITCH_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_SWITCH_CNTL 0x00d7
|
||||
#define mmRPB_RD_SWITCH_CNTL_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_WR 0x00d8
|
||||
#define mmRPB_CID_QUEUE_WR_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_RD 0x00d9
|
||||
#define mmRPB_CID_QUEUE_RD_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_EX 0x00dc
|
||||
#define mmRPB_CID_QUEUE_EX_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_EX_DATA 0x00dd
|
||||
#define mmRPB_CID_QUEUE_EX_DATA_BASE_IDX 0
|
||||
#define mmRPB_SWITCH_CNTL2 0x00de
|
||||
#define mmRPB_SWITCH_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_DEINTRLV_COMBINE_CNTL 0x00df
|
||||
#define mmRPB_DEINTRLV_COMBINE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_VC_SWITCH_RDWR 0x00e0
|
||||
#define mmRPB_VC_SWITCH_RDWR_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_LO 0x00e1
|
||||
#define mmRPB_PERFCOUNTER_LO_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_HI 0x00e2
|
||||
#define mmRPB_PERFCOUNTER_HI_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER0_CFG 0x00e3
|
||||
#define mmRPB_PERFCOUNTER0_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER1_CFG 0x00e4
|
||||
#define mmRPB_PERFCOUNTER1_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER2_CFG 0x00e5
|
||||
#define mmRPB_PERFCOUNTER2_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER3_CFG 0x00e6
|
||||
#define mmRPB_PERFCOUNTER3_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_RSLT_CNTL 0x00e7
|
||||
#define mmRPB_PERFCOUNTER_RSLT_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_QUEUE_CNTL 0x00e9
|
||||
#define mmRPB_RD_QUEUE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_QUEUE_CNTL2 0x00ea
|
||||
#define mmRPB_RD_QUEUE_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_WR_QUEUE_CNTL 0x00eb
|
||||
#define mmRPB_WR_QUEUE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_WR_QUEUE_CNTL2 0x00ec
|
||||
#define mmRPB_WR_QUEUE_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_EA_QUEUE_WR 0x00ed
|
||||
#define mmRPB_EA_QUEUE_WR_BASE_IDX 0
|
||||
#define mmRPB_ATS_CNTL 0x00ee
|
||||
#define mmRPB_ATS_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ATS_CNTL2 0x00ef
|
||||
#define mmRPB_ATS_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_SDPPORT_CNTL 0x00f0
|
||||
#define mmRPB_SDPPORT_CNTL_BASE_IDX 0
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Build dynamic Library object
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}" )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/aql_profile.cpp
|
||||
${LIB_DIR}/core/counters.cpp
|
||||
${LIB_DIR}/core/threadtrace.cpp
|
||||
${LIB_DIR}/core/spm_data.cpp
|
||||
${LIB_DIR}/core/populate_aql.cpp
|
||||
${LIB_DIR}/core/memorymanager.cpp
|
||||
${LIB_DIR}/core/pm4_factory.cpp
|
||||
${LIB_DIR}/core/gfx9_factory.cpp
|
||||
${LIB_DIR}/core/gfx908_factory.cpp
|
||||
${LIB_DIR}/core/gfx90a_factory.cpp
|
||||
${LIB_DIR}/core/gfx940_factory.cpp
|
||||
${LIB_DIR}/core/gfx10_factory.cpp
|
||||
${LIB_DIR}/core/gfx11_factory.cpp
|
||||
${LIB_DIR}/core/gfx12_factory.cpp
|
||||
${LIB_DIR}/core/vega20_reg_init.cpp
|
||||
${LIB_DIR}/core/parse_ip_discovery.cpp
|
||||
${LIB_DIR}/core/navi_reg_init.cpp
|
||||
${LIB_DIR}/core/ip_offset_table_init.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${API_PATH})
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE pthread hsa-runtime64::hsa-runtime64 )
|
||||
|
||||
## Generating definitions
|
||||
set ( SCRIPT_DIR "${ROOT_DIR}/script" )
|
||||
set ( GFXIP_DIR "${ROOT_DIR}/gfxip" )
|
||||
set ( DEF_DIR "${ROOT_DIR}/src/def" )
|
||||
set ( BINFO_TEMPL "${SCRIPT_DIR}/gpu_block_info.h" )
|
||||
set ( BINFO_DEF "${GFXIP_DIR}/gpu_block_info.h" )
|
||||
set ( BINFO_HEADER "${DEF_DIR}/gpu_block_info.h" )
|
||||
set ( GFX9_TEMPL "${SCRIPT_DIR}/gfx9_def.h" )
|
||||
set ( GFX9_DEF "${GFXIP_DIR}/gfx9_def.h" )
|
||||
set ( GFX9_HEADER "${DEF_DIR}/gfx9_def.h" )
|
||||
set ( GFX10_TEMPL "${SCRIPT_DIR}/gfx10_def.h" )
|
||||
set ( GFX10_DEF "${GFXIP_DIR}/gfx10_def.h" )
|
||||
set ( GFX10_HEADER "${DEF_DIR}/gfx10_def.h" )
|
||||
set ( GFX11_TEMPL "${SCRIPT_DIR}/gfx11_def.h" )
|
||||
set ( GFX11_DEF "${GFXIP_DIR}/gfx11_def.h" )
|
||||
set ( GFX11_HEADER "${DEF_DIR}/gfx11_def.h" )
|
||||
set ( GFX12_TEMPL "${SCRIPT_DIR}/gfx12_def.h" )
|
||||
set ( GFX12_DEF "${GFXIP_DIR}/gfx12_def.h" )
|
||||
set ( GFX12_HEADER "${DEF_DIR}/gfx12_def.h" )
|
||||
include_directories ( ${ROOT_DIR} )
|
||||
add_custom_target( mygenreset
|
||||
COMMAND sh -xc "cp ${BINFO_TEMPL} ${BINFO_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX9_TEMPL} ${GFX9_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX10_TEMPL} ${GFX10_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX11_TEMPL} ${GFX11_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX12_TEMPL} ${GFX12_HEADER}"
|
||||
)
|
||||
add_custom_target( mygen
|
||||
COMMAND sh -xc "cp ${BINFO_TEMPL} ${BINFO_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX9_TEMPL} ${GFX9_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX10_TEMPL} ${GFX10_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX11_TEMPL} ${GFX11_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX12_TEMPL} ${GFX12_HEADER}"
|
||||
COMMAND sh -xc "sed '/gfx9_def/ s/.*//' ${GFX9_TEMPL} >${GFX9_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX9_HEADER} ${GFX9_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx10_def/ s/.*//' ${GFX10_TEMPL} >${GFX10_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX10_HEADER} ${GFX10_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx11_def/ s/.*//' ${GFX11_TEMPL} >${GFX11_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX11_HEADER} ${GFX11_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx12_def/ s/.*//' ${GFX12_TEMPL} >${GFX12_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX12_HEADER} ${GFX12_DEF}"
|
||||
COMMAND sh -xc "head -n1 ${BINFO_TEMPL} >${BINFO_HEADER}"
|
||||
COMMAND sh -xc "sed 's/_GPU_BLOCKINFO_H_/SRC_DEF_GPU_BLOCK_INFO_H_/' ${BINFO_DEF} >>${BINFO_HEADER}"
|
||||
)
|
||||
|
||||
add_subdirectory(src/core)
|
||||
@@ -0,0 +1 @@
|
||||
add_subdirectory(include)
|
||||
@@ -0,0 +1,46 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
#define SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
// Value of 'pm4_ib_format' field of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_FORMAT = 1;
|
||||
// Value of 'dw_count_remain' field of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_DW_COUNT_REMAIN = 10;
|
||||
// Size of 'reserved' array of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_RESERVED_COUNT = 8;
|
||||
|
||||
// AQL Vendor Specific Packet which carry PM4 IB command
|
||||
typedef struct {
|
||||
uint16_t header;
|
||||
uint16_t pm4_ib_format;
|
||||
uint32_t pm4_ib_command[4];
|
||||
uint32_t dw_count_remain;
|
||||
uint32_t reserved[AMD_AQL_PM4_IB_RESERVED_COUNT];
|
||||
hsa_signal_t completion_signal;
|
||||
} amd_aql_pm4_ib_packet_t;
|
||||
|
||||
#endif // SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
@@ -0,0 +1,799 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/counter_dimensions.hpp"
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Getting SPM data using driver API
|
||||
namespace spm_kfd_namespace {
|
||||
hsa_status_t spm_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data);
|
||||
}
|
||||
|
||||
// PC sampling callback data
|
||||
struct pcsmp_callback_data_t {
|
||||
const char* kernel_name; // sampled kernel name
|
||||
void* data_buffer; // host buffer for tracing data
|
||||
uint64_t id; // sample id
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
};
|
||||
|
||||
std::atomic<int> ATT_TARGET_CU{0};
|
||||
|
||||
namespace aql_profile {
|
||||
// Command buffer partitioning manager
|
||||
// Supports Pre/Post commands partitioning
|
||||
// and prefix control partition
|
||||
|
||||
static std::unordered_map<void*, pm4_builder::TraceConfig> configs;
|
||||
static std::mutex config_mut;
|
||||
|
||||
static inline pm4_builder::counters_vector CountersVec(const profile_t* profile,
|
||||
const Pm4Factory* pm4_factory) {
|
||||
pm4_builder::counters_vector vec;
|
||||
std::map<block_des_t, uint32_t, lt_block_des> index_map;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(p);
|
||||
const block_des_t block_des = {pm4_factory->GetBlockInfo(p)->id, p->block_index};
|
||||
// Counting counter register index per block
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
uint32_t& reg_index = ret.first->second;
|
||||
|
||||
if (pm4_builder::SPISkip(block_info->attr, p->counter_id)) {
|
||||
vec.push_back({p->counter_id, reg_index, block_des, block_info});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (reg_index >= block_info->counter_count) {
|
||||
throw event_exception("Event is out of block counter registers number limit, ", *p);
|
||||
}
|
||||
|
||||
vec.push_back({p->counter_id, reg_index, block_des, block_info});
|
||||
|
||||
++reg_index;
|
||||
}
|
||||
|
||||
if (pm4_factory->IsGFX10() && (vec.get_attr() & CounterBlockGRBMAttr) == 0 && !vec.empty()) {
|
||||
event_t grbm_event{
|
||||
.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, .block_index = 0, .counter_id = 0};
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(&grbm_event);
|
||||
if (block_info == nullptr) return vec;
|
||||
const block_des_t block_des = {block_info->id, 0};
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
uint32_t& reg_index = ret.first->second;
|
||||
vec.push_back({0, reg_index, block_des, block_info});
|
||||
reg_index++;
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
static inline bool IsEventMatch(const event_t& event1, const event_t& event2) {
|
||||
return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) &&
|
||||
(event1.counter_id == event2.counter_id);
|
||||
}
|
||||
|
||||
hsa_status_t DefaultPmcdataCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
if (IsEventMatch(info_data->pmc_data.event, passed_data->pmc_data.event)) {
|
||||
if (passed_data->sample_id == UINT32_MAX) {
|
||||
passed_data->pmc_data.result += info_data->pmc_data.result;
|
||||
} else if (passed_data->sample_id == info_data->sample_id) {
|
||||
passed_data->pmc_data.result = info_data->pmc_data.result;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t DefaultTracedataCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA) {
|
||||
if (info_data->sample_id == passed_data->sample_id) {
|
||||
passed_data->trace_data = info_data->trace_data;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Logger::mutex_t Logger::mutex_;
|
||||
Logger* Logger::instance_ = NULL;
|
||||
bool Pm4Factory::concurrent_create_mode_ = false;
|
||||
bool Pm4Factory::spm_kfd_mode_ = false;
|
||||
Pm4Factory::mutex_t Pm4Factory::mutex_;
|
||||
Pm4Factory::instances_t* Pm4Factory::instances_ = NULL;
|
||||
bool read_api_enabled = true;
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
const char* read_api_enabled_str = getenv("AQLPROFILE_READ_API");
|
||||
if (read_api_enabled_str != NULL) {
|
||||
if (atoi(read_api_enabled_str) == 0) read_api_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
Logger::Destroy();
|
||||
Pm4Factory::Destroy();
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Return library major/minor version
|
||||
PUBLIC_API uint32_t hsa_ven_amd_aqlprofile_version_major() { return HSA_AQLPROFILE_VERSION_MAJOR; }
|
||||
PUBLIC_API uint32_t hsa_ven_amd_aqlprofile_version_minor() { return HSA_AQLPROFILE_VERSION_MINOR; }
|
||||
|
||||
// Returns the last error message
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) {
|
||||
*str = aql_profile::Logger::LastMessage().c_str();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->GetBlockInfo(event) != NULL) *result = true;
|
||||
} catch (aql_profile::event_exception& e) {
|
||||
INFO_LOGGING << e.what();
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_start_packet) {
|
||||
try {
|
||||
pm4_builder::CmdBuffer commands;
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile->command_buffer.ptr, UINT_MAX);
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
const pm4_builder::counters_vector countersVec = CountersVec(profile, pm4_factory);
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
|
||||
// Generate read commands
|
||||
auto data_size = pmc_builder->Read(&commands, countersVec, profile->output_buffer.ptr);
|
||||
if (!aql_profile::read_api_enabled) commands.Clear();
|
||||
cmd_buffer_mgr.SetRdSize(commands.Size());
|
||||
|
||||
// Copy generated read commands
|
||||
if (profile->command_buffer.ptr != NULL) {
|
||||
const aql_profile::descriptor_t rd_descr = cmd_buffer_mgr.GetRdDescr();
|
||||
memcpy(rd_descr.ptr, commands.Data(), commands.Size());
|
||||
commands.Clear();
|
||||
}
|
||||
|
||||
// Generate start commands
|
||||
pmc_builder->Start(&commands, countersVec);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
|
||||
// Generate stop commands
|
||||
if (!aql_profile::read_api_enabled)
|
||||
pmc_builder->Read(&commands, countersVec, profile->output_buffer.ptr);
|
||||
pmc_builder->Stop(&commands, countersVec);
|
||||
|
||||
if (profile->output_buffer.size < data_size) {
|
||||
profile->output_buffer.size = data_size;
|
||||
if (profile->output_buffer.ptr != NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE) {
|
||||
pm4_builder::TraceConfig trace_config{};
|
||||
const uint64_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
|
||||
if (profile->parameters) {
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
switch (p->parameter_name) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK:
|
||||
trace_config.se_mask = p->value & ((1ull << se_number_total) - 1);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
if (p->value > 15)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
|
||||
trace_config.targetCu = p->value;
|
||||
ATT_TARGET_CU.store(p->value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
trace_config.vmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
if ((p->value & 0x50) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
|
||||
trace_config.deprecated_mask = p->value;
|
||||
trace_config.targetCu = p->value & 0xF;
|
||||
ATT_TARGET_CU.store(trace_config.targetCu);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
if ((p->value & 0xFF000000) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
|
||||
p->value);
|
||||
trace_config.deprecated_tokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
trace_config.deprecated_tokenMask2 = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
|
||||
trace_config.sampleRate = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
|
||||
trace_config.concurrent = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
|
||||
trace_config.simd_sel = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE:
|
||||
trace_config.occupancy_mode = p->value ? 1 : 0;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
|
||||
trace_config.perfMASK = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
|
||||
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
|
||||
if (trace_config.perfcounters.size() < 8)
|
||||
trace_config.perfcounters.push_back({p->value, 0xF});
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "Bad trace parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto control_size = sizeof(pm4_builder::TraceControl) * se_number_total;
|
||||
char* prefix_ptr = cmd_buffer_mgr.AddPrefix(control_size);
|
||||
auto* control_ptr = reinterpret_cast<pm4_builder::TraceControl*>(prefix_ptr);
|
||||
|
||||
trace_config.control_buffer_ptr = control_ptr;
|
||||
trace_config.control_buffer_size = control_size;
|
||||
trace_config.data_buffer_ptr = profile->output_buffer.ptr;
|
||||
trace_config.data_buffer_size = profile->output_buffer.size;
|
||||
|
||||
if (countersVec.size() == 0) {
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
// Generate start commands
|
||||
sqtt_builder->Begin(&commands, &trace_config);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
sqtt_builder->End(&commands, &trace_config);
|
||||
} else {
|
||||
const char* sz_sampling_rate = getenv("AQLPROFILE_SPM_SAMPLE_RATE");
|
||||
if (sz_sampling_rate != NULL) trace_config.sampleRate = atoi(sz_sampling_rate);
|
||||
|
||||
pm4_builder::SpmBuilder* spm_builder = pm4_factory->GetSpmBuilder();
|
||||
// Generate start commands
|
||||
spm_builder->Begin(&commands, &trace_config, countersVec);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
spm_builder->End(&commands, &trace_config);
|
||||
}
|
||||
aql_profile::configs[profile->command_buffer.ptr] = trace_config;
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
cmd_buffer_mgr.Finalize(commands.Size());
|
||||
const uint32_t cmd_size = (cmd_buffer_mgr.GetSize() + 0x1800) & ~0xFFF;
|
||||
if (profile->command_buffer.size < cmd_size) {
|
||||
profile->command_buffer.size = cmd_size;
|
||||
if (profile->command_buffer.ptr != NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (profile->command_buffer.ptr != NULL) {
|
||||
// Copy generated commands
|
||||
const aql_profile::descriptor_t pre_descr = cmd_buffer_mgr.GetPreDescr();
|
||||
const aql_profile::descriptor_t post_descr = cmd_buffer_mgr.GetPostDescr();
|
||||
memcpy(pre_descr.ptr, commands.Data(), pre_descr.size);
|
||||
memcpy(post_descr.ptr, reinterpret_cast<const char*>(commands.Data()) + pre_descr.size,
|
||||
post_descr.size);
|
||||
// Populate start aql packet
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::PopulateAql(pre_descr.ptr, pre_descr.size, cmd_writer, aql_start_packet);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_stop_packet) {
|
||||
try {
|
||||
// Populate stop aql packet
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
const aql_profile::descriptor_t post_descr = cmd_buffer_mgr.GetPostDescr();
|
||||
aql_profile::PopulateAql(post_descr.ptr, post_descr.size, cmd_writer, aql_stop_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling read commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_read(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_read_packet) {
|
||||
if (!aql_profile::read_api_enabled) return HSA_STATUS_ERROR;
|
||||
try {
|
||||
// Populate read aql packet
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
|
||||
const aql_profile::descriptor_t rd_descr =
|
||||
(is_concurrent == false) ? cmd_buffer_mgr.GetRdDescr() : cmd_buffer_mgr.FetchRdDescr();
|
||||
aql_profile::PopulateAql(rd_descr.ptr, rd_descr.size, cmd_writer, aql_read_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Legacy devices, converting of the profiling AQL packet to PM4 packet blob
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Method for getting the profile info
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
const uint32_t attr_op = (uint32_t)attribute;
|
||||
const uint32_t begin_op = (uint32_t)HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD;
|
||||
if (attr_op >= begin_op) attribute = (hsa_ven_amd_aqlprofile_info_type_t)begin_op;
|
||||
|
||||
if (profile == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'profile'";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
if (attribute != HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD) {
|
||||
if (value == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'value'";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
switch (attribute) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE:
|
||||
*(uint32_t*)value = 0x2000; // a current approximation as 4K is big enough
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE:
|
||||
*(uint32_t*)value = 0x1800; // a current approximation as 4K is big enough
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA:
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(value)->pmc_data.result = 0;
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::DefaultPmcdataCallback,
|
||||
value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA:
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::DefaultTracedataCallback,
|
||||
value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS:
|
||||
*reinterpret_cast<uint32_t*>(value) =
|
||||
pm4_factory->GetBlockInfo(&(profile->events[0]))->counter_count;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID: {
|
||||
hsa_ven_amd_aqlprofile_id_query_t* query =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_id_query_t*>(value);
|
||||
const uint32_t block = pm4_factory->FindBlock(query->name);
|
||||
const GpuBlockInfo* info = pm4_factory->GetBlockInfo(block);
|
||||
status = (info == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
query->id = block;
|
||||
query->instance_count = info->instance_count;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD: {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
const uint32_t op = attr_op - begin_op;
|
||||
switch (op) {
|
||||
case 0:
|
||||
pmc_builder->Enable(&commands);
|
||||
break;
|
||||
case 1:
|
||||
pmc_builder->Disable(&commands);
|
||||
break;
|
||||
case 2:
|
||||
pmc_builder->WaitIdle(&commands);
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "get_info, not supported op (" << op << ")";
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (profile->command_buffer.ptr == NULL) {
|
||||
const_cast<hsa_ven_amd_aqlprofile_profile_t*>(profile)->command_buffer.size =
|
||||
commands.Size();
|
||||
break;
|
||||
}
|
||||
|
||||
if (profile->command_buffer.size != commands.Size()) {
|
||||
ERR_LOGGING << "get_info, wrong profile cmd size";
|
||||
status = HSA_STATUS_ERROR;
|
||||
break;
|
||||
}
|
||||
if (value == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'value'";
|
||||
status = HSA_STATUS_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(profile->command_buffer.ptr, commands.Data(), profile->command_buffer.size);
|
||||
aql_profile::PopulateAql(profile->command_buffer.ptr, profile->command_buffer.size,
|
||||
cmd_writer, reinterpret_cast<aql_profile::packet_t*>(value));
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
ERR_LOGGING << "Invalid attribute (" << attribute << ")";
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t callback) {
|
||||
try {
|
||||
EventDimension::init();
|
||||
for (auto& [name, id] : EventDimension::dimension_table) callback(id, name.c_str());
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
hsa_agent_t agent, hsa_ven_amd_aqlprofile_event_t event, uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback, void* userdata) {
|
||||
try {
|
||||
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
|
||||
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
std::vector<uint8_t> coord;
|
||||
coord.resize(attrib.get_num());
|
||||
attrib.get_coordinates(coord.data(),
|
||||
sample_id * attrib.get_num_instances() + event.block_index);
|
||||
|
||||
for (size_t i = 0; i < attrib.get_num(); i++) {
|
||||
EventDimension dim = attrib.get_dim(i);
|
||||
callback(i, dim.id, dim.extent, coord[i], dim.name.data(), userdata);
|
||||
}
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
const uint32_t xcc_num = pm4_factory->GetXccNumber();
|
||||
const uint32_t se_number = pm4_factory->GetShaderEnginesNumber() / xcc_num;
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
uint64_t* samples = reinterpret_cast<uint64_t*>(profile->output_buffer.ptr);
|
||||
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
if ((char*)samples >= (char*)profile->output_buffer.ptr + profile->output_buffer.size)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
if (!(pm4_factory->GetBlockInfo(p)->attr & CounterBlockAidAttr)) continue;
|
||||
|
||||
// Process an MI300 UMC event for XCC 0 ONLY
|
||||
auto sample_id = p->block_index; // sample id is the event block_index or the UMCCH id
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = sample_id;
|
||||
sample_info.pmc_data.event = *p;
|
||||
sample_info.pmc_data.result = *samples;
|
||||
#if DEBUG_TRACE == 2
|
||||
printf(
|
||||
"DATA: sample index(%u) id(%u) bloc id(%u) index(%u) counter id(%u) "
|
||||
"res(%lu)\n",
|
||||
sample_index, sample_id, p->block_name, p->block_index, p->counter_id,
|
||||
samples[sample_index]);
|
||||
#endif
|
||||
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
samples++;
|
||||
}
|
||||
for (uint32_t xcc_index = 0; xcc_index < xcc_num; xcc_index++) {
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
// this check needs to be the first check as it takes care of a corner case
|
||||
// in which a UMC event is the last event in profile->events
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockAidAttr) continue;
|
||||
|
||||
if ((char*)samples > (char*)profile->output_buffer.ptr + profile->output_buffer.size)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// non-MI300A-AID counter event.
|
||||
uint32_t block_samples_count = 1;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSqAttr && pm4_factory->IsGFX11())
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
|
||||
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = blk;
|
||||
sample_info.pmc_data.event = *p;
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: xcc(%u) id(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n",
|
||||
xcc_index, blk, p->block_name, p->block_index, p->counter_id, *samples);
|
||||
#endif
|
||||
|
||||
sample_info.pmc_data.result = *samples;
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
samples++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE) {
|
||||
uint32_t mode = 2;
|
||||
switch (profile->event_count) {
|
||||
case 0:
|
||||
mode = 0;
|
||||
break;
|
||||
case UINT32_MAX:
|
||||
const_cast<hsa_ven_amd_aqlprofile_profile_t*>(profile)->event_count = 0;
|
||||
mode = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (mode != 2) { // SQTT trace data, or SQTT pc sampling
|
||||
auto& trace_config = aql_profile::configs.at(profile->command_buffer.ptr);
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
const uint64_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
// Control buffer was allocated as the CmdBuffer prefix partition
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
|
||||
auto* control_ptr =
|
||||
reinterpret_cast<pm4_builder::TraceControl*>(cmd_buffer_mgr.GetPrefix1());
|
||||
// Check if SQTT buffer was wrapped
|
||||
for (size_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
if (control_ptr[se_index].status & sqttbuilder->GetUTCErrorMask()) {
|
||||
ERR_LOGGING << "SQTT memory error received, SE(" << se_index << ")";
|
||||
status = HSA_STATUS_ERROR_EXCEPTION;
|
||||
} else if (control_ptr[se_index].status & sqttbuilder->GetBufferFullMask()) {
|
||||
ERR2_LOGGING << "SQTT data buffer full, SE(" << se_index << ")";
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (size_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
bool bMaskedIn = trace_config.GetTargetCU(se_index) >= 0;
|
||||
uint64_t sample_capacity = trace_config.GetCapacity(se_index);
|
||||
void* sample_ptr = reinterpret_cast<void*>(trace_config.GetSEBaseAddr(se_index));
|
||||
|
||||
// WPTR specifies the index in thread trace buffer where next token will be
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
size_t sample_size = (control_ptr[se_index].wptr & sqttbuilder->GetWritePtrMask()) *
|
||||
sqttbuilder->GetWritePtrBlk();
|
||||
|
||||
if (pm4_factory->GetGpuId() == aql_profile::GFX11_GPU_ID) {
|
||||
sample_size = sample_size - reinterpret_cast<uint64_t>(sample_ptr);
|
||||
sample_size &= (1ull << 29) - 1;
|
||||
}
|
||||
|
||||
if (sample_size >= sample_capacity) {
|
||||
ERR_LOGGING << "SQTT data out of bounds, sample_id(" << se_index << ") size("
|
||||
<< sample_size << "/" << sample_capacity << ")";
|
||||
sample_size = sample_capacity;
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
hsa_status_t call_status;
|
||||
if (mode == 0) { // SQTT trace
|
||||
if (bMaskedIn) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t info;
|
||||
info.sample_id = se_index;
|
||||
info.trace_data.ptr = sample_ptr;
|
||||
info.trace_data.size = sample_size;
|
||||
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &info, data);
|
||||
}
|
||||
} else { // PC sampling
|
||||
pcsmp_callback_data_t* pcsmp_data = reinterpret_cast<pcsmp_callback_data_t*>(data);
|
||||
pcsmp_data->id = se_index;
|
||||
pcsmp_data->cycle = 333;
|
||||
pcsmp_data->pc = 0x333;
|
||||
call_status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, NULL, data);
|
||||
}
|
||||
}
|
||||
} else { // SPM trace data
|
||||
if (pm4_factory->SpmKfdMode() == false) {
|
||||
const uint32_t tnumber = 1;
|
||||
void* sample_ptr = profile->output_buffer.ptr;
|
||||
const uint32_t sample_size = profile->output_buffer.size;
|
||||
const uint32_t sample_capacity = (profile->output_buffer.size / tnumber);
|
||||
|
||||
for (unsigned i = 0; i < tnumber; ++i) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.trace_data.ptr = sample_ptr;
|
||||
sample_info.trace_data.size = sample_size;
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
sample_ptr = reinterpret_cast<char*>(sample_ptr) + sample_capacity;
|
||||
}
|
||||
} else {
|
||||
status = spm_kfd_namespace::spm_iterate_data(profile, callback, data);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_marker_packet,
|
||||
uint32_t data, hsa_ven_amd_aqlprofile_att_marker_channel_t channel) {
|
||||
assert(profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE);
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
// Generate start commands
|
||||
auto status = sqtt_builder->InsertMarker(&commands, data, channel);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
aql_profile::descriptor_t& cmdbuffer = profile->command_buffer;
|
||||
|
||||
size_t cmd_size = cmdbuffer.size;
|
||||
cmdbuffer.size = commands.Size();
|
||||
|
||||
if (cmdbuffer.ptr == NULL) return HSA_STATUS_SUCCESS;
|
||||
if (cmd_size < commands.Size()) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Populate stop aql packet
|
||||
memcpy(cmdbuffer.ptr, commands.Data(), commands.Size());
|
||||
aql_profile::PopulateAql(cmdbuffer.ptr, commands.Size(), cmd_writer, aql_marker_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,67 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_AQL_PROFILE_H_
|
||||
#define SRC_CORE_AQL_PROFILE_H_
|
||||
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "include/aql_profile_v2.h"
|
||||
|
||||
#include "core/aql_profile_exception.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
|
||||
namespace pm4_builder {
|
||||
class CmdBuilder;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t;
|
||||
typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
|
||||
void PopulateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_builder::CmdBuilder* cmd_writer,
|
||||
packet_t* aql_packet);
|
||||
void* LegacyAqlAcquire(const packet_t* aql_packet, void* data);
|
||||
void* LegacyAqlRelease(const packet_t* aql_packet, void* data);
|
||||
void* LegacyPm4(const packet_t* aql_packet, void* data);
|
||||
|
||||
class event_exception : public aql_profile_exc_val<event_t> {
|
||||
public:
|
||||
event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {}
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const aql_profile::event_t& ev) {
|
||||
os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id
|
||||
<< "))";
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif // SRC_CORE_AQL_PROFILE_H_
|
||||
@@ -0,0 +1,57 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
#define SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class aql_profile_exc_msg : public std::exception {
|
||||
public:
|
||||
explicit aql_profile_exc_msg(const std::string& msg) : str_(msg) {}
|
||||
virtual const char* what() const throw() { return str_.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class aql_profile_exc_val : public std::exception {
|
||||
public:
|
||||
aql_profile_exc_val(const std::string& msg, const T& val) {
|
||||
std::ostringstream oss;
|
||||
oss << msg << "(" << val << ")";
|
||||
str_ = oss.str();
|
||||
}
|
||||
virtual const char* what() const throw() { return str_.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str_;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
@@ -0,0 +1,187 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
|
||||
namespace aql_profile {
|
||||
class CommandBufferMgr {
|
||||
public:
|
||||
struct info_t {
|
||||
uint32_t prefix_size;
|
||||
uint32_t rdcmds_size;
|
||||
uint32_t rd2cmds_size;
|
||||
uint32_t is_rd_fetch2;
|
||||
uint32_t precmds_size;
|
||||
uint32_t postcmds_size;
|
||||
};
|
||||
|
||||
CommandBufferMgr(void* ptr, const uint32_t& size) { Init(descriptor_t{ptr, size}, false); }
|
||||
explicit CommandBufferMgr(const profile_t* profile) { Init(profile->command_buffer, true); }
|
||||
|
||||
char* GetPrefix() { return reinterpret_cast<char*>(buffer_.ptr); }
|
||||
char* GetPrefix1() { return reinterpret_cast<char*>(buffer_.ptr) + sizeof(info_t); }
|
||||
char* AddPrefix(const uint32_t& delta) {
|
||||
const uint32_t size = Align(delta);
|
||||
char* ptr = (buffer_.ptr != NULL) ? GetPrefix() + info_.prefix_size : NULL;
|
||||
info_.prefix_size += delta;
|
||||
buffer_.size -= (size < buffer_.size) ? size : buffer_.size;
|
||||
if (buffer_.size == 0)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::AddPrefix(): buffer size set to zero");
|
||||
return (buffer_.size != 0) ? ptr : NULL;
|
||||
}
|
||||
|
||||
bool SetRdSize(const uint32_t& rd_data_size) {
|
||||
const uint32_t size = Align(rd_data_size);
|
||||
const bool suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.rdcmds_size = rd_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetRdSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool SetRd2Size(const uint32_t& rd_data_size) {
|
||||
const uint32_t size = Align(rd_data_size);
|
||||
const bool suc = SetRdSize(Align(size));
|
||||
if (suc) {
|
||||
info_.rd2cmds_size = rd_data_size;
|
||||
info_.rdcmds_size = 2 * size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetRd2Size(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool SetPreSize(const uint32_t& pre_data_size) {
|
||||
const uint32_t size = Align(pre_data_size);
|
||||
const bool suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.precmds_size = pre_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetPreSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool Finalize(const uint32_t& data_size) {
|
||||
bool suc = (data_size > info_.precmds_size);
|
||||
if (suc) {
|
||||
const uint32_t post_data_size = data_size - info_.precmds_size;
|
||||
const uint32_t size = Align(post_data_size);
|
||||
suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.postcmds_size = post_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::Finalize(): postcmd size is out of cmdbuffer");
|
||||
}
|
||||
if (!suc) throw aql_profile_exc_msg("CommandBufferMgr::Finalize(): postcmd size is zero");
|
||||
|
||||
if (info_slot_) *info_slot_ = info_;
|
||||
|
||||
return suc;
|
||||
}
|
||||
|
||||
uint32_t GetSize() const { return GetEndOffset(); }
|
||||
|
||||
descriptor_t GetRdDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset();
|
||||
descr.size = info_.rdcmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t FetchRdDescr() {
|
||||
descriptor_t descr;
|
||||
if (info_.is_rd_fetch2 == 0) {
|
||||
info_.is_rd_fetch2 = 1;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset();
|
||||
} else {
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset() + (info_.rdcmds_size / 2);
|
||||
}
|
||||
descr.size = info_.rd2cmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t GetPreDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetPreOffset();
|
||||
descr.size = info_.precmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t GetPostDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetPostOffset();
|
||||
descr.size = info_.postcmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
static uint32_t Align(const uint32_t& size) { return (size + align_mask_) & ~align_mask_; }
|
||||
|
||||
private:
|
||||
void Init(const descriptor_t& buffer, const bool& import) {
|
||||
buffer_ = buffer;
|
||||
info_ = {};
|
||||
info_slot_ = NULL;
|
||||
|
||||
uint32_t prefix_size = sizeof(info_t);
|
||||
if (buffer_.ptr != NULL) {
|
||||
info_slot_ = reinterpret_cast<info_t*>(GetPrefix());
|
||||
if (import) {
|
||||
prefix_size = info_slot_->prefix_size;
|
||||
info_ = *info_slot_;
|
||||
info_.prefix_size = 0;
|
||||
}
|
||||
} else {
|
||||
buffer_.size = UINT_MAX;
|
||||
}
|
||||
AddPrefix(prefix_size);
|
||||
}
|
||||
|
||||
uint32_t GetRdOffset() const { return Align(info_.prefix_size); }
|
||||
uint32_t GetPreOffset() const { return GetRdOffset() + Align(info_.rdcmds_size); }
|
||||
uint32_t GetPostOffset() const { return GetPreOffset() + Align(info_.precmds_size); }
|
||||
uint32_t GetEndOffset() const { return GetPostOffset() + Align(info_.postcmds_size); }
|
||||
|
||||
static const uint32_t align_size_ = 0x100;
|
||||
static const uint32_t align_mask_ = align_size_ - 1;
|
||||
|
||||
descriptor_t buffer_;
|
||||
info_t info_;
|
||||
info_t* info_slot_;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,204 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
#include "def/gpu_block_info.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <array>
|
||||
|
||||
struct EventDimension {
|
||||
EventDimension(const EventDimension& other) = default;
|
||||
EventDimension(std::string_view _name, size_t _extent)
|
||||
: id(dimension_table.at(std::string(_name))), name(_name), extent(_extent) {}
|
||||
|
||||
uint64_t id;
|
||||
uint64_t extent;
|
||||
std::string_view name;
|
||||
|
||||
static std::vector<std::string> dimension_list;
|
||||
static std::unordered_map<std::string, size_t> dimension_table;
|
||||
static void init() {
|
||||
if (dimension_list.size()) return;
|
||||
|
||||
dimension_list.push_back("XCD");
|
||||
dimension_list.push_back("AID");
|
||||
dimension_list.push_back("SE");
|
||||
dimension_list.push_back("SA");
|
||||
dimension_list.push_back("WGP");
|
||||
dimension_list.push_back("INSTANCE");
|
||||
|
||||
for (size_t i = 0; i < dimension_list.size(); i++) dimension_table[dimension_list[i]] = i;
|
||||
}
|
||||
};
|
||||
|
||||
class EventKey {
|
||||
public:
|
||||
uint64_t agent;
|
||||
uint64_t block;
|
||||
|
||||
bool operator==(const EventKey& other) const {
|
||||
return agent == other.agent && block == other.block;
|
||||
}
|
||||
bool operator!=(const EventKey& other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct std::hash<EventKey> {
|
||||
uint64_t operator()(const EventKey& ev) const {
|
||||
return ev.agent | (ev.block << 56) | (ev.block >> 8);
|
||||
}
|
||||
};
|
||||
|
||||
class EventAttribDimension {
|
||||
public:
|
||||
static constexpr size_t event_id_bit = 24;
|
||||
|
||||
template <typename AgentType>
|
||||
EventAttribDimension(AgentType agent, hsa_ven_amd_aqlprofile_block_name_t block_name)
|
||||
: key({agent.handle, (uint64_t)block_name}) {
|
||||
EventDimension::init();
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
this->block_info = pm4_factory->GetBlockInfo(block_name);
|
||||
|
||||
bIsGFX12 = pm4_factory->IsGFX12();
|
||||
bIsGFX11 = pm4_factory->IsGFX11();
|
||||
bIsGFX9 = pm4_factory->IsGFX9();
|
||||
|
||||
num_xccs = pm4_factory->GetXccNumber();
|
||||
if (num_xccs > 1 && HasAttr(CounterBlockUmcAttr)) { // For MI300 AID only
|
||||
num_xccs = 1;
|
||||
num_aid = 4;
|
||||
}
|
||||
shader_engine = HasAttr(CounterBlockSeAttr);
|
||||
shader_array = HasAttr(CounterBlockSaAttr);
|
||||
|
||||
if (bIsGFX9)
|
||||
compute_unit = HasAttr(CounterBlockTcAttr) && shader_engine;
|
||||
else if (bIsGFX11 || bIsGFX12)
|
||||
workgroup_processor = HasAttr(CounterBlockSqAttr);
|
||||
|
||||
se_num = pm4_factory->GetShaderEnginesNumber();
|
||||
sarrays = pm4_factory->GetShaderArraysNumber() * se_num;
|
||||
|
||||
cu_num = (pm4_factory->GetComputeUnitNumber() + sarrays - 1) / sarrays;
|
||||
wgp_num = (pm4_factory->GetComputeUnitNumber() / 2 + sarrays - 1) / sarrays;
|
||||
|
||||
if (HasAttr(CounterBlockUmcAttr))
|
||||
block_instance_count = block_info->instance_count / num_aid;
|
||||
else if (compute_unit)
|
||||
block_instance_count = std::min<size_t>(block_info->instance_count, cu_num + 1);
|
||||
else
|
||||
block_instance_count = block_info->instance_count;
|
||||
|
||||
if (num_xccs > 1) dimensions.push_back({"XCD", num_xccs});
|
||||
if (num_aid > 1) dimensions.push_back({"AID", num_aid});
|
||||
|
||||
if (workgroup_processor)
|
||||
dimensions.push_back({"WGP", wgp_num});
|
||||
else
|
||||
dimensions.push_back({"INSTANCE", block_instance_count});
|
||||
|
||||
if (shader_engine)
|
||||
dimensions.push_back(
|
||||
{"SE", pm4_factory->GetShaderEnginesNumber() / (num_xccs > 0 ? num_xccs : 1)});
|
||||
if (shader_array) dimensions.push_back({"SA", pm4_factory->GetShaderArraysNumber()});
|
||||
}
|
||||
|
||||
size_t get_num_xccs() const { return num_xccs; };
|
||||
size_t get_total_elements() const {
|
||||
size_t acc = 1;
|
||||
for (auto& d : dimensions) acc *= d.extent;
|
||||
return acc;
|
||||
}
|
||||
uint64_t get_num() const { return dimensions.size(); };
|
||||
EventDimension get_dim(uint64_t index) const { return dimensions.at(index); };
|
||||
|
||||
hsa_status_t get_coordinates(uint8_t* coordinates, int64_t cumulative_id) const {
|
||||
const int end = static_cast<int>(get_num()) - 1;
|
||||
for (int i = end; i >= 0; i--) {
|
||||
coordinates[i] = static_cast<uint8_t>(cumulative_id % dimensions.at(i).extent);
|
||||
cumulative_id /= dimensions.at(i).extent;
|
||||
}
|
||||
if (cumulative_id != 0) return HSA_STATUS_ERROR_INVALID_INDEX;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
size_t get_num_instances() const { return block_instance_count; }
|
||||
|
||||
private:
|
||||
bool HasAttr(CounterBlockAttr attr) const { return (block_info->attr & attr) != 0; }
|
||||
|
||||
EventKey key;
|
||||
const GpuBlockInfo* block_info = nullptr;
|
||||
hsa_ven_amd_aqlprofile_event_t event{};
|
||||
|
||||
bool bIsGFX12;
|
||||
bool bIsGFX11;
|
||||
bool bIsGFX9;
|
||||
|
||||
bool shader_engine = false;
|
||||
bool shader_array = false;
|
||||
bool compute_unit = false;
|
||||
bool workgroup_processor = false;
|
||||
|
||||
size_t num_xccs = 1;
|
||||
size_t num_aid = 1;
|
||||
size_t se_num = 1;
|
||||
size_t sarrays = 1;
|
||||
size_t cu_num = 1;
|
||||
size_t wgp_num = 1;
|
||||
size_t block_instance_count = 1;
|
||||
|
||||
std::vector<EventDimension> dimensions;
|
||||
|
||||
public:
|
||||
template <typename AgentType>
|
||||
static const EventAttribDimension& get(AgentType agent,
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name) {
|
||||
thread_local std::unordered_map<EventKey, std::shared_ptr<EventAttribDimension>> event_map{};
|
||||
thread_local std::shared_ptr<EventAttribDimension> event_cache{nullptr};
|
||||
|
||||
EventKey key{agent.handle, (uint64_t)block_name};
|
||||
|
||||
if (!event_cache || event_cache->key != key) {
|
||||
auto it = event_map.find(key);
|
||||
if (auto it = event_map.find(key); it != event_map.end())
|
||||
event_cache = it->second;
|
||||
else
|
||||
event_cache =
|
||||
event_map.emplace(key, std::make_shared<EventAttribDimension>(agent, block_name))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
return *event_cache;
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,447 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/counter_dimensions.hpp"
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
#include "memorymanager.hpp"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define HSA_TRY_WRAP try {
|
||||
#define HSA_CATCH_WRAP \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
return HSA_STATUS_ERROR; \
|
||||
}
|
||||
|
||||
std::vector<std::string> EventDimension::dimension_list;
|
||||
std::unordered_map<std::string, size_t> EventDimension::dimension_table;
|
||||
|
||||
namespace aql_profile_v2 {
|
||||
// Command buffer partitioning manager
|
||||
// Supports Pre/Post commands partitioning
|
||||
// and prefix control partition
|
||||
|
||||
using aql_profile::event_exception;
|
||||
using aql_profile::event_t;
|
||||
using ::aql_profile::Pm4Factory;
|
||||
|
||||
uint32_t HandleSQFlagsBlock(Pm4Factory* pm4_factory, const aqlprofile_pmc_event_t& event) {
|
||||
auto visible_id = event.event_id;
|
||||
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_LO_RES)
|
||||
visible_id = pm4_factory->GetAccumLowID();
|
||||
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_HI_RES)
|
||||
visible_id = pm4_factory->GetAccumHiID();
|
||||
return visible_id;
|
||||
}
|
||||
|
||||
counter_des_t GetCounter(Pm4Factory* pm4_factory, EventRequest& event,
|
||||
std::map<block_des_t, uint32_t, lt_block_des>& index_map) {
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(event.block_name);
|
||||
const block_des_t block_des = {block_info->id, event.block_index};
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
auto reg_index = ret.first->second;
|
||||
auto visible_id = event.event_id;
|
||||
|
||||
if (pm4_builder::SPISkip(block_info->attr, visible_id)) {
|
||||
event.bInternal = true;
|
||||
return {visible_id, reg_index, block_des, block_info};
|
||||
}
|
||||
|
||||
if (reg_index >= block_info->counter_count)
|
||||
throw std::string("Event is out of block counter registers number limit");
|
||||
|
||||
if (event.flags.raw) {
|
||||
if (event.block_name == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ) {
|
||||
visible_id = HandleSQFlagsBlock(pm4_factory, event);
|
||||
} else {
|
||||
throw HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
|
||||
ret.first->second++;
|
||||
return {visible_id, reg_index, block_des, block_info};
|
||||
}
|
||||
|
||||
pm4_builder::counters_vector CountersVec(std::vector<EventRequest>& events,
|
||||
Pm4Factory* pm4_factory) {
|
||||
pm4_builder::counters_vector vec;
|
||||
std::map<block_des_t, uint32_t, lt_block_des> index_map;
|
||||
|
||||
for (auto& event : events) vec.push_back(GetCounter(pm4_factory, event, index_map));
|
||||
|
||||
if (pm4_factory->IsGFX10() && (vec.get_attr() & CounterBlockGRBMAttr) == 0) {
|
||||
EventRequest grbm_event{0};
|
||||
grbm_event.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM;
|
||||
vec.push_back(GetCounter(pm4_factory, grbm_event, index_map));
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
hsa_status_t _internal_aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback,
|
||||
void* userdata) {
|
||||
auto counter_memorymgr = MemoryManager::GetManager(handle.handle);
|
||||
CounterMemoryManager* memorymgr = dynamic_cast<CounterMemoryManager*>(counter_memorymgr.get());
|
||||
if (!memorymgr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(memorymgr->AgentHandle());
|
||||
const uint32_t xcc_num = pm4_factory->GetXccNumber();
|
||||
|
||||
uint64_t* samples = reinterpret_cast<uint64_t*>(memorymgr->GetOutputBuf());
|
||||
uint64_t* buffer_end_location = samples + memorymgr->GetOutputBufSize() / sizeof(uint64_t);
|
||||
auto& events = memorymgr->GetEvents();
|
||||
|
||||
size_t umc_sample_id = 0;
|
||||
if (xcc_num > 1)
|
||||
for (auto& event : events) {
|
||||
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
|
||||
|
||||
if (!(pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr)) continue;
|
||||
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: sample index(%u) id(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n",
|
||||
sample_index, sample_id, p->block_name, p->block_index, p->counter_id, *samples);
|
||||
#endif
|
||||
|
||||
hsa_status_t status = callback(event, event.block_index, *samples, userdata);
|
||||
samples++;
|
||||
umc_sample_id++;
|
||||
|
||||
if (status == HSA_STATUS_INFO_BREAK) return HSA_STATUS_SUCCESS;
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
}
|
||||
|
||||
size_t xcc_sample_count = 0;
|
||||
for (uint32_t xcc_index = 0; xcc_index < xcc_num; xcc_index++)
|
||||
for (auto& event : events) {
|
||||
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
|
||||
|
||||
if (pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr) continue;
|
||||
|
||||
// non-MI300A-AID counter event.
|
||||
uint32_t block_samples_count = pm4_factory->GetNumEvents(event.block_name);
|
||||
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: xcc(%u) blk(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n", xcc_index,
|
||||
blk, event.block_name, event.block_index, event.event_id, *samples);
|
||||
#endif
|
||||
xcc_sample_count += xcc_index == 0;
|
||||
size_t xcc_sample_id = xcc_sample_count * xcc_index +
|
||||
static_cast<size_t>(event.block_index) * block_samples_count + blk;
|
||||
|
||||
if (!event.bInternal) {
|
||||
hsa_status_t status = callback(event, xcc_sample_id, *samples, userdata);
|
||||
if (status == HSA_STATUS_INFO_BREAK)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
else if (status != HSA_STATUS_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
samples++;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t _internal_aqlprofile_pmc_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
|
||||
void* userdata) {
|
||||
pm4_builder::CmdBuffer commands;
|
||||
auto memorymgr =
|
||||
std::make_shared<CounterMemoryManager>(profile.agent, alloc_cb, dealloc_cb, userdata);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
memorymgr->CopyEvents(profile.events, profile.event_count);
|
||||
|
||||
pm4_builder::CmdBuffer read_cmd;
|
||||
pm4_builder::CmdBuffer start_cmd;
|
||||
pm4_builder::CmdBuffer stop_cmd;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile.agent);
|
||||
const pm4_builder::counters_vector countersVec = CountersVec(memorymgr->GetEvents(), pm4_factory);
|
||||
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
|
||||
// Start outputbuf ptr
|
||||
size_t output_bytes = 8; // Extra space for GRBM block on gfx10
|
||||
for (auto& event : memorymgr->GetEvents())
|
||||
output_bytes += pm4_factory->GetBytesNeeded(event.block_name);
|
||||
memorymgr->CreateOutputBuf(output_bytes);
|
||||
// Generate read commands
|
||||
size_t data_size = pmc_builder->Read(&read_cmd, countersVec, memorymgr->GetOutputBuf());
|
||||
// Generate start commands
|
||||
pmc_builder->Start(&start_cmd, countersVec);
|
||||
// Generate stop commands
|
||||
pmc_builder->Stop(&stop_cmd, countersVec);
|
||||
|
||||
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC Builder Stop(): data size set to zero");
|
||||
if (memorymgr->GetOutputBufSize() < data_size) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Copy generated commands
|
||||
size_t start_size = aql_profile::CommandBufferMgr::Align(start_cmd.Size());
|
||||
size_t stop_size = aql_profile::CommandBufferMgr::Align(stop_cmd.Size());
|
||||
size_t read_size = aql_profile::CommandBufferMgr::Align(read_cmd.Size());
|
||||
memorymgr->CreateCmdBuf(start_size + stop_size + read_size);
|
||||
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
uint8_t* cmdbuf = reinterpret_cast<uint8_t*>(memorymgr->GetCmdBuf());
|
||||
|
||||
memcpy_cb(cmdbuf, read_cmd.Data(), read_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, read_cmd.Size(), cmd_writer, &packets->read_packet);
|
||||
cmdbuf += read_size;
|
||||
memcpy_cb(cmdbuf, start_cmd.Data(), start_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, start_cmd.Size(), cmd_writer, &packets->start_packet);
|
||||
cmdbuf += start_size;
|
||||
memcpy_cb(cmdbuf, stop_cmd.Data(), stop_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, stop_cmd.Size(), cmd_writer, &packets->stop_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace aql_profile_v2
|
||||
|
||||
extern "C" {
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_pmc_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_pmc_create_packets(
|
||||
handle, packets, profile, alloc_cb, dealloc_cb, memcpy_cb, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle) {
|
||||
try {
|
||||
MemoryManager::DeleteManager(handle.handle);
|
||||
} catch (std::exception& e) {
|
||||
return;
|
||||
} catch (...) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_pmc_iterate_data(handle, callback, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_iterate_event_ids(aqlprofile_eventname_callback_t callback,
|
||||
void* user_data) {
|
||||
try {
|
||||
EventDimension::init();
|
||||
for (auto& [name, id] : EventDimension::dimension_table) {
|
||||
if (auto ret = callback(id, name.c_str(), user_data); ret != HSA_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_iterate_event_coord(aqlprofile_agent_handle_t agent,
|
||||
aqlprofile_pmc_event_t event,
|
||||
uint64_t counter_id,
|
||||
aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
|
||||
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
std::array<uint8_t, 32> coord;
|
||||
assert(attrib.get_num() < coord.size());
|
||||
attrib.get_coordinates(coord.data(), counter_id);
|
||||
|
||||
for (size_t i = 0; i < attrib.get_num(); i++) {
|
||||
EventDimension dim = attrib.get_dim(i);
|
||||
callback(i, dim.id, dim.extent, coord.at(i), dim.name.data(), userdata);
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
|
||||
const aqlprofile_agent_info_t* agent_info) {
|
||||
return aqlprofile_register_agent_info(agent_id, agent_info, AQLPROFILE_AGENT_VERSION_V0);
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_register_agent_info(aqlprofile_agent_handle_t* agent_id,
|
||||
const void* agent_info,
|
||||
aqlprofile_agent_version_t version) {
|
||||
try {
|
||||
if (agent_info == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
switch (version) {
|
||||
case AQLPROFILE_AGENT_VERSION_V0: {
|
||||
const auto* info = static_cast<const aqlprofile_agent_info_t*>(agent_info);
|
||||
aqlprofile_agent_info_v1_t info_v1 = {
|
||||
.agent_gfxip = info->agent_gfxip,
|
||||
.xcc_num = info->xcc_num,
|
||||
.se_num = info->se_num,
|
||||
.cu_num = info->cu_num,
|
||||
.shader_arrays_per_se = info->shader_arrays_per_se,
|
||||
.domain = 0,
|
||||
.location_id = 0,
|
||||
};
|
||||
*agent_id = aql_profile::RegisterAgent(&info_v1);
|
||||
} break;
|
||||
case AQLPROFILE_AGENT_VERSION_V1: {
|
||||
*agent_id =
|
||||
aql_profile::RegisterAgent(static_cast<const aqlprofile_agent_info_v1_t*>(agent_info));
|
||||
} break;
|
||||
case AQLPROFILE_AGENT_VERSION_NONE:
|
||||
case AQLPROFILE_AGENT_VERSION_LAST:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
const aqlprofile_pmc_event_t* event,
|
||||
bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->GetBlockInfo(event) != NULL) *result = true;
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute, void* value) {
|
||||
if (!profile) return HSA_STATUS_ERROR;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile->agent);
|
||||
|
||||
switch (attribute) {
|
||||
case AQLPROFILE_INFO_BLOCK_ID: {
|
||||
hsa_ven_amd_aqlprofile_id_query_t* query =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_id_query_t*>(value);
|
||||
const uint32_t block = pm4_factory->FindBlock(query->name);
|
||||
const GpuBlockInfo* info = pm4_factory->GetBlockInfo(block);
|
||||
if (!info) return HSA_STATUS_ERROR;
|
||||
|
||||
const auto& attrib =
|
||||
EventAttribDimension::get(profile->agent, (hsa_ven_amd_aqlprofile_block_name_t)block);
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
query->id = block;
|
||||
query->instance_count = attrib.get_num_instances();
|
||||
} break;
|
||||
case AQLPROFILE_INFO_BLOCK_COUNTERS: {
|
||||
*reinterpret_cast<uint32_t*>(value) =
|
||||
pm4_factory->GetBlockInfo(&profile->events[0])->counter_count;
|
||||
} break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,107 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx10_def.h"
|
||||
#include "pm4/gfx10_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx10 factory class
|
||||
class Gfx10Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx10Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx10Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX10() const override { return true; }
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 1; };
|
||||
|
||||
protected:
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
// void Gfx10Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
void Gfx10Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx10CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
// GFX10 block table
|
||||
const GpuBlockInfo* Gfx10Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
NULL /*&TcaCounterBlockInfo*/, NULL /*&TccCounterBlockInfo*/, NULL /*&TcpCounterBlockInfo*/,
|
||||
NULL /*&TdCounterBlockInfo*/,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/,
|
||||
NULL /*&McVmL2CounterBlockInfo*/, NULL /*MC_XBAR*/, NULL /*&AtcCounterBlockInfo*/,
|
||||
NULL /*&AtcL2CounterBlockInfo*/, &GceaCounterBlockInfo, NULL /*&RpbCounterBlockInfo*/,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/,
|
||||
// new navi blocks
|
||||
&Gl1aCounterBlockInfo, &Gl1cCounterBlockInfo, &Gl2aCounterBlockInfo, &Gl2cCounterBlockInfo,
|
||||
&GcrCounterBlockInfo, &GusCounterBlockInfo};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx10Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx10Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx10Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,107 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx11_def.h"
|
||||
#include "pm4/gfx11_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx11 factory class
|
||||
class Gfx11Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx11Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx11Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX11() const override { return true; }
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 1; };
|
||||
|
||||
protected:
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
// void Gfx11Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
void Gfx11Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx11CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
// GFX11 block table
|
||||
const GpuBlockInfo* Gfx11Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
NULL /*&TcaCounterBlockInfo*/, NULL /*&TccCounterBlockInfo*/, &TcpCounterBlockInfo,
|
||||
NULL /*&TdCounterBlockInfo*/,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/,
|
||||
NULL /*&McVmL2CounterBlockInfo*/, NULL /*MC_XBAR*/, NULL /*&AtcCounterBlockInfo*/,
|
||||
NULL /*&AtcL2CounterBlockInfo*/, &GceaCounterBlockInfo, NULL /*&RpbCounterBlockInfo*/,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/,
|
||||
// new navi blocks
|
||||
&Gl1aCounterBlockInfo, &Gl1cCounterBlockInfo, &Gl2aCounterBlockInfo, &Gl2cCounterBlockInfo,
|
||||
&GcrCounterBlockInfo, &GusCounterBlockInfo};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx11Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx11Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx11Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,116 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx12_def.h"
|
||||
#include "pm4/gfx12_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx12 factory class
|
||||
class Gfx12Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx12Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx12Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX12() const override { return true; }
|
||||
|
||||
protected:
|
||||
void ConstructBuilders(const AgentInfo* agent_info);
|
||||
void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info) {
|
||||
agent_info_ = agent_info;
|
||||
ConstructBuilders(agent_info);
|
||||
ConstructTable(agent_info);
|
||||
}
|
||||
const GpuBlockInfo* block_table_[LastCounterBlockId + 1]{};
|
||||
};
|
||||
|
||||
void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx12CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
}
|
||||
|
||||
void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
|
||||
// Global blocks
|
||||
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPC)] = &CpcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPF)] = &CpfCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCEA)] = &GceaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCR)] = &GcrCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2A)] = &Gl2aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2C)] = &Gl2cCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBM)] = &GrbmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SDMA_PM)] = &SdmaPmCounterBlockInfo;
|
||||
// SE blocks
|
||||
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SPI)] = &SpiCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SQ)] = &SqcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
|
||||
// SA blocks
|
||||
block_table_[__BLOCK_ID(GL1A)] = &Gl1aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL1C)] = &Gl1cCounterBlockInfo;
|
||||
// WGP blocks
|
||||
block_table_[__BLOCK_ID(TA)] = &TaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TCP)] = &TcpCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TD)] = &TdCounterBlockInfo;
|
||||
}
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx12Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx12Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx12Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,81 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx908_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
const GpuBlockInfo* Mi100Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi100Factory::Mi100Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (i == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[i] = block_info;
|
||||
|
||||
// overwrite block info for any update from gfx9 to mi100
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 303;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 87;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 295;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 58;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 83;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = gfx9_cntx_prim::SDMA_COUNTER_BLOCK_NUM_INSTANCES;
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 6;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi100Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi100Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi100Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,93 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx90a_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Mi200 factory class
|
||||
class Mi200Factory : public Gfx9Factory {
|
||||
public:
|
||||
explicit Mi200Factory(const AgentInfo* agent_info);
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 185; };
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
const GpuBlockInfo* Mi200Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi200Factory::Mi200Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (i == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[i] = block_info;
|
||||
// overwrite block info for any update from gfx9 to mi100
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 303;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 87;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 295;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 58;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 83;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = 5;
|
||||
// Print(block_info);
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 9;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi200Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi200Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi200Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,108 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx940_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Mi300Factory : public Mi100Factory {
|
||||
public:
|
||||
explicit Mi300Factory(const AgentInfo* agent_info) : Mi100Factory(agent_info) {
|
||||
for (unsigned blockname_id = 0; blockname_id < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER;
|
||||
++blockname_id) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[blockname_id];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (blockname_id == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else if (blockname_id == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC)
|
||||
block_info = new GpuBlockInfo(AtcCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[blockname_id] = block_info;
|
||||
// overwrite block info for any update from gfx9 to mi300
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 373;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 84;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 16;
|
||||
block_info->event_id_max = 199;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 34;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 82;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = 4 * pm4_builder::MAX_AID;
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 11;
|
||||
block_info->instance_count = 32 * pm4_builder::MAX_AID;
|
||||
break;
|
||||
case RpbCounterBlockId:
|
||||
block_info->instance_count = 4;
|
||||
break;
|
||||
case AtcCounterBlockId:
|
||||
block_info->instance_count = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 184; };
|
||||
};
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi300Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi300Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi300Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
class Mi350Factory : public Mi300Factory {
|
||||
public:
|
||||
// MI350 is a copy of Mi300
|
||||
explicit Mi350Factory(const AgentInfo* agent_info) : Mi300Factory(agent_info) {}
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 200; };
|
||||
};
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi350Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi350Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi350Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,100 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx9_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx factory init
|
||||
void Gfx9Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx9CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
void Gfx9Factory::Print(const GpuBlockInfo* block_info) {
|
||||
std::cout << "Block name: " << block_info->name << std::endl;
|
||||
std::cout << "\tInstances: " << block_info->instance_count << std::endl;
|
||||
std::cout << "\tMax Events: " << block_info->event_id_max << std::endl;
|
||||
std::cout << "\tCounters: " << block_info->counter_count << std::endl;
|
||||
auto counters = block_info->instance_count * block_info->counter_count;
|
||||
for (int i = 0; i < counters; ++i) {
|
||||
auto reg_info = block_info->counter_reg_info[i];
|
||||
std::cout << "\t " << i << ": select_addr = 0x" << std::hex << reg_info.select_addr.offset
|
||||
<< "(" << reg_info.select_addr.offset * 4 << ")"
|
||||
<< ", control_addr = 0x" << reg_info.control_addr.offset << "("
|
||||
<< reg_info.control_addr.offset * 4 << ")"
|
||||
<< ", counter_addr_lo = 0x" << reg_info.register_addr_lo.offset << "("
|
||||
<< reg_info.register_addr_lo.offset * 4 << ")"
|
||||
<< ", counter_addr_hi = 0x" << reg_info.register_addr_hi.offset << "("
|
||||
<< reg_info.register_addr_hi.offset * 4 << ")" << std::dec << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// GFX9 block table
|
||||
const GpuBlockInfo* Gfx9Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
&GrbmSeCounterBlockInfo, &SpiCounterBlockInfo, &SqCounterBlockInfo, &SqCsCounterBlockInfo,
|
||||
NULL /*GFX? SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, &TcaCounterBlockInfo,
|
||||
&TccCounterBlockInfo, &TcpCounterBlockInfo, &TdCounterBlockInfo,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/, &McVmL2CounterBlockInfo,
|
||||
NULL /*MC_XBAR*/, &AtcCounterBlockInfo, &AtcL2CounterBlockInfo, &GceaCounterBlockInfo,
|
||||
&RpbCounterBlockInfo,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/, NULL /*GL1A*/, NULL /*GL1C*/, NULL /*GL2A*/, NULL /*GL2C*/,
|
||||
NULL /*GCR*/, NULL /*GUS*/, NULL /*&UmcCounterBlockInfo*/
|
||||
};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx9Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx9Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx9Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,61 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_FACTORY_H_
|
||||
#define _GFX9_FACTORY_H_
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx9 factory class
|
||||
class Gfx9Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx9Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx9Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
|
||||
bool IsGFX9() const override { return true; }
|
||||
|
||||
protected:
|
||||
void Init(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
|
||||
static void Print(const GpuBlockInfo* block_info);
|
||||
};
|
||||
|
||||
// Mi100 factory class
|
||||
class Mi100Factory : public Gfx9Factory {
|
||||
public:
|
||||
explicit Mi100Factory(const AgentInfo* agent_info);
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _GFX9_FACTORY_H_
|
||||
@@ -0,0 +1,7 @@
|
||||
set(AQLPROFILE_HEADER_FILES
|
||||
aql_profile_v2.h
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${AQLPROFILE_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aqlprofile-sdk)
|
||||
@@ -0,0 +1,434 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} aqlprofile_handle_t;
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_MEMORY_HINT_NONE = 0,
|
||||
AQLPROFILE_MEMORY_HINT_HOST = 1,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED = 2,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_COHERENT = 3,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT = 4,
|
||||
AQLPROFILE_MEMORY_HINT_LAST
|
||||
} aqlprofile_memory_hint_t;
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_AGENT_VERSION_NONE = 0,
|
||||
AQLPROFILE_AGENT_VERSION_V0 = 1,
|
||||
AQLPROFILE_AGENT_VERSION_V1 = 2,
|
||||
AQLPROFILE_AGENT_VERSION_LAST
|
||||
} aqlprofile_agent_version_t;
|
||||
|
||||
/**
|
||||
* @brief Flags to describe which agents can access given buffer.
|
||||
*/
|
||||
typedef union {
|
||||
uint32_t raw;
|
||||
struct {
|
||||
uint32_t device_access : 1;
|
||||
uint32_t host_access : 1;
|
||||
uint32_t memory_hint : 6; // One of aqlprofile_memory_hint_t
|
||||
uint32_t _reserved : 24;
|
||||
};
|
||||
} aqlprofile_buffer_desc_flags_t;
|
||||
|
||||
/**
|
||||
* @brief Callback to request a memory buffer, which will be tied to a profile.
|
||||
* The user is responsible for clearing up memory after the profile is no longer needed.
|
||||
* @param[out] ptr The pointer containing memory.
|
||||
* @param[in] size Minimum requested buffer size.
|
||||
* @param[in] flags Access flags, requesting which agents need to read/write to the buffer.
|
||||
* @param[in] userdata Data to be passed back to user.
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if memory could not be allocated
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_memory_alloc_callback_t)(void** ptr, uint64_t size,
|
||||
aqlprofile_buffer_desc_flags_t flags,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Callback to dealloc memory requested via aqlprofile_memory_alloc_callback_t
|
||||
* @param[in] ptr The pointer containing memory.
|
||||
* @param[in] userdata Data to be passed back to user.
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if memory could not be allocated
|
||||
*/
|
||||
typedef void (*aqlprofile_memory_dealloc_callback_t)(void* ptr, void* userdata);
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_ACCUMULATION_NONE = 0, /** Do not accumulate event */
|
||||
AQLPROFILE_ACCUMULATION_LO_RES, /**< The event should be integrated over quad-cycles */
|
||||
AQLPROFILE_ACCUMULATION_HI_RES, /**< The event should be integrated every cycle */
|
||||
AQLPROFILE_ACCUMULATION_LAST,
|
||||
} aqlprofile_accumulation_type_t;
|
||||
|
||||
/**
|
||||
* @brief Special flags indicating additional properties to a counter. E.g. Accumulation metrics
|
||||
*/
|
||||
typedef union {
|
||||
uint32_t raw;
|
||||
struct {
|
||||
uint32_t accum : 3; /**< One of aqlprofile_accumulation_type_t */
|
||||
uint32_t _reserved : 29;
|
||||
} sq_flags;
|
||||
} aqlprofile_pmc_event_flags_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing all necessary information of an event (counter).
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t block_index; /**< Block channel. */
|
||||
uint32_t event_id; /**< Event ID as fined by XML */
|
||||
aqlprofile_pmc_event_flags_t flags; /**< Special event flags e.g. accumulation */
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name; /**< Block name as defined by block indexes */
|
||||
} aqlprofile_pmc_event_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing information about the agent. User code sets these values
|
||||
* to the describe the agent to profile. Information can be obtained either from HSA
|
||||
* (if loaded) or the KFD topology.
|
||||
*/
|
||||
typedef struct {
|
||||
const char* agent_gfxip; /**< Agent GFXIP (HSA_AGENT_INFO_NAME or KFD.product_name) */
|
||||
uint32_t xcc_num; /**< XCC's on the agent (HSA_AMD_AGENT_INFO_NUM_XCC or KFD.num_xcc) */
|
||||
uint32_t se_num; /**< SE's on the agent (HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES or
|
||||
KFD.num_shader_banks) */
|
||||
uint32_t cu_num; /**< CU's on the agent (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT or KFD.cu_count) */
|
||||
uint32_t shader_arrays_per_se; /**< Shader arrays per SE of agent
|
||||
(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE or
|
||||
KFD.simd_arrays_per_engine)*/
|
||||
} aqlprofile_agent_info_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing information about the agent. User code sets these values
|
||||
* to the describe the agent to profile. Information can be obtained either from HSA
|
||||
* (if loaded) or the KFD topology.
|
||||
*/
|
||||
typedef struct {
|
||||
const char* agent_gfxip; /**< Agent GFXIP (HSA_AGENT_INFO_NAME or KFD.product_name) */
|
||||
uint32_t xcc_num; /**< XCC's on the agent (HSA_AMD_AGENT_INFO_NUM_XCC or KFD.num_xcc) */
|
||||
uint32_t se_num; /**< SE's on the agent (HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES or
|
||||
KFD.num_shader_banks) */
|
||||
uint32_t cu_num; /**< CU's on the agent (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT or KFD.cu_count) */
|
||||
uint32_t shader_arrays_per_se; /**< Shader arrays per SE of agent
|
||||
(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE or
|
||||
KFD.simd_arrays_per_engine)*/
|
||||
uint32_t domain; /**< PCI domain of the GPU agent (HSA_AMD_AGENT_INFO_DOMAIN or KFD.domain) */
|
||||
uint32_t location_id; /**< BDF (Bus/Device/function number) of the GPU agent
|
||||
(HSA_AMD_AGENT_INFO_BDFID or KFD.location_id)*/
|
||||
} aqlprofile_agent_info_v1_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing a handle to a registered agent
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} aqlprofile_agent_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Registers an agent to be used with AQL profile.
|
||||
* @param[out] agent_id Handle to newly registered agent
|
||||
* @param[in] agent_info Info to register a new agent with AQL Profiler
|
||||
* @retval HSA_STATUS_SUCCESS registration ok
|
||||
* @retval HSA_STATUS_ERROR registration failed
|
||||
*/
|
||||
hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
|
||||
const aqlprofile_agent_info_t* agent_info);
|
||||
|
||||
/**
|
||||
* @brief Registers an agent to be used with AQL profile.
|
||||
* @param[out] agent_id Handle to newly registered agent
|
||||
* @param[in] agent_info Info to register a new agent with AQL Profiler
|
||||
* @param[in] version Version of the agent info structure
|
||||
* @retval HSA_STATUS_SUCCESS registration ok
|
||||
* @retval HSA_STATUS_ERROR registration failed
|
||||
*/
|
||||
hsa_status_t aqlprofile_register_agent_info(aqlprofile_agent_handle_t* agent_id,
|
||||
const void* agent_info,
|
||||
aqlprofile_agent_version_t version);
|
||||
/**
|
||||
* @brief AQLprofile struct containing information for perfmon events
|
||||
*/
|
||||
typedef struct {
|
||||
aqlprofile_agent_handle_t agent;
|
||||
const aqlprofile_pmc_event_t* events;
|
||||
uint32_t event_count;
|
||||
} aqlprofile_pmc_profile_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
|
||||
// counters disable command buffer
|
||||
} aqlprofile_pmc_info_type_t;
|
||||
|
||||
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute, void* value);
|
||||
|
||||
// Profile parameter object
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t counter_id : 28;
|
||||
uint32_t simd_mask : 4;
|
||||
};
|
||||
};
|
||||
} aqlprofile_att_parameter_t;
|
||||
|
||||
/**
|
||||
* @brief AQLprofile struct containing information for Advanced Thread Trace
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_agent_t agent;
|
||||
const aqlprofile_att_parameter_t* parameters;
|
||||
uint32_t parameter_count;
|
||||
} aqlprofile_att_profile_t;
|
||||
|
||||
/**
|
||||
* @brief Data callback for perfmon events. Each event will call this once per coordinate
|
||||
* @param[in] event The event information passed in from aqlprofile_pmc_profile_t
|
||||
* @param[in] counter_id Internal ID of the counter
|
||||
* @param[in] counter_value The event value, as incremented from start() to stop()
|
||||
* @param[in] userdata Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS to continue iteration
|
||||
* @retval HSA_STATUS_ERROR to stop callback iteration
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_pmc_data_callback_t)(aqlprofile_pmc_event_t event,
|
||||
uint64_t counter_id, uint64_t counter_value,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Data callback for thread trace. This will be called at least once per shader engine
|
||||
* @param[in] shader Shader Engine ID
|
||||
* @param[in] buffer Pointer containing the data
|
||||
* @param[in] size Amount of bytes used by thread trace
|
||||
* @param[in] callback_data Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS to continue iteration
|
||||
* @retval HSA_STATUS_ERROR to stop callback iteration
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_att_data_callback_t)(uint32_t shader, void* buffer, uint64_t size,
|
||||
void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Memory copy fn for aqlprofile to copy data.
|
||||
* @param[in] dst Destination pointer to copy data to.
|
||||
* @param[in] src Source pointer where data is to be copied from.
|
||||
* @param[in] size Amount of bytes to be copied.
|
||||
* @param[in] userdata Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS on success
|
||||
* @retval HSA_STATUS_ERROR on failure
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_memory_copy_t)(void* dst, const void* src, size_t size,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Validates the event for the agent.
|
||||
* @param[in] agent The agent to validate the event for.
|
||||
* @param[in] event The event to validate.
|
||||
* @param[out] result True if the event is valid for the agent, false otherwise.
|
||||
* @retval HSA_STATUS_SUCCESS if the event was validated.
|
||||
* @retval HSA_STATUS_ERROR if the event was not validated.
|
||||
*/
|
||||
hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
const aqlprofile_pmc_event_t* event, bool* result);
|
||||
|
||||
/**
|
||||
* @brief Iterate_data() will parse the event data and call @callback with the resulting event data
|
||||
* @param[in] handle The handle returned from aqlprofile_pmc_create_packets()
|
||||
* @param[in] callback CB where the resulting event values are going to be returned
|
||||
* @param[in] userdata Data sent back to user
|
||||
* @retval HSA_STATUS_SUCCESS all operations exited succesfully
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Struct to be returned by aqlprofile_pmc_create_packets
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ext_amd_aql_pm4_packet_t start_packet; /**< Reset counters and start incrementing */
|
||||
hsa_ext_amd_aql_pm4_packet_t stop_packet; /**< Pause counters from incrementing */
|
||||
hsa_ext_amd_aql_pm4_packet_t read_packet; /**< Retrieve results from device */
|
||||
} aqlprofile_pmc_aql_packets_t;
|
||||
|
||||
/**
|
||||
* @brief Function to create AQL packets to be inserted into the queue.
|
||||
* @param[out] handle To be passed to iterate_data()
|
||||
* @param[out] packets Pointer to where the start, stop and read packets will be written to
|
||||
* @param[in] profile Agent and events information
|
||||
* @param[in] alloc_cb Memory allocation, which may request cpu or gpu memory for internal use
|
||||
* @param[in] dealloc_cb Function to free memory allocated by alloc_cb
|
||||
* @param[in] userdata Data passed back to user via memory alloc callback
|
||||
*/
|
||||
hsa_status_t aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
aqlprofile_memory_copy_t memcpy_cb, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Function to delete AQL packets after creation by aqlprofile_pmc_create_packets
|
||||
* @param[in] handle Returned by aqlprofile_pmc_create_packets()
|
||||
*/
|
||||
void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Iterates over thread trace data and the data to user
|
||||
* @param[in] handle The handle returned from aqlprofile_att_create_packets()
|
||||
* @param[in] callback CB where the resulting data is going to be returned
|
||||
* @param[in] userdata Data sent back to user
|
||||
* @retval HSA_STATUS_SUCCESS all operations exited succesfully
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Struct containing AQLpackets to start and stop thread trace
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ext_amd_aql_pm4_packet_t start_packet; /**< Packet to start thread trace */
|
||||
hsa_ext_amd_aql_pm4_packet_t stop_packet; /**< Packet to stop thread trace and flush data */
|
||||
} aqlprofile_att_control_aql_packets_t;
|
||||
|
||||
/**
|
||||
* @brief Fn to create start and stop thread trace packets
|
||||
* @param[out] handle To be passed to iterate_data()
|
||||
* @param[out] packets Packets returned by this function to start and stop thread trace
|
||||
* @param[in] profile Agent information and extra parameters for thread trace
|
||||
* @param[in] callback Memory allocation fn which may request cpu or gpu memory
|
||||
* @retval HSA_STATUS_SUCCESS if all packets created succesfully
|
||||
* @retval HSA_STATUS_ERROR otherwise
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
aqlprofile_memory_copy_t memcpy_cb, void* userdata);
|
||||
|
||||
void aqlprofile_att_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
||||
* @param [in] id Integer identifying the dimension.
|
||||
* @param [in] name Name of the dimension
|
||||
* @param [in] data User data supplied to @ref aqlprofile_iterate_event_ids
|
||||
* @retval HSA_STATUS_SUCCESS Continues iteration
|
||||
* @retval OTHERS Any other HSA return values stops iteration, passing back this value through
|
||||
* @ref aqlprofile_iterate_event_ids
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_eventname_callback_t)(int id, const char* name, void* data);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all possible event coordinate IDs and their names.
|
||||
* @param [in] callback Callback to use for iteration of dimensions
|
||||
* @param [in] user_data Data to supply to callback @ref aqlprofile_eventname_callback_t
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if error on interation
|
||||
* @retval OTHERS If @ref aqlprofile_eventname_callback_t returns non-HSA_STATUS_SUCCESS,
|
||||
* that value is returned.
|
||||
*/
|
||||
hsa_status_t aqlprofile_iterate_event_ids(aqlprofile_eventname_callback_t callback,
|
||||
void* user_data);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param position A counting sequence indicating callback number.
|
||||
* @param id Coordinate ID as in _iterate_event_ids.
|
||||
* @param extent Coordinate extent indicating maximum allowed instances.
|
||||
* @param coordinate The coordinate, in the range [0,extent-1].
|
||||
* @param name Coordinate name as in _iterate_event_ids.
|
||||
* @param userdata Userdata returned from _iterate_event_coord function.
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_coordinate_callback_t)(int position, int id, int extent,
|
||||
int coordinate, const char* name,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param[in] agent HSA agent.
|
||||
* @param[in] event The event ID and block ID to iterate for.
|
||||
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
|
||||
* @param[in] callback Callback function to return the coordinates.
|
||||
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
|
||||
*/
|
||||
hsa_status_t aqlprofile_iterate_event_coord(aqlprofile_agent_handle_t agent,
|
||||
aqlprofile_pmc_event_t event, uint64_t sample_id,
|
||||
aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata);
|
||||
|
||||
typedef struct {
|
||||
uint64_t id;
|
||||
uint64_t addr;
|
||||
uint64_t size;
|
||||
hsa_agent_t agent;
|
||||
uint32_t isUnload : 1;
|
||||
uint32_t fromStart : 1;
|
||||
} aqlprofile_att_codeobj_data_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an AQL packet for marking code objects
|
||||
* @param[out] packet Returned packet
|
||||
* @param[out] handle The handle created for these packets
|
||||
* @param[in] data Code object information
|
||||
* @param[in] alloc_cb Callback to return both CPU and GPU accessible memory on demand
|
||||
* @param[in] dealloc_cb Callback to free data allocated by alloc_cb()
|
||||
* @param[in] userdata Userdata to be passed back to memory callbacks
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_codeobj_marker(hsa_ext_amd_aql_pm4_packet_t* packet,
|
||||
aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
void* userdata);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,51 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_IP_DISCOVERY_H_
|
||||
#define SRC_CORE_IP_DISCOVERY_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
|
||||
#include "util/reg_offsets.h"
|
||||
|
||||
using base_addr_segments_t = std::array<uint32_t, HWIP_MAX_SEGMENT>;
|
||||
|
||||
// Represents a single entry in the discovery table, containing information about a specific IP
|
||||
// block.
|
||||
struct discovery_table_entry_t {
|
||||
int die{0}; // Die index
|
||||
base_addr_segments_t segments{}; // Base address segments
|
||||
int major{0}; // Major version of the IP
|
||||
int minor{0}; // Minor version of the IP
|
||||
int revision{0}; // Revision number of the IP
|
||||
int instance{0}; // Instance ID of the IP
|
||||
std::string ipname{}; // Name of the IP block
|
||||
};
|
||||
|
||||
using discovery_table_t = std::vector<discovery_table_entry_t>;
|
||||
discovery_table_t parse_ip_discovery(uint32_t domain, uint32_t bdf);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,98 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <shared_mutex>
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "ip_offset_table_init.h"
|
||||
|
||||
// Pair of pcie domain, bdf
|
||||
using domain_bdf_t = std::pair<uint32_t, uint32_t>;
|
||||
|
||||
// Hash function for domain_bdf_t
|
||||
template <>
|
||||
struct std::hash<domain_bdf_t> {
|
||||
std::size_t operator()(const domain_bdf_t& key) const {
|
||||
return std::hash<uint32_t>()(key.first) ^ (std::hash<uint32_t>()(key.second) << 1);
|
||||
}
|
||||
};
|
||||
|
||||
// Map from (Domain, BDF) to reg_base_offset_table*
|
||||
using reg_base_offset_table_cache = std::unordered_map<domain_bdf_t, const reg_base_offset_table*>;
|
||||
|
||||
class locked_ip_offset_table_cache {
|
||||
public:
|
||||
const reg_base_offset_table* get(const AgentInfo* agent_info) {
|
||||
{
|
||||
std::shared_lock lock{mutex};
|
||||
auto it = cache.find(std::make_pair(agent_info->domain, agent_info->bdf_id));
|
||||
if (it != cache.end()) return it->second;
|
||||
}
|
||||
{
|
||||
std::string_view gfxip(agent_info->gfxip);
|
||||
std::unique_lock lock{mutex};
|
||||
const reg_base_offset_table* table = nullptr;
|
||||
|
||||
if (auto gfxip_prefix = gfxip.substr(0, 4); gfxip_prefix == "gfx9")
|
||||
table = vega20_reg_base_init();
|
||||
else {
|
||||
if (auto gfxip_prefix = gfxip.substr(0, 5);
|
||||
gfxip_prefix == "gfx10" || gfxip_prefix == "gfx11" || gfxip_prefix == "gfx12") {
|
||||
table = navi_ip_offset_table_discovery_sysfs(agent_info->domain, agent_info->bdf_id);
|
||||
if (!table) table = sienna_cichlid_reg_base_init();
|
||||
}
|
||||
}
|
||||
|
||||
if (table) cache.emplace(std::make_pair(agent_info->domain, agent_info->bdf_id), table);
|
||||
return table;
|
||||
}
|
||||
}
|
||||
|
||||
static locked_ip_offset_table_cache& get_instance() {
|
||||
// Note: never cleanup, keep in memory to prevent issue with global destructor
|
||||
static auto* cache = new locked_ip_offset_table_cache{};
|
||||
return *cache;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_mutex mutex;
|
||||
reg_base_offset_table_cache cache;
|
||||
};
|
||||
|
||||
// acquire the IP offset table for the device using the domain and bdf_id
|
||||
const reg_base_offset_table* acquire_ip_offset_table(const AgentInfo* agent_info) {
|
||||
auto ip_offset_table = locked_ip_offset_table_cache::get_instance().get(agent_info);
|
||||
if (ip_offset_table == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"Failed to acquire the IP offset table for the device. Possible reasons include:\n"
|
||||
" 1. Incorrect or incomplete ROCm setup. Please verify your installation.\n"
|
||||
" 2. The device is not supported.\n"
|
||||
" 3. An internal error or bug.\n");
|
||||
}
|
||||
return ip_offset_table;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
#define SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
|
||||
// static IP offset table init functions
|
||||
const reg_base_offset_table* vega20_reg_base_init();
|
||||
const reg_base_offset_table* sienna_cichlid_reg_base_init();
|
||||
|
||||
// dynamic IP offset table functions
|
||||
const reg_base_offset_table* navi_ip_offset_table_discovery_sysfs(uint32_t domain, uint32_t bdf);
|
||||
|
||||
#endif // SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
@@ -0,0 +1,177 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_LOGGER_H_
|
||||
#define SRC_CORE_LOGGER_H_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
template <typename T>
|
||||
Logger& operator<<(const T& m) {
|
||||
std::ostringstream oss;
|
||||
oss << m;
|
||||
if (!streaming_)
|
||||
Log(oss.str());
|
||||
else
|
||||
Put(oss.str());
|
||||
streaming_ = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef void (*manip_t)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
f();
|
||||
return *this;
|
||||
}
|
||||
|
||||
static void begm() { Instance().messaging_ = true; }
|
||||
static void endl() { Instance().ResetStreaming(); }
|
||||
|
||||
static const std::string& LastMessage() {
|
||||
Logger& logger = Instance();
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
return logger.message_[GetTid()];
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) instance_ = new Logger();
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ != NULL) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
Logger() : file_(NULL), dirty_(false), streaming_(false), messaging_(false) {
|
||||
const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG");
|
||||
if (path != NULL) {
|
||||
file_ = fopen("/tmp/aql_profile_log.txt", "a");
|
||||
}
|
||||
ResetStreaming();
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file_ != NULL) {
|
||||
if (dirty_) Put("\n");
|
||||
fclose(file_);
|
||||
}
|
||||
}
|
||||
|
||||
void ResetStreaming() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] = "";
|
||||
}
|
||||
messaging_ = false;
|
||||
streaming_ = false;
|
||||
}
|
||||
|
||||
void Put(const std::string& m) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] += m;
|
||||
}
|
||||
if (file_ != NULL) {
|
||||
dirty_ = true;
|
||||
flock(fileno(file_), LOCK_EX);
|
||||
fprintf(file_, "%s", m.c_str());
|
||||
fflush(file_);
|
||||
flock(fileno(file_), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void Log(const std::string& m) {
|
||||
const time_t rawtime = time(NULL);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
Put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file_;
|
||||
bool dirty_;
|
||||
bool streaming_;
|
||||
bool messaging_;
|
||||
|
||||
static mutex_t mutex_;
|
||||
static Logger* instance_;
|
||||
std::map<uint32_t, std::string> message_;
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#define ERR_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
#define ERR2_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ << "(): ")
|
||||
#define INFO_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Info: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#define WARN_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Warning: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Debug: in " << __FUNCTION__ << " at " << __FILE__ \
|
||||
<< " line " << __LINE__ << aql_profile::Logger::begm)
|
||||
#endif
|
||||
|
||||
#endif // SRC_CORE_LOGGER_H_
|
||||
@@ -0,0 +1,61 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "memorymanager.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
std::atomic<size_t> MemoryManager::HANDLE_COUNTER{1};
|
||||
std::unordered_map<size_t, std::shared_ptr<MemoryManager>> MemoryManager::managers;
|
||||
std::mutex MemoryManager::managers_map_mutex;
|
||||
|
||||
void CounterMemoryManager::CopyEvents(const aqlprofile_pmc_event_t* _events, size_t count) {
|
||||
events.reserve(count + 4);
|
||||
int num_flag_metrics = 0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
events.push_back(EventRequest{_events[i], false});
|
||||
num_flag_metrics += _events[i].flags.raw != 0;
|
||||
}
|
||||
|
||||
if (!num_flag_metrics) return;
|
||||
|
||||
std::sort(events.begin(), events.end());
|
||||
|
||||
std::vector<EventRequest> acc_requests;
|
||||
for (auto it = events.begin(); it != events.end(); it++) {
|
||||
if (!it->flags.raw) continue;
|
||||
|
||||
if (it != events.begin()) {
|
||||
auto prev = std::prev(it);
|
||||
if (it->IsSameNoFlags(*prev) && (!prev->flags.raw || prev->bInternal)) continue;
|
||||
}
|
||||
|
||||
EventRequest req = *it;
|
||||
req.bInternal = true;
|
||||
req.flags.raw = 0;
|
||||
acc_requests.push_back(req);
|
||||
}
|
||||
|
||||
if (!acc_requests.size()) return;
|
||||
|
||||
events.insert(events.end(), acc_requests.begin(), acc_requests.end());
|
||||
std::sort(events.begin(), events.end());
|
||||
}
|
||||
@@ -0,0 +1,258 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include "include/aql_profile_v2.h"
|
||||
#include <stdexcept>
|
||||
#include "pm4/trace_config.h"
|
||||
|
||||
struct EventRequest : public aqlprofile_pmc_event_t {
|
||||
bool bInternal;
|
||||
|
||||
auto GetOrder() const -> auto{
|
||||
uint64_t idx = bInternal ? 0 : 1;
|
||||
idx |= uint64_t(flags.raw) << 1;
|
||||
idx |= uint64_t(event_id) << 33;
|
||||
|
||||
uint64_t blk = block_index;
|
||||
blk |= uint64_t(block_name) << 32;
|
||||
|
||||
return std::pair<uint64_t, uint64_t>{blk, idx};
|
||||
}
|
||||
|
||||
bool operator<(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
if (idx1.first == idx2.first)
|
||||
return idx1.second < idx2.second;
|
||||
else
|
||||
return idx1.first < idx2.first;
|
||||
}
|
||||
|
||||
bool operator==(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
return idx1.second == idx2.second && idx1.first == idx2.first;
|
||||
}
|
||||
|
||||
bool IsSameNoFlags(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
return idx1.first == idx2.first && event_id == other.event_id;
|
||||
}
|
||||
};
|
||||
|
||||
class MemoryManager {
|
||||
public:
|
||||
MemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: agent(agent),
|
||||
alloc_cb(alloc),
|
||||
dealloc_cb(dealloc),
|
||||
userdata(data),
|
||||
handle(HANDLE_COUNTER.fetch_add(1)) {}
|
||||
|
||||
MemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: agent_handle(agent),
|
||||
alloc_cb(alloc),
|
||||
dealloc_cb(dealloc),
|
||||
userdata(data),
|
||||
handle(HANDLE_COUNTER.fetch_add(1)) {}
|
||||
|
||||
virtual ~MemoryManager() {}
|
||||
|
||||
void CheckStatus(hsa_status_t status) const {
|
||||
if (status != HSA_STATUS_SUCCESS) throw status;
|
||||
}
|
||||
|
||||
void* GetCmdBuf() const { return cmdbuf.get(); }
|
||||
void* GetOutputBuf() const { return outputbuf.get(); }
|
||||
|
||||
size_t GetOutputBufSize() const { return outputbuf_size; }
|
||||
|
||||
size_t GetHandler() const { return handle; }
|
||||
hsa_agent_t GetAgent() const { return agent; }
|
||||
aqlprofile_agent_handle_t AgentHandle() const { return agent_handle; }
|
||||
|
||||
void CreateCmdBuf(size_t size) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = true;
|
||||
flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT;
|
||||
cmdbuf = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
virtual void CreateOutputBuf(size_t size) = 0;
|
||||
|
||||
static void RegisterManager(const std::shared_ptr<MemoryManager>& shared) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
managers[shared->handle] = shared;
|
||||
}
|
||||
|
||||
static void DeleteManager(size_t handle) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
managers.erase(handle);
|
||||
}
|
||||
|
||||
static std::shared_ptr<MemoryManager> GetManager(size_t handle) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
try {
|
||||
return managers.at(handle);
|
||||
} catch (std::exception& e) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
struct MemoryDeleter {
|
||||
aqlprofile_memory_dealloc_callback_t free_fn;
|
||||
void* userdata;
|
||||
void operator()(void* ptr) const {
|
||||
if (ptr && free_fn) free_fn(ptr, userdata);
|
||||
};
|
||||
};
|
||||
|
||||
std::unique_ptr<void, MemoryDeleter> AllocMemory(size_t size,
|
||||
aqlprofile_buffer_desc_flags_t flags) const {
|
||||
void* ptr;
|
||||
CheckStatus(alloc_cb(&ptr, size, flags, userdata));
|
||||
return std::unique_ptr<void, MemoryDeleter>{ptr, MemoryDeleter{dealloc_cb, userdata}};
|
||||
}
|
||||
|
||||
aqlprofile_agent_handle_t agent_handle = {.handle = 0};
|
||||
hsa_agent_t agent = {.handle = 0};
|
||||
std::unique_ptr<void, MemoryDeleter> cmdbuf = nullptr;
|
||||
std::unique_ptr<void, MemoryDeleter> outputbuf = nullptr;
|
||||
size_t outputbuf_size = 0;
|
||||
|
||||
void* const userdata;
|
||||
aqlprofile_memory_alloc_callback_t const alloc_cb;
|
||||
aqlprofile_memory_dealloc_callback_t const dealloc_cb;
|
||||
size_t handle;
|
||||
|
||||
static std::atomic<size_t> HANDLE_COUNTER;
|
||||
static std::unordered_map<size_t, std::shared_ptr<MemoryManager>> managers;
|
||||
static std::mutex managers_map_mutex;
|
||||
};
|
||||
|
||||
class CounterMemoryManager : public MemoryManager {
|
||||
public:
|
||||
CounterMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
CounterMemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
void CreateOutputBuf(size_t size) override {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED;
|
||||
outputbuf = AllocMemory(size, flags);
|
||||
outputbuf_size = size;
|
||||
}
|
||||
|
||||
std::vector<EventRequest>& GetEvents() { return events; }
|
||||
void CopyEvents(const aqlprofile_pmc_event_t* events, size_t count);
|
||||
|
||||
protected:
|
||||
std::vector<EventRequest> events;
|
||||
};
|
||||
|
||||
class TraceMemoryManager : public MemoryManager {
|
||||
public:
|
||||
TraceMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc,
|
||||
aqlprofile_memory_copy_t _copy_fn, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data), copy_fn(_copy_fn) {}
|
||||
|
||||
TraceMemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
void CreateOutputBuf(size_t size) override {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT;
|
||||
outputbuf = AllocMemory(size, flags);
|
||||
outputbuf_size = size;
|
||||
}
|
||||
|
||||
void CreateTraceControlBuf(size_t size) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_HOST;
|
||||
trace_control_buf = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
const std::vector<hsa_ven_amd_aqlprofile_parameter_t>& GetATTParams() const { return att_params; }
|
||||
void CopyATTParams(hsa_ven_amd_aqlprofile_parameter_t* params, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) this->att_params.push_back(params[i]);
|
||||
for (auto& param : att_params) {
|
||||
if (param.parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET)
|
||||
target_cu = param.value;
|
||||
else if (param.parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION)
|
||||
simd_mask = param.value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
Type* GetTraceControlBuf() const {
|
||||
return reinterpret_cast<Type*>(trace_control_buf.get());
|
||||
}
|
||||
|
||||
void CopyMemory(void* dst, const void* src, size_t size) {
|
||||
this->copy_fn(dst, src, size, this->userdata);
|
||||
}
|
||||
|
||||
int GetSimdMask() const { return simd_mask; }
|
||||
|
||||
pm4_builder::TraceConfig config{};
|
||||
|
||||
protected:
|
||||
int target_cu = -1;
|
||||
int simd_mask = 0xF;
|
||||
aqlprofile_memory_copy_t copy_fn;
|
||||
std::vector<hsa_ven_amd_aqlprofile_parameter_t> att_params;
|
||||
std::unique_ptr<void, MemoryDeleter> trace_control_buf = nullptr;
|
||||
};
|
||||
|
||||
class CodeobjMemoryManager : public MemoryManager {
|
||||
public:
|
||||
CodeobjMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, size_t size, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
this->cmd_buffer = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
void CreateOutputBuf(size_t size) override{};
|
||||
std::unique_ptr<void, MemoryDeleter> cmd_buffer;
|
||||
};
|
||||
@@ -0,0 +1,103 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <shared_mutex>
|
||||
#include "ip_discovery.h"
|
||||
|
||||
#define __maybe_unused __attribute__((__unused__))
|
||||
|
||||
#include "linux/registers/sienna_cichlid_ip_offset.h"
|
||||
#include "util/reg_offsets.h"
|
||||
|
||||
#define LOG_VERBOSE 0
|
||||
|
||||
namespace {
|
||||
void LogErrors(std::string msg) {
|
||||
#if LOG_VERBOSE
|
||||
std::cerr << msg << std::endl;
|
||||
#endif /* LOG_VERBOSE */
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const reg_base_offset_table* sienna_cichlid_reg_base_init() {
|
||||
static_assert(HWIP_MAX_INSTANCE >= MAX_INSTANCE,
|
||||
"HWIP_MAX_INSTANCE must be greater than MAX_INSTANCE");
|
||||
static_assert(HWIP_MAX_SEGMENT >= MAX_SEGMENT,
|
||||
"HWIP_MAX_SEGMENT must be greater than MAX_SEGMENT");
|
||||
|
||||
static const auto* sienna_cichlid_reg_table = []() {
|
||||
auto* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& base) {
|
||||
for (uint32_t i = 0; i < MAX_INSTANCE; ++i) {
|
||||
std::copy(std::begin(base.instance[i].segment), std::end(base.instance[i].segment),
|
||||
std::begin(reg_table->reg_offset[hwip][i]));
|
||||
}
|
||||
};
|
||||
|
||||
// HW has more IP blocks, only initialize the blocks needed
|
||||
init_hwip(GC_HWIP, GC_BASE);
|
||||
init_hwip(ATHUB_HWIP, ATHUB_BASE);
|
||||
return reg_table;
|
||||
}();
|
||||
|
||||
return sienna_cichlid_reg_table;
|
||||
}
|
||||
|
||||
const reg_base_offset_table* navi_ip_offset_table_discovery_sysfs(uint32_t domain, uint32_t bdf) {
|
||||
// Read the drm device properties, which includes all the IP base offsets for a GPU card on the
|
||||
// system.
|
||||
discovery_table_t table;
|
||||
try {
|
||||
table = parse_ip_discovery(domain, bdf);
|
||||
} catch (const std::exception& e) {
|
||||
LogErrors("Error in IP discovery for domain=" + std::to_string(domain) +
|
||||
" bdf=" + std::to_string(bdf) + ": \n" + e.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Note: never cleanup, keep in memory to prevent issue with global destructor
|
||||
struct reg_base_offset_table* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& entry) {
|
||||
std::copy(std::begin(entry.segments), std::end(entry.segments),
|
||||
std::begin(reg_table->reg_offset[hwip][entry.instance]));
|
||||
};
|
||||
|
||||
for (auto& entry : table) {
|
||||
if (entry.ipname == "gc") {
|
||||
init_hwip(GC_HWIP, entry);
|
||||
} else if (entry.ipname == "athub") {
|
||||
init_hwip(ATHUB_HWIP, entry);
|
||||
}
|
||||
}
|
||||
|
||||
return reg_table;
|
||||
}
|
||||
@@ -0,0 +1,269 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <unordered_map>
|
||||
#include <regex>
|
||||
#include <iomanip>
|
||||
#include <cassert>
|
||||
|
||||
#include "ip_discovery.h"
|
||||
|
||||
#define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
|
||||
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
|
||||
#define PCI_FUNC(devfn) ((devfn)&0x07)
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* @brief Reads a single integer (decimal or hexadecimal) from a sysfs file.
|
||||
*
|
||||
* This helper function reads a file containing a single numeric value and parses it
|
||||
* as either a decimal or hexadecimal integer, based on the provided flag.
|
||||
*
|
||||
* @param fname The path to the sysfs file containing the numeric value.
|
||||
*
|
||||
* @return An `std::optional<int>` containing the parsed integer if successful, or `std::nullopt`
|
||||
* if the file does not exist, cannot be opened, or contains invalid data.
|
||||
*/
|
||||
std::optional<int> read_sysfs_single_int(const fs::path& path) {
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) return std::nullopt; // Failed to open file
|
||||
|
||||
int value;
|
||||
file >> value;
|
||||
if (file.fail()) return std::nullopt; // Failed to parse data
|
||||
|
||||
file.close();
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads base address segments from a sysfs file.
|
||||
*
|
||||
* This helper function reads a file containing hexadecimal values representing
|
||||
* base address segments and parses them into a `base_addr_segments_t` structure.
|
||||
*
|
||||
* @param fname The path to the sysfs file containing base address segments.
|
||||
*
|
||||
* @return An `std::optional<base_addr_segments_t>` containing the parsed base address segments
|
||||
* if successful, or `std::nullopt` if the file does not exist, cannot be opened,
|
||||
* or contains invalid data.
|
||||
*/
|
||||
std::optional<base_addr_segments_t> read_sysfs_base_addr_segments(const fs::path& path) {
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) return std::nullopt; // Failed to open file
|
||||
|
||||
base_addr_segments_t segments{0};
|
||||
std::string databuf;
|
||||
size_t x = 0;
|
||||
while (std::getline(file, databuf) && x < segments.size()) {
|
||||
std::stringstream ss(databuf);
|
||||
ss >> std::hex >> segments[x++];
|
||||
if (ss.fail()) return std::nullopt; // Failed to parse data
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parses IP instances for a given die and IP name from the sysfs directory structure.
|
||||
*
|
||||
* This function reads attributes such as base address segments, version information,
|
||||
* and instance number for each IP instance and stores them in the discovery table.
|
||||
*
|
||||
* @param die_num The die number associated with the IP instances.
|
||||
* @param diepath The sysfs path to the die directory.
|
||||
* @param ipname The name of the IP to be parsed.
|
||||
*
|
||||
* @return The discovery table where parsed IP instance data will be stored.
|
||||
*/
|
||||
discovery_table_t parse_ip_instances(int die_num, const fs::path& diepath,
|
||||
const std::string& ipname) {
|
||||
// /sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}
|
||||
const fs::path dir_path = fs::path(diepath) / ipname;
|
||||
if (!fs::exists(dir_path) || !fs::is_directory(dir_path)) {
|
||||
throw std::runtime_error("sysfs path does not exist or is not a directory: " +
|
||||
dir_path.string());
|
||||
}
|
||||
|
||||
discovery_table_t instances{};
|
||||
|
||||
// sub-folders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}"
|
||||
for (const auto& dir_entry : fs::directory_iterator(dir_path)) {
|
||||
if (!std::isdigit(dir_entry.path().filename().string()[0])) continue;
|
||||
|
||||
discovery_table_entry_t table_entry{};
|
||||
table_entry.die = die_num;
|
||||
|
||||
// "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}/{instance_num}"
|
||||
fs::path instance_path = dir_path / dir_entry.path().filename();
|
||||
|
||||
// base_addr list
|
||||
if (auto segments = read_sysfs_base_addr_segments(instance_path / "base_addr"))
|
||||
table_entry.segments = *segments;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP base_addr segments for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// major
|
||||
if (auto major = read_sysfs_single_int(instance_path / "major"))
|
||||
table_entry.major = *major;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP major version for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// minor
|
||||
if (auto minor = read_sysfs_single_int(instance_path / "minor"))
|
||||
table_entry.minor = *minor;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP minor version for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// revision
|
||||
if (auto revision = read_sysfs_single_int(instance_path / "revision"))
|
||||
table_entry.revision = *revision;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP revision for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// instance
|
||||
if (auto instance = read_sysfs_single_int(instance_path / "num_instance"))
|
||||
table_entry.instance = *instance;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP instance for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// convert name to lowercase
|
||||
table_entry.ipname = ipname;
|
||||
std::transform(table_entry.ipname.begin(), table_entry.ipname.end(), table_entry.ipname.begin(),
|
||||
[](unsigned char c) { return std::tolower(c); });
|
||||
|
||||
instances.emplace_back(table_entry);
|
||||
}
|
||||
|
||||
return instances;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Generates a PCI domain BDF (Bus:Device.Function) string.
|
||||
*
|
||||
* This function converts the given PCI domain and BDF (Bus:Device.Function) values
|
||||
* into a standardized string format: "Domain:Bus:Device.Function".
|
||||
*
|
||||
* @param domain The PCI domain number (32-bit unsigned integer).
|
||||
* @param bdf The PCI Bus/Device/Function (BDF) value (32-bit unsigned integer).
|
||||
*
|
||||
* @return A string representing the PCI domain and BDF in the format "Domain:Bus:Device.Function".
|
||||
* Example: "0000:47:00.0".
|
||||
*
|
||||
* @details
|
||||
* - The domain is represented as a 4-digit hexadecimal value.
|
||||
* - The bus is represented as a 2-digit hexadecimal value.
|
||||
* - The device is represented as a 2-digit hexadecimal value.
|
||||
* - The function is represented as a single decimal digit.
|
||||
*/
|
||||
std::string get_domain_bdf_str(uint32_t domain, uint32_t bdf) {
|
||||
uint8_t pci_bus = PCI_BUS_NUM(bdf);
|
||||
uint8_t pci_devfn = bdf & 0xFF;
|
||||
uint8_t pci_dev = PCI_SLOT(pci_devfn);
|
||||
uint8_t pci_func = 0; // PCI_FUNC(pci_devfn); // Future ToDo: Use the macro PCI_FUNC() to support
|
||||
// multiple functions. For now, it's always zero.
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::hex << std::setfill('0') << std::setw(4) << domain << ":" << std::setw(2)
|
||||
<< static_cast<int>(pci_bus) << ":" << std::setw(2) << static_cast<int>(pci_dev) << "."
|
||||
<< static_cast<int>(pci_func);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* @brief Parses IP discovery information for a given PCI domain and BDF (Bus:Device.Function).
|
||||
*
|
||||
* This function discovers IP instances for all dies associated with a given PCI device.
|
||||
* It reads the sysfs directory structure to extract information about IP instances
|
||||
* and populates the provided discovery table.
|
||||
*
|
||||
* @param domain The PCI domain number (32-bit unsigned integer).
|
||||
* @param bdf The PCI Bus/Device/Function (BDF) value (32-bit unsigned integer).
|
||||
* @return table The discovery table where parsed IP instance data will be stored.
|
||||
*
|
||||
* @throws std::runtime_error If the sysfs directory does not exist, is not a directory,
|
||||
* or if no IP instances are found.
|
||||
*
|
||||
* @details
|
||||
* - Constructs the sysfs path for the PCI device using the domain and BDF values.
|
||||
* - Iterates over the dies in the `/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die`
|
||||
* directory.
|
||||
* - For each die, iterates over the IP directories and calls `parse_ip_instances` to parse
|
||||
* individual IP instance data.
|
||||
* - If no IP instances are found, throws an exception.
|
||||
*/
|
||||
discovery_table_t parse_ip_discovery(uint32_t domain, uint32_t bdf) {
|
||||
// /sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die
|
||||
const fs::path die_path =
|
||||
fs::path("/sys/bus/pci/devices") / get_domain_bdf_str(domain, bdf) / "ip_discovery/die";
|
||||
|
||||
if (!fs::exists(die_path) || !fs::is_directory(die_path)) {
|
||||
throw std::runtime_error("sysfs path does not exist or is not a directory: " +
|
||||
die_path.string());
|
||||
}
|
||||
|
||||
discovery_table_t table{};
|
||||
|
||||
// iterate over every die
|
||||
// subfolders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die"
|
||||
for (const auto& die_entry : fs::directory_iterator(die_path)) {
|
||||
if (!die_entry.is_directory()) continue;
|
||||
|
||||
// "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die/{die_num}"
|
||||
const fs::path die_entry_path = die_entry.path();
|
||||
int die_num = std::stoi(die_entry_path.filename());
|
||||
|
||||
// subfolders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die/{die_num}"
|
||||
for (const auto& ip_entry : fs::directory_iterator(die_entry_path)) {
|
||||
if (!ip_entry.is_directory()) continue;
|
||||
const std::string filename = ip_entry.path().filename();
|
||||
if (std::isalpha(filename[0])) {
|
||||
const auto instances = parse_ip_instances(die_num, die_entry.path(), filename);
|
||||
table.insert(table.end(), instances.begin(), instances.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (table.empty()) {
|
||||
throw std::runtime_error("No IP instances found");
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "pm4_factory.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace aql_profile {
|
||||
namespace {
|
||||
struct locked_agent_cache {
|
||||
std::shared_mutex mutex;
|
||||
std::unordered_map<uint64_t, AgentInfo> cache;
|
||||
|
||||
void add(uint64_t& agent_id, const AgentInfo& agent_info) {
|
||||
auto lock = std::unique_lock{mutex};
|
||||
agent_id = cache.size();
|
||||
cache[agent_id] = agent_info;
|
||||
}
|
||||
|
||||
const AgentInfo* get(uint64_t agent_id) {
|
||||
auto lock = std::shared_lock{mutex};
|
||||
auto it = cache.find(agent_id);
|
||||
if (it == cache.end()) return nullptr;
|
||||
return &it->second;
|
||||
}
|
||||
};
|
||||
|
||||
locked_agent_cache& get_cache() {
|
||||
static auto* cache = new locked_agent_cache{};
|
||||
return *cache;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
aqlprofile_agent_handle_t RegisterAgent(const aqlprofile_agent_info_v1_t* agent_info) {
|
||||
aqlprofile_agent_handle_t agent_id;
|
||||
AgentInfo int_agent_info;
|
||||
int_agent_info.cu_num = agent_info->cu_num;
|
||||
int_agent_info.se_num = agent_info->se_num;
|
||||
int_agent_info.xcc_num = agent_info->xcc_num;
|
||||
int_agent_info.shader_arrays_per_se = agent_info->shader_arrays_per_se;
|
||||
int_agent_info.domain = agent_info->domain;
|
||||
int_agent_info.bdf_id = agent_info->location_id;
|
||||
|
||||
auto len = strlen(agent_info->agent_gfxip);
|
||||
memset(int_agent_info.gfxip, 0, sizeof(int_agent_info.gfxip));
|
||||
memcpy(int_agent_info.gfxip, agent_info->agent_gfxip,
|
||||
(len >= sizeof(int_agent_info.gfxip) ? sizeof(int_agent_info.gfxip) - 1 : len));
|
||||
get_cache().add(agent_id.handle, int_agent_info);
|
||||
return agent_id;
|
||||
}
|
||||
|
||||
const AgentInfo* GetAgentInfo(aqlprofile_agent_handle_t agent_id) {
|
||||
return get_cache().get(agent_id.handle);
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,417 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_PM4_FACTORY_H_
|
||||
#define SRC_CORE_PM4_FACTORY_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "def/gpu_block_info.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
struct pm4_agent_info {
|
||||
std::string agent_gfxip;
|
||||
uint32_t cu_num;
|
||||
uint32_t se_num;
|
||||
uint32_t shader_arrays_per_se;
|
||||
uint32_t xcc_num;
|
||||
};
|
||||
|
||||
const AgentInfo* GetAgentInfo(aqlprofile_agent_handle_t agent_id);
|
||||
|
||||
aqlprofile_agent_handle_t RegisterAgent(const aqlprofile_agent_info_v1_t* agent_info);
|
||||
|
||||
// GPU enumeration
|
||||
enum gpu_id_t {
|
||||
INVAL_GPU_ID, // invalid GPU id
|
||||
GFX9_GPU_ID, // generic Gfx9 id
|
||||
MI100_GPU_ID, // Mi100 GPU id
|
||||
MI200_GPU_ID, // Mi200 GPU id
|
||||
MI300_GPU_ID, // Mi300 GPU id
|
||||
MI350_GPU_ID, // Mi350 GPU id
|
||||
GFX10_GPU_ID, // generic Gfx10 id
|
||||
GFX11_GPU_ID, // generic Gfx11 id
|
||||
GFX12_GPU_ID, // generic Gfx12 id
|
||||
};
|
||||
|
||||
// Block info map class
|
||||
class BlockInfoMap {
|
||||
public:
|
||||
BlockInfoMap(const GpuBlockInfo** table, const uint32_t& size)
|
||||
: block_table_(table), block_count_(size / sizeof(uintptr_t)) {}
|
||||
BlockInfoMap(const BlockInfoMap& map)
|
||||
: block_table_(map.block_table_), block_count_(map.block_count_) {}
|
||||
|
||||
// Get block info for a given block id
|
||||
const GpuBlockInfo* Get(const uint32_t& block_id) const {
|
||||
return (block_id < block_count_) ? block_table_[block_id] : NULL;
|
||||
}
|
||||
|
||||
// Find block by name
|
||||
// Return block id or UINT32_MAX if not found
|
||||
uint32_t Find(const char* name) const {
|
||||
uint32_t index = 0;
|
||||
while (index < block_count_) {
|
||||
const GpuBlockInfo* entry = block_table_[index];
|
||||
if (entry) {
|
||||
if (strcmp(name, entry->name) == 0) break;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
return (index == block_count_) ? UINT32_MAX : index;
|
||||
}
|
||||
|
||||
private:
|
||||
// Block info table
|
||||
const GpuBlockInfo** const block_table_;
|
||||
// Number of elements in the block info table
|
||||
const uint32_t block_count_;
|
||||
};
|
||||
|
||||
// Factory of PM4 builders
|
||||
class Pm4Factory {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static Pm4Factory* Create(aqlprofile_agent_handle_t agent_info, bool concurrent = false);
|
||||
static Pm4Factory* Create(const AgentInfo* agent_info, gpu_id_t gpu_id, bool concurrent);
|
||||
// Create factory for a given agent
|
||||
static Pm4Factory* Create(const hsa_agent_t agent, const bool concurrent = false);
|
||||
// Create factory for a given profile
|
||||
static Pm4Factory* Create(const profile_t* profile) {
|
||||
// First check and save the mode
|
||||
return Create(profile->agent, CheckConcurrent(profile));
|
||||
}
|
||||
// Destroy factory
|
||||
static void Destroy();
|
||||
|
||||
// Return gpu id
|
||||
gpu_id_t GetGpuId() const { return gpu_id_; }
|
||||
// Is pmc to be profiled concurrently?
|
||||
bool IsConcurrent() const { return concurrent_mode_; }
|
||||
// Is getting SPM data using driver public API?
|
||||
bool SpmKfdMode() const { return spm_kfd_mode_; }
|
||||
|
||||
// Return PM4 command builder
|
||||
pm4_builder::CmdBuilder* GetCmdBuilder() const { return cmd_builder_; }
|
||||
// Return PMC PM4 packets builder
|
||||
pm4_builder::PmcBuilder* GetPmcBuilder() const { return pmc_builder_; }
|
||||
// Return SPM PM4 packets builder
|
||||
pm4_builder::SpmBuilder* GetSpmBuilder() const { return spm_builder_; }
|
||||
// Return SQTT PM4 packets builder
|
||||
pm4_builder::SqttBuilder* GetSqttBuilder() const { return sqtt_builder_; }
|
||||
|
||||
// Return Shader Engines number
|
||||
uint32_t GetShaderEnginesNumber() const { return agent_info_->se_num; }
|
||||
uint32_t GetShaderArraysNumber() const { return agent_info_->shader_arrays_per_se; }
|
||||
uint32_t GetComputeUnitNumber() const { return agent_info_->cu_num; }
|
||||
// Return SQTT buffer alignment
|
||||
uint32_t GetSQTTBufferAlignment() const { return 0x1000; }
|
||||
const char* GetGFX() const { return agent_info_->name; }
|
||||
virtual bool IsGFX9() const { return false; }
|
||||
virtual bool IsGFX10() const { return false; }
|
||||
virtual bool IsGFX11() const { return false; }
|
||||
virtual bool IsGFX12() const { return false; }
|
||||
// Return number of XCC on the GPU
|
||||
uint32_t GetXccNumber() const { return agent_info_->xcc_num; }
|
||||
|
||||
const GpuBlockInfo* GetBlockInfo(const aqlprofile_pmc_event_t* event) const {
|
||||
const GpuBlockInfo* info = block_map_.Get(event->block_name);
|
||||
if (info == NULL) throw std::runtime_error("Bad Block");
|
||||
// Checking that the block index is in proper range
|
||||
if (event->block_index >= info->instance_count) throw std::runtime_error("Bad Index");
|
||||
// Checking that the counter event index is in proper range
|
||||
#if 0
|
||||
if (event->counter_id > info->event_id_max)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
#endif
|
||||
return info;
|
||||
}
|
||||
|
||||
// Return block info foor a given event
|
||||
const GpuBlockInfo* GetBlockInfo(const event_t* event) const {
|
||||
const GpuBlockInfo* info = block_map_.Get(event->block_name);
|
||||
if (info == NULL) throw event_exception(std::string("Bad block, "), *event);
|
||||
// Checking that the block index is in proper range
|
||||
if (event->block_index >= info->instance_count)
|
||||
throw event_exception(std::string("Bad block index, "), *event);
|
||||
// Checking that the counter event index is in proper range
|
||||
#if 0
|
||||
if (event->counter_id > info->event_id_max)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
#endif
|
||||
return info;
|
||||
}
|
||||
|
||||
// Return block info for a given block id
|
||||
const GpuBlockInfo* GetBlockInfo(const uint32_t& block_id) const {
|
||||
return block_map_.Get(block_id);
|
||||
}
|
||||
|
||||
virtual size_t GetNumEvents(uint32_t block_name) const {
|
||||
size_t se_number = GetShaderEnginesNumber() / GetXccNumber();
|
||||
size_t block_samples_count = 1;
|
||||
auto* block_info = GetBlockInfo(block_name);
|
||||
|
||||
if (block_info->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (block_info->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
if (block_info->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= GetNumWGPs();
|
||||
if ((block_info->attr & CounterBlockSqAttr) && IsGFX11()) // TODO: Move to CounterBlockWgpAttr
|
||||
block_samples_count *= GetNumWGPs();
|
||||
return block_samples_count;
|
||||
}
|
||||
|
||||
virtual size_t GetBytesNeeded(uint32_t block_name) const {
|
||||
return GetNumEvents(block_name) * GetXccNumber() * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
// Return block id for a given block name string
|
||||
uint32_t FindBlock(const char* name) const { return block_map_.Find(name); }
|
||||
|
||||
/// Workaround for GFX11. PMC Builder overrides this.
|
||||
virtual int GetNumWGPs() const {
|
||||
if (pmc_builder_) return pmc_builder_->GetNumWGPs();
|
||||
return 1;
|
||||
};
|
||||
|
||||
virtual int GetAccumLowID() const { throw HSA_STATUS_ERROR_INVALID_ARGUMENT; };
|
||||
virtual int GetAccumHiID() const { throw HSA_STATUS_ERROR_INVALID_ARGUMENT; };
|
||||
|
||||
protected:
|
||||
explicit Pm4Factory(const BlockInfoMap& map)
|
||||
: cmd_builder_(NULL),
|
||||
pmc_builder_(NULL),
|
||||
spm_builder_(NULL),
|
||||
sqtt_builder_(NULL),
|
||||
agent_info_(NULL),
|
||||
concurrent_mode_(concurrent_create_mode_),
|
||||
block_map_(map) {}
|
||||
|
||||
virtual ~Pm4Factory() {
|
||||
delete cmd_builder_;
|
||||
delete pmc_builder_;
|
||||
delete spm_builder_;
|
||||
delete sqtt_builder_;
|
||||
}
|
||||
|
||||
// PM4 command builder
|
||||
pm4_builder::CmdBuilder* cmd_builder_;
|
||||
// PMC PM4 packets builder
|
||||
pm4_builder::PmcBuilder* pmc_builder_;
|
||||
// SPM PM4 packets builder
|
||||
pm4_builder::SpmBuilder* spm_builder_;
|
||||
// SQTT PM4 packets builder
|
||||
pm4_builder::SqttBuilder* sqtt_builder_;
|
||||
// agent info
|
||||
const AgentInfo* agent_info_;
|
||||
gpu_id_t gpu_id_;
|
||||
// Concurrent mode
|
||||
static bool concurrent_create_mode_;
|
||||
static bool spm_kfd_mode_;
|
||||
bool concurrent_mode_;
|
||||
|
||||
private:
|
||||
// PM4 factory instance map type
|
||||
struct instances_fncomp_t {
|
||||
bool operator()(const hsa_agent_t& a, const hsa_agent_t& b) const {
|
||||
return a.handle < b.handle;
|
||||
}
|
||||
};
|
||||
typedef std::map<hsa_agent_t, Pm4Factory*, instances_fncomp_t> instances_t;
|
||||
|
||||
// Create GFX9 generic factory
|
||||
static Pm4Factory* Gfx9Create(const AgentInfo* agent_info);
|
||||
// Create GFX10 generic factory
|
||||
static Pm4Factory* Gfx10Create(const AgentInfo* agent_info);
|
||||
// Create GFX11 generic factory
|
||||
static Pm4Factory* Gfx11Create(const AgentInfo* agent_info);
|
||||
// Create GFX12 generic factory
|
||||
static Pm4Factory* Gfx12Create(const AgentInfo* agent_info);
|
||||
// Create MI100 factory
|
||||
static Pm4Factory* Mi100Create(const AgentInfo* agent_info);
|
||||
// Create MI200 factory
|
||||
static Pm4Factory* Mi200Create(const AgentInfo* agent_info);
|
||||
// Create MI300 factory
|
||||
static Pm4Factory* Mi300Create(const AgentInfo* agent_info);
|
||||
// Create MI350 factory
|
||||
static Pm4Factory* Mi350Create(const AgentInfo* agent_info);
|
||||
// Return GPU id for a given agent
|
||||
static gpu_id_t GetGpuId(std::string_view);
|
||||
|
||||
static bool CheckConcurrent(const profile_t* profile);
|
||||
|
||||
// Mutex for inter thread synchronization for the instances create/destroy
|
||||
static mutex_t mutex_;
|
||||
// Factory instances container
|
||||
static instances_t* instances_;
|
||||
// Block info container
|
||||
const BlockInfoMap block_map_;
|
||||
};
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(const AgentInfo* agent_info, gpu_id_t gpu_id,
|
||||
bool concurrent) {
|
||||
// Check if we have the instance already created
|
||||
if (instances_ == NULL) instances_ = new instances_t;
|
||||
const auto ret = instances_->insert({agent_info->dev_id, NULL});
|
||||
instances_t::iterator it = ret.first;
|
||||
|
||||
concurrent_create_mode_ = concurrent;
|
||||
static bool spm_kfd = getenv("ROCP_SPM_KFD_MODE") != NULL;
|
||||
spm_kfd_mode_ = spm_kfd;
|
||||
|
||||
// Create a factory implementation for the GPU id
|
||||
if (ret.second) {
|
||||
switch (gpu_id) {
|
||||
// Create Gfx9 generic factory
|
||||
case GFX9_GPU_ID:
|
||||
it->second = Gfx9Create(agent_info);
|
||||
break;
|
||||
// Create Gfx10 generic factory
|
||||
case GFX10_GPU_ID:
|
||||
it->second = Gfx10Create(agent_info);
|
||||
break;
|
||||
// Create Gfx11 generic factory
|
||||
case GFX11_GPU_ID:
|
||||
it->second = Gfx11Create(agent_info);
|
||||
break;
|
||||
case GFX12_GPU_ID:
|
||||
it->second = Gfx12Create(agent_info);
|
||||
break;
|
||||
// Create MI100 generic factory
|
||||
case MI100_GPU_ID:
|
||||
it->second = Mi100Create(agent_info);
|
||||
break;
|
||||
case MI200_GPU_ID:
|
||||
it->second = Mi200Create(agent_info);
|
||||
break;
|
||||
case MI300_GPU_ID:
|
||||
it->second = Mi300Create(agent_info);
|
||||
break;
|
||||
case MI350_GPU_ID:
|
||||
it->second = Mi350Create(agent_info);
|
||||
break;
|
||||
default:
|
||||
throw aql_profile_exc_val<gpu_id_t>("GPU id error", gpu_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory::Create() failed");
|
||||
it->second->gpu_id_ = gpu_id;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Create PM4 factory
|
||||
inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent, bool concurrent) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
// Get GPU id for a given agent
|
||||
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
std::vector<char> agent_name{};
|
||||
agent_name.resize(64);
|
||||
uint32_t device_id = 0;
|
||||
|
||||
// Getting GfxIP name
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name.data());
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
// Getting DeviceId
|
||||
hsa_agent_info_t attribute = static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_CHIP_ID);
|
||||
status = hsa_agent_get_info(agent, attribute, &device_id);
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
throw aql_profile_exc_msg("Pm4Factory::Create() bad agent");
|
||||
}
|
||||
|
||||
const gpu_id_t gpu_id = GetGpuId(agent_name.data());
|
||||
return Pm4Factory::Create(agent_info, gpu_id, concurrent);
|
||||
}
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(aqlprofile_agent_handle_t agent_info, bool concurrent) {
|
||||
const auto* info = GetAgentInfo(agent_info);
|
||||
if (info == NULL) throw aql_profile_exc_val<uint64_t>("Bad agent handle", agent_info.handle);
|
||||
const gpu_id_t gpu_id = GetGpuId(info->gfxip);
|
||||
return Pm4Factory::Create(info, gpu_id, concurrent);
|
||||
}
|
||||
|
||||
// Destroy PM4 factory
|
||||
inline void Pm4Factory::Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (instances_ != NULL) {
|
||||
for (auto& item : *instances_) delete item.second;
|
||||
delete instances_;
|
||||
instances_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the setting of pmc profiling mode
|
||||
inline bool Pm4Factory::CheckConcurrent(const profile_t* profile) {
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
if (p->parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return GPU id for a given agent
|
||||
inline gpu_id_t Pm4Factory::GetGpuId(std::string_view gfx_ip) {
|
||||
std::vector<std::pair<std::string, gpu_id_t>> gfxip_map = {
|
||||
{"gfx908", MI100_GPU_ID}, {"gfx90a", MI200_GPU_ID}, {"gfx900", GFX9_GPU_ID},
|
||||
{"gfx902", GFX9_GPU_ID}, {"gfx906", GFX9_GPU_ID}, {"gfx94", MI300_GPU_ID},
|
||||
{"gfx95", MI350_GPU_ID}, {"gfx10", GFX10_GPU_ID}, {"gfx11", GFX11_GPU_ID},
|
||||
{"gfx12", GFX12_GPU_ID},
|
||||
};
|
||||
|
||||
for (const auto& [name, id] : gfxip_map) {
|
||||
if (gfx_ip.rfind(name, 0) == 0) {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
return INVAL_GPU_ID;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // SRC_CORE_PM4_FACTORY_H_
|
||||
@@ -0,0 +1,71 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "core/amd_aql_pm4_ib_packet.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "pm4/cmd_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
void PopulateAql(const uint32_t* ib_packet, packet_t* aql_packet) {
|
||||
// Populate relevant fields of Aql pkt
|
||||
// Size of IB pkt is four DWords
|
||||
// Header and completion sinal are not set
|
||||
amd_aql_pm4_ib_packet_t* aql_pm4_ib = reinterpret_cast<amd_aql_pm4_ib_packet_t*>(aql_packet);
|
||||
aql_pm4_ib->pm4_ib_format = AMD_AQL_PM4_IB_FORMAT;
|
||||
aql_pm4_ib->pm4_ib_command[0] = ib_packet[0];
|
||||
aql_pm4_ib->pm4_ib_command[1] = ib_packet[1];
|
||||
aql_pm4_ib->pm4_ib_command[2] = ib_packet[2];
|
||||
aql_pm4_ib->pm4_ib_command[3] = ib_packet[3];
|
||||
aql_pm4_ib->dw_count_remain = AMD_AQL_PM4_IB_DW_COUNT_REMAIN;
|
||||
for (unsigned i = 0; i < AMD_AQL_PM4_IB_RESERVED_COUNT; ++i) {
|
||||
aql_pm4_ib->reserved[i] = 0;
|
||||
}
|
||||
|
||||
#if defined(DEBUG_TRACE)
|
||||
const uint32_t* dwords = (uint32_t*)aql_packet;
|
||||
const uint32_t dword_count = sizeof(*aql_packet) / sizeof(uint32_t);
|
||||
std::ostringstream oss;
|
||||
oss << "AQL 'IB' size(" << dword_count << ")";
|
||||
std::clog << std::setw(40) << std::left << "AQL 'IB' size(16)"
|
||||
<< ":";
|
||||
for (unsigned idx = 0; idx < dword_count; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << dwords[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PopulateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_builder::CmdBuilder* cmd_writer,
|
||||
packet_t* aql_packet) {
|
||||
pm4_builder::CmdBuffer ib_buffer;
|
||||
cmd_writer->BuildIndirectBufferCmd(&ib_buffer, cmd_buffer, (size_t)cmd_size);
|
||||
PopulateAql((const uint32_t*)ib_buffer.Data(), aql_packet);
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,239 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <dirent.h>
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include <pthread.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace spm_kfd_namespace {
|
||||
|
||||
int get_gpu_node_id(uint32_t gpu_ind) {
|
||||
int gpu_node = -1;
|
||||
uint32_t index = 0;
|
||||
|
||||
// find a valid gpu node from /sys/class/kfd/kfd/topology/nodes
|
||||
std::string path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
DIR* dir;
|
||||
struct dirent* ent;
|
||||
|
||||
if ((dir = opendir(path.c_str())) != NULL) {
|
||||
while ((ent = readdir(dir)) != NULL) {
|
||||
std::string dir = ent->d_name;
|
||||
|
||||
if (dir.find_first_not_of("0123456789") == std::string::npos) {
|
||||
std::string file = path + "/" + ent->d_name + "/gpu_id";
|
||||
std::ifstream infile(file);
|
||||
int id;
|
||||
|
||||
infile >> id;
|
||||
if ((id != 0) && (index == gpu_ind)) {
|
||||
++index;
|
||||
gpu_node = atoi(ent->d_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
if (gpu_node == -1) {
|
||||
printf("get_gpu_node_id`error: GPU[%d] not found\n", gpu_ind);
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
return gpu_node;
|
||||
}
|
||||
|
||||
int get_gpu_node_id(hsa_agent_t agent) {
|
||||
const uint32_t gpu_ind = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index;
|
||||
return get_gpu_node_id(gpu_ind);
|
||||
}
|
||||
|
||||
struct state_t {
|
||||
bool thread_stop;
|
||||
int node_id;
|
||||
uint32_t buf_size;
|
||||
uint32_t timeout;
|
||||
uint32_t data_size;
|
||||
void* kfd_buf;
|
||||
void* prod_buf;
|
||||
void* cons_buf;
|
||||
bool data_loss;
|
||||
bool ready;
|
||||
pthread_mutex_t work_mutex;
|
||||
pthread_cond_t work_cond;
|
||||
hsa_agent_t agent;
|
||||
};
|
||||
|
||||
void producer_fun(state_t* state) {
|
||||
uint32_t timeout = 0;
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, state->buf_size, &timeout, &(state->data_size),
|
||||
// state->kfd_buf, &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer init error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
do {
|
||||
timeout = state->timeout;
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, state->buf_size, &timeout, &(state->data_size),
|
||||
// state->prod_buf, &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(state->work_mutex)));
|
||||
void* tmp = state->prod_buf;
|
||||
state->prod_buf = state->cons_buf;
|
||||
state->cons_buf = state->kfd_buf;
|
||||
state->kfd_buf = tmp;
|
||||
state->ready = true;
|
||||
PTHREAD_CALL(pthread_cond_signal(&(state->work_cond)));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(state->work_mutex)));
|
||||
} while (!state->thread_stop);
|
||||
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, 0, &timeout, &(state->data_size), NULL,
|
||||
// &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer stop error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void consumer_fun(state_t* state, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
const uint32_t sample_id = 0;
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(state->work_mutex)));
|
||||
do {
|
||||
while (state->ready == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(state->work_cond), &(state->work_mutex)));
|
||||
}
|
||||
state->ready = false;
|
||||
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = sample_id;
|
||||
sample_info.trace_data.ptr = state->cons_buf;
|
||||
sample_info.trace_data.size = state->data_size;
|
||||
|
||||
hsa_status_t status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
state->thread_stop = true;
|
||||
break;
|
||||
} else if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("SPM consumer callback failed\n");
|
||||
abort();
|
||||
}
|
||||
} while (1);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(state->work_mutex)));
|
||||
}
|
||||
|
||||
void mananger_fun(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
state_t obj{};
|
||||
const int gpu_node_id = get_gpu_node_id(profile->agent);
|
||||
char* buf_ptr = (char*)(profile->output_buffer.ptr);
|
||||
// SPM data buffer size 256 byte aligned
|
||||
const uint32_t buf_size = (profile->output_buffer.size / 3) & ~(uint32_t(256) - 1);
|
||||
|
||||
obj.timeout = 1000000; // 1sec
|
||||
obj.node_id = gpu_node_id;
|
||||
obj.buf_size = buf_size;
|
||||
obj.kfd_buf = buf_ptr;
|
||||
obj.prod_buf = buf_ptr + buf_size;
|
||||
obj.cons_buf = buf_ptr + 2 * buf_size;
|
||||
obj.agent = profile->agent;
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_init(&(obj.work_mutex), NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&(obj.work_cond), NULL));
|
||||
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS; // hsa_amd_spm_acquire(profile->agent);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Acquire error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
// spm threads
|
||||
std::thread producer(producer_fun, &obj);
|
||||
std::thread consumer(consumer_fun, &obj, callback, data);
|
||||
|
||||
producer.join();
|
||||
consumer.join();
|
||||
|
||||
status = HSA_STATUS_SUCCESS; // hsa_amd_spm_release(profile->agent);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Release error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::mutex spm_mutex_t;
|
||||
spm_mutex_t spm_mutex;
|
||||
|
||||
// Getting SPM data using driver API
|
||||
hsa_status_t spm_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
std::lock_guard<spm_mutex_t> lck(spm_mutex);
|
||||
static std::thread* t = NULL;
|
||||
|
||||
if (t == NULL) {
|
||||
// spm manager thread
|
||||
t = new std::thread(mananger_fun, profile, callback, data);
|
||||
} else {
|
||||
t->join();
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace spm_kfd_namespace
|
||||
@@ -0,0 +1,420 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
#include "memorymanager.hpp"
|
||||
|
||||
#define THREAD_TRACE_PREFIX_SIZE 0x100
|
||||
#define DEFAULT_TRACE_BUFFER_SIZE (3 << 26)
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint64_t legacy_version : 13;
|
||||
uint64_t gfx9_version2 : 3;
|
||||
uint64_t DSIMDM : 4;
|
||||
uint64_t DCU : 5;
|
||||
uint64_t DSA : 1;
|
||||
uint64_t SEID : 6;
|
||||
uint64_t reserved2 : 32;
|
||||
};
|
||||
uint64_t raw;
|
||||
} att_header_packet_t;
|
||||
|
||||
typedef enum {
|
||||
ATT_MARKER_HEADER_CHANNEL = 0,
|
||||
ATT_MARKER_SIZE_LO_CHANNEL,
|
||||
ATT_MARKER_ADDR_LO_CHANNEL,
|
||||
ATT_MARKER_ADDR_HI_CHANNEL,
|
||||
ATT_MARKER_SIZE_HI_CHANNEL,
|
||||
ATT_MARKER_ID_LO_CHANNEL,
|
||||
ATT_MARKER_ID_HI_CHANNEL,
|
||||
ATT_MARKER_WAIT_FOR_HEADER = 32
|
||||
} att_marker_state;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint32_t isUnload : 1; // 0 if code object is being loaded, 1 for unload
|
||||
uint32_t bFromStart : 1; // Has this code object been loaded before thread trace started?
|
||||
uint32_t legacy_id : 30; // Legacy code object ID, if it fits in 30 bits.
|
||||
};
|
||||
uint32_t raw;
|
||||
} aqlprofile_att_header_marker_t;
|
||||
|
||||
inline att_header_packet_t getHeaderPacket(int SE, int CU, int SIMD) {
|
||||
att_header_packet_t header{.raw = 0};
|
||||
header.legacy_version = 0x11; // The thread trace viewer only sees gfx9 for 0x11
|
||||
header.gfx9_version2 = 4;
|
||||
header.SEID = SE;
|
||||
header.DCU = CU;
|
||||
header.DSIMDM = SIMD;
|
||||
header.DSA = 0;
|
||||
return header;
|
||||
}
|
||||
|
||||
namespace aql_profile_v2 {
|
||||
|
||||
hsa_status_t _internal_aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback,
|
||||
void* userdata) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
auto shared_memorymgr = MemoryManager::GetManager(handle.handle);
|
||||
TraceMemoryManager* memorymgr = dynamic_cast<TraceMemoryManager*>(shared_memorymgr.get());
|
||||
if (!memorymgr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(memorymgr->GetAgent());
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
auto* control_ptr = memorymgr->GetTraceControlBuf<pm4_builder::TraceControl>();
|
||||
|
||||
// Check if SQTT buffer was wrapped
|
||||
for (size_t se = 0; se < se_number_total; se++) {
|
||||
if (control_ptr[se].status & sqttbuilder->GetUTCErrorMask()) {
|
||||
ERR_LOGGING << "SQTT memory error received, SE(" << se << ")";
|
||||
status = HSA_STATUS_ERROR_EXCEPTION;
|
||||
} else if (control_ptr[se].status & sqttbuilder->GetBufferFullMask()) {
|
||||
ERR2_LOGGING << "SQTT data buffer full, SE(" << se << ")";
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> sample_sizes(se_number_total, 0);
|
||||
size_t max_sample_size = 0;
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (uint64_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
bool bMaskedIn = memorymgr->config.GetTargetCU(se_index) >= 0;
|
||||
uint64_t sample_capacity = memorymgr->config.GetCapacity(se_index);
|
||||
void* sample_ptr = reinterpret_cast<void*>(memorymgr->config.GetSEBaseAddr(se_index));
|
||||
|
||||
// WPTR specifies the index in thread trace buffer where next token will be
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
size_t wptr_mask = sqttbuilder->GetWritePtrMask();
|
||||
size_t sample_size = (control_ptr[se_index].wptr & wptr_mask) * sqttbuilder->GetWritePtrBlk();
|
||||
|
||||
// GFX11 hardware bug workaround
|
||||
if (pm4_factory->GetGpuId() == aql_profile::GFX11_GPU_ID) {
|
||||
sample_size = sample_size - reinterpret_cast<uint64_t>(sample_ptr);
|
||||
sample_size &= (1ull << 29) - 1;
|
||||
}
|
||||
|
||||
if (sample_size >= sample_capacity) {
|
||||
ERR_LOGGING << "SQTT data out of bounds, sample_id(" << se_index << ") size(" << sample_size
|
||||
<< "/" << sample_capacity << ")";
|
||||
sample_size = sample_capacity;
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
sample_sizes.at(se_index) = sample_size;
|
||||
max_sample_size = std::max(sample_size, max_sample_size);
|
||||
}
|
||||
|
||||
std::vector<size_t> cpu_sample(max_sample_size / sizeof(size_t) + sizeof(att_header_packet_t), 0);
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (uint64_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
int target_cu = memorymgr->config.GetTargetCU(se_index);
|
||||
if (target_cu < 0) continue;
|
||||
|
||||
void* sample_ptr = reinterpret_cast<void*>(memorymgr->config.GetSEBaseAddr(se_index));
|
||||
size_t sample_size = sample_sizes.at(se_index);
|
||||
size_t sample_size_plus_header = sample_size;
|
||||
|
||||
char* sample_data_ptr = (char*)cpu_sample.data();
|
||||
if (pm4_factory->GetGpuId() < aql_profile::GFX10_GPU_ID) {
|
||||
auto* header = reinterpret_cast<att_header_packet_t*>(cpu_sample.data());
|
||||
*header = getHeaderPacket(se_index, target_cu, memorymgr->GetSimdMask());
|
||||
sample_data_ptr += sizeof(att_header_packet_t);
|
||||
sample_size_plus_header = sample_size + sizeof(att_header_packet_t);
|
||||
}
|
||||
|
||||
memorymgr->CopyMemory((void*)sample_data_ptr, sample_ptr, sample_size);
|
||||
callback(se_index, (void*)cpu_sample.data(), sample_size_plus_header, userdata);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t copy_fn,
|
||||
void* userdata) {
|
||||
pm4_builder::CmdBuffer start_cmd;
|
||||
pm4_builder::CmdBuffer stop_cmd;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile.agent);
|
||||
|
||||
auto memorymgr =
|
||||
std::make_shared<TraceMemoryManager>(profile.agent, alloc_cb, dealloc_cb, copy_fn, userdata);
|
||||
|
||||
auto& trace_config = memorymgr->config;
|
||||
|
||||
trace_config.vmIdMask = 0;
|
||||
trace_config.simd_sel = 0xF;
|
||||
trace_config.perfMASK = ~0u;
|
||||
trace_config.se_mask = 0x11111111;
|
||||
|
||||
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
|
||||
|
||||
if (profile.parameters)
|
||||
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
|
||||
switch (p->parameter_name) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK:
|
||||
trace_config.se_mask = p->value & ((1ull << se_number_total) - 1);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
if (p->value > 15)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
|
||||
trace_config.targetCu = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
trace_config.vmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
if ((p->value & 0x50) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
|
||||
trace_config.deprecated_mask = p->value;
|
||||
trace_config.targetCu = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
if ((p->value & 0xFF000000) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
|
||||
p->value);
|
||||
trace_config.deprecated_tokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
trace_config.deprecated_tokenMask2 = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
|
||||
trace_config.sampleRate = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
|
||||
trace_config.concurrent = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
|
||||
trace_config.simd_sel = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE:
|
||||
trace_config.occupancy_mode = p->value ? 1 : 0;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
|
||||
buffer_size = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
|
||||
trace_config.perfMASK = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
|
||||
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
|
||||
if (trace_config.perfcounters.size() >= 8) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
trace_config.perfcounters.push_back({p->counter_id, p->simd_mask});
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "Bad trace parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const size_t control_size = sizeof(pm4_builder::TraceControl) * se_number_total;
|
||||
|
||||
memorymgr->CreateTraceControlBuf(control_size + THREAD_TRACE_PREFIX_SIZE);
|
||||
memorymgr->CreateOutputBuf(buffer_size);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
|
||||
auto* control_ptr = memorymgr->GetTraceControlBuf<pm4_builder::TraceControl>();
|
||||
|
||||
trace_config.control_buffer_ptr = control_ptr;
|
||||
trace_config.control_buffer_size = control_size;
|
||||
trace_config.data_buffer_ptr = memorymgr->GetOutputBuf();
|
||||
trace_config.data_buffer_size = memorymgr->GetOutputBufSize();
|
||||
|
||||
uint32_t se_per_xcc = pm4_factory->GetShaderEnginesNumber() / pm4_factory->GetXccNumber();
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
|
||||
// Generate start commands
|
||||
sqtt_builder->Begin(&start_cmd, &trace_config);
|
||||
// Generate stop commands
|
||||
sqtt_builder->End(&stop_cmd, &trace_config);
|
||||
|
||||
// Copy generated commands
|
||||
const size_t start_size = aql_profile::CommandBufferMgr::Align(start_cmd.Size());
|
||||
const size_t stop_size = aql_profile::CommandBufferMgr::Align(stop_cmd.Size());
|
||||
memorymgr->CreateCmdBuf(start_size + stop_size);
|
||||
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
uint8_t* cmdbuf = reinterpret_cast<uint8_t*>(memorymgr->GetCmdBuf());
|
||||
|
||||
copy_fn(cmdbuf, start_cmd.Data(), start_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, start_cmd.Size(), cmd_writer, &packets->start_packet);
|
||||
cmdbuf += start_size;
|
||||
copy_fn(cmdbuf, stop_cmd.Data(), stop_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, stop_cmd.Size(), cmd_writer, &packets->stop_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
hsa_status_t _internal_aqlprofile_att_codeobj_marker(
|
||||
hsa_ext_amd_aql_pm4_packet_t* packet, aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, void* userdata) {
|
||||
static auto* mut = new std::shared_mutex{};
|
||||
static auto* factory_cache = new std::map<uint64_t, aql_profile::Pm4Factory*>{};
|
||||
|
||||
auto _slk = std::shared_lock{*mut};
|
||||
|
||||
if (factory_cache->find(data.agent.handle) == factory_cache->end()) {
|
||||
_slk.unlock();
|
||||
{
|
||||
auto _unique = std::unique_lock{*mut};
|
||||
factory_cache->emplace(data.agent.handle, aql_profile::Pm4Factory::Create(data.agent));
|
||||
}
|
||||
_slk.lock();
|
||||
}
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = factory_cache->at(data.agent.handle);
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
if (!data.isUnload) {
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.addr), ATT_MARKER_ADDR_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.addr >> 32, ATT_MARKER_ADDR_HI_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.size), ATT_MARKER_SIZE_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.size >> 32, ATT_MARKER_SIZE_HI_CHANNEL);
|
||||
}
|
||||
|
||||
aqlprofile_att_header_marker_t header{};
|
||||
header.bFromStart = data.fromStart;
|
||||
header.isUnload = data.isUnload;
|
||||
|
||||
if (data.id >= (1 << 30)) {
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.id), ATT_MARKER_ID_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.id >> 32, ATT_MARKER_ID_HI_CHANNEL);
|
||||
} else
|
||||
header.legacy_id = data.id;
|
||||
|
||||
sqttbuilder->InsertMarker(&commands, header.raw, ATT_MARKER_HEADER_CHANNEL);
|
||||
|
||||
auto memorymgr = std::make_shared<CodeobjMemoryManager>(data.agent, alloc_cb, dealloc_cb,
|
||||
commands.Size(), userdata);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
void* cmdbuffer = memorymgr->cmd_buffer.get();
|
||||
|
||||
memcpy(cmdbuffer, commands.Data(), commands.Size());
|
||||
aql_profile::PopulateAql(cmdbuffer, commands.Size(), cmd_writer, packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace aql_profile_v2
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_codeobj_marker(
|
||||
hsa_ext_amd_aql_pm4_packet_t* packet, aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_codeobj_marker(packet, handle, data, alloc_cb,
|
||||
dealloc_cb, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_iterate_data(handle, callback, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t copy_fn,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_create_packets(
|
||||
handle, packets, profile, alloc_cb, dealloc_cb, copy_fn, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
};
|
||||
|
||||
PUBLIC_API void aqlprofile_att_delete_packets(aqlprofile_handle_t handle) {
|
||||
try {
|
||||
MemoryManager::DeleteManager(handle.handle);
|
||||
} catch (std::exception& e) {
|
||||
return;
|
||||
} catch (...) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,73 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include "linux/registers/vega20_ip_offset.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "util/soc15_common.h"
|
||||
|
||||
const reg_base_offset_table* vega20_reg_base_init() {
|
||||
static_assert(HWIP_MAX_INSTANCE >= MAX_INSTANCE,
|
||||
"HWIP_MAX_INSTANCE must be greater than MAX_INSTANCE");
|
||||
static_assert(HWIP_MAX_SEGMENT >= MAX_SEGMENT,
|
||||
"HWIP_MAX_SEGMENT must be greater than MAX_SEGMENT");
|
||||
|
||||
static const auto* vega20_reg_table = []() {
|
||||
auto* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& base) {
|
||||
for (uint32_t i = 0; i < MAX_INSTANCE; i++) {
|
||||
std::copy(std::begin(base.instance[i].segment), std::end(base.instance[i].segment),
|
||||
std::begin(reg_table->reg_offset[hwip][i]));
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize all HWIP blocks
|
||||
init_hwip(GC_HWIP, GC_BASE);
|
||||
init_hwip(HDP_HWIP, HDP_BASE);
|
||||
init_hwip(MMHUB_HWIP, MMHUB_BASE);
|
||||
init_hwip(ATHUB_HWIP, ATHUB_BASE);
|
||||
init_hwip(NBIO_HWIP, NBIO_BASE);
|
||||
init_hwip(MP0_HWIP, MP0_BASE);
|
||||
init_hwip(MP1_HWIP, MP1_BASE);
|
||||
init_hwip(UVD_HWIP, UVD_BASE);
|
||||
init_hwip(VCE_HWIP, VCE_BASE);
|
||||
init_hwip(DF_HWIP, DF_BASE);
|
||||
init_hwip(DCE_HWIP, DCE_BASE);
|
||||
init_hwip(OSSSYS_HWIP, OSSSYS_BASE);
|
||||
init_hwip(SDMA0_HWIP, SDMA0_BASE);
|
||||
init_hwip(SDMA1_HWIP, SDMA1_BASE);
|
||||
init_hwip(SMUIO_HWIP, SMUIO_BASE);
|
||||
init_hwip(NBIF_HWIP, NBIO_BASE);
|
||||
init_hwip(THM_HWIP, THM_BASE);
|
||||
init_hwip(CLK_HWIP, CLK_BASE);
|
||||
init_hwip(UMC_HWIP, UMC_BASE);
|
||||
init_hwip(RSMU_HWIP, RSMU_BASE);
|
||||
|
||||
return reg_table;
|
||||
}();
|
||||
|
||||
return vega20_reg_table;
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_DEF_H_
|
||||
#define _GFX10_DEF_H_
|
||||
|
||||
#include "linux/navi10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_10_3_0_offset.h"
|
||||
#include "linux/registers/gc/gc_10_3_0_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx10/gfx10_block_info.h"
|
||||
#include "gfxip/gfx10/gfx10_primitives.h"
|
||||
#include "gfxip/gfx10/gfx10_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx10;
|
||||
#endif // _GFX10_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_DEF_H_
|
||||
#define _GFX11_DEF_H_
|
||||
|
||||
#include "linux/soc21_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_11_0_0_offset.h"
|
||||
#include "linux/registers/gc/gc_11_0_0_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx11/gfx11_block_info.h"
|
||||
#include "gfxip/gfx11/gfx11_primitives.h"
|
||||
#include "gfxip/gfx11/gfx11_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx11;
|
||||
#endif // _GFX11_DEF_H_
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX12_DEF_H_
|
||||
#define _GFX12_DEF_H_
|
||||
|
||||
#include "linux/soc24_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_12_0_0_offset.h"
|
||||
#include "linux/registers/gc/gc_12_0_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx12/gfx12_block_info.h"
|
||||
using namespace gfxip::gfx12;
|
||||
using namespace gfxip::gfx12::gfx1201;
|
||||
#include "gfxip/gfx12/gfx12_primitives.h"
|
||||
#include "gfxip/gfx12/gfx12_block_table.h"
|
||||
|
||||
#endif // _GFX12_DEF_H_
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX908_DEF_H_
|
||||
#define _GFX908_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX908_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX90A_DEF_H_
|
||||
#define _GFX90A_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX90A_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX940_DEF_H_
|
||||
#define _GFX940_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX940_DEF_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Viittaa uudesa ongelmassa
Block a user