Initial Commit
Contributors:
Ammar ELWazir <aelwazir@amd.com>
AravindanC <aravindan.cheruvally@amd.com>
Benjamin Welton <bewelton@amd.com>
Ma, Bing <Bing.Ma@amd.com>
Chun Yang <chun.yang@amd.com>
Cole Nelson <cole.nelson@amd.com>
Ethan Stewart <ethan.stewart@amd.com>
Evgeny <evgeny.shcherbakov@amd.com>
Freddy Paul <Freddy.paul@amd.com>
Giovanni Baraldi <gbaraldi@amd.com>
Gopesh Bhardwaj <Gopesh.Bhardwaj@amd.com>
Icarus Sparry <icarus.sparry@amd.com>
itrowbri <Ian.Trowbridge@amd.com>
James Edwards <JamesAdrian.Edwards@amd.com>
jatang <jatang@amd.com>
Jeremy Newton <Jeremy.Newton@amd.com>
Jonathan Kim <jonathan.kim@amd.com>
Kent Russell <kent.russell@amd.com>
Kiumars Sabeti <kiumars.sabeti@amd.com>
Lang Yu <lang.yu@amd.com>
Laurent Morichetti <laurent.morichetti@amd.com>
Mallya, Ameya Keshava <AmeyaKeshava.Mallya@amd.com>
Manjunath Jakaraddi <manjunath.jakaraddi@amd.com>
Mark Laws <markdavid.laws@amd.com>
Mohan Kumar Mithur <Mohan.KumarMithur@amd.com>
Nicholas Curtis <nicurtis@amd.com>
Nirmal Unnikrishnan <Nirmal.Unnikrishnan@amd.com>
Parag Bhandari <parag.bhandari@amd.com>
Ranjith Ramakrishnan <Ranjith.Ramakrishnan@amd.com>
Robert Gregory <Robert.Gregory@amd.com>
Saravanan Solaiyappan <saravanan.solaiyappan@amd.com>
Saurabh Verma <saurabh.verma@amd.com>
Srihari Uttanur <srihari.u@amd.com>
Srinivasan Subramanian <srinivasan.subramanian@amd.com>
Sriraksha Nagaraj <Sriraksha.Nagaraj@amd.com>
Sushma Vaddireddy <svaddire@amd.com>
Xianwei Zhang <Xianwei.Zhang@amd.com>
[ROCm/aqlprofile commit: 1ed169e30c]
Este cometimento está contido em:
cometido por
Ammar ELWazir
ascendente
b3d67ffe0d
cometimento
7c4369bde4
@@ -0,0 +1,205 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveMacros: None
|
||||
AlignConsecutiveAssignments: None
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeConceptDeclarations: true
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeColon
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 100
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: true
|
||||
DisableFormat: false
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<ext/.*\.h>'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: true
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentRequires: false
|
||||
IndentWidth: 2
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Never
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
RawStringFormats:
|
||||
- Language: Cpp
|
||||
Delimiters:
|
||||
- cc
|
||||
- CC
|
||||
- cpp
|
||||
- Cpp
|
||||
- CPP
|
||||
- 'c++'
|
||||
- 'C++'
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
- Language: TextProto
|
||||
Delimiters:
|
||||
- pb
|
||||
- PB
|
||||
- proto
|
||||
- PROTO
|
||||
EnclosingFunctions:
|
||||
- EqualsProto
|
||||
- EquivToProto
|
||||
- PARSE_PARTIAL_TEXT_PROTO
|
||||
- PARSE_TEST_PROTO
|
||||
- PARSE_TEXT_PROTO
|
||||
- ParseTextOrDie
|
||||
- ParseTextProtoOrDie
|
||||
- ParseTestProto
|
||||
- ParsePartialTestProto
|
||||
CanonicalDelimiter: pb
|
||||
BasedOnStyle: google
|
||||
ReflowComments: true
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: false
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: Both
|
||||
Standard: Auto
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
...
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions" # See documentation for possible values
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/docs/sphinx" # Location of package manifests
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "daily"
|
||||
versioning-strategy: increase
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
disabled: false
|
||||
scmId: gh-emu-rocm
|
||||
branchesToScan:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
@@ -0,0 +1,91 @@
|
||||
name: "CodeQL Advanced"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "amd-staging" ]
|
||||
pull_request:
|
||||
branches: [ "amd-staging" ]
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
env:
|
||||
EXCLUDED_PATHS: ""
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
# Runner size impacts CodeQL analysis time. To learn more, please see:
|
||||
# - https://gh.io/recommended-hardware-resources-for-running-codeql
|
||||
# - https://gh.io/supported-runners-and-hardware-resources
|
||||
# - https://gh.io/using-larger-runners (GitHub.com only)
|
||||
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
|
||||
runs-on: gpuless-emu-runner-set
|
||||
permissions:
|
||||
# required for all workflows
|
||||
security-events: write
|
||||
|
||||
# required to fetch internal or private CodeQL packs
|
||||
packages: read
|
||||
|
||||
# only required for workflows in private repositories
|
||||
actions: read
|
||||
contents: read
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- language: c-cpp
|
||||
build-mode: manual
|
||||
- language: python
|
||||
build-mode: none
|
||||
- language: actions
|
||||
build-mode: none
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- if: matrix.build-mode == 'manual'
|
||||
name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
apt-get update
|
||||
apt-get install -y build-essential cmake g++-11 g++-12 python3-pip libdw-dev rocm-llvm-dev
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: ${{ matrix.build-mode }}
|
||||
queries: security-extended
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
# If the analyze step fails for one of the languages you are analyzing with
|
||||
# "We were unable to automatically build your code", modify the matrix above
|
||||
# to set the build mode to "manual" for that language. Then modify this step
|
||||
# to build your code.
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
- if: matrix.build-mode == 'manual'
|
||||
name: Configure and Build
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run: |
|
||||
cmake -B /tmp/build -DGPU_TARGETS='gfx906,gfx90a,gfx942,gfx1101,gfx1201' -DCMAKE_PREFIX_PATH=/opt/rocm
|
||||
cmake --build /tmp/build --target all --parallel 16
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
@@ -0,0 +1,146 @@
|
||||
name: Continuous Integration
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ amd-staging, amd-mainline, amd-npi ]
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'source/docs/**'
|
||||
- 'CODEOWNERS'
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- '*.md'
|
||||
- 'source/docs/**'
|
||||
- 'CODEOWNERS'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
|
||||
PATH: "/usr/bin:$PATH"
|
||||
navi3_EXCLUDE_TESTS_REGEX: ""
|
||||
vega20_EXCLUDE_TESTS_REGEX: ""
|
||||
mi200_EXCLUDE_TESTS_REGEX: ""
|
||||
mi300_EXCLUDE_TESTS_REGEX: ""
|
||||
mi300a_EXCLUDE_TESTS_REGEX: ""
|
||||
mi325_EXCLUDE_TESTS_REGEX: ""
|
||||
navi4_EXCLUDE_TESTS_REGEX: ""
|
||||
navi3_EXCLUDE_LABEL_REGEX: ""
|
||||
vega20_EXCLUDE_LABEL_REGEX: ""
|
||||
mi200_EXCLUDE_LABEL_REGEX: ""
|
||||
mi300_EXCLUDE_LABEL_REGEX: ""
|
||||
mi300a_EXCLUDE_LABEL_REGEX: ""
|
||||
mi325_EXCLUDE_LABEL_REGEX: ""
|
||||
navi4_EXCLUDE_LABEL_REGEX: ""
|
||||
|
||||
jobs:
|
||||
core-deb:
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ['navi4', 'mi300a', 'mi200', 'navi3']
|
||||
os: ['ubuntu-22.04']
|
||||
build-type: ['RelWithDebInfo']
|
||||
|
||||
runs-on: ${{ matrix.runner }}${{ github.ref == 'refs/heads/amd-npi' && '-npi' || '' }}-emu-runner-set
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
# define this for containers
|
||||
env:
|
||||
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
|
||||
CORE_EXT_RUNNER: mi300a
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
apt-get update
|
||||
apt-get install -y build-essential cmake g++-11 g++-12 python3-pip
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 --slave /usr/bin/gcov gcov /usr/bin/gcov-12
|
||||
|
||||
- name: List Files
|
||||
shell: bash
|
||||
run: |
|
||||
echo "PATH: ${PATH}"
|
||||
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
|
||||
which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; }
|
||||
for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done
|
||||
cat /opt/rocm/.info/version
|
||||
ls -la
|
||||
pwd
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run:
|
||||
LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH ctest --output-on-failure -V -DCTEST_SOURCE_DIRECTORY="$(pwd)"
|
||||
-DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}"
|
||||
-DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.runner }}-core
|
||||
-DCMAKE_CTEST_ARGUMENTS=""
|
||||
-DAQLPROFILE_EXTRA_CONFIGURE_ARGS=""
|
||||
-S ./dashboard.cmake
|
||||
|
||||
core-rpm:
|
||||
if: github.ref != 'refs/heads/amd-npi'
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ['mi300']
|
||||
os: ['rhel-emu', 'sles-emu']
|
||||
build-type: ['RelWithDebInfo']
|
||||
ci-flags: ['--linter clang-tidy']
|
||||
|
||||
runs-on: ${{ matrix.os }}-runner-set
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
# define this for containers
|
||||
env:
|
||||
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
|
||||
CORE_EXT_RUNNER: mi300
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
timeout-minutes: 10
|
||||
shell: bash
|
||||
run: |
|
||||
git config --global --add safe.directory '*'
|
||||
|
||||
- name: List Files
|
||||
shell: bash
|
||||
run: |
|
||||
echo "PATH: ${PATH}"
|
||||
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
|
||||
which-realpath() { echo -e "\n$1 resolves to $(realpath $(which $1))"; echo "$($(which $1) --version &> /dev/stdout | head -n 1)"; }
|
||||
for i in python3 git cmake ctest gcc g++ gcov; do which-realpath $i; done
|
||||
cat /opt/rocm/.info/version
|
||||
ls -la
|
||||
pwd
|
||||
|
||||
- name: Configure, Build, and Test
|
||||
timeout-minutes: 30
|
||||
shell: bash
|
||||
run:
|
||||
sudo LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH ctest --output-on-failure -V -DCTEST_SOURCE_DIRECTORY="$(pwd)"
|
||||
-DCTEST_BINARY_DIRECTORY="$(pwd)/build" -DAQLPROFILE_BUILD_NUM_JOBS="16" -DCTEST_SITE="${RUNNER_HOSTNAME}"
|
||||
-DCTEST_BUILD_NAME=PR_${{ github.ref_name }}_${{ github.repository }}-${{ matrix.os }}-${{ matrix.runner }}-core
|
||||
-DCMAKE_CTEST_ARGUMENTS=""
|
||||
-DAQLPROFILE_EXTRA_CONFIGURE_ARGS=""
|
||||
-S ./dashboard.cmake
|
||||
@@ -0,0 +1,15 @@
|
||||
name: Rocm Validation Suite KWS
|
||||
on:
|
||||
push:
|
||||
branches: [amd-staging]
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
kws:
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{github.event.pull_request.number}}
|
||||
base_branch: ${{github.base_ref}}
|
||||
@@ -0,0 +1,25 @@
|
||||
name: ROCm CI Caller
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-npi, release/rocm-rel-*]
|
||||
types: [opened, reopened, synchronize]
|
||||
push:
|
||||
branches: [amd-mainline]
|
||||
workflow_dispatch:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
if: ${{ github.event_name != 'issue_comment' || github.event.comment.body == '!verify' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
input_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
input_pr_num: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 0 }}
|
||||
input_pr_url: ${{ github.event_name == 'pull_request' && github.event.pull_request.html_url || '' }}
|
||||
input_pr_title: ${{ github.event_name == 'pull_request' && github.event.pull_request.title || '' }}
|
||||
repository_name: ${{ github.repository }}
|
||||
base_ref: ${{ github.event_name == 'pull_request' && github.base_ref || github.ref }}
|
||||
trigger_event_type: ${{ github.event_name }}
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-mainline to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/aqlprofile.git"
|
||||
source_branch: "amd-mainline"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/aqlprofile.git"
|
||||
destination_branch: "amd-mainline"
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-staging to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-staging ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/aqlprofile.git"
|
||||
source_branch: "amd-staging"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/aqlprofile.git"
|
||||
destination_branch: "amd-staging"
|
||||
@@ -0,0 +1,2 @@
|
||||
build
|
||||
.cache
|
||||
@@ -0,0 +1,188 @@
|
||||
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.0)
|
||||
|
||||
## Set module name and project name.
|
||||
set ( AQLPROFILE_NAME "hsa-amd-aqlprofile" )
|
||||
set ( AQLPROFILE_TARGET "${AQLPROFILE_NAME}64" )
|
||||
set ( AQLPROFILE_LIBRARY "lib${AQLPROFILE_TARGET}" )
|
||||
project ( ${AQLPROFILE_NAME} )
|
||||
|
||||
include(GNUInstallDirs)
|
||||
## Adding default path cmake modules
|
||||
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
|
||||
## Include common cmake modules
|
||||
include ( utils )
|
||||
## Set build environment
|
||||
include ( env )
|
||||
|
||||
set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory")
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
message ( "-- BUILD-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
|
||||
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
|
||||
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
set ( LIB_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( LIB_VERSION_MINOR ${VERSION_MINOR} )
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( LIB_VERSION_PATCH ${ROCM_PATCH_VERSION} )
|
||||
else()
|
||||
set ( LIB_VERSION_PATCH ${VERSION_PATCH} )
|
||||
endif()
|
||||
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
|
||||
|
||||
## Set target and root/lib/test directory
|
||||
set ( TARGET_NAME "${AQLPROFILE_TARGET}" )
|
||||
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
set ( LIB_DIR "${ROOT_DIR}/src" )
|
||||
set ( TEST_DIR "${ROOT_DIR}/test" )
|
||||
|
||||
## Build library
|
||||
include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${AQLPROFILE_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *aqlprofile*.so )
|
||||
endif ()
|
||||
|
||||
## Build tests
|
||||
enable_testing()
|
||||
include(CTest)
|
||||
set ( TEST_BINARY_DIR ${PROJECT_BINARY_DIR}/test )
|
||||
add_subdirectory ( ${TEST_DIR} ${TEST_BINARY_DIR} )
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
set ( DEST_NAME ${AQLPROFILE_NAME} )
|
||||
install ( TARGETS ${AQLPROFILE_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime )
|
||||
install ( TARGETS ${AQLPROFILE_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan )
|
||||
|
||||
option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" OFF)
|
||||
|
||||
if(FILE_REORG_BACKWARD_COMPATIBILITY)
|
||||
include(aqlprof-backward-compat.cmake)
|
||||
endif()
|
||||
|
||||
## Add the packaging directives for the runtime library.
|
||||
if ( ENABLE_ASAN_PACKAGING )
|
||||
set ( CPACK_PACKAGE_NAME ${AQLPROFILE_NAME}-asan )
|
||||
# ASAN Package requires only asan component with libraries and license file
|
||||
set ( CPACK_COMPONENTS_ALL asan )
|
||||
else()
|
||||
set ( CPACK_PACKAGE_NAME ${AQLPROFILE_NAME} )
|
||||
set ( CPACK_COMPONENTS_ALL runtime tests )
|
||||
endif()
|
||||
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.rocm-profiler.support@amd.com>" )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}" )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AQLPROFILE library for AMD HSA runtime API extension support" )
|
||||
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
|
||||
|
||||
|
||||
set(CPACK_STGZ_HEADER_FILE ${PROJECT_SOURCE_DIR}/cmake_modules/CPack.STGZ_Header.sh.in)
|
||||
set(CPACK_STGZ_INCLUDE_SUBDIR OFF)
|
||||
|
||||
if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
|
||||
endif()
|
||||
|
||||
## Debian package specific variables
|
||||
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-AqlProfile-AMD-extension" )
|
||||
## Process the Debian install/remove scripts to update the CPACK variables
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm" )
|
||||
|
||||
## Process the Rpm install/remove scripts to update the CPACK variables
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
|
||||
|
||||
if ( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
|
||||
|
||||
## RPM package specific variables
|
||||
if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
set( CPACK_RPM_PACKAGE_LICENSE "AMD Proprietary" )
|
||||
|
||||
#Disable build id for rocprofiler as its creating transaction error
|
||||
set ( CPACK_RPM_SPEC_MORE_DEFINE "%define _build_id_links none
|
||||
%global __strip ${CPACK_STRIP_EXECUTABLE}
|
||||
%global __objdump ${CPACK_OBJDUMP_EXECUTABLE}
|
||||
%global __objcopy ${CPACK_OBJCOPY_EXECUTABLE}
|
||||
%global __readelf ${CPACK_READELF_EXECUTABLE}")
|
||||
|
||||
## 'dist' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process( COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE )
|
||||
|
||||
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
|
||||
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
|
||||
endif()
|
||||
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
|
||||
if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX )
|
||||
set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}" )
|
||||
endif ( )
|
||||
|
||||
# Enable Component Mode & install settings.
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${AQLPROFILE_NAME}")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${AQLPROFILE_NAME}-asan")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${AQLPROFILE_NAME}-tests")
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${AQLPROFILE_NAME}")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_NAME "${AQLPROFILE_NAME}-asan")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_NAME "${AQLPROFILE_NAME}-tests")
|
||||
# Add dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON
|
||||
if(ROCM_DEP_ROCMCORE)
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "rocm-core")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "rocm-core")
|
||||
endif()
|
||||
|
||||
include ( CPack )
|
||||
|
||||
cpack_add_component(
|
||||
runtime
|
||||
DISPLAY_NAME "Runtime"
|
||||
DESCRIPTION "Dynamic libraries for the AQLProfile")
|
||||
|
||||
cpack_add_component(
|
||||
asan
|
||||
DISPLAY_NAME "ASAN"
|
||||
DESCRIPTION "ASAN libraries for the AQLProfile"
|
||||
DEPENDS asan)
|
||||
|
||||
cpack_add_component(
|
||||
tests
|
||||
DISPLAY_NAME "Tests"
|
||||
DESCRIPTION "Tests for the AQLProfile"
|
||||
DEPENDS runtime)
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
do_ldconfig() {
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
( configure )
|
||||
do_ldconfig
|
||||
;;
|
||||
( abort-upgrade | abort-remove | abort-deconfigure )
|
||||
echo "$1"
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
rm_ldconfig() {
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
rm -f /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
( remove | upgrade )
|
||||
rm_ldconfig
|
||||
;;
|
||||
( purge )
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,5 @@
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
@@ -0,0 +1,6 @@
|
||||
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ $1 -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
|
||||
rm -f /etc/ld.so.conf.d/libhsa-amd-aqlprofile64.conf
|
||||
ldconfig
|
||||
fi
|
||||
@@ -0,0 +1,67 @@
|
||||
HSA extension AMD AQL profile library.
|
||||
Provides AQL packets helper methods for perfcounters (PMC) and SQ threadtraces (SQTT).
|
||||
|
||||
Library supports GFX9 APIs.
|
||||
The library source tree:
|
||||
- doc - Documentation, the API specification and the presentation
|
||||
- <hsa-runtime>/inc/hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API
|
||||
- src - AMD AQL profile library sources
|
||||
- core - AQL API sources
|
||||
- pm4 - cmd/pmc/sqtt pm4 builders
|
||||
- def - Generated GFXIP definition headers
|
||||
- test - library test suite
|
||||
- ctrl - Test control
|
||||
- util - Test utils
|
||||
- simple_convolution - Simple convolution test kernel
|
||||
|
||||
Build environment:
|
||||
|
||||
$ export CMAKE_PREFIX_PATH=<path to hsa-runtime includes>:<path to hsa-runtime library>
|
||||
$ export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
$ export CMAKE_DEBUG_TRACE=1 # 1 to enable debug tracing
|
||||
|
||||
To build with the current installed ROCM:
|
||||
|
||||
$ export CMAKE_PREFIX_PATH=/opt/rocm/lib:/opt/rocm/include/hsa
|
||||
|
||||
$ cd .../aqlprofile
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
To regenerate src/def headers:
|
||||
|
||||
Need to use 'clang' compiler:
|
||||
$ export CXX=/usr/bin/clang++
|
||||
$ export CC=/usr/bin/clang
|
||||
|
||||
'mygen' make target to regenerate the headers from full set of gfxip headers:
|
||||
$ make mygen
|
||||
|
||||
To reset the generated headers:
|
||||
$ make mygenreset
|
||||
|
||||
To run the test:
|
||||
|
||||
$ cd ../aqlprofile/build
|
||||
$ export LD_LIBRARY_PATH=$PWD
|
||||
$ run.sh
|
||||
|
||||
To enabled error messages logging to '/tmp/aql_profile_log.txt':
|
||||
|
||||
$ export HSA_VEN_AMD_AQLPROFILE_LOG=1
|
||||
|
||||
To enable verbose tracing:
|
||||
|
||||
$ export AQLPROFILE_TRACE=1
|
||||
|
||||
To recompile kernel object:
|
||||
|
||||
$ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/opencl-c.h -nogpulib -Xclang -mlink-bitcode-file -Xclang /opt/rocm/amdgcn/bitcode/opencl.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang /opt/rocm/amdgcn/bitcode/ockl.amdgcn.bc -target amdgcn-amd-amdhsa -mcpu=gfx906 vector_add_kernel.cl -o vector_add_kernel.so
|
||||
|
||||
With newer device-libs layout, use this recompile command:
|
||||
$ /opt/rocm/opencl/bin/clang -cl-std=CL2.0 -include /opt/rocm/opencl/include/opencl-c.h --hip-device-lib-path=/opt/rocm/amdgcn/bitcode -target amdgcn-amd-amdhsa -mcpu=gfx906 vector_add_kernel.cl -o vector_add_kernel.so
|
||||
|
||||
### ROCm 5.7
|
||||
Added support for GFX10/GFX11
|
||||
@@ -0,0 +1,27 @@
|
||||
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.8)
|
||||
|
||||
set(AQLPROF_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(AQLPROF_WRAPPER_DIR ${AQLPROF_BUILD_DIR}/wrapper_dir)
|
||||
set(AQLPROF_WRAPPER_LIB_DIR ${AQLPROF_WRAPPER_DIR}/lib)
|
||||
|
||||
#function to create symlink to libraries
|
||||
function(create_library_symlink)
|
||||
file(MAKE_DIRECTORY ${AQLPROF_WRAPPER_LIB_DIR})
|
||||
set(LIB_AQLPROF "${AQLPROFILE_LIBRARY}.so")
|
||||
set(MAJ_VERSION "${LIB_VERSION_MAJOR}")
|
||||
set(SO_VERSION "${LIB_VERSION_STRING}")
|
||||
set(library_files "${LIB_AQLPROF}" "${LIB_AQLPROF}.${MAJ_VERSION}" "${LIB_AQLPROF}.${SO_VERSION}")
|
||||
|
||||
foreach(file_name ${library_files})
|
||||
add_custom_target(link_${file_name} ALL
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${AQLPROF_WRAPPER_LIB_DIR}/${file_name})
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# Create symlink to library files
|
||||
create_library_symlink()
|
||||
install(DIRECTORY ${AQLPROF_WRAPPER_LIB_DIR} DESTINATION ${AQLPROFILE_NAME} COMPONENT ${AQLPROFILE_LIBRARY})
|
||||
Ficheiro executável
+82
@@ -0,0 +1,82 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
|
||||
|
||||
SRC_DIR=$(dirname "$0")
|
||||
COMPONENT="aqlprofile"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
usage() {
|
||||
echo -e "AQLProfile Build Script Usage:"
|
||||
echo -e "\nTo run ./build.sh PARAMs, PARAMs can be the following:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
TO_CLEAN=no
|
||||
shift
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -z "$AQLPROFILE_ROOT" ]; then AQLPROFILE_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151"; fi
|
||||
|
||||
AQLPROFILE_ROOT=$(cd $AQLPROFILE_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-'RelWithDebInfo'} \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_GENERATOR=${CPACKGEN:-'DEB;RPM'} \
|
||||
-DCMAKE_INSTALL_RPATH=${ROCM_RPATH} \
|
||||
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
|
||||
-DCPACK_GENERATOR="STGZ" \
|
||||
-DGPU_TARGETS="$GPU_LIST" \
|
||||
-DCPACK_OBJCOPY_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objcopy" \
|
||||
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
|
||||
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
|
||||
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
|
||||
$AQLPROFILE_ROOT
|
||||
|
||||
popd
|
||||
|
||||
MAKE_OPTS="-j -C $AQLPROFILE_ROOT/$BUILD_DIR"
|
||||
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS all mytest
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS test
|
||||
cmake --build "$BUILD_DIR" -- $MAKE_OPTS package
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/bin/sh
|
||||
# Display usage
|
||||
cpack_usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: $0 [options]
|
||||
Options: [defaults in brackets after descriptions]
|
||||
--help print this message
|
||||
--prefix=dir directory in which to install
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
cpack_echo_exit()
|
||||
{
|
||||
echo $1
|
||||
exit 1
|
||||
}
|
||||
# Display version
|
||||
cpack_version()
|
||||
{
|
||||
echo "@CPACK_PACKAGE_NAME@ Installer Version: @CPACK_PACKAGE_VERSION@, Copyright (c) @CPACK_PACKAGE_VENDOR@"
|
||||
}
|
||||
# Helper function to fix windows paths.
|
||||
cpack_fix_slashes ()
|
||||
{
|
||||
echo "$1" | sed 's/\\/\//g'
|
||||
}
|
||||
interactive=TRUE
|
||||
cpack_skip_license=FALSE
|
||||
cpack_include_subdir=FALSE
|
||||
for a in "$@CPACK_AT_SIGN@"; do
|
||||
if echo $a | grep "^--prefix=" > /dev/null 2> /dev/null; then
|
||||
cpack_prefix_dir=`echo $a | sed "s/^--prefix=//"`
|
||||
cpack_prefix_dir=`cpack_fix_slashes "${cpack_prefix_dir}"`
|
||||
fi
|
||||
if echo $a | grep "^--help" > /dev/null 2> /dev/null; then
|
||||
cpack_usage
|
||||
fi
|
||||
if echo $a | grep "^--version" > /dev/null 2> /dev/null; then
|
||||
cpack_version
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
if [ "x${cpack_include_subdir}x" != "xx" -o "x${cpack_skip_license}x" = "xTRUEx" ]
|
||||
then
|
||||
interactive=FALSE
|
||||
fi
|
||||
cpack_version
|
||||
echo "This is a self-extracting archive."
|
||||
toplevel="`pwd`"
|
||||
if [ "x${cpack_prefix_dir}x" != "xx" ]
|
||||
then
|
||||
toplevel="${cpack_prefix_dir}"
|
||||
fi
|
||||
echo "The archive will be extracted to: ${toplevel}"
|
||||
if [ "x${interactive}x" = "xTRUEx" ]
|
||||
then
|
||||
echo ""
|
||||
echo "If you want to stop extracting, please press <ctrl-C>."
|
||||
if [ "x${cpack_skip_license}x" != "xTRUEx" ]
|
||||
then
|
||||
more << '____cpack__here_doc____'
|
||||
@CPACK_RESOURCE_FILE_LICENSE_CONTENT@
|
||||
____cpack__here_doc____
|
||||
echo
|
||||
echo "Do you accept the license? [yN]: "
|
||||
read line leftover
|
||||
case ${line} in
|
||||
y* | Y*)
|
||||
cpack_license_accepted=TRUE;;
|
||||
*)
|
||||
echo "License not accepted. Exiting ..."
|
||||
exit 1;;
|
||||
esac
|
||||
fi
|
||||
if [ "x${cpack_include_subdir}x" = "xx" ]
|
||||
then
|
||||
echo "By default the @CPACK_PACKAGE_NAME@ will be installed in:"
|
||||
echo " \"${toplevel}/@CPACK_PACKAGE_FILE_NAME@\""
|
||||
echo "Do you want to include the subdirectory @CPACK_PACKAGE_FILE_NAME@?"
|
||||
echo "Saying no will install in: \"${toplevel}\" [Yn]: "
|
||||
read line leftover
|
||||
cpack_include_subdir=TRUE
|
||||
case ${line} in
|
||||
n* | N*)
|
||||
cpack_include_subdir=FALSE
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
if [ "x${cpack_include_subdir}x" = "xTRUEx" ]
|
||||
then
|
||||
toplevel="${toplevel}/@CPACK_PACKAGE_FILE_NAME@"
|
||||
mkdir -p "${toplevel}"
|
||||
fi
|
||||
echo
|
||||
echo "Using target directory: ${toplevel}"
|
||||
echo "Extracting, please wait..."
|
||||
echo ""
|
||||
# take the archive portion of this file and pipe it to tar
|
||||
# the NUMERIC parameter in this command should be one more
|
||||
# than the number of lines in this header file
|
||||
# there are tails which don't understand the "-n" argument, e.g. on SunOS
|
||||
# OTOH there are tails which complain when not using the "-n" argument (e.g. GNU)
|
||||
# so at first try to tail some file to see if tail fails if used with "-n"
|
||||
# if so, don't use "-n"
|
||||
use_new_tail_syntax="-n"
|
||||
tail $use_new_tail_syntax +1 "$0" > /dev/null 2> /dev/null || use_new_tail_syntax=""
|
||||
tail $use_new_tail_syntax +###CPACK_HEADER_LENGTH### "$0" | gunzip | (cd "${toplevel}" && tar xf -) || cpack_echo_exit "Problem unpacking the @CPACK_PACKAGE_FILE_NAME@"
|
||||
echo "Unpacking finished successfully"
|
||||
exit 0
|
||||
#-----------------------------------------------------------
|
||||
# Start of TAR.GZ file
|
||||
#-----------------------------------------------------------;
|
||||
@@ -0,0 +1,88 @@
|
||||
## Build is not supported on Windows plaform
|
||||
if ( WIN32 )
|
||||
message ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
## Compiler Preprocessor definitions.
|
||||
add_definitions ( -DAMD_INTERNAL_BUILD )
|
||||
add_definitions ( -DHSA_LARGE_MODEL= )
|
||||
add_definitions ( -DHSA_DEPRECATED= )
|
||||
add_definitions ( -DLITTLEENDIAN_CPU=1 )
|
||||
|
||||
## Linux Compiler options
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
|
||||
|
||||
add_definitions(-DNEW_TRACE_API=1)
|
||||
|
||||
## CLANG options
|
||||
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
|
||||
endif()
|
||||
|
||||
## Enable debug trace
|
||||
if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
|
||||
add_definitions ( -DDEBUG_TRACE=1 )
|
||||
endif()
|
||||
|
||||
## Enable direct loading of AQL-profile HSA extension
|
||||
if ( DEFINED ENV{CMAKE_LD_AQLPROFILE} )
|
||||
add_definitions (-DROCP_LD_AQLPROFILE=1)
|
||||
endif()
|
||||
|
||||
## Build type
|
||||
if ( NOT DEFINED CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "" )
|
||||
if ( DEFINED ENV{CMAKE_BUILD_TYPE} )
|
||||
set ( CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE} )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Installation prefix path
|
||||
if ( NOT DEFINED CMAKE_PREFIX_PATH AND DEFINED ENV{CMAKE_PREFIX_PATH} )
|
||||
set ( CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH} )
|
||||
endif()
|
||||
set ( ENV{CMAKE_PREFIX_PATH} ${CMAKE_PREFIX_PATH} )
|
||||
|
||||
## Extend Compiler flags based on build type
|
||||
string ( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE )
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL debug )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
|
||||
set ( CMAKE_BUILD_TYPE "debug" )
|
||||
else ()
|
||||
set ( CMAKE_BUILD_TYPE "release" )
|
||||
endif ()
|
||||
|
||||
## Extend Compiler flags based on Processor architecture
|
||||
if ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" )
|
||||
set ( NBIT 64 )
|
||||
set ( NBITSTR "64" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
|
||||
elseif ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" )
|
||||
set ( NBIT 32 )
|
||||
set ( NBITSTR "" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
|
||||
endif ()
|
||||
|
||||
## Find hsa-runtime
|
||||
find_package(hsa-runtime64 REQUIRED HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
|
||||
|
||||
# find KFD thunk
|
||||
find_package(hsakmt REQUIRED HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
|
||||
|
||||
## Basic Tool Chain Information
|
||||
message ( "----------------NBIT: ${NBIT}" )
|
||||
message ( "-----------BuildType: ${CMAKE_BUILD_TYPE}" )
|
||||
message ( "------------Compiler: ${CMAKE_CXX_COMPILER}" )
|
||||
message ( "----Compiler-Version: ${CMAKE_CXX_COMPILER_VERSION}" )
|
||||
message ( "------------API-path: ${API_PATH}" )
|
||||
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
|
||||
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
|
||||
message ( "-CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}" )
|
||||
message ( "-CMAKE_CXX_COMPILER_VERSION: ${CMAKE_CXX_COMPILER_VERSION}" )
|
||||
message ( "---------GPU_TARGETS: ${GPU_TARGETS}" )
|
||||
@@ -0,0 +1,76 @@
|
||||
|
||||
|
||||
## Parses the VERSION_STRING variable and places
|
||||
## the first, second and third number values in
|
||||
## the major, minor and patch variables.
|
||||
function( parse_version VERSION_STRING )
|
||||
|
||||
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
|
||||
|
||||
if ( ${STRING_INDEX} GREATER -1 )
|
||||
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
|
||||
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
|
||||
endif ()
|
||||
|
||||
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
|
||||
list ( LENGTH VERSIONS VERSION_COUNT )
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 0)
|
||||
list ( GET VERSIONS 0 MAJOR )
|
||||
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${MAJOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 1 )
|
||||
list ( GET VERSIONS 1 MINOR )
|
||||
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 2 )
|
||||
list ( GET VERSIONS 2 PATCH )
|
||||
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
|
||||
endif ()
|
||||
|
||||
if ( DEFINED VERSION_BUILD )
|
||||
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
endif ()
|
||||
|
||||
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
|
||||
|
||||
endfunction ()
|
||||
|
||||
## Gets the current version of the repository
|
||||
## using versioning tags and git describe.
|
||||
## Passes back a packaging version string
|
||||
## and a library version string.
|
||||
function ( get_version DEFAULT_VERSION_STRING )
|
||||
|
||||
parse_version ( ${DEFAULT_VERSION_STRING} )
|
||||
|
||||
find_program ( GIT NAMES git )
|
||||
|
||||
if ( GIT )
|
||||
|
||||
execute_process ( COMMAND "git describe --dirty --long --match [0-9]* 2>/dev/null"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_TAG_STRING
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RESULT )
|
||||
|
||||
if ( ${RESULT} EQUAL 0 )
|
||||
|
||||
parse_version ( ${GIT_TAG_STRING} )
|
||||
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
|
||||
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
|
||||
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
|
||||
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
|
||||
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
|
||||
endfunction()
|
||||
@@ -0,0 +1,81 @@
|
||||
macro(dashboard_submit)
|
||||
ctest_submit()
|
||||
endmacro()
|
||||
|
||||
set(CTEST_PROJECT_NAME "aqlprofile-emu")
|
||||
set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC")
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "cdash.cdash.svc.cluster.local:8080")
|
||||
set(CTEST_DROP_LOCATION "/submit.php?project=${CTEST_PROJECT_NAME}")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
|
||||
set(CTEST_UPDATE_TYPE git)
|
||||
set(CTEST_UPDATE_VERSION_ONLY TRUE)
|
||||
set(CTEST_GIT_COMMAND git)
|
||||
set(CTEST_GIT_INIT_SUBMODULES FALSE)
|
||||
|
||||
set(CTEST_OUTPUT_ON_FAILURE TRUE)
|
||||
set(CTEST_USE_LAUNCHERS TRUE)
|
||||
set(CTEST_VERBOSE ON)
|
||||
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_PASSED_TEST_OUTPUT_SIZE "51200")
|
||||
|
||||
if(NOT DEFINED CTEST_SOURCE_DIRECTORY)
|
||||
set(CTEST_SOURCE_DIRECTORY ".")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CTEST_BINARY_DIRECTORY)
|
||||
set(CTEST_BINARY_DIRECTORY "./build")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED AQLPROFILE_EXTRA_CONFIGURE_ARGS)
|
||||
set(AQLPROFILE_EXTRA_CONFIGURE_ARGS "")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED AQLPROFILE_BUILD_NUM_JOBS)
|
||||
set(AQLPROFILE_BUILD_NUM_JOBS "16")
|
||||
endif()
|
||||
|
||||
set(CTEST_CONFIGURE_COMMAND "cmake -B ${CTEST_BINARY_DIRECTORY} -DCMAKE_BUILD_TYPE='RelWithDebInfo' -DCMAKE_PREFIX_PATH=/opt/rocm -DCMAKE_INSTALL_PREFIX=/opt/rocm -DCPACK_PACKAGING_INSTALL_PREFIX=/opt/rocm -DCPACK_GENERATOR='DEB;RPM;STGZ' -DGPU_TARGETS='gfx906,gfx90a,gfx942,gfx1101,gfx1201' ${AQLPROFILE_EXTRA_CONFIGURE_ARGS} ${CTEST_SOURCE_DIRECTORY}")
|
||||
set(CTEST_BUILD_COMMAND "cmake --build \"${CTEST_BINARY_DIRECTORY}\" -- -j ${AQLPROFILE_BUILD_NUM_JOBS} all mytest")
|
||||
|
||||
if(NOT DEFINED CTEST_SITE)
|
||||
set(CTEST_SITE "${HOSTNAME}")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CTEST_BUILD_NAME)
|
||||
set(CTEST_BUILD_NAME "aqlprofile-amd-staging-ubuntu-${RUNNER_HOSTNAME}-core")
|
||||
endif()
|
||||
|
||||
macro(handle_error _message _ret)
|
||||
if(NOT ${${_ret}} EQUAL 0)
|
||||
dashboard_submit(PARTS Done RETURN_VALUE _submit_ret)
|
||||
message(FATAL_ERROR "${_message} failed: ${${_ret}}")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
ctest_start(Continuous)
|
||||
ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _update_ret)
|
||||
handle_error("Configure" _update_ret)
|
||||
ctest_configure(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _configure_ret)
|
||||
dashboard_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Configure" _configure_ret)
|
||||
|
||||
ctest_build(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _build_ret)
|
||||
dashboard_submit(PARTS Build RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Build" _build_ret)
|
||||
|
||||
ctest_test(SOURCE "${CTEST_SOURCE_DIRECTORY}" BUILD "${CTEST_BINARY_DIRECTORY}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Testing" _test_ret)
|
||||
|
||||
dashboard_submit(PARTS Done RETURN_VALUE _submit_ret)
|
||||
@@ -0,0 +1,211 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_BLOCKINFO_H_
|
||||
#define _GFX10_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
// To define GFX10 specific blocks info like GC caches blocks
|
||||
// All common with GFX9 blocks are inherited from GFX9 space
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
IaCounterBlockId,
|
||||
PaScCounterBlockId,
|
||||
PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
TcaCounterBlockId,
|
||||
TccCounterBlockId,
|
||||
TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
VgtCounterBlockId,
|
||||
WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
AtcCounterBlockId,
|
||||
AtcL2CounterBlockId,
|
||||
McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
Gl1aCounterBlockId,
|
||||
Gl1cCounterBlockId,
|
||||
Gl2aCounterBlockId,
|
||||
Gl2cCounterBlockId,
|
||||
GcrCounterBlockId,
|
||||
GusCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
SdmaCounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 5;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t GusCounterBlockNumCounters = 2;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent = CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_ME2_DC1_SPI_BUSY;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CPF_UTCL2IU_STALL;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_CPG_UTCL2IU_STALL;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_GWS_BYPASS;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
// static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
// static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SC_SC_SPI_EVENT;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent = SX_PERF_SEL_DB3_SIZE;
|
||||
// static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
// static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
// static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
// static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
// static const uint32_t TdCounterBlockMaxEvent =
|
||||
// TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt; static const uint32_t VgtCounterBlockMaxEvent =
|
||||
// vgt_perf_sclk_te11_vld; static const uint32_t WdCounterBlockMaxEvent =
|
||||
// wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 24;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 83;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 91;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 254;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 142;
|
||||
static const uint32_t GusCounterBlockMaxEvent = 89;
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_BLOCKINFO_H_
|
||||
@@ -0,0 +1,425 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_BLOCKTABLE_H_
|
||||
#define _GFX10_BLOCKTABLE_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
|
||||
/*
|
||||
* CPC
|
||||
*/
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmCPC_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* CPF
|
||||
*/
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmCPF_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GDS
|
||||
*/
|
||||
static const CounterRegInfo GdsCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGDS_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM
|
||||
*/
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGRBM_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM_SE
|
||||
*/
|
||||
static const CounterRegInfo GrbmSeCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE0_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE1_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE2_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGRBM_SE3_PERFCOUNTER_HI)}};
|
||||
|
||||
/*
|
||||
* SPI
|
||||
*/
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, mmSPI_PERFCOUNTER5_HI)}};
|
||||
|
||||
/*
|
||||
* SQ
|
||||
*/
|
||||
static const CounterRegInfo SqCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_SELECT), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_LO), REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_HI)}};
|
||||
|
||||
/*
|
||||
* SX
|
||||
*/
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmSX_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GCEA
|
||||
*/
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, mmGCEA_PERFCOUNTER_HI)}};
|
||||
|
||||
// Define GFX10 specific blocks table entries like GC caches blocks
|
||||
/*
|
||||
* GCR
|
||||
*/
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, mmGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, mmGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGCR_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GL1A
|
||||
*/
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL1A_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GL1C
|
||||
*/
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL1C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2A
|
||||
*/
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL2A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2C
|
||||
*/
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, mmGL2C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GUS
|
||||
*/
|
||||
static const CounterRegInfo GusCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER1_CFG), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER2_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* TA
|
||||
*/
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, mmTA_PERFCOUNTER1_HI)}};
|
||||
|
||||
// Counter block CPC
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {
|
||||
"CPC",
|
||||
CpcCounterBlockId,
|
||||
1,
|
||||
CpcCounterBlockMaxEvent,
|
||||
CpcCounterBlockNumCounters,
|
||||
CpcCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_CPC_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpcBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
// Counter block CPF
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {
|
||||
"CPF",
|
||||
CpfCounterBlockId,
|
||||
1,
|
||||
CpfCounterBlockMaxEvent,
|
||||
CpfCounterBlockNumCounters,
|
||||
CpfCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_CPF_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpfBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
// Counter block GDS
|
||||
static const GpuBlockInfo GdsCounterBlockInfo = {
|
||||
"GDS",
|
||||
GdsCounterBlockId,
|
||||
1,
|
||||
GdsCounterBlockMaxEvent,
|
||||
GdsCounterBlockNumCounters,
|
||||
GdsCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GDS_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*GdsBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS};
|
||||
// Counter block GRBM
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {
|
||||
"GRBM",
|
||||
GrbmCounterBlockId,
|
||||
1,
|
||||
GrbmCounterBlockMaxEvent,
|
||||
GrbmCounterBlockNumCounters,
|
||||
GrbmCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GRBM_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockGRBMAttr};
|
||||
// Counter block GRBMSE
|
||||
static const GpuBlockInfo GrbmSeCounterBlockInfo = {
|
||||
"GRBM_SE",
|
||||
GrbmSeCounterBlockId,
|
||||
1,
|
||||
GrbmSeCounterBlockMaxEvent,
|
||||
GrbmSeCounterBlockNumCounters,
|
||||
GrbmSeCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_GRBM_SE0_PERFCOUNTER_SELECT,
|
||||
CounterBlockDfltAttr};
|
||||
// Counter block SPI
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {
|
||||
"SPI",
|
||||
SpiCounterBlockId,
|
||||
1,
|
||||
SpiCounterBlockMaxEvent,
|
||||
SpiCounterBlockNumCounters,
|
||||
SpiCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_SPI_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSPIAttr,
|
||||
NULL /*SpiBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SPI};
|
||||
// Counter block SQ
|
||||
static const GpuBlockInfo SqCounterBlockInfo = {"SQ",
|
||||
SqCounterBlockId,
|
||||
1,
|
||||
SqCounterBlockMaxEvent,
|
||||
SqCounterBlockNumCounters,
|
||||
SqCounterRegAddr,
|
||||
gfx10_cntx_prim::sq_select_value,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr,
|
||||
NULL,
|
||||
SPM_SE_BLOCK_NAME_SQG};
|
||||
// Counter block SX
|
||||
static const GpuBlockInfo SxCounterBlockInfo = {
|
||||
"SX",
|
||||
SxCounterBlockId,
|
||||
1,
|
||||
SxCounterBlockMaxEvent,
|
||||
SxCounterBlockNumCounters,
|
||||
SxCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_SX_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockCleanAttr,
|
||||
NULL /*SxBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SX};
|
||||
// Counter block GCEA
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {
|
||||
"GCEA",
|
||||
GceaCounterBlockId,
|
||||
GceaCounterBlockNumInstances,
|
||||
GceaCounterBlockMaxEvent,
|
||||
GceaCounterBlockNumCounters,
|
||||
GceaCounterRegAddr,
|
||||
gfx10_cntx_prim::mc_select_value_GCEA_PERFCOUNTER0_CFG,
|
||||
CounterBlockMcAttr};
|
||||
// Counter block GL1A
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {
|
||||
"GL1A",
|
||||
Gl1aCounterBlockId,
|
||||
8,
|
||||
Gl1aCounterBlockMaxEvent,
|
||||
Gl1aCounterBlockNumCounters,
|
||||
Gl1aCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL1C
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {
|
||||
"GL1C",
|
||||
Gl1cCounterBlockId,
|
||||
8,
|
||||
Gl1cCounterBlockMaxEvent,
|
||||
Gl1cCounterBlockNumCounters,
|
||||
Gl1cCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2A
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {
|
||||
"GL2A",
|
||||
Gl2aCounterBlockId,
|
||||
32,
|
||||
Gl2aCounterBlockMaxEvent,
|
||||
Gl2aCounterBlockNumCounters,
|
||||
Gl2aCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GL2C
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {
|
||||
"GL2C",
|
||||
Gl2cCounterBlockId,
|
||||
32,
|
||||
Gl2cCounterBlockMaxEvent,
|
||||
Gl2cCounterBlockNumCounters,
|
||||
Gl2cCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GCR
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {
|
||||
"GCR",
|
||||
GcrCounterBlockId,
|
||||
1,
|
||||
GcrCounterBlockMaxEvent,
|
||||
GcrCounterBlockNumCounters,
|
||||
GcrCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GUS
|
||||
static const GpuBlockInfo GusCounterBlockInfo = {
|
||||
"GUS",
|
||||
GusCounterBlockId,
|
||||
1,
|
||||
GusCounterBlockMaxEvent,
|
||||
GusCounterBlockNumCounters,
|
||||
GusCounterRegAddr,
|
||||
gfx10_cntx_prim::mc_select_value_RPB_PERFCOUNTER0_CFG,
|
||||
CounterBlockGusAttr};
|
||||
// Counter block TA
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {
|
||||
"TA",
|
||||
TaCounterBlockId,
|
||||
TaCounterBlockNumInstances,
|
||||
235 /*TaCounterBlockMaxEvent*/,
|
||||
TaCounterBlockNumCounters,
|
||||
TaCounterRegAddr,
|
||||
gfx10_cntx_prim::select_value_TA_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockTcAttr,
|
||||
NULL /*TaBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_TA};
|
||||
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,685 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_PRIMITIVES_H_
|
||||
#define _GFX10_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define SQTT_PRIM_ENABLED 1
|
||||
|
||||
// Taken from gfx10_mask.h
|
||||
// GCR_CNTL
|
||||
#define GCR_CNTL__SEQ_FORWARD 0x00010000L
|
||||
#define GCR_CNTL__SEQ_MASK 0x00030000L
|
||||
#define GCR_CNTL__GL2_WB_MASK 0x00008000L
|
||||
|
||||
// Taken from gfx10_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx10 {
|
||||
|
||||
class gfx10_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 10;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, mmGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmCP_PERFMON_CNTL);
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BUF0_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
static constexpr Register GUS_PERFCOUNTER_RSLT_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmGUS_PERFCOUNTER_RSLT_CNTL);
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, mmRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index{0};
|
||||
grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable{0};
|
||||
compute_perfcount_enable = SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable{0};
|
||||
compute_perfcount_enable = SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select{0};
|
||||
sq_perfcounter0_select = SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
#if defined(SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK__SHIFT)
|
||||
sq_perfcounter0_select |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF);
|
||||
#else
|
||||
sq_perfcounter0_select |= 0xF000;
|
||||
#endif
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select{0};
|
||||
sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
#if defined(SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK__SHIFT)
|
||||
sq_perfcounter0_select |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF);
|
||||
#else
|
||||
sq_perfcounter0_select |= 0xF000;
|
||||
#endif
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx10
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} else if (block_id == SqVsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} else if (block_id == SqPsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() { return 0; }
|
||||
static uint32_t sq_control2_disable_value() { return 0; }
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
// Counter Select Register value templates
|
||||
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value(SX_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
select_value(GDS_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select{0};
|
||||
tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// GUS primitives
|
||||
static uint32_t gus_disable_clear_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
gus_perfcounter_rslt_cntl = SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, CLEAR_ALL, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_start_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
gus_perfcounter_rslt_cntl = SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, ENABLE_ANY, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t gus0_perfcounter_cfg{0};
|
||||
gus0_perfcounter_cfg = SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, ENABLE, 0x1);
|
||||
return gus0_perfcounter_cfg;
|
||||
}
|
||||
|
||||
static uint32_t gus_stop_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl{0};
|
||||
rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl{0};
|
||||
rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size{0};
|
||||
rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_LINE, global_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t mask{0};
|
||||
mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE, 1 << 6);
|
||||
return mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx10
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
static const uint32_t SQTT_TOKEN_REG_USERDATA = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_VALU = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WVRDY = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_WAVE = 1 << 4;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_IMMED = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 8;
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_VALU | SQTT_TOKEN_WVRDY | SQTT_TOKEN_WAVE | SQTT_TOKEN_REG |
|
||||
SQTT_TOKEN_IMMED | SQTT_TOKEN_INST) ^
|
||||
0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t token_mask{0};
|
||||
token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_WAVE | SQTT_TOKEN_REG) ^ 0x7FF);
|
||||
return token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx10
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t base{0};
|
||||
base = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE, BASE_LO,
|
||||
Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return base;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) { return 0; }
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t size{0};
|
||||
size = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, BASE_HI, base_hi);
|
||||
return size;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_ctrl{0};
|
||||
sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RT_FREQ, 2) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, REG_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, REG_DROP_ON_STALL, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx10
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX10_PRIMITIVES_H_
|
||||
@@ -0,0 +1,217 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_BLOCKINFO_H_
|
||||
#define _GFX11_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
// To define GFX11 specific blocks info like GC caches blocks
|
||||
// All common with GFX9 blocks are inherited from GFX9 space
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
// IaCounterBlockId,
|
||||
// PaScCounterBlockId,
|
||||
// PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
// SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
// TcaCounterBlockId,
|
||||
// TccCounterBlockId,
|
||||
// TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
// VgtCounterBlockId,
|
||||
// WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
// AtcCounterBlockId,
|
||||
// AtcL2CounterBlockId,
|
||||
// McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
Gl1aCounterBlockId,
|
||||
Gl1cCounterBlockId,
|
||||
Gl2aCounterBlockId,
|
||||
Gl2cCounterBlockId,
|
||||
GcrCounterBlockId,
|
||||
GusCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
Sdma0CounterBlockId,
|
||||
Sdma1CounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
TcpCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 5;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t GusCounterBlockNumCounters = 2;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent =
|
||||
CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK; // CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_MEC_THREAD3;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_OREO_Events_stalls;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
// static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
// static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_SPI_WAVE_STALLED_BY_SPI; // SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 15; // SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SPI_PERF_BUSY; // SC_SC_SPI_EVENT;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQ_PERF_SEL_NONE2; // SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent =
|
||||
SX_PERF_SEL_DB3_4X2_DISCARD; // SX_PERF_SEL_DB3_SIZE;
|
||||
// static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
// static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
// static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
// static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
// static const uint32_t TdCounterBlockMaxEvent =
|
||||
// TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt; static const uint32_t VgtCounterBlockMaxEvent =
|
||||
// vgt_perf_sclk_te11_vld; static const uint32_t WdCounterBlockMaxEvent =
|
||||
// wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent =
|
||||
RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3; // RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 61;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 24;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 84;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 108;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 259;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 155;
|
||||
static const uint32_t GusCounterBlockMaxEvent = 176;
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_BLOCKINFO_H_
|
||||
@@ -0,0 +1,441 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_BLOCKTABLE_H_
|
||||
#define _GFX11_BLOCKTABLE_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
|
||||
/*
|
||||
* CPC CORRECT
|
||||
*/
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regCPC_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* CPF CORRECT
|
||||
*/
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regCPF_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GDS CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GdsCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGDS_PERFCOUNTER3_HI)}};
|
||||
/*
|
||||
* GRBM CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGRBM_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* GRBM_SE CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GrbmSeCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE0_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE1_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE2_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE3_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE4_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE5_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGRBM_SE6_PERFCOUNTER_HI)}};
|
||||
|
||||
/*
|
||||
* SPI CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER3_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER4_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_LO), REG_32B_ADDR(GC, 0, regSPI_PERFCOUNTER5_HI)}};
|
||||
/*
|
||||
* SQ CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SqCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER3_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER5_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL),
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
|
||||
/*
|
||||
* SX CORRECT
|
||||
*/
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regSX_PERFCOUNTER3_HI)}};
|
||||
|
||||
/*
|
||||
* GCEA
|
||||
*/
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_RSLT_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCEA_PERFCOUNTER_HI)}};
|
||||
|
||||
// Define GFX10 specific blocks table entries like GC caches blocks
|
||||
/*
|
||||
* GCR CORRECT
|
||||
*/
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_SELECT), REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL),
|
||||
REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGCR_PERFCOUNTER1_HI)}};
|
||||
|
||||
/*
|
||||
* TCP
|
||||
*/
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regTCP_PERFCOUNTER3_HI)}};
|
||||
/*
|
||||
* GL1A CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL1A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL1C CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL1C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2A CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL2A_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GL2C CORRECT
|
||||
*/
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER1_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER2_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_LO), REG_32B_ADDR(GC, 0, regGL2C_PERFCOUNTER3_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* GUS ????? need more investigations
|
||||
*/
|
||||
static const CounterRegInfo GusCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER0_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_LO),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER1_CFG),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER2_LO),
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER2_HI)},
|
||||
};
|
||||
|
||||
/*
|
||||
* TA CORRECT
|
||||
*/
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_LO), REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_SELECT), REG_32B_NULL,
|
||||
REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_LO), REG_32B_ADDR(GC, 0, regTA_PERFCOUNTER1_HI)}};
|
||||
|
||||
// Counter block CPC
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {
|
||||
"CPC",
|
||||
CpcCounterBlockId,
|
||||
1,
|
||||
CpcCounterBlockMaxEvent,
|
||||
CpcCounterBlockNumCounters,
|
||||
CpcCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_CPC_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpcBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
// Counter block CPF
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {
|
||||
"CPF",
|
||||
CpfCounterBlockId,
|
||||
1,
|
||||
CpfCounterBlockMaxEvent,
|
||||
CpfCounterBlockNumCounters,
|
||||
CpfCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_CPF_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*CpfBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
// Counter block GDS
|
||||
static const GpuBlockInfo GdsCounterBlockInfo = {
|
||||
"GDS",
|
||||
GdsCounterBlockId,
|
||||
1,
|
||||
GdsCounterBlockMaxEvent,
|
||||
GdsCounterBlockNumCounters,
|
||||
GdsCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GDS_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSpmGlobalAttr,
|
||||
NULL /*GdsBlockDelayInfo*/,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS};
|
||||
// Counter block GRBM
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {
|
||||
"GRBM",
|
||||
GrbmCounterBlockId,
|
||||
1,
|
||||
GrbmCounterBlockMaxEvent,
|
||||
GrbmCounterBlockNumCounters,
|
||||
GrbmCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GRBM_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockGRBMAttr};
|
||||
// Counter block GRBMSE
|
||||
static const GpuBlockInfo GrbmSeCounterBlockInfo = {
|
||||
"GRBM_SE",
|
||||
GrbmSeCounterBlockId,
|
||||
1,
|
||||
GrbmSeCounterBlockMaxEvent,
|
||||
GrbmSeCounterBlockNumCounters,
|
||||
GrbmSeCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_GRBM_SE0_PERFCOUNTER_SELECT,
|
||||
CounterBlockDfltAttr};
|
||||
// Counter block SPI
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {
|
||||
"SPI",
|
||||
SpiCounterBlockId,
|
||||
1,
|
||||
SpiCounterBlockMaxEvent,
|
||||
SpiCounterBlockNumCounters,
|
||||
SpiCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_SPI_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockSPIAttr,
|
||||
NULL /*SpiBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SPI};
|
||||
// Counter block SQ
|
||||
static const GpuBlockInfo SqCounterBlockInfo = {
|
||||
"SQ",
|
||||
SqCounterBlockId,
|
||||
1,
|
||||
SqCounterBlockMaxEvent,
|
||||
SqCounterBlockNumCounters,
|
||||
SqCounterRegAddr,
|
||||
gfx11_cntx_prim::sq_select_value,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr,
|
||||
NULL,
|
||||
SPM_SE_BLOCK_NAME_SQG};
|
||||
// Counter block SX
|
||||
static const GpuBlockInfo SxCounterBlockInfo = {
|
||||
"SX",
|
||||
SxCounterBlockId,
|
||||
1,
|
||||
SxCounterBlockMaxEvent,
|
||||
SxCounterBlockNumCounters,
|
||||
SxCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_SX_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockCleanAttr,
|
||||
NULL /*SxBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_SX};
|
||||
// Counter block GCEA
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {
|
||||
"GCEA",
|
||||
GceaCounterBlockId,
|
||||
GceaCounterBlockNumInstances,
|
||||
GceaCounterBlockMaxEvent,
|
||||
GceaCounterBlockNumCounters,
|
||||
GceaCounterRegAddr,
|
||||
gfx11_cntx_prim::mc_select_value_GCEA_PERFCOUNTER0_CFG,
|
||||
CounterBlockMcAttr};
|
||||
// Counter block TCP
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {
|
||||
"TCP",
|
||||
TcpCounterBlockId,
|
||||
16,
|
||||
TcpCounterBlockMaxEvent,
|
||||
TcpCounterBlockNumCounters,
|
||||
TcpCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr};
|
||||
// Counter block GL1A
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {
|
||||
"GL1A",
|
||||
Gl1aCounterBlockId,
|
||||
4,
|
||||
Gl1aCounterBlockMaxEvent,
|
||||
Gl1aCounterBlockNumCounters,
|
||||
Gl1aCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL1C
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {
|
||||
"GL1C",
|
||||
Gl1cCounterBlockId,
|
||||
4,
|
||||
Gl1cCounterBlockMaxEvent,
|
||||
Gl1cCounterBlockNumCounters,
|
||||
Gl1cCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2A
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {
|
||||
"GL2A",
|
||||
Gl2aCounterBlockId,
|
||||
32,
|
||||
Gl2aCounterBlockMaxEvent,
|
||||
Gl2aCounterBlockNumCounters,
|
||||
Gl2aCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockTcAttr};
|
||||
// Counter block GL2C
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {
|
||||
"GL2C",
|
||||
Gl2cCounterBlockId,
|
||||
32,
|
||||
Gl2cCounterBlockMaxEvent,
|
||||
Gl2cCounterBlockNumCounters,
|
||||
Gl2cCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockTcAttr};
|
||||
// Counter block GCR
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {
|
||||
"GCR",
|
||||
GcrCounterBlockId,
|
||||
1,
|
||||
GcrCounterBlockMaxEvent,
|
||||
GcrCounterBlockNumCounters,
|
||||
GcrCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockTcAttr};
|
||||
// Counter block GUS
|
||||
static const GpuBlockInfo GusCounterBlockInfo = {
|
||||
"GUS",
|
||||
GusCounterBlockId,
|
||||
1,
|
||||
GusCounterBlockMaxEvent,
|
||||
GusCounterBlockNumCounters,
|
||||
GusCounterRegAddr,
|
||||
gfx11_cntx_prim::mc_select_value_RPB_PERFCOUNTER0_CFG,
|
||||
CounterBlockGusAttr};
|
||||
// Counter block TA
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {
|
||||
"TA",
|
||||
TaCounterBlockId,
|
||||
TaCounterBlockNumInstances,
|
||||
235 /*TaCounterBlockMaxEvent*/,
|
||||
TaCounterBlockNumCounters,
|
||||
TaCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TA_PERFCOUNTER0_SELECT,
|
||||
CounterBlockSeAttr | CounterBlockTcAttr,
|
||||
NULL /*TaBlockDelayInfo*/,
|
||||
SPM_SE_BLOCK_NAME_TA};
|
||||
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,699 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_PRIMITIVES_H_
|
||||
#define _GFX11_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
// Taken from gfx11_mask.h
|
||||
// GCR_CNTL
|
||||
#define GCR_CNTL__SEQ_FORWARD 0x00010000L
|
||||
#define GCR_CNTL__SEQ_MASK 0x00030000L
|
||||
#define GCR_CNTL__GL2_WB_MASK 0x00008000L
|
||||
|
||||
// Taken from gfx11_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
#define SQTT_PRIM_ENABLED 1
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx11 {
|
||||
|
||||
class gfx11_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 11;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, regGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL);
|
||||
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL2);
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = Register(0xD9E1);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
static constexpr Register GUS_PERFCOUNTER_RSLT_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regGUS_PERFCOUNTER_RSLT_CNTL);
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, regRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, wgp_index << 2);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_index << 2) | (instance_index << 1)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t cp_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return cp_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t cp_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return cp_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_cntr_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_cntr_sel;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_cntr_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_cntr_sel;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx11
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0xFFFFFFFF; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_cntr_ctrl{0};
|
||||
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} /* else if (block_id == SqVsCounterBlockId) {
|
||||
sq_cntr_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} */
|
||||
else if (block_id == SqPsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
static uint32_t sq_control2_enable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, true) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
static uint32_t sq_control2_disable_value() {
|
||||
uint32_t sq_cntr_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, false) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_cntr_ctrl;
|
||||
}
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
// Counter Select Register value templates
|
||||
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value(SX_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
select_value(GDS_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return select;
|
||||
}
|
||||
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// GUS primitives
|
||||
static uint32_t gus_disable_clear_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, CLEAR_ALL, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_start_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER_RSLT_CNTL, ENABLE_ANY, 0x1);
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
static uint32_t gus_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t gus0_perfcounter_cfg =
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(GUS_PERFCOUNTER0_CFG, ENABLE, 0x1);
|
||||
return gus0_perfcounter_cfg;
|
||||
}
|
||||
|
||||
static uint32_t gus_stop_value() {
|
||||
uint32_t gus_perfcounter_rslt_cntl{0};
|
||||
return gus_perfcounter_rslt_cntl;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t value =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return value;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t value =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, TOTAL_NUM_SEGMENT, segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_SEGMENT, global_nlines);
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return value;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE, 1 << 6) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_SHADERDATA, 1);
|
||||
return sq_thread_trace_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const uint32_t SQTT_TOKEN_REG_USERDATA = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_VALU = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WVRDY = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_WAVE = 1 << 4;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_IMMED = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 8;
|
||||
|
||||
// not supported in gfx11
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_VALU | SQTT_TOKEN_WVRDY | SQTT_TOKEN_WAVE | SQTT_TOKEN_REG |
|
||||
SQTT_TOKEN_IMMED | SQTT_TOKEN_INST) ^
|
||||
0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, SQTT_TOKEN_REG_USERDATA) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
(SQTT_TOKEN_WAVE | SQTT_TOKEN_REG) ^ 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// not supported in gfx11
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_buf0_base = SET_REG_FIELD_BITS(
|
||||
SQ_THREAD_TRACE_BUF0_BASE, BASE_LO, Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) { return 0; }
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
#if SQTT_PRIM_ENABLED
|
||||
uint32_t sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, BASE_HI, base_hi);
|
||||
return sq_thread_trace_buf0_size;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RT_FREQ, 2) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx11
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX11_PRIMITIVES_H_
|
||||
@@ -0,0 +1,285 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_BLOCKINFO_H_
|
||||
#define _GFX12_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
#define __BLOCK_ID(block) HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_##block
|
||||
// Private PMC Counter BlockId is defined here
|
||||
// Pubclic PMC Counter BlockId is defined in hsa_ven_amd_aqlprofile.h
|
||||
enum CounterBlockId {
|
||||
__BLOCK_ID(RLC) = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER,
|
||||
__BLOCK_ID(CPG),
|
||||
__BLOCK_ID(GRBMH),
|
||||
__BLOCK_ID(GRBMA),
|
||||
__BLOCK_ID(SQG),
|
||||
|
||||
// mem blocks
|
||||
__BLOCK_ID(CHA),
|
||||
__BLOCK_ID(CHC),
|
||||
__BLOCK_ID(GLARBA),
|
||||
__BLOCK_ID(GLARBC),
|
||||
__BLOCK_ID(GC_CANE),
|
||||
__BLOCK_ID(GC_FFBM),
|
||||
__BLOCK_ID(GC_NHTTLB),
|
||||
__BLOCK_ID(GC_L2TLB),
|
||||
__BLOCK_ID(GC_UTCL1),
|
||||
__BLOCK_ID(GC_UTCL2),
|
||||
__BLOCK_ID(GC_VML2),
|
||||
|
||||
__BLOCK_ID(GCEA_SE),
|
||||
|
||||
// New SDMA Perfmon interface, comparing to the original SDMA PerfCnt. gfx12
|
||||
// supports both and they should provide the same counter events. We might
|
||||
// remove SDMA PerfCnt support in aqlprofile in the future since it is easier
|
||||
// to program Perfmon
|
||||
__BLOCK_ID(SDMA_PM),
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
// Define SPM Counter BlockId
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_FIRST = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = SPM_GLOBAL_BLOCK_NAME_FIRST,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS,
|
||||
SPM_GLOBAL_BLOCK_NAME_GCR,
|
||||
SPM_GLOBAL_BLOCK_NAME_PH,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE1,
|
||||
SPM_GLOBAL_BLOCK_NAME_GL2A,
|
||||
SPM_GLOBAL_BLOCK_NAME_GL2C,
|
||||
SPM_GLOBAL_BLOCK_NAME_SDMA,
|
||||
SPM_GLOBAL_BLOCK_NAME_GUS,
|
||||
SPM_GLOBAL_BLOCK_NAME_EA,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHA,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHC,
|
||||
SPM_GLOBAL_BLOCK_NAME_CHCG,
|
||||
SPM_GLOBAL_BLOCK_NAME_ATCL2,
|
||||
SPM_GLOBAL_BLOCK_NAME_VML2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE2_SE,
|
||||
SPM_GLOBAL_BLOCK_NAME_GE2_DIST,
|
||||
SPM_GLOBAL_BLOCK_NAME_FFBM,
|
||||
SPM_GLOBAL_BLOCK_NAME_CANE,
|
||||
SPM_GLOBAL_BLOCK_NAME_LAST = SPM_GLOBAL_BLOCK_NAME_CANE,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_FIRST = 0,
|
||||
SPM_SE_BLOCK_NAME_CB = SPM_SE_BLOCK_NAME_FIRST,
|
||||
SPM_SE_BLOCK_NAME_DB,
|
||||
SPM_SE_BLOCK_NAME_PA,
|
||||
SPM_SE_BLOCK_NAME_SX,
|
||||
SPM_SE_BLOCK_NAME_SC,
|
||||
SPM_SE_BLOCK_NAME_TA,
|
||||
SPM_SE_BLOCK_NAME_TD,
|
||||
SPM_SE_BLOCK_NAME_TCP,
|
||||
SPM_SE_BLOCK_NAME_SPI,
|
||||
SPM_SE_BLOCK_NAME_SQG,
|
||||
SPM_SE_BLOCK_NAME_GL1A,
|
||||
SPM_SE_BLOCK_NAME_RMI,
|
||||
SPM_SE_BLOCK_NAME_GL1C,
|
||||
SPM_SE_BLOCK_NAME_GL1CG,
|
||||
SPM_SE_BLOCK_NAME_CBR,
|
||||
SPM_SE_BLOCK_NAME_DBR,
|
||||
SPM_SE_BLOCK_NAME_GL1H,
|
||||
SPM_SE_BLOCK_NAME_SQC,
|
||||
SPM_SE_BLOCK_NAME_PC,
|
||||
SPM_SE_BLOCK_NAME_EA,
|
||||
SPM_SE_BLOCK_NAME_GE,
|
||||
SPM_SE_BLOCK_NAME_GL2A,
|
||||
SPM_SE_BLOCK_NAME_GL2C,
|
||||
SPM_SE_BLOCK_NAME_WGS,
|
||||
SPM_SE_BLOCK_NAME_GL1XA,
|
||||
SPM_SE_BLOCK_NAME_GL1XC,
|
||||
SPM_SE_BLOCK_NAME_UTCL1,
|
||||
SPM_SE_BLOCK_NAME_LAST = SPM_SE_BLOCK_NAME_UTCL1,
|
||||
};
|
||||
|
||||
namespace gfx1201 {
|
||||
// IP versions for Radeon RX 9070
|
||||
// ip_block : gc_12_0_1
|
||||
// ip_block : athub_4_1_0
|
||||
// ip_block : umc_8_14_0
|
||||
// ip_block : df_4_15_1
|
||||
// ip_block : pcie_6_1_0
|
||||
|
||||
// Number of block instances
|
||||
// Reference: global_features.h (from gfxip header file package)
|
||||
// rspm_config.pm (from design configuration files)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumInstances = 1;
|
||||
static const uint32_t RlcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpgCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpcCounterBlockNumInstances = 1;
|
||||
static const uint32_t CpfCounterBlockNumInstances = 1;
|
||||
static const uint32_t GcrCounterBlockNumInstances = 1;
|
||||
static const uint32_t Ge1CounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl2aCounterBlockNumInstances = 4; // GFX_CPWD__NUM_GL2A_PER_CPWD
|
||||
static const uint32_t Gl2cCounterBlockNumInstances = 32; // GFX_CPWD__NUM_GL2C_PER_CPWD
|
||||
static const uint32_t GceaCounterBlockNumInstances = 36; // GFX_CPWD__NUM_EA_PER_CPWD
|
||||
static const uint32_t ChaCounterBlockNumInstances = 1;
|
||||
static const uint32_t ChcCounterBlockNumInstances = 4; // GFX_CPWD__NUM_CHC
|
||||
static const uint32_t Ge2DistCounterBlockNumInstances = 1;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2; // GFX_CPWD__NUM_SDMA_PER_CPWD
|
||||
static const uint32_t GcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcMcVml2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumInstances = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumInstances = 1;
|
||||
static const uint32_t CbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t DbCounterBlockNumInstances = 2; // GFX_SE__NUM_RB_PER_SA
|
||||
static const uint32_t SuCounterBlockNumInstances = 1; // GFX_SE__NUM_PA_PER_SE
|
||||
static const uint32_t SxCounterBlockNumInstances = 1;
|
||||
static const uint32_t ScCounterBlockNumInstances = 2; // GFX_SE__NUM_PACKER_PER_SA
|
||||
static const uint32_t TaCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TdCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t TcpCounterBlockNumInstances = 2; // GFX_SE__NUM_ROWS_PER_WGP
|
||||
static const uint32_t SpiCounterBlockNumInstances = 1;
|
||||
static const uint32_t SqgCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1aCounterBlockNumInstances = 1;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 2; // GFX_SE__NUM_RMI_PER_SA
|
||||
static const uint32_t Gl1cCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t SqcCounterBlockNumInstances = 1;
|
||||
static const uint32_t PcCounterBlockNumInstances = 1;
|
||||
static const uint32_t GceaSeCounterBlockNumInstances = 4;
|
||||
static const uint32_t GeCounterBlockNumInstances = 1;
|
||||
static const uint32_t WgsCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xaCounterBlockNumInstances = 1;
|
||||
static const uint32_t Gl1xcCounterBlockNumInstances = 4; // GFX_SE__NUM_GL1C_PER_SA
|
||||
static const uint32_t GcUtcl1CounterBlockNumInstances = 2;
|
||||
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
|
||||
// Number of block counter registers - Auto-generated from chip_offset_byte.h, edit with extra
|
||||
// caution Reference: chip_offset_byte.h (from gfxip header file package) The following default
|
||||
// values are generated from Radeon RX 9070, the first product of the RDNA 4 lineup. It could change
|
||||
// for other products, and the change will be made in [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcrCounterBlockNumCounters = 2;
|
||||
static const uint32_t PhCounterBlockNumCounters = 8;
|
||||
static const uint32_t Ge1CounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2aCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl2cCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t ChaCounterBlockNumCounters = 4;
|
||||
static const uint32_t ChcCounterBlockNumCounters = 4;
|
||||
static const uint32_t Ge2DistCounterBlockNumCounters = 4;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t GcVml2CounterBlockNumCounters = 2;
|
||||
static const uint32_t GcMcVml2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GcUtcl2CounterBlockNumCounters = 1;
|
||||
static const uint32_t GrbmhCounterBlockNumCounters = 2;
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t SuCounterBlockNumCounters = 4;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqgCounterBlockNumCounters = 8;
|
||||
static const uint32_t Gl1aCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1cCounterBlockNumCounters = 4;
|
||||
static const uint32_t SqcCounterBlockNumCounters = 16;
|
||||
static const uint32_t PcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaSeCounterBlockNumCounters = 2;
|
||||
static const uint32_t GeCounterBlockNumCounters = 4;
|
||||
static const uint32_t WgsCounterBlockNumCounters = 2;
|
||||
static const uint32_t Gl1xaCounterBlockNumCounters = 4;
|
||||
static const uint32_t Gl1xcCounterBlockNumCounters = 4;
|
||||
static const uint32_t GcUtcl1CounterBlockNumCounters = 4;
|
||||
|
||||
// Block counters max event value - Auto-generated from chip_enum.h, edit with extra caution
|
||||
// Reference: chip_enum.h (from gfxip header file package)
|
||||
// The following default values are generated from Radeon RX 9070, the first product of the
|
||||
// RDNA 4 lineup. It could change for other products, and the change will be made in
|
||||
// [PRODUCT_NAME]_factory.h
|
||||
//
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = 51;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 6;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = 30;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = 55;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = 4;
|
||||
static const uint32_t GcrCounterBlockMaxEvent = 151;
|
||||
static const uint32_t PhCounterBlockMaxEvent = 1023;
|
||||
static const uint32_t Ge1CounterBlockMaxEvent = 54;
|
||||
static const uint32_t Gl2aCounterBlockMaxEvent = 114;
|
||||
static const uint32_t Gl2cCounterBlockMaxEvent = 249;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 32;
|
||||
static const uint32_t ChaCounterBlockMaxEvent = 25;
|
||||
static const uint32_t ChcCounterBlockMaxEvent = 94;
|
||||
static const uint32_t Ge2DistCounterBlockMaxEvent = 188;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = 125;
|
||||
static const uint32_t GcVml2CounterBlockMaxEvent = 90;
|
||||
static const uint32_t GcMcVml2CounterBlockMaxEvent =
|
||||
1; // This is handled by GCMC_VM_L2_PERFCOUNTER0_CFG
|
||||
static const uint32_t GcUtcl2CounterBlockMaxEvent = 36;
|
||||
static const uint32_t GrbmhCounterBlockMaxEvent = 25;
|
||||
static const uint32_t CbCounterBlockMaxEvent = 315;
|
||||
static const uint32_t DbCounterBlockMaxEvent = 441;
|
||||
static const uint32_t PaSuCounterBlockMaxEvent = 828;
|
||||
static const uint32_t SxCounterBlockMaxEvent = 81;
|
||||
static const uint32_t ScCounterBlockMaxEvent = 821;
|
||||
static const uint32_t TaCounterBlockMaxEvent = 254;
|
||||
static const uint32_t TdCounterBlockMaxEvent = 271;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = 99;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = 318;
|
||||
static const uint32_t SqgCounterBlockMaxEvent = 45;
|
||||
static const uint32_t Gl1aCounterBlockMaxEvent = 21;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = 138;
|
||||
static const uint32_t Gl1cCounterBlockMaxEvent = 121;
|
||||
static const uint32_t SqcCounterBlockMaxEvent = 511;
|
||||
static const uint32_t PcCounterBlockMaxEvent = 164;
|
||||
static const uint32_t GceaSeCounterBlockMaxEvent = 32;
|
||||
static const uint32_t GeCounterBlockMaxEvent = 103;
|
||||
static const uint32_t WgsCounterBlockMaxEvent = 4;
|
||||
static const uint32_t Gl1xaCounterBlockMaxEvent = 21;
|
||||
static const uint32_t Gl1xcCounterBlockMaxEvent = 109;
|
||||
static const uint32_t GcUtcl1CounterBlockMaxEvent = 71;
|
||||
} // namespace gfx1201
|
||||
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_BLOCKINFO_H_
|
||||
@@ -0,0 +1,158 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_BLOCKTABLE_H_
|
||||
#define _GFX12_BLOCKTABLE_H_
|
||||
|
||||
#define REG_INFO_WITH_CTRL(BLOCK, CTRL, INDEX) \
|
||||
{REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_SELECT), CTRL, REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_LO), REG_32B_ADDR(GC, 0, reg##BLOCK##_PERFCOUNTER##INDEX##_HI)}
|
||||
#define REG_INFO_WITH_CTRL_1(BLOCK, CTRL) REG_INFO_WITH_CTRL(BLOCK, CTRL, 0)
|
||||
#define REG_INFO_WITH_CTRL_2(BLOCK, CTRL) REG_INFO_WITH_CTRL_1(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 1)
|
||||
#define REG_INFO_WITH_CTRL_3(BLOCK, CTRL) REG_INFO_WITH_CTRL_2(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 2)
|
||||
#define REG_INFO_WITH_CTRL_4(BLOCK, CTRL) REG_INFO_WITH_CTRL_3(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 3)
|
||||
#define REG_INFO_WITH_CTRL_5(BLOCK, CTRL) REG_INFO_WITH_CTRL_4(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 4)
|
||||
#define REG_INFO_WITH_CTRL_6(BLOCK, CTRL) REG_INFO_WITH_CTRL_5(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 5)
|
||||
#define REG_INFO_WITH_CTRL_7(BLOCK, CTRL) REG_INFO_WITH_CTRL_6(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 6)
|
||||
#define REG_INFO_WITH_CTRL_8(BLOCK, CTRL) REG_INFO_WITH_CTRL_7(BLOCK, CTRL), REG_INFO_WITH_CTRL(BLOCK, CTRL, 7)
|
||||
#define REG_INFO_1(BLOCK) REG_INFO_WITH_CTRL_1(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_2(BLOCK) REG_INFO_WITH_CTRL_2(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_3(BLOCK) REG_INFO_WITH_CTRL_3(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_4(BLOCK) REG_INFO_WITH_CTRL_4(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_5(BLOCK) REG_INFO_WITH_CTRL_5(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_6(BLOCK) REG_INFO_WITH_CTRL_6(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_7(BLOCK) REG_INFO_WITH_CTRL_7(BLOCK, REG_32B_NULL)
|
||||
#define REG_INFO_8(BLOCK) REG_INFO_WITH_CTRL_8(BLOCK, REG_32B_NULL)
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
namespace gfx1201 {
|
||||
// Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution
|
||||
static const CounterRegInfo GrbmCounterRegAddr[] = {REG_INFO_2(GRBM)};
|
||||
static const CounterRegInfo RlcCounterRegAddr[] = {REG_INFO_2(RLC)};
|
||||
static const CounterRegInfo CpgCounterRegAddr[] = {REG_INFO_2(CPG)};
|
||||
static const CounterRegInfo CpcCounterRegAddr[] = {REG_INFO_2(CPC)};
|
||||
static const CounterRegInfo CpfCounterRegAddr[] = {REG_INFO_2(CPF)};
|
||||
static const CounterRegInfo GcrCounterRegAddr[] = {REG_INFO_WITH_CTRL_2(GCR, REG_32B_ADDR(GC, 0, regGCR_GENERAL_CNTL))};
|
||||
static const CounterRegInfo PaPhCounterRegAddr[] = {REG_INFO_8(PA_PH)};
|
||||
static const CounterRegInfo Ge1CounterRegAddr[] = {REG_INFO_4(GE1)};
|
||||
static const CounterRegInfo Gl2aCounterRegAddr[] = {REG_INFO_4(GL2A)};
|
||||
static const CounterRegInfo Gl2cCounterRegAddr[] = {REG_INFO_4(GL2C)};
|
||||
static const CounterRegInfo GceaCounterRegAddr[] = {REG_INFO_2(GC_EA_CPWD)};
|
||||
static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)};
|
||||
static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)};
|
||||
static const CounterRegInfo Ge2CounterRegAddr[] = {REG_INFO_4(GE2_DIST)};
|
||||
static const CounterRegInfo SdmaCounterRegAddr[] = {REG_INFO_2(SDMA0), REG_INFO_2(SDMA1)};
|
||||
//static const CounterRegInfo GcVml2CounterRegAddr[] = {REG_INFO_2(GCVML2)};
|
||||
//static const CounterRegInfo GcMcVml2CounterRegAddr[] = {REG_INFO_1(GCMC_VM_L2)};
|
||||
//static const CounterRegInfo GcUtcl2CounterRegAddr[] = {REG_INFO_1(GCUTCL2)};
|
||||
static const CounterRegInfo GrbmhCounterRegAddr[] = {REG_INFO_2(GRBMH)};
|
||||
static const CounterRegInfo CbCounterRegAddr[] = {REG_INFO_4(CB)};
|
||||
static const CounterRegInfo DbCounterRegAddr[] = {REG_INFO_4(DB)};
|
||||
static const CounterRegInfo PaSuCounterRegAddr[] = {REG_INFO_4(PA_SU)};
|
||||
static const CounterRegInfo SxCounterRegAddr[] = {REG_INFO_4(SX)};
|
||||
static const CounterRegInfo PaScCounterRegAddr[] = {REG_INFO_8(PA_SC)};
|
||||
static const CounterRegInfo TaCounterRegAddr[] = {REG_INFO_2(TA)};
|
||||
static const CounterRegInfo TdCounterRegAddr[] = {REG_INFO_2(TD)};
|
||||
static const CounterRegInfo TcpCounterRegAddr[] = {REG_INFO_4(TCP)};
|
||||
static const CounterRegInfo SpiCounterRegAddr[] = {REG_INFO_6(SPI)};
|
||||
static const CounterRegInfo SqgCounterRegAddr[] = {REG_INFO_WITH_CTRL_8(SQG, REG_32B_ADDR(GC, 0, regSQG_PERFCOUNTER_CTRL))};
|
||||
static const CounterRegInfo Gl1aCounterRegAddr[] = {REG_INFO_4(GL1A)};
|
||||
static const CounterRegInfo RmiCounterRegAddr[] = {REG_INFO_4(RMI)};
|
||||
static const CounterRegInfo Gl1cCounterRegAddr[] = {REG_INFO_4(GL1C)};
|
||||
//static const CounterRegInfo SqcCounterRegAddr[] = {REG_INFO_WITH_CTRL_16(SQ, regSQ_PERFCOUNTER_CTRL)};
|
||||
static const CounterRegInfo PcCounterRegAddr[] = {REG_INFO_4(PC)};
|
||||
static const CounterRegInfo GeCounterRegAddr[] = {REG_INFO_4(GE2_SE)};
|
||||
static const CounterRegInfo GceaSeCounterRegAddr[] = {REG_INFO_2(GC_EA_SE)};
|
||||
// static const CounterRegInfo WgsCounterRegAddr[] = {REG_INFO_2(WGS)};
|
||||
static const CounterRegInfo Gl1xaCounterRegAddr[] = {REG_INFO_4(GL1XA)};
|
||||
static const CounterRegInfo Gl1xcCounterRegAddr[] = {REG_INFO_4(GL1XC)};
|
||||
static const CounterRegInfo GcUtcl1CounterRegAddr[] = {REG_INFO_4(UTCL1)};
|
||||
|
||||
// Special handling of SQC:
|
||||
// SQC only supports 32bit PMC, only regSQ_PERFCOUNTER#even_number#_SELECT is
|
||||
// used by PMC. regSQ_PERFCOUNTER#odd_number#_SELECT is used only by SPM
|
||||
static const CounterRegInfo SqcCounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER0_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER1_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER2_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER3_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER8_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER4_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER10_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER5_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER12_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER6_LO), REG_32B_NULL},
|
||||
{REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER14_SELECT), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL), REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER7_LO), REG_32B_NULL}};
|
||||
|
||||
// Special handling of GCVML2:
|
||||
static const CounterRegInfo GcVml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_0_HI)},
|
||||
{REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_SELECT), REG_32B_NULL, REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_LO), REG_32B_ADDR(GC, 0, regGCVML2_PERFCOUNTER2_1_HI)}};
|
||||
|
||||
// Special handling of GCMC_VM_L2:
|
||||
static const CounterRegInfo GcMcVml2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCMC_VM_L2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Special handling of GCUTCL2: Not sure if this is SPM-only
|
||||
static const CounterRegInfo GcUtcl2CounterRegAddr[] = {
|
||||
{REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER0_CFG), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_RSLT_CNTL), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_LO), REG_32B_ADDR(GC, 0, regGCUTCL2_PERFCOUNTER_HI)}};
|
||||
|
||||
// Global blocks: ATCL2 CHA CHC CPC CPF CPG EA FFBM GCR GL2A GL2C GRBM RLC SDMA VML2 UTCL2
|
||||
// (Grphics only - not supported in ROCm): GE1 GE2_DIST PH
|
||||
// (Grphics only): CPG is for graphics, but it is not physically removed for compute products
|
||||
// (Not enabled for gfx12): CHCG GDS GUS
|
||||
static const GpuBlockInfo GcAtcl2CounterBlockInfo = {"ATCL2", __BLOCK_ID(ATCL2)}; // Placeholder now
|
||||
static const GpuBlockInfo ChaCounterBlockInfo = {"CHA", __BLOCK_ID(CHA), ChaCounterBlockNumInstances, ChaCounterBlockMaxEvent, ChaCounterBlockNumCounters, ChaCounterRegAddr, gfx12_cntx_prim::select_value_Cha, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value_Chc, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo CpcCounterBlockInfo = {"CPC", __BLOCK_ID(CPC), CpcCounterBlockNumInstances, CpcCounterBlockMaxEvent, CpcCounterBlockNumCounters, CpcCounterRegAddr, gfx12_cntx_prim::select_value_Cpc, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPC};
|
||||
static const GpuBlockInfo CpfCounterBlockInfo = {"CPF", __BLOCK_ID(CPF), CpfCounterBlockNumInstances, CpfCounterBlockMaxEvent, CpfCounterBlockNumCounters, CpfCounterRegAddr, gfx12_cntx_prim::select_value_Cpf, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPF};
|
||||
static const GpuBlockInfo CpgCounterBlockInfo = {"CPG", __BLOCK_ID(CPG), CpgCounterBlockNumInstances, CpgCounterBlockMaxEvent, CpgCounterBlockNumCounters, CpgCounterRegAddr, gfx12_cntx_prim::select_value_Cpg, CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_CPG};
|
||||
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID(GCEA), GceaCounterBlockNumInstances, GceaCounterBlockMaxEvent, GceaCounterBlockNumCounters, GceaCounterRegAddr, gfx12_cntx_prim::select_value_Gcea, 0};
|
||||
static const GpuBlockInfo GcFfbmCounterBlockInfo = {"GC_FFBM", __BLOCK_ID(GC_FFBM)}; // Placeholder now
|
||||
static const GpuBlockInfo GcrCounterBlockInfo = {"GCR", __BLOCK_ID(GCR), GcrCounterBlockNumInstances, GcrCounterBlockMaxEvent, GcrCounterBlockNumCounters, GcrCounterRegAddr, gfx12_cntx_prim::select_value_Gcr, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2aCounterBlockInfo = {"GL2A", __BLOCK_ID(GL2A), Gl2aCounterBlockNumInstances, Gl2aCounterBlockMaxEvent, Gl2aCounterBlockNumCounters, Gl2aCounterRegAddr, gfx12_cntx_prim::select_value_Gl2a, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID(GL2C), Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value_Gl2c, CounterBlockTcAttr};
|
||||
static const GpuBlockInfo GrbmCounterBlockInfo = {"GRBM", __BLOCK_ID(GRBM), GrbmCounterBlockNumInstances, GrbmCounterBlockMaxEvent, GrbmCounterBlockNumCounters, GrbmCounterRegAddr, gfx12_cntx_prim::select_value_Grbm, CounterBlockGRBMAttr};
|
||||
static const GpuBlockInfo RlcCounterBlockInfo = {"RLC", __BLOCK_ID(RLC), RlcCounterBlockNumInstances, RlcCounterBlockMaxEvent, RlcCounterBlockNumCounters, RlcCounterRegAddr, gfx12_cntx_prim::select_value_Rlc, 0};
|
||||
static const GpuBlockInfo SdmaPmCounterBlockInfo = {"SDMA_PM", __BLOCK_ID(SDMA_PM), SdmaCounterBlockNumInstances, SdmaCounterBlockMaxEvent, SdmaCounterBlockNumCounters, SdmaCounterRegAddr, gfx12_cntx_prim::select_value_SdmaPm, CounterBlockExplInstAttr|CounterBlockSpmGlobalAttr, NULL, SPM_GLOBAL_BLOCK_NAME_SDMA};
|
||||
static const GpuBlockInfo GcVml2CounterBlockInfo = {"GC_VML2", __BLOCK_ID(GC_VML2)}; // Placeholder now
|
||||
static const GpuBlockInfo GcUtcl2CounterBlockInfo = {"GC_UTCL2", __BLOCK_ID(GC_UTCL2)}; // Placeholder now
|
||||
// SE blocks: EA_SE GL2A GL2C GRBMH SPI SQG UTCL1
|
||||
// (Grphics only - not supported in ROCm): GE GL1XA GL1XC PA PC WGS
|
||||
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), GceaSeCounterBlockNumInstances, GceaSeCounterBlockMaxEvent, GceaSeCounterBlockNumCounters, GceaSeCounterRegAddr, gfx12_cntx_prim::select_value_GceaSe, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo GrbmhCounterBlockInfo = {"GRBMH", __BLOCK_ID(GRBMH), GrbmhCounterBlockNumInstances, GrbmhCounterBlockMaxEvent, GrbmhCounterBlockNumCounters, GrbmhCounterRegAddr, gfx12_cntx_prim::select_value_Grbmh, CounterBlockSeAttr};
|
||||
static const GpuBlockInfo SpiCounterBlockInfo = {"SPI", __BLOCK_ID(SPI), SpiCounterBlockNumInstances, SpiCounterBlockMaxEvent, SpiCounterBlockNumCounters, SpiCounterRegAddr, gfx12_cntx_prim::select_value_Spi, CounterBlockSeAttr|CounterBlockSPIAttr, NULL, SPM_SE_BLOCK_NAME_SPI};
|
||||
static const GpuBlockInfo SqgCounterBlockInfo = {"SQG", __BLOCK_ID(SQG), SqgCounterBlockNumInstances, SqgCounterBlockMaxEvent, SqgCounterBlockNumCounters, SqgCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQG};
|
||||
static const GpuBlockInfo GcUtcl1CounterBlockInfo = {"GC_UTCL1", __BLOCK_ID(GC_UTCL1), GcUtcl1CounterBlockNumInstances, GcUtcl1CounterBlockMaxEvent, GcUtcl1CounterBlockNumCounters, GcUtcl1CounterRegAddr, gfx12_cntx_prim::select_value_GcUtcl1, CounterBlockSeAttr, NULL, SPM_SE_BLOCK_NAME_UTCL1};
|
||||
// SA blocks: GL1A GL1C
|
||||
// (Grphics only - not supported in ROCm): CB DB SC SX
|
||||
// (Not enabled for gfx12): GL1CG
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {"GL1A", __BLOCK_ID(GL1A), Gl1aCounterBlockNumInstances, Gl1aCounterBlockMaxEvent, Gl1aCounterBlockNumCounters, Gl1aCounterRegAddr, gfx12_cntx_prim::select_value_Gl1a, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
static const GpuBlockInfo Gl1cCounterBlockInfo = {"GL1C", __BLOCK_ID(GL1C), Gl1cCounterBlockNumInstances, Gl1cCounterBlockMaxEvent, Gl1cCounterBlockNumCounters, Gl1cCounterRegAddr, gfx12_cntx_prim::select_value_Gl1c, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockTcAttr};
|
||||
// WGP blocks: SQC TA TCP TD
|
||||
static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID(SQ), SqcCounterBlockNumInstances, SqcCounterBlockMaxEvent, SqcCounterBlockNumCounters, SqcCounterRegAddr, gfx12_cntx_prim::sq_select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockSqAttr, NULL, SPM_SE_BLOCK_NAME_SQC};
|
||||
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value_Ta, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
|
||||
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value_Td, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
|
||||
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value_Tcp, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
|
||||
} // namespace gfx1201
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_BLOCKTABLE_H_
|
||||
@@ -0,0 +1,651 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef _GFX12_PRIMITIVES_H_
|
||||
#define _GFX12_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
// taken from gfx12_pm4defs.h
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_blank() \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = 0; \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx12 {
|
||||
|
||||
class gfx12_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 12;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, regGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL_1);
|
||||
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR = REG_32B_ADDR(GC, 0, regSPI_SQG_EVENT_CTL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_PERFCOUNTER_CTRL2);
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = Register(0xD9E1);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE_LO);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_BASE_HI);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_BUF0_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR{};
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_DROPPED_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, regRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR{};
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, regSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static const uint32_t NUM_WGP1_PER_SA = 0;
|
||||
static const uint32_t NUM_ROWS_PER_WGP = 2;
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) { return REG_32B_NULL; }
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = __BLOCK_ID(SQ);
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = SPM_SE_BLOCK_NAME_SQG;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sa_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
// Hardcode wgp_side to 0 now because we don't have a product with wgp1 configuration
|
||||
uint32_t wgp_side = 0;
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index,
|
||||
const uint32_t& sa_index,
|
||||
const uint32_t& wgp_index) {
|
||||
// Hardcode wgp_side to 0 now because we don't have a product with wgp1 configuration
|
||||
uint32_t wgp_side = 0;
|
||||
assert(instance_index < NUM_ROWS_PER_WGP);
|
||||
uint32_t grbm_gfx_index =
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SA_INDEX, sa_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, ((wgp_side<<6) | (wgp_index << 2) | (instance_index << 1)));
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 2) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL_1 value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_perfcounter0_sel;
|
||||
}
|
||||
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_sel =
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_perfcounter0_sel;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value - not used in gfx12
|
||||
static uint32_t sq_mask_value(const counter_des_t&) { return 0xFFFFFFFF; }
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable performance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
// SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl2 = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, true) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_perfcounter_ctrl2;
|
||||
}
|
||||
static uint32_t sq_control2_disable_value() {
|
||||
uint32_t sq_perfcounter_ctrl2 = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, FORCE_EN, false) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL2, VMID_EN, 0xFFFF);
|
||||
return sq_perfcounter_ctrl2;
|
||||
}
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_Cha= select_value(CHA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Chc= select_value(CHC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpc= select_value(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpf= select_value(CPF_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Cpg= select_value(CPG_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gcea= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gcr= select_value(GCR_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2a= select_value(GL2A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl2c= select_value(GL2C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Grbm= select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Rlc= select_value_t2(RLC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SdmaPm= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcVml2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GcUtcl2= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_GceaSe= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Grbmh= select_value(GRBMH_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Spi= select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GcUtcl1= select_value_blank(); // register not present
|
||||
static auto constexpr select_value_Gl1a= select_value(GL1A_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Gl1c= select_value(GL1C_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Ta= select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Td= select_value(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_Tcp= select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = select_value_blank();
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcp_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCP_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcp_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl{0};
|
||||
rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl{0};
|
||||
rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size{0};
|
||||
rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, TOTAL_NUM_SEGMENT, segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_SEGMENT, global_nlines);
|
||||
// rlc_spm_perfmon_segment_size = SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE,
|
||||
// se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
// SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) { return 0; }
|
||||
|
||||
// Enable all of the WTYPEs
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
static uint32_t sqtt_mask_value(uint32_t wgp, uint32_t simd, uint32_t vmid) {
|
||||
uint32_t sq_thread_trace_mask{0};
|
||||
sq_thread_trace_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_SEL, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WGP_SEL, wgp) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SA_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, WTYPE_INCLUDE,
|
||||
1 << 6) | // SQ_TT_WTYPE_INCLUDE_CS_BIT
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_SHADERDATA, 1);
|
||||
// sq_thread_trace_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK,
|
||||
// EXCLUDE_NONDETAIL_WAVESTART_EXT, 1) |
|
||||
// SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, EXCLUDE_NONDETAIL_ALLOC, 1);
|
||||
return sq_thread_trace_mask;
|
||||
}
|
||||
// not supported in gfx12
|
||||
static uint32_t sqtt_perf_mask_value() { return 0; }
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_DETAIL_ALL, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE,
|
||||
(SQ_TT_TOKEN_MASK_SQDEC_BIT | SQ_TT_TOKEN_MASK_SHDEC_BIT |
|
||||
SQ_TT_TOKEN_MASK_GFXUDEC_BIT | SQ_TT_TOKEN_MASK_CONTEXT_BIT |
|
||||
SQ_TT_TOKEN_MASK_COMP_BIT)) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE,
|
||||
((1 << SQ_TT_TOKEN_EXCLUDE_VMEMEXEC_SHIFT) |
|
||||
(1 << SQ_TT_TOKEN_EXCLUDE_ALUEXEC_SHIFT))) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, EXCLUDE_BARRIER_WAIT,
|
||||
1); // // See DEGFX12-10117
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_EXCLUDE, 0x7) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
uint32_t sq_thread_trace_token_mask{0};
|
||||
sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_INCLUDE, 0x8) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, INST_EXCLUDE, 0x3) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_EXCLUDE, 0x7FF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
// not supported in gfx12
|
||||
static uint32_t sqtt_token_mask2_value() { return 0; }
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() { return 0; }
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() { return 0; }
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_buf0_base_lo{0};
|
||||
sq_thread_trace_buf0_base_lo = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE_LO, BASE_LO,
|
||||
Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base_lo;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_buf0_base_hi{0};
|
||||
sq_thread_trace_buf0_base_hi = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_BASE_HI, BASE_HI,
|
||||
High32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_buf0_base_hi;
|
||||
}
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) {
|
||||
uint32_t sq_thread_trace_buf0_size{0};
|
||||
sq_thread_trace_buf0_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BUF0_SIZE, SIZE, size_val >> TT_BUFF_ALIGN_SHIFT);
|
||||
return sq_thread_trace_buf0_size;
|
||||
}
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) {
|
||||
uint32_t spi_sqg_event_ctl{0};
|
||||
spi_sqg_event_ctl =
|
||||
SET_REG_FIELD_BITS(SPI_SQG_EVENT_CTL, ENABLE_SQG_TOP_EVENTS, enableSqgEvents) |
|
||||
SET_REG_FIELD_BITS(SPI_SQG_EVENT_CTL, ENABLE_SQG_BOP_EVENTS, enableSqgEvents);
|
||||
return spi_sqg_event_ctl;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl{0};
|
||||
sq_thread_trace_ctrl =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, MODE, on ? SQ_TT_MODE_ON : SQ_TT_MODE_OFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, HIWATER, 5) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, UTIL_TIMER, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, DRAW_EVENT_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SPI_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, SQ_STALL_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, LOWATER_OFFSET, 4) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, AUTO_FLUSH_MODE, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x1000000,
|
||||
// TODO: Navi has 2 full bits on status2, one for each buffer
|
||||
TT_CONTROL_FULL_MASK = 0x0,
|
||||
TT_WRITE_PTR_MASK = 0x1FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 25;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t PIPE_START = 2;
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << (NUM_PIPES + PIPE_START)) - (1u << PIPE_START);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx12
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX12_PRIMITIVES_H_
|
||||
@@ -0,0 +1,201 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_BLOCKINFO_H_
|
||||
#define _GFX9_BLOCKINFO_H_
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx9 {
|
||||
|
||||
// Enumeration of Gfx9 hardware counter blocks
|
||||
enum CounterBlockId {
|
||||
CbCounterBlockId,
|
||||
CpcCounterBlockId,
|
||||
CpfCounterBlockId,
|
||||
CpgCounterBlockId,
|
||||
DbCounterBlockId,
|
||||
GdsCounterBlockId,
|
||||
GrbmCounterBlockId,
|
||||
GrbmSeCounterBlockId,
|
||||
IaCounterBlockId,
|
||||
PaScCounterBlockId,
|
||||
PaSuCounterBlockId,
|
||||
SpiCounterBlockId,
|
||||
SqCounterBlockId,
|
||||
SqGsCounterBlockId,
|
||||
SqVsCounterBlockId,
|
||||
SqPsCounterBlockId,
|
||||
SqHsCounterBlockId,
|
||||
SqCsCounterBlockId,
|
||||
SxCounterBlockId,
|
||||
TaCounterBlockId,
|
||||
TcaCounterBlockId,
|
||||
TccCounterBlockId,
|
||||
TcpCounterBlockId,
|
||||
TcsCounterBlockId,
|
||||
TdCounterBlockId,
|
||||
VgtCounterBlockId,
|
||||
WdCounterBlockId,
|
||||
|
||||
// MC blocks
|
||||
GceaCounterBlockId,
|
||||
AtcCounterBlockId,
|
||||
AtcL2CounterBlockId,
|
||||
McVmL2CounterBlockId,
|
||||
RpbCounterBlockId,
|
||||
RmiCounterBlockId,
|
||||
|
||||
// SDMA block
|
||||
SdmaCounterBlockId,
|
||||
// UMC block
|
||||
UmcCounterBlockId,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
IommuV2CounterBlockId,
|
||||
KernelDriverCounterBlockId,
|
||||
|
||||
CpPipeStatsCounterBlockId,
|
||||
HwInfoCounterBlockId,
|
||||
|
||||
FirstCounterBlockId = CbCounterBlockId,
|
||||
LastCounterBlockId = HwInfoCounterBlockId,
|
||||
};
|
||||
|
||||
/*
|
||||
* SPM global and shader engine block IDs
|
||||
*/
|
||||
enum SpmGlobalBlockId {
|
||||
SPM_GLOBAL_BLOCK_NAME_CPG = 0,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPC = 1,
|
||||
SPM_GLOBAL_BLOCK_NAME_CPF = 2,
|
||||
SPM_GLOBAL_BLOCK_NAME_GDS = 3,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCC = 4,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCA = 5,
|
||||
SPM_GLOBAL_BLOCK_NAME_IA = 6,
|
||||
SPM_GLOBAL_BLOCK_NAME_TCS = 7,
|
||||
};
|
||||
|
||||
enum SpmSeBlockId {
|
||||
SPM_SE_BLOCK_NAME_CB = 0,
|
||||
SPM_SE_BLOCK_NAME_DB = 1,
|
||||
SPM_SE_BLOCK_NAME_PA = 2,
|
||||
SPM_SE_BLOCK_NAME_SX = 3,
|
||||
SPM_SE_BLOCK_NAME_SC = 4,
|
||||
SPM_SE_BLOCK_NAME_TA = 5,
|
||||
SPM_SE_BLOCK_NAME_TD = 6,
|
||||
SPM_SE_BLOCK_NAME_TCP = 7,
|
||||
SPM_SE_BLOCK_NAME_SPI = 8,
|
||||
SPM_SE_BLOCK_NAME_SQG = 9,
|
||||
SPM_SE_BLOCK_NAME_VGT = 10,
|
||||
};
|
||||
|
||||
// Number of block instances
|
||||
static const uint32_t CbCounterBlockNumInstances = 4;
|
||||
static const uint32_t DbCounterBlockNumInstances = 4;
|
||||
static const uint32_t TaCounterBlockNumInstances = 16;
|
||||
static const uint32_t TdCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcpCounterBlockNumInstances = 16;
|
||||
static const uint32_t TcaCounterBlockNumInstances = 2;
|
||||
static const uint32_t TccCounterBlockNumInstances = 16;
|
||||
static const uint32_t SdmaCounterBlockNumInstances = 2;
|
||||
static const uint32_t UmcCounterBlockNumInstances = 32;
|
||||
|
||||
// MI100 has 8 SDMA instances
|
||||
static const uint32_t SdmaCounterBlockMaxInstances = 8;
|
||||
static const uint32_t UmcCounterBlockMaxInstances = 32;
|
||||
static const uint32_t RmiCounterBlockNumInstances = 8;
|
||||
static const uint32_t GceaCounterBlockNumInstances = 16;
|
||||
|
||||
// Number of block counter registers
|
||||
static const uint32_t CbCounterBlockNumCounters = 4;
|
||||
static const uint32_t CpcCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpfCounterBlockNumCounters = 2;
|
||||
static const uint32_t CpgCounterBlockNumCounters = 2;
|
||||
static const uint32_t DbCounterBlockNumCounters = 4;
|
||||
static const uint32_t GdsCounterBlockNumCounters = 4;
|
||||
static const uint32_t GrbmCounterBlockNumCounters = 2;
|
||||
static const uint32_t GrbmSeCounterBlockNumCounters = 4;
|
||||
static const uint32_t IaCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaSuCounterBlockNumCounters = 4;
|
||||
static const uint32_t PaScCounterBlockNumCounters = 8;
|
||||
static const uint32_t RlcCounterBlockNumCounters = 2;
|
||||
static const uint32_t SdmaCounterBlockNumCounters = 2;
|
||||
static const uint32_t UmcCounterBlockNumCounters = 0;
|
||||
static const uint32_t SpiCounterBlockNumCounters = 6;
|
||||
static const uint32_t SqCounterBlockNumCounters = 8;
|
||||
static const uint32_t SxCounterBlockNumCounters = 4;
|
||||
static const uint32_t TaCounterBlockNumCounters = 2;
|
||||
static const uint32_t TcaCounterBlockNumCounters = 4;
|
||||
static const uint32_t TccCounterBlockNumCounters = 4;
|
||||
static const uint32_t TcpCounterBlockNumCounters = 4;
|
||||
static const uint32_t TdCounterBlockNumCounters = 2;
|
||||
static const uint32_t VgtCounterBlockNumCounters = 4;
|
||||
static const uint32_t WdCounterBlockNumCounters = 4;
|
||||
static const uint32_t GceaCounterBlockNumCounters = 2;
|
||||
static const uint32_t AtcCounterBlockNumCounters = 4;
|
||||
static const uint32_t AtcL2CounterBlockNumCounters = 2;
|
||||
#ifndef _mi300_OFFSET_HEADER
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 8;
|
||||
#else
|
||||
// MI300 bumped this to 16
|
||||
static const uint32_t McVmL2CounterBlockNumCounters = 16;
|
||||
#endif
|
||||
static const uint32_t RpbCounterBlockNumCounters = 4;
|
||||
static const uint32_t RmiCounterBlockNumCounters = 4;
|
||||
|
||||
// Block counters max event value
|
||||
static const uint32_t CbCounterBlockMaxEvent = CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD;
|
||||
static const uint32_t CpcCounterBlockMaxEvent = CPC_PERF_SEL_ME2_DC1_SPI_BUSY;
|
||||
static const uint32_t CpfCounterBlockMaxEvent = CPF_PERF_SEL_CPF_UTCL2IU_STALL;
|
||||
static const uint32_t CpgCounterBlockMaxEvent = CPG_PERF_SEL_CPG_UTCL2IU_STALL;
|
||||
static const uint32_t DbCounterBlockMaxEvent = DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels;
|
||||
static const uint32_t GdsCounterBlockMaxEvent = GDS_PERF_SEL_GWS_BYPASS;
|
||||
static const uint32_t GrbmCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t GrbmSeCounterBlockMaxEvent = GRBM_PERF_SEL_CPAXI_BUSY;
|
||||
static const uint32_t IaCounterBlockMaxEvent = ia_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t PaSuCounterBlockMaxEvent = PERF_CLIENT_UTCL1_INFLIGHT;
|
||||
static const uint32_t PaScCounterBlockMaxEvent =
|
||||
SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND;
|
||||
static const uint32_t RlcCounterBlockMaxEvent = 7;
|
||||
static const uint32_t SdmaCounterBlockMaxEvent = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER;
|
||||
static const uint32_t UmcCounterBlockMaxEvent = 255;
|
||||
static const uint32_t SpiCounterBlockMaxEvent = SPI_PERF_VWC_CSC_WR;
|
||||
static const uint32_t SqCounterBlockMaxEvent = SQC_PERF_SEL_DUMMY_LAST;
|
||||
static const uint32_t SxCounterBlockMaxEvent = SX_PERF_SEL_DB3_SIZE;
|
||||
static const uint32_t TaCounterBlockMaxEvent = TA_PERF_SEL_first_xnack_on_phase3;
|
||||
static const uint32_t TcaCounterBlockMaxEvent = TCA_PERF_SEL_CROSSBAR_STALL_TCC7;
|
||||
static const uint32_t TccCounterBlockMaxEvent = TCC_PERF_SEL_CLIENT127_REQ;
|
||||
static const uint32_t TcpCounterBlockMaxEvent = TCP_PERF_SEL_TCC_DCC_REQ;
|
||||
static const uint32_t TdCounterBlockMaxEvent = TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt;
|
||||
static const uint32_t VgtCounterBlockMaxEvent = vgt_perf_sclk_te11_vld;
|
||||
static const uint32_t WdCounterBlockMaxEvent = wd_perf_utcl1_stall_utcl2_event;
|
||||
static const uint32_t GceaCounterBlockMaxEvent = 76;
|
||||
static const uint32_t AtcCounterBlockMaxEvent = 23;
|
||||
static const uint32_t AtcL2CounterBlockMaxEvent = 7;
|
||||
static const uint32_t RpbCounterBlockMaxEvent = 62;
|
||||
static const uint32_t McVmL2CounterBlockMaxEvent = 20;
|
||||
static const uint32_t RmiCounterBlockMaxEvent = RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3;
|
||||
|
||||
} // namespace gfx9
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX9_BLOCKINFO_H_
|
||||
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
@@ -0,0 +1,727 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_PRIMITIVES_H_
|
||||
#define _GFX9_PRIMITIVES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
|
||||
// Counter Select Register value lambdas
|
||||
#define select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t2(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERFCOUNTER_SELECT, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define select_value_t3(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, CNTR_SEL0, counter_des.id); \
|
||||
return select; \
|
||||
}
|
||||
#define mc_select_value(reg_name) \
|
||||
[](const counter_des_t& counter_des) { \
|
||||
uint32_t select = SET_REG_FIELD_BITS(reg_name, PERF_SEL, counter_des.id) | \
|
||||
SET_REG_FIELD_BITS(reg_name, PERF_MODE, PERFMON_COUNTER_MODE_ACCUM) | \
|
||||
SET_REG_FIELD_BITS(reg_name, ENABLE, 1); \
|
||||
return select; \
|
||||
}
|
||||
|
||||
namespace gfxip {
|
||||
namespace gfx9 {
|
||||
|
||||
class gfx9_cntx_prim {
|
||||
public:
|
||||
static const uint32_t GFXIP_LEVEL = 9;
|
||||
static const uint32_t NUMBER_OF_BLOCKS = LastCounterBlockId + 1;
|
||||
static constexpr Register GRBM_GFX_INDEX_ADDR = REG_32B_ADDR(GC, 0, mmGRBM_GFX_INDEX);
|
||||
static constexpr Register COMPUTE_PERFCOUNT_ENABLE_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmCOMPUTE_PERFCOUNT_ENABLE);
|
||||
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmRLC_PERFMON_CLK_CNTL);
|
||||
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, mmCP_PERFMON_CNTL);
|
||||
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM = 0x01000000L;
|
||||
static const uint32_t MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM = 0x02000000L;
|
||||
|
||||
static constexpr Register SPI_SQG_EVENT_CTL_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_CTRL);
|
||||
static constexpr Register SQ_PERFCOUNTER_CTRL2_ADDR{};
|
||||
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR{};
|
||||
static constexpr Register SQ_PERFCOUNTER_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_MASK_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_PERF_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_PERF_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK);
|
||||
static constexpr Register SQ_THREAD_TRACE_TOKEN_MASK2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_TOKEN_MASK2);
|
||||
static constexpr Register SQ_THREAD_TRACE_MODE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_MODE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_LO_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_BASE_HI_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BUF0_SIZE_ADDR{};
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BASE);
|
||||
static constexpr Register SQ_THREAD_TRACE_BASE2_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_BASE2);
|
||||
static constexpr Register SQ_THREAD_TRACE_SIZE_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_SIZE);
|
||||
static constexpr Register SQ_THREAD_TRACE_CTRL_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CTRL);
|
||||
static constexpr Register SQ_THREAD_TRACE_HIWATER_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_HIWATER);
|
||||
static const uint32_t SQ_THREAD_TRACE_HIWATER_VAL = 0x6;
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
static constexpr Register SQ_THREAD_TRACE_CNTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_CNTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_WPTR_ADDR = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_WPTR);
|
||||
static constexpr Register SQ_THREAD_TRACE_STATUS_OFFSET = []() {
|
||||
Register reg = REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_STATUS);
|
||||
reg.offset -= UCONFIG_SPACE_START;
|
||||
return reg;
|
||||
}();
|
||||
static const uint32_t TT_BUFF_ALIGN_SHIFT = 12;
|
||||
|
||||
static const uint32_t SDMA_COUNTER_BLOCK_NUM_INSTANCES = SdmaCounterBlockMaxInstances;
|
||||
static const uint32_t UMC_COUNTER_BLOCK_NUM_INSTANCES = UmcCounterBlockMaxInstances;
|
||||
|
||||
static constexpr Register RLC_SPM_PERFMON_CNTL__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_CNTL);
|
||||
static constexpr Register RLC_SPM_MC_CNTL__ADDR = REG_32B_ADDR(GC, 0, mmRLC_SPM_MC_CNTL);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_LO__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_LO);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_BASE_HI__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI);
|
||||
static constexpr Register RLC_SPM_PERFMON_RING_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE);
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_PERFMON_SEGMENT_SIZE);
|
||||
#if defined(regRLC_SPM_PERFMON_SEGMENT_SIZE_CORE1)
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR =
|
||||
REG_32B_ADDR(GC, 0, regRLC_SPM_PERFMON_SEGMENT_SIZE_CORE1);
|
||||
#else
|
||||
static constexpr Register RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__ADDR = Register(0xDCAF);
|
||||
#endif
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_GLOBAL_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_DATA);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_ADDR__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);
|
||||
static constexpr Register RLC_SPM_SE_MUXSEL_DATA__ADDR =
|
||||
REG_32B_ADDR(GC, 0, mmRLC_SPM_SE_MUXSEL_DATA);
|
||||
static const uint32_t RLC_SPM_COUNTERS_PER_LINE = 16;
|
||||
static const uint32_t RLC_SPM_TIMESTAMP_SIZE16 = 4;
|
||||
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_0 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_0);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_1 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_1);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_2 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_2);
|
||||
static constexpr Register SQ_THREAD_TRACE_USERDATA_3 =
|
||||
REG_32B_ADDR(GC, 0, mmSQ_THREAD_TRACE_USERDATA_3);
|
||||
|
||||
static Register sqtt_perfcounter_addr(uint32_t index) {
|
||||
static const Register SQTT_PERFCOUNTERS_SELECT[16] = {
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER0_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER1_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER2_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER3_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER4_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER5_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER6_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER7_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER8_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER9_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER10_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER11_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER12_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER13_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER14_SELECT),
|
||||
REG_32B_ADDR(GC, 0, mmSQ_PERFCOUNTER15_SELECT)};
|
||||
return SQTT_PERFCOUNTERS_SELECT[index & 0xF];
|
||||
}
|
||||
|
||||
union mux_info_t {
|
||||
uint16_t data;
|
||||
struct {
|
||||
uint16_t counter : 6;
|
||||
uint16_t block : 5;
|
||||
uint16_t instance : 5;
|
||||
} gfx;
|
||||
};
|
||||
|
||||
static const uint32_t SQ_BLOCK_ID = SqCounterBlockId;
|
||||
static const uint32_t SQ_BLOCK_SPM_ID = 9;
|
||||
|
||||
static const uint32_t COPY_DATA_SEL_REG_PRM = COPY_DATA_SEL_REG;
|
||||
static const uint32_t COPY_DATA_SEL_SRC_SYS_PERF_COUNTER_PRM = COPY_DATA_SEL_SRC_SYS_PERF_COUNTER;
|
||||
static const uint32_t COPY_DATA_SEL_COUNT_1DW_PRM = COPY_DATA_SEL_COUNT_1DW;
|
||||
|
||||
static uint32_t Low32(const uint64_t& v) { return (uint32_t)v; }
|
||||
static uint32_t High32(const uint64_t& v) { return (uint32_t)(v >> 32); }
|
||||
|
||||
// SPM delay functions for global instance
|
||||
static uint32_t get_spm_global_delay(const counter_des_t& counter_des,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
return block_info->delay_info[instance_index].val - 1;
|
||||
}
|
||||
|
||||
// SPM delay functions for se instance
|
||||
static uint32_t get_spm_se_delay(const counter_des_t& counter_des, const uint32_t& se_index,
|
||||
const uint32_t& instance_index) {
|
||||
const auto* block_info = counter_des.block_info;
|
||||
int delay_index = se_index * block_info->instance_count + instance_index;
|
||||
return block_info->delay_info[delay_index].val - 1;
|
||||
}
|
||||
|
||||
// GRBM broadcasting mode
|
||||
static uint32_t grbm_broadcast_value() {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_inst_index_value(const uint32_t& instance_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE indexing
|
||||
static uint32_t grbm_se_index_value(const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH indexing
|
||||
static uint32_t grbm_se_sh_index_value(const uint32_t& se_index, const uint32_t& sh_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_INDEX, sh_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SH/SE/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_index_value(const uint32_t& instance_index,
|
||||
const uint32_t& se_index, const uint32_t& sh_index) {
|
||||
uint32_t grbm_gfx_index = SET_REG_FIELD_BITS(GRBM_GFX_INDEX, INSTANCE_INDEX, instance_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SE_INDEX, se_index) |
|
||||
SET_REG_FIELD_BITS(GRBM_GFX_INDEX, SH_INDEX, sh_index);
|
||||
return grbm_gfx_index;
|
||||
}
|
||||
|
||||
// GRBM SE/SH/WGP indexing
|
||||
static uint32_t grbm_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
// GRBM SE/SH/WGP/BlockInstance indexing
|
||||
static uint32_t grbm_inst_se_sh_wgp_index_value(const uint32_t&, const uint32_t&, const uint32_t&, const uint32_t&) { return 0; }
|
||||
|
||||
// CP_PERFMON_CNTL value to reset counters
|
||||
static uint32_t cp_perfmon_cntl_reset_value() {
|
||||
uint32_t cp_perfmon_cntl{0};
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to start counters
|
||||
static uint32_t cp_perfmon_cntl_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// CP_PERFMON_CNTL value to stop/freeze counters
|
||||
static uint32_t cp_perfmon_cntl_read_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
|
||||
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
// Compute Perfcount Enable register value to enable counting
|
||||
static uint32_t cp_perfcount_enable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 1);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// Compute Perfcount Disable register value to enable counting
|
||||
static uint32_t cp_perfcount_disable_value() {
|
||||
uint32_t compute_perfcount_enable =
|
||||
SET_REG_FIELD_BITS(COMPUTE_PERFCOUNT_ENABLE, PERFCOUNT_ENABLE, 0);
|
||||
return compute_perfcount_enable;
|
||||
}
|
||||
|
||||
// SQ Block primitives
|
||||
|
||||
// SQ Counter Select Register value
|
||||
static uint32_t sq_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SIMD_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_CLIENT_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id);
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
static uint32_t sq_spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t sq_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SIMD_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_BANK_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SQC_CLIENT_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER0_SELECT, SPM_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return sq_perfcounter0_select;
|
||||
}
|
||||
|
||||
// SQ Counter Mask Register value
|
||||
static uint32_t sq_mask_value(const counter_des_t&) {
|
||||
uint32_t sq_perfcounter_mask = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_MASK, SH0_MASK, 0xFFFF) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_MASK, SH1_MASK, 0xFFFF);
|
||||
return sq_perfcounter_mask;
|
||||
}
|
||||
|
||||
// SQ Counter Control Register value
|
||||
static uint32_t sq_control_value(const counter_des_t& counter_des) {
|
||||
const uint32_t block_id = counter_des.block_des.id;
|
||||
uint32_t sq_perfcounter_ctrl{0};
|
||||
if (block_id == SqCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
} else if (block_id == SqGsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1);
|
||||
} else if (block_id == SqVsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1);
|
||||
} else if (block_id == SqPsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1);
|
||||
} else if (block_id == SqHsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1);
|
||||
} else if (block_id == SqCsCounterBlockId) {
|
||||
sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
}
|
||||
#if defined(SQ_PERFCOUNTER_CTRL__VMID_MASK__SHIFT)
|
||||
sq_perfcounter_ctrl |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VMID_MASK, 0xFFFF);
|
||||
#else
|
||||
sq_perfcounter_ctrl |= 0xFFFF0000;
|
||||
#endif
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
|
||||
// SQ validate counter attributes
|
||||
static void validate_counters(uint32_t counters_vec_attr) {
|
||||
#if SQ_CONFLICT_CHECK == 1
|
||||
const uint32_t mask = CounterBlockSqAttr | CounterBlockTcAttr;
|
||||
const bool conflict = ((counters_vec_attr & mask) == mask);
|
||||
if (conflict) abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
// SQ Counter Control enable perfomance counter in graphics pipeline stages
|
||||
static uint32_t sq_control_enable_value() {
|
||||
uint32_t sq_perfcounter_ctrl = SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, PS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, GS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, ES_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, HS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, LS_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, CS_EN, 0x1);
|
||||
#if defined(SQ_PERFCOUNTER_CTRL__VMID_MASK__SHIFT)
|
||||
sq_perfcounter_ctrl |= SET_REG_FIELD_BITS(SQ_PERFCOUNTER_CTRL, VMID_MASK, 0xFFFF);
|
||||
#else
|
||||
sq_perfcounter_ctrl |= 0xFFFF0000;
|
||||
#endif
|
||||
return sq_perfcounter_ctrl;
|
||||
}
|
||||
static uint32_t sq_control2_enable_value() { return 0; }
|
||||
static uint32_t sq_control2_disable_value() { return 0; }
|
||||
|
||||
// MC Block primitives
|
||||
|
||||
// MC Channel value
|
||||
static uint32_t mc_config_value(const counter_des_t& counter_des) { return counter_des.index; }
|
||||
|
||||
// MC registers values
|
||||
static auto constexpr mc_select_value_MC_VM_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(MC_VM_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_L2_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_L2_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_ATC_PERFCOUNTER0_CFG =
|
||||
mc_select_value(ATC_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_GCEA_PERFCOUNTER0_CFG =
|
||||
mc_select_value(GCEA_PERFCOUNTER0_CFG);
|
||||
static auto constexpr mc_select_value_RPB_PERFCOUNTER0_CFG =
|
||||
mc_select_value(RPB_PERFCOUNTER0_CFG);
|
||||
|
||||
static uint32_t mc_reset_value() { return MC_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK_PRM; }
|
||||
static uint32_t mc_start_value() { return MC_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK_PRM; }
|
||||
|
||||
static auto constexpr select_value_CB_PERFCOUNTER0_SELECT = select_value(CB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_DB_PERFCOUNTER0_SELECT = select_value(DB_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_PERFCOUNTER0_SELECT =
|
||||
select_value(GRBM_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_GRBM_SE0_PERFCOUNTER_SELECT =
|
||||
select_value(GRBM_SE0_PERFCOUNTER_SELECT);
|
||||
static auto constexpr select_value_PA_SU_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SU_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_PA_SC_PERFCOUNTER0_SELECT =
|
||||
select_value(PA_SC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_SPI_PERFCOUNTER0_SELECT =
|
||||
select_value(SPI_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TA_PERFCOUNTER0_SELECT = select_value(TA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCA_PERFCOUNTER0_SELECT =
|
||||
select_value(TCA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCC_PERFCOUNTER0_SELECT =
|
||||
select_value(TCC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TD_PERFCOUNTER0_SELECT = select_value(TD_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_TCP_PERFCOUNTER0_SELECT =
|
||||
select_value(TCP_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_VGT_PERFCOUNTER0_SELECT =
|
||||
select_value(VGT_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_IA_PERFCOUNTER0_SELECT = select_value(IA_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_WD_PERFCOUNTER0_SELECT = select_value(WD_PERFCOUNTER0_SELECT);
|
||||
|
||||
// static auto constexpr select_value_SX_PERFCOUNTER0_SELECT =
|
||||
// select_value_t2(SX_PERFCOUNTER0_SELECT); static auto constexpr
|
||||
// select_value_GDS_PERFCOUNTER0_SELECT = select_value_t2(GDS_PERFCOUNTER0_SELECT);
|
||||
|
||||
static auto constexpr select_value_SX_PERFCOUNTER0_SELECT = [](const counter_des_t& counter_des) {
|
||||
return (uint32_t)0;
|
||||
};
|
||||
static auto constexpr select_value_GDS_PERFCOUNTER0_SELECT =
|
||||
[](const counter_des_t& counter_des) { return (uint32_t)0; };
|
||||
|
||||
static auto constexpr select_value_CPC_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPC_PERFCOUNTER0_SELECT);
|
||||
static auto constexpr select_value_CPF_PERFCOUNTER0_SELECT =
|
||||
select_value_t3(CPF_PERFCOUNTER0_SELECT);
|
||||
|
||||
static uint32_t spm_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 3); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_even_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 1); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static uint32_t spm_odd_select_value(const counter_des_t& counter_des) {
|
||||
uint32_t tcc_perfcounter0_select =
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, PERF_SEL1, counter_des.id) |
|
||||
SET_REG_FIELD_BITS(TCC_PERFCOUNTER0_SELECT, CNTR_MODE, 1); // PERFMON_SPM_MODE_32BIT_CLAMP
|
||||
return tcc_perfcounter0_select;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(const counter_des_t& counter_des) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter_des.index;
|
||||
mxinfo.gfx.block = counter_des.block_info->spm_block_id;
|
||||
mxinfo.gfx.instance = counter_des.block_des.index;
|
||||
return mxinfo;
|
||||
}
|
||||
static mux_info_t spm_mux_ram_value(uint16_t counter, uint16_t block, uint16_t instance) {
|
||||
mux_info_t mxinfo{0};
|
||||
mxinfo.gfx.counter = counter;
|
||||
mxinfo.gfx.block = block;
|
||||
mxinfo.gfx.instance = instance;
|
||||
return mxinfo;
|
||||
}
|
||||
static uint32_t spm_mux_ram_idx_incr(uint32_t idx) {
|
||||
uint32_t incr_idx = ++idx;
|
||||
if (!(incr_idx % RLC_SPM_COUNTERS_PER_LINE)) incr_idx += RLC_SPM_COUNTERS_PER_LINE;
|
||||
return incr_idx;
|
||||
}
|
||||
|
||||
// SDMA primitives
|
||||
static uint32_t sdma_disable_clear_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_enable_value() { return 0; }
|
||||
|
||||
static uint32_t sdma_select_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
static uint32_t sdma_stop_value(const counter_des_t& counter_des) { return 0; }
|
||||
|
||||
// SPM trace routines
|
||||
static uint32_t rlc_spm_mc_cntl_value() {
|
||||
uint32_t rlc_spm_mc_cntl = SET_REG_FIELD_BITS(RLC_SPM_MC_CNTL, RLC_SPM_VMID, 15);
|
||||
return rlc_spm_mc_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_start_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
static uint32_t cp_perfmon_cntl_spm_stop_value() {
|
||||
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
|
||||
return cp_perfmon_cntl;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
|
||||
const uint32_t& block, const uint32_t& hi) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_cntl_value(const uint32_t& sampling_rate) {
|
||||
uint32_t rlc_spm_perfmon_cntl =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_CNTL, PERFMON_SAMPLE_INTERVAL, sampling_rate);
|
||||
return rlc_spm_perfmon_cntl;
|
||||
}
|
||||
static uint32_t rlc_spm_perfmon_segment_size_value(const uint32_t& global_count,
|
||||
const uint32_t& se_count) {
|
||||
const uint32_t global_nlines = global_count;
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = (global_nlines + (4 * se_nlines));
|
||||
uint32_t rlc_spm_perfmon_segment_size =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, GLOBAL_NUM_LINE, global_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE0_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE1_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, SE2_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE, PERFMON_SEGMENT_SIZE, segment_size);
|
||||
return rlc_spm_perfmon_segment_size;
|
||||
}
|
||||
|
||||
static uint32_t rlc_spm_perfmon_segment_size_core1_value(const uint32_t& se_count) {
|
||||
const uint32_t se_nlines = se_count;
|
||||
const uint32_t segment_size = 4 * se_nlines;
|
||||
uint32_t rlc_spm_perfmon_segment_size_core1{0};
|
||||
#if defined(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1__PERFMON_SEGMENT_SIZE_CORE1__SHIFT)
|
||||
rlc_spm_perfmon_segment_size_core1 =
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, PERFMON_SEGMENT_SIZE_CORE1,
|
||||
segment_size) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE4_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE5_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE6_NUM_LINE, se_nlines) |
|
||||
SET_REG_FIELD_BITS(RLC_SPM_PERFMON_SEGMENT_SIZE_CORE1, SE7_NUM_LINE, se_nlines);
|
||||
#endif
|
||||
return rlc_spm_perfmon_segment_size_core1;
|
||||
}
|
||||
|
||||
// Enable Thread Trace for all VM Id's
|
||||
// Enable all of the SIMD's of the compute unit
|
||||
// Enable Compute Unit (CU) at index Zero to be used for fine-grained data
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
//
|
||||
// @note: Not enabling REG_STALL_EN, SPI_STALL_EN and SQ_STALL_EN bits. They
|
||||
// are useful if we wish to program buffer throttling.
|
||||
//
|
||||
static uint32_t sqtt_mask_value(uint32_t targetCu, uint32_t simd, uint32_t vmIdMask) {
|
||||
uint32_t sq_thread_trace_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SH_SEL, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SIMD_EN, simd) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, CU_SEL, targetCu) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SQ_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SPI_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, REG_STALL_EN, 0x1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, VM_ID_MASK, vmIdMask);
|
||||
return sq_thread_trace_mask;
|
||||
}
|
||||
|
||||
// Mask of compute units to get thread trace data from
|
||||
static uint32_t sqtt_perf_mask_value() {
|
||||
uint32_t sq_thread_trace_perf_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_PERF_MASK, SH0_MASK, 0xFFFF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_PERF_MASK, SH1_MASK, 0xFFFF);
|
||||
return sq_thread_trace_perf_mask;
|
||||
}
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
|
||||
static const uint32_t SQTT_TOKEN_MISC = 1 << 0;
|
||||
static const uint32_t SQTT_TOKEN_TIME = 1 << 1;
|
||||
static const uint32_t SQTT_TOKEN_REG = 1 << 2;
|
||||
static const uint32_t SQTT_TOKEN_WAVE_START = 1 << 3;
|
||||
static const uint32_t SQTT_TOKEN_REG_CS = 1 << 5;
|
||||
static const uint32_t SQTT_TOKEN_WAVE_END = 1 << 6;
|
||||
static const uint32_t SQTT_TOKEN_INST = 1 << 10;
|
||||
static const uint32_t SQTT_TOKEN_INST_PC = 1 << 11;
|
||||
static const uint32_t SQTT_TOKEN_USERDATA = 1 << 12;
|
||||
static const uint32_t SQTT_TOKEN_ISSUE = 1 << 13;
|
||||
static const uint32_t SQTT_TOKEN_REG_CS_PRIV = 1 << 15;
|
||||
|
||||
static uint32_t sqtt_token_mask_on_value() {
|
||||
uint32_t sq_thread_trace_token_mask;
|
||||
uint32_t sq_thread_trace_token_mask_token_mask =
|
||||
SQTT_TOKEN_MISC | SQTT_TOKEN_TIME | SQTT_TOKEN_REG | SQTT_TOKEN_WAVE_START |
|
||||
SQTT_TOKEN_WAVE_END | SQTT_TOKEN_INST | SQTT_TOKEN_INST_PC | SQTT_TOKEN_USERDATA |
|
||||
SQTT_TOKEN_ISSUE | SQTT_TOKEN_REG_CS | SQTT_TOKEN_REG_CS_PRIV;
|
||||
|
||||
sq_thread_trace_token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK,
|
||||
sq_thread_trace_token_mask_token_mask);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_off_value() {
|
||||
uint32_t sq_thread_trace_token_mask =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0x0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK, 0xF);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_token_mask_occupancy_value() {
|
||||
uint32_t sq_thread_trace_token_mask;
|
||||
uint32_t sq_thread_trace_token_mask_token_mask =
|
||||
SQTT_TOKEN_MISC | SQTT_TOKEN_TIME | SQTT_TOKEN_REG | SQTT_TOKEN_WAVE_START |
|
||||
SQTT_TOKEN_WAVE_END | SQTT_TOKEN_REG_CS_PRIV | SQTT_TOKEN_REG_CS | SQTT_TOKEN_USERDATA;
|
||||
|
||||
sq_thread_trace_token_mask = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_MASK, 0xF) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, TOKEN_MASK,
|
||||
sq_thread_trace_token_mask_token_mask);
|
||||
return sq_thread_trace_token_mask;
|
||||
}
|
||||
|
||||
// Indicate the different TT tokens that specify instruction operations to be logged
|
||||
// Disabling specifically instruction operations updating Program Counter (PC).
|
||||
// @note: The field is defined in the spec incorrectly as a 16-bit value
|
||||
static uint32_t sqtt_token_mask2_value() {
|
||||
uint32_t sq_thread_trace_token_mask2 =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK2, INST_MASK, 0xFFFFFFFF);
|
||||
return sq_thread_trace_token_mask2;
|
||||
}
|
||||
|
||||
// Check if stalling is supported
|
||||
static bool sqtt_stalling_enabled(const uint32_t& mask_val, const uint32_t& token_mask_val) {
|
||||
return GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SQ_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, SPI_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_MASK, REG_STALL_EN, mask_val) ||
|
||||
GET_REG_FIELD_BITS(SQ_THREAD_TRACE_TOKEN_MASK, REG_DROP_ON_STALL, token_mask_val);
|
||||
}
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
// Thread trace mode OFF value
|
||||
static uint32_t sqtt_mode_off_value() {
|
||||
uint32_t sq_thread_trace_mode =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, WRAP, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, CAPTURE_MODE, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MASK_CS, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, AUTOFLUSH_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MODE, SQ_THREAD_TRACE_MODE_OFF);
|
||||
return sq_thread_trace_mode;
|
||||
}
|
||||
// Thread trace mode ON value
|
||||
static uint32_t sqtt_mode_on_value() {
|
||||
uint32_t sq_thread_trace_mode =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, WRAP, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, CAPTURE_MODE, 0) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MASK_CS, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, AUTOFLUSH_EN, 1) |
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_MODE, MODE, SQ_THREAD_TRACE_MODE_ON);
|
||||
return sq_thread_trace_mode;
|
||||
}
|
||||
|
||||
// Base address of buffer to use for thread trace
|
||||
static uint32_t sqtt_base_value_lo(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_base =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BASE, ADDR, Low32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_base;
|
||||
}
|
||||
static uint32_t sqtt_base_value_hi(const uint64_t& base_addr) {
|
||||
uint32_t sq_thread_trace_base = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_BASE2, ADDR_HI,
|
||||
High32(base_addr >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_base;
|
||||
}
|
||||
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
static uint32_t sqtt_buffer_size_value(uint32_t size_val, uint32_t base_hi) {
|
||||
uint32_t sq_thread_trace_size =
|
||||
SET_REG_FIELD_BITS(SQ_THREAD_TRACE_SIZE, SIZE, (size_val >> TT_BUFF_ALIGN_SHIFT));
|
||||
return sq_thread_trace_size;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_buffer0_size_value(uint32_t size_val) { return 0; }
|
||||
|
||||
static uint32_t spi_sqg_event_ctl(bool enableSqgEvents) { return 0; }
|
||||
|
||||
static uint32_t sqtt_zero_size_value() { return 0; }
|
||||
|
||||
// Thread trace ctrl register value
|
||||
static uint32_t sqtt_ctrl_value(bool on) {
|
||||
uint32_t sq_thread_trace_ctrl = SET_REG_FIELD_BITS(SQ_THREAD_TRACE_CTRL, RESET_BUFFER, 1);
|
||||
return sq_thread_trace_ctrl;
|
||||
}
|
||||
|
||||
// SPM primitives
|
||||
static uint16_t spm_timestamp_muxsel() { return 0xF0F0; }
|
||||
|
||||
enum ESQTT_STATUS_MASK {
|
||||
// Mask to check if memory error was received
|
||||
TT_CONTROL_UTC_ERR_MASK = 0x10000000,
|
||||
// Mask to check if SQTT buffer is wrapped
|
||||
TT_CONTROL_FULL_MASK = 0x80000000,
|
||||
TT_WRITE_PTR_MASK = 0x3FFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t sqtt_busy_mask() {
|
||||
const uint32_t BUSY_BIT = 30;
|
||||
return 1u << BUSY_BIT;
|
||||
}
|
||||
|
||||
static uint32_t sqtt_pending_mask() {
|
||||
const uint32_t NUM_PIPES = 8;
|
||||
return (1u << NUM_PIPES) - 1;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gfx9
|
||||
} // namespace gfxip
|
||||
|
||||
#endif // _GFX9_PRIMITIVES_H_
|
||||
@@ -0,0 +1,109 @@
|
||||
#ifndef _GPU_BLOCKINFO_H_
|
||||
#define _GPU_BLOCKINFO_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// Counter Block attributes
|
||||
enum CounterBlockAttr {
|
||||
// Default block attribute
|
||||
CounterBlockDfltAttr = 1,
|
||||
// Per ShaderEngine blocks
|
||||
CounterBlockSeAttr = 2,
|
||||
// SQ blocks
|
||||
CounterBlockSqAttr = 4,
|
||||
// Need to clean counter registers
|
||||
CounterBlockCleanAttr = 8,
|
||||
// MC Block
|
||||
CounterBlockMcAttr = 0x10,
|
||||
// CP PERFMON controllable blocks
|
||||
CounterBlockCpmonAttr = 0x1f,
|
||||
// SDMA block
|
||||
CounterBlockSdmaAttr = 0x100,
|
||||
// Texture cache
|
||||
CounterBlockTcAttr = 0x400,
|
||||
// Explicitly indexed blocks
|
||||
CounterBlockExplInstAttr = 0x800,
|
||||
// SPM blocks
|
||||
CounterBlockSpmGlobalAttr = 0x1000,
|
||||
CounterBlockSpmSeAttr = 0x2000,
|
||||
// GUS block
|
||||
CounterBlockGusAttr = 0x4000,
|
||||
// GRBM block
|
||||
CounterBlockGRBMAttr = 0x8000,
|
||||
// UMC blocks
|
||||
CounterBlockUmcAttr = 0x10000,
|
||||
// SE and SA-dependent blocks
|
||||
CounterBlockSaAttr = 0x20000,
|
||||
// MI300 AID blocks
|
||||
CounterBlockAidAttr = 0x40000,
|
||||
// SPI counter
|
||||
CounterBlockSPIAttr = 0x80000,
|
||||
// Blocks within WGP
|
||||
CounterBlockWgpAttr = 0x100000,
|
||||
};
|
||||
|
||||
// Register address corresponding to each counter
|
||||
struct CounterRegInfo {
|
||||
// counter select register address
|
||||
uint32_t select_addr;
|
||||
// counter control register address
|
||||
uint32_t control_addr;
|
||||
// counter register address low
|
||||
uint32_t register_addr_lo;
|
||||
// counter register address high
|
||||
uint32_t register_addr_hi;
|
||||
};
|
||||
|
||||
struct BlockDelayInfo {
|
||||
uint32_t reg;
|
||||
uint32_t val;
|
||||
};
|
||||
|
||||
struct counter_des_t;
|
||||
|
||||
// GPU Block info definition
|
||||
struct GpuBlockInfo {
|
||||
// Unique string identifier of the block.
|
||||
const char* name;
|
||||
// Block ID
|
||||
uint32_t id;
|
||||
// Maximum number of block instances in the group per shader array
|
||||
uint32_t instance_count;
|
||||
// Maximum counter event ID
|
||||
uint32_t event_id_max;
|
||||
// Maximum number of counters that can be enabled at once
|
||||
uint32_t counter_count;
|
||||
// Counter registers addresses
|
||||
const CounterRegInfo* counter_reg_info;
|
||||
// Counter select value function
|
||||
uint32_t (*select_value)(const counter_des_t&);
|
||||
// Block attributes mask
|
||||
uint32_t attr;
|
||||
// Block delay info
|
||||
const BlockDelayInfo* delay_info;
|
||||
// SPM block id
|
||||
uint32_t spm_block_id;
|
||||
};
|
||||
|
||||
// Block descriptor
|
||||
struct block_des_t {
|
||||
uint32_t id;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
// block_des_t less then functor
|
||||
struct lt_block_des {
|
||||
bool operator()(const block_des_t& a1, const block_des_t& a2) const {
|
||||
return (a1.id < a2.id) || ((a1.id == a2.id) && (a1.index < a2.index));
|
||||
}
|
||||
};
|
||||
|
||||
// Counter descriptor
|
||||
struct counter_des_t {
|
||||
uint32_t id;
|
||||
uint32_t index;
|
||||
block_des_t block_des;
|
||||
const GpuBlockInfo* block_info;
|
||||
};
|
||||
|
||||
#endif // _GPU_BLOCKINFO_H_
|
||||
@@ -0,0 +1,58 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef INC_AQL_PROFILE_H_
|
||||
#define INC_AQL_PROFILE_H_
|
||||
|
||||
#if 0
|
||||
// Profiling parameters
|
||||
// All parameters are generic and if not applicable for a specific
|
||||
// profile configuration then error status will be returned.
|
||||
typedef enum {
|
||||
// SQTT applicable parameters
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5
|
||||
} hsa_ven_amd_aqlprofile_parameter_name_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA = 3, // get_info returns SQTT buffer ptr/size
|
||||
// in info_data object
|
||||
//
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
//
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
} hsa_ven_amd_aqlprofile_info_type_t;
|
||||
#endif
|
||||
|
||||
#endif // INC_AQL_PROFILE_H_
|
||||
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
@@ -0,0 +1,678 @@
|
||||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NVD_H
|
||||
#define NVD_H
|
||||
|
||||
/**
|
||||
* Navi's PM4 definitions
|
||||
*/
|
||||
#define PACKET_TYPE0 0
|
||||
#define PACKET_TYPE1 1
|
||||
#define PACKET_TYPE2 2
|
||||
#define PACKET_TYPE3 3
|
||||
|
||||
#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
|
||||
#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
|
||||
#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
|
||||
#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
|
||||
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
#define CP_PACKET2 0x80000000
|
||||
#define PACKET2_PAD_SHIFT 0
|
||||
#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
/* Packet 3 types */
|
||||
#define PACKET3_NOP 0x10
|
||||
#define PACKET3_SET_BASE 0x11
|
||||
#define PACKET3_BASE_INDEX(x) ((x) << 0)
|
||||
#define CE_PARTITION_BASE 3
|
||||
#define PACKET3_CLEAR_STATE 0x12
|
||||
#define PACKET3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PACKET3_DISPATCH_DIRECT 0x15
|
||||
#define PACKET3_DISPATCH_INDIRECT 0x16
|
||||
#define PACKET3_INDIRECT_BUFFER_END 0x17
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
|
||||
#define PACKET3_ATOMIC_GDS 0x1D
|
||||
#define PACKET3_ATOMIC_MEM 0x1E
|
||||
#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
|
||||
#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_OCCLUSION_QUERY 0x1F
|
||||
#define PACKET3_SET_PREDICATION 0x20
|
||||
#define PACKET3_REG_RMW 0x21
|
||||
#define PACKET3_COND_EXEC 0x22
|
||||
#define PACKET3_PRED_EXEC 0x23
|
||||
#define PACKET3_DRAW_INDIRECT 0x24
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PACKET3_INDEX_BASE 0x26
|
||||
#define PACKET3_DRAW_INDEX_2 0x27
|
||||
#define PACKET3_CONTEXT_CONTROL 0x28
|
||||
#define PACKET3_INDEX_TYPE 0x2A
|
||||
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PACKET3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PACKET3_NUM_INSTANCES 0x2F
|
||||
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
|
||||
#define PACKET3_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
|
||||
#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
|
||||
#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
|
||||
#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
|
||||
#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
|
||||
#define PACKET3_COPY_DW 0x3B
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
|
||||
#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
|
||||
#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
|
||||
#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
|
||||
#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
|
||||
#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
|
||||
#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
|
||||
#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
|
||||
#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
|
||||
#define PACKET3_CP_DMA 0x41
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_SURFACE_SYNC 0x43
|
||||
#define PACKET3_ME_INITIALIZE 0x44
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
#define PACKET3_EVENT_WRITE 0x46
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
*/
|
||||
#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
|
||||
#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
|
||||
#define PACKET3_EVENT_WRITE_EOP 0x47
|
||||
#define PACKET3_EVENT_WRITE_EOS 0x48
|
||||
#define PACKET3_RELEASE_MEM 0x49
|
||||
#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
|
||||
#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
|
||||
#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
|
||||
#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22)
|
||||
#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - cache_policy__me_release_mem__lru
|
||||
* 1 - cache_policy__me_release_mem__stream
|
||||
* 2 - cache_policy__me_release_mem__noa
|
||||
* 3 - cache_policy__me_release_mem__bypass
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
|
||||
|
||||
#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
* 2 - send 64bit data
|
||||
* 3 - send 64bit GPU counter value
|
||||
* 4 - send 64bit sys counter value
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
|
||||
/* 0 - none
|
||||
* 1 - interrupt only (DATA_SEL = 0)
|
||||
* 2 - interrupt when data write is confirmed
|
||||
*/
|
||||
#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
|
||||
/* 0 - MC
|
||||
* 1 - TC/L2
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PACKET3_PREAMBLE_CNTL 0x4A
|
||||
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
|
||||
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
|
||||
#define PACKET3_DMA_DATA 0x50
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. SRC_ADDR_LO or DATA [31:0]
|
||||
* 4. SRC_ADDR_HI [31:0]
|
||||
* 5. DST_ADDR_LO [31:0]
|
||||
* 6. DST_ADDR_HI [7:0]
|
||||
* 7. COMMAND [31:26] | BYTE_COUNT [25:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
|
||||
/* 0 - ME
|
||||
* 1 - PFP
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
|
||||
/* 0 - DST_ADDR using DAS
|
||||
* 1 - GDS
|
||||
* 3 - DST_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
|
||||
/* 0 - SRC_ADDR using SAS
|
||||
* 1 - GDS
|
||||
* 2 - DATA
|
||||
* 3 - SRC_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
|
||||
/* COMMAND */
|
||||
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||
#define PACKET3_CONTEXT_REG_RMW 0x51
|
||||
#define PACKET3_GFX_CNTX_UPDATE 0x52
|
||||
#define PACKET3_BLK_CNTX_UPDATE 0x53
|
||||
#define PACKET3_INCR_UPDT_STATE 0x55
|
||||
#define PACKET3_ACQUIRE_MEM 0x58
|
||||
/* 1. HEADER
|
||||
* 2. COHER_CNTL [30:0]
|
||||
* 2.1 ENGINE_SEL [31:31]
|
||||
* 2. COHER_SIZE [31:0]
|
||||
* 3. COHER_SIZE_HI [7:0]
|
||||
* 4. COHER_BASE_LO [31:0]
|
||||
* 5. COHER_BASE_HI [23:0]
|
||||
* 7. POLL_INTERVAL [15:0]
|
||||
* 8. GCR_CNTL [18:0]
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
|
||||
/*
|
||||
* 0:NOP
|
||||
* 1:ALL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:reserved
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
|
||||
/*
|
||||
* 0:ALL
|
||||
* 1:VOL
|
||||
* 2:RANGE
|
||||
* 3:FIRST_LAST
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
|
||||
/*
|
||||
* 0: PARALLEL
|
||||
* 1: FORWARD
|
||||
* 2: REVERSE
|
||||
*/
|
||||
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
|
||||
#define PACKET3_REWIND 0x59
|
||||
#define PACKET3_INTERRUPT 0x5A
|
||||
#define PACKET3_GEN_PDEPTE 0x5B
|
||||
#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
|
||||
#define PACKET3_PRIME_UTCL2 0x5D
|
||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||
#define PACKET3_LOAD_SH_REG 0x5F
|
||||
#define PACKET3_LOAD_CONFIG_REG 0x60
|
||||
#define PACKET3_LOAD_CONTEXT_REG 0x61
|
||||
#define PACKET3_LOAD_COMPUTE_STATE 0x62
|
||||
#define PACKET3_LOAD_SH_REG_INDEX 0x63
|
||||
#define PACKET3_SET_CONFIG_REG 0x68
|
||||
#define PACKET3_SET_CONFIG_REG_START 0x00002000
|
||||
#define PACKET3_SET_CONFIG_REG_END 0x00002c00
|
||||
#define PACKET3_SET_CONTEXT_REG 0x69
|
||||
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
|
||||
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
|
||||
#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
|
||||
#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
|
||||
#define PACKET3_SET_SH_REG_DI 0x72
|
||||
#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
|
||||
#define PACKET3_SET_SH_REG_DI_MULTI 0x74
|
||||
#define PACKET3_GFX_PIPE_LOCK 0x75
|
||||
#define PACKET3_SET_SH_REG 0x76
|
||||
#define PACKET3_SET_SH_REG_START 0x00002c00
|
||||
#define PACKET3_SET_SH_REG_END 0x00003000
|
||||
#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
|
||||
#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
|
||||
#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
|
||||
#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
|
||||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
|
||||
#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
|
||||
#define PACKET3_FORWARD_HEADER 0x7C
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
#define PACKET3_WRITE_CONST_RAM 0x81
|
||||
#define PACKET3_DUMP_CONST_RAM 0x83
|
||||
#define PACKET3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
|
||||
#define PACKET3_DISPATCH_DRAW 0x8D
|
||||
#define PACKET3_DISPATCH_DRAW_ACE 0x8D
|
||||
#define PACKET3_GET_LOD_STATS 0x8E
|
||||
#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
|
||||
#define PACKET3_FRAME_CONTROL 0x90
|
||||
# define FRAME_TMZ (1 << 0)
|
||||
# define FRAME_CMD(x) ((x) << 28)
|
||||
/*
|
||||
* x=0: tmz_begin
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
|
||||
#define PACKET3_WAIT_REG_MEM64 0x93
|
||||
#define PACKET3_COND_PREEMPT 0x94
|
||||
#define PACKET3_HDP_FLUSH 0x95
|
||||
#define PACKET3_COPY_DATA_RB 0x96
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_AQL_PACKET 0x99
|
||||
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
|
||||
#define PACKET3_SET_SH_REG_INDEX 0x9B
|
||||
#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
|
||||
#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
|
||||
#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. QUEUE_MASK_LO [31:0]
|
||||
* 4. QUEUE_MASK_HI [31:0]
|
||||
* 5. GWS_MASK_LO [31:0]
|
||||
* 6. GWS_MASK_HI [31:0]
|
||||
* 7. OAC_MASK [15:0]
|
||||
* 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
*/
|
||||
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
|
||||
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
|
||||
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_MAP_PROCESS 0xA1
|
||||
#define PACKET3_MAP_QUEUES 0xA2
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. MQD_ADDR_LO [31:0]
|
||||
* 5. MQD_ADDR_HI [31:0]
|
||||
* 6. WPTR_ADDR_LO [31:0]
|
||||
* 7. WPTR_ADDR_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
|
||||
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
|
||||
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
|
||||
# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
|
||||
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2 */
|
||||
# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
|
||||
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
#define PACKET3_UNMAP_QUEUES 0xA3
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. CONTROL3
|
||||
* 5. CONTROL4
|
||||
* 6. CONTROL5
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
|
||||
/* 0 - PREEMPT_QUEUES
|
||||
* 1 - RESET_QUEUES
|
||||
* 2 - DISABLE_PROCESS_QUEUES
|
||||
* 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
*/
|
||||
# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
|
||||
/* CONTROL3a */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
|
||||
/* CONTROL3b */
|
||||
# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
|
||||
/* CONTROL4 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
|
||||
/* CONTROL5 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
|
||||
#define PACKET3_QUERY_STATUS 0xA4
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. ADDR_LO [31:0]
|
||||
* 5. ADDR_HI [31:0]
|
||||
* 6. DATA_LO [31:0]
|
||||
* 7. DATA_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
|
||||
# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
|
||||
# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
|
||||
#define PACKET3_RUN_LIST 0xA5
|
||||
#define PACKET3_MAP_PROCESS_VM 0xA6
|
||||
|
||||
#define PACKET3_RUN_CLEANER_SHADER 0xD2
|
||||
/* 1. header
|
||||
* 2. RESERVED [31:0]
|
||||
*/
|
||||
|
||||
/* GFX11 */
|
||||
#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
|
||||
# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,583 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef SOC15_H
|
||||
#define SOC15_H
|
||||
|
||||
#define GFX9_NUM_GFX_RINGS 1
|
||||
#define GFX9_NUM_COMPUTE_RINGS 8
|
||||
|
||||
/*
|
||||
* PM4
|
||||
*/
|
||||
#define PACKET_TYPE0 0
|
||||
#define PACKET_TYPE1 1
|
||||
#define PACKET_TYPE2 2
|
||||
#define PACKET_TYPE3 3
|
||||
|
||||
#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
|
||||
#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
|
||||
#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
|
||||
#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
|
||||
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
|
||||
((reg) & 0xFFFF) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
#define CP_PACKET2 0x80000000
|
||||
#define PACKET2_PAD_SHIFT 0
|
||||
#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
|
||||
(((op) & 0xFF) << 8) | \
|
||||
((n) & 0x3FFF) << 16)
|
||||
|
||||
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
|
||||
|
||||
#define PACKETJ_CONDITION_CHECK0 0
|
||||
#define PACKETJ_CONDITION_CHECK1 1
|
||||
#define PACKETJ_CONDITION_CHECK2 2
|
||||
#define PACKETJ_CONDITION_CHECK3 3
|
||||
#define PACKETJ_CONDITION_CHECK4 4
|
||||
#define PACKETJ_CONDITION_CHECK5 5
|
||||
#define PACKETJ_CONDITION_CHECK6 6
|
||||
#define PACKETJ_CONDITION_CHECK7 7
|
||||
|
||||
#define PACKETJ_TYPE0 0
|
||||
#define PACKETJ_TYPE1 1
|
||||
#define PACKETJ_TYPE2 2
|
||||
#define PACKETJ_TYPE3 3
|
||||
#define PACKETJ_TYPE4 4
|
||||
#define PACKETJ_TYPE5 5
|
||||
#define PACKETJ_TYPE6 6
|
||||
#define PACKETJ_TYPE7 7
|
||||
|
||||
#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \
|
||||
((r & 0x3F) << 18) | \
|
||||
((cond & 0xF) << 24) | \
|
||||
((type & 0xF) << 28))
|
||||
|
||||
#define CP_PACKETJ_NOP 0x60000000
|
||||
#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
|
||||
#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
|
||||
#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
|
||||
#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
|
||||
|
||||
/* Packet 3 types */
|
||||
#define PACKET3_NOP 0x10
|
||||
#define PACKET3_SET_BASE 0x11
|
||||
#define PACKET3_BASE_INDEX(x) ((x) << 0)
|
||||
#define CE_PARTITION_BASE 3
|
||||
#define PACKET3_CLEAR_STATE 0x12
|
||||
#define PACKET3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PACKET3_DISPATCH_DIRECT 0x15
|
||||
#define PACKET3_DISPATCH_INDIRECT 0x16
|
||||
#define PACKET3_ATOMIC_GDS 0x1D
|
||||
#define PACKET3_ATOMIC_MEM 0x1E
|
||||
#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
|
||||
#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
|
||||
#define PACKET3_OCCLUSION_QUERY 0x1F
|
||||
#define PACKET3_SET_PREDICATION 0x20
|
||||
#define PACKET3_REG_RMW 0x21
|
||||
#define PACKET3_COND_EXEC 0x22
|
||||
#define PACKET3_PRED_EXEC 0x23
|
||||
#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
|
||||
#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
|
||||
#define PACKET3_DRAW_INDIRECT 0x24
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PACKET3_INDEX_BASE 0x26
|
||||
#define PACKET3_DRAW_INDEX_2 0x27
|
||||
#define PACKET3_CONTEXT_CONTROL 0x28
|
||||
#define PACKET3_INDEX_TYPE 0x2A
|
||||
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PACKET3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PACKET3_NUM_INSTANCES 0x2F
|
||||
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PACKET3_INDIRECT_BUFFER_CONST 0x33
|
||||
#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
|
||||
#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
|
||||
#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
|
||||
#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
|
||||
#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
|
||||
#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
|
||||
#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
|
||||
#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
|
||||
#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
|
||||
#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
|
||||
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
|
||||
#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
|
||||
#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
|
||||
#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
|
||||
#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
|
||||
#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
|
||||
#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
|
||||
#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
|
||||
#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
|
||||
#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
|
||||
#define PACKET3_COPY_DATA__DST_SEL__GDS 3
|
||||
#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
|
||||
#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
|
||||
#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
|
||||
#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
|
||||
#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
|
||||
#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_COND_WRITE 0x45
|
||||
#define PACKET3_EVENT_WRITE 0x46
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
*/
|
||||
#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
|
||||
#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
|
||||
#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
|
||||
#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
|
||||
#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
|
||||
#define PACKET3_RELEASE_MEM 0x49
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
#define EOP_TCL1_VOL_ACTION_EN (1 << 12)
|
||||
#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */
|
||||
#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
|
||||
#define EOP_TCL1_ACTION_EN (1 << 16)
|
||||
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
|
||||
#define EOP_TC_NC_ACTION_EN (1 << 19)
|
||||
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
|
||||
#define EOP_EXEC (1 << 28) /* For Trailing Fence */
|
||||
|
||||
#define DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
* 2 - send 64bit data
|
||||
* 3 - send 64bit GPU counter value
|
||||
* 4 - send 64bit sys counter value
|
||||
*/
|
||||
#define INT_SEL(x) ((x) << 24)
|
||||
/* 0 - none
|
||||
* 1 - interrupt only (DATA_SEL = 0)
|
||||
* 2 - interrupt when data write is confirmed
|
||||
*/
|
||||
#define DST_SEL(x) ((x) << 16)
|
||||
/* 0 - MC
|
||||
* 1 - TC/L2
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PACKET3_PREAMBLE_CNTL 0x4A
|
||||
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
|
||||
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
|
||||
#define PACKET3_DMA_DATA 0x50
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. SRC_ADDR_LO or DATA [31:0]
|
||||
* 4. SRC_ADDR_HI [31:0]
|
||||
* 5. DST_ADDR_LO [31:0]
|
||||
* 6. DST_ADDR_HI [7:0]
|
||||
* 7. COMMAND [30:21] | BYTE_COUNT [20:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
|
||||
/* 0 - ME
|
||||
* 1 - PFP
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
|
||||
/* 0 - DST_ADDR using DAS
|
||||
* 1 - GDS
|
||||
* 3 - DST_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
|
||||
/* 0 - SRC_ADDR using SAS
|
||||
* 1 - GDS
|
||||
* 2 - DATA
|
||||
* 3 - SRC_ADDR using L2
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
|
||||
/* COMMAND */
|
||||
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
|
||||
/* 0 - memory
|
||||
* 1 - register
|
||||
*/
|
||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||
#define PACKET3_ACQUIRE_MEM 0x58
|
||||
/* 1. HEADER
|
||||
* 2. COHER_CNTL [30:0]
|
||||
* 2.1 ENGINE_SEL [31:31]
|
||||
* 3. COHER_SIZE [31:0]
|
||||
* 4. COHER_SIZE_HI [7:0]
|
||||
* 5. COHER_BASE_LO [31:0]
|
||||
* 6. COHER_BASE_HI [23:0]
|
||||
* 7. POLL_INTERVAL [15:0]
|
||||
*/
|
||||
/* COHER_CNTL fields for CP_COHER_CNTL */
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
|
||||
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
|
||||
#define PACKET3_REWIND 0x59
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
|
||||
#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
|
||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||
#define PACKET3_LOAD_SH_REG 0x5F
|
||||
#define PACKET3_LOAD_CONFIG_REG 0x60
|
||||
#define PACKET3_LOAD_CONTEXT_REG 0x61
|
||||
#define PACKET3_SET_CONFIG_REG 0x68
|
||||
#define PACKET3_SET_CONFIG_REG_START 0x00002000
|
||||
#define PACKET3_SET_CONFIG_REG_END 0x00002c00
|
||||
#define PACKET3_SET_CONTEXT_REG 0x69
|
||||
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
|
||||
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
|
||||
#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
|
||||
#define PACKET3_SET_SH_REG 0x76
|
||||
#define PACKET3_SET_SH_REG_START 0x00002c00
|
||||
#define PACKET3_SET_SH_REG_END 0x00003000
|
||||
#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
|
||||
#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
|
||||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
|
||||
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
|
||||
#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
#define PACKET3_WRITE_CONST_RAM 0x81
|
||||
#define PACKET3_DUMP_CONST_RAM 0x83
|
||||
#define PACKET3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
#define PACKET3_FRAME_CONTROL 0x90
|
||||
# define FRAME_TMZ (1 << 0)
|
||||
# define FRAME_CMD(x) ((x) << 28)
|
||||
/*
|
||||
* x=0: tmz_begin
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. QUEUE_MASK_LO [31:0]
|
||||
* 4. QUEUE_MASK_HI [31:0]
|
||||
* 5. GWS_MASK_LO [31:0]
|
||||
* 6. GWS_MASK_HI [31:0]
|
||||
* 7. OAC_MASK [15:0]
|
||||
* 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
|
||||
*/
|
||||
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
|
||||
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
|
||||
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_MAP_QUEUES 0xA2
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. MQD_ADDR_LO [31:0]
|
||||
* 5. MQD_ADDR_HI [31:0]
|
||||
* 6. WPTR_ADDR_LO [31:0]
|
||||
* 7. WPTR_ADDR_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
|
||||
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
|
||||
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
|
||||
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
|
||||
# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
|
||||
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2 */
|
||||
# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
|
||||
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
#define PACKET3_UNMAP_QUEUES 0xA3
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. CONTROL3
|
||||
* 5. CONTROL4
|
||||
* 6. CONTROL5
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
|
||||
/* 0 - PREEMPT_QUEUES
|
||||
* 1 - RESET_QUEUES
|
||||
* 2 - DISABLE_PROCESS_QUEUES
|
||||
* 3 - PREEMPT_QUEUES_NO_UNMAP
|
||||
*/
|
||||
# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
|
||||
# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
|
||||
# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
|
||||
/* CONTROL3a */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
|
||||
/* CONTROL3b */
|
||||
# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
|
||||
/* CONTROL4 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
|
||||
/* CONTROL5 */
|
||||
# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
|
||||
#define PACKET3_QUERY_STATUS 0xA4
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
* 3. CONTROL2
|
||||
* 4. ADDR_LO [31:0]
|
||||
* 5. ADDR_HI [31:0]
|
||||
* 6. DATA_LO [31:0]
|
||||
* 7. DATA_HI [31:0]
|
||||
*/
|
||||
/* CONTROL */
|
||||
# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
|
||||
# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
|
||||
# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
|
||||
/* CONTROL2a */
|
||||
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
|
||||
/* CONTROL2b */
|
||||
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
|
||||
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
|
||||
|
||||
#define PACKET3_RUN_CLEANER_SHADER 0xD2
|
||||
/* 1. header
|
||||
* 2. RESERVED [31:0]
|
||||
*/
|
||||
|
||||
#define VCE_CMD_NO_OP 0x00000000
|
||||
#define VCE_CMD_END 0x00000001
|
||||
#define VCE_CMD_IB 0x00000002
|
||||
#define VCE_CMD_FENCE 0x00000003
|
||||
#define VCE_CMD_TRAP 0x00000004
|
||||
#define VCE_CMD_IB_AUTO 0x00000005
|
||||
#define VCE_CMD_SEMAPHORE 0x00000006
|
||||
|
||||
#define VCE_CMD_IB_VM 0x00000102
|
||||
#define VCE_CMD_WAIT_GE 0x00000106
|
||||
#define VCE_CMD_UPDATE_PTB 0x00000107
|
||||
#define VCE_CMD_FLUSH_TLB 0x00000108
|
||||
#define VCE_CMD_REG_WRITE 0x00000109
|
||||
#define VCE_CMD_REG_WAIT 0x0000010a
|
||||
|
||||
#define HEVC_ENC_CMD_NO_OP 0x00000000
|
||||
#define HEVC_ENC_CMD_END 0x00000001
|
||||
#define HEVC_ENC_CMD_FENCE 0x00000003
|
||||
#define HEVC_ENC_CMD_TRAP 0x00000004
|
||||
#define HEVC_ENC_CMD_IB_VM 0x00000102
|
||||
#define HEVC_ENC_CMD_REG_WRITE 0x00000109
|
||||
#define HEVC_ENC_CMD_REG_WAIT 0x0000010a
|
||||
|
||||
#endif
|
||||
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
@@ -0,0 +1,453 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef _athub_1_0_OFFSET_HEADER
|
||||
#define _athub_1_0_OFFSET_HEADER
|
||||
|
||||
|
||||
|
||||
// addressBlock: athub_atsdec
|
||||
// base address: 0x3080
|
||||
#define mmATC_ATS_CNTL 0x0000
|
||||
#define mmATC_ATS_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_STATUS 0x0003
|
||||
#define mmATC_ATS_STATUS_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_CNTL 0x0004
|
||||
#define mmATC_ATS_FAULT_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO 0x0005
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_ADDR 0x0006
|
||||
#define mmATC_ATS_FAULT_STATUS_ADDR_BASE_IDX 0
|
||||
#define mmATC_ATS_DEFAULT_PAGE_LOW 0x0007
|
||||
#define mmATC_ATS_DEFAULT_PAGE_LOW_BASE_IDX 0
|
||||
#define mmATC_TRANS_FAULT_RSPCNTRL 0x0008
|
||||
#define mmATC_TRANS_FAULT_RSPCNTRL_BASE_IDX 0
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO2 0x0009
|
||||
#define mmATC_ATS_FAULT_STATUS_INFO2_BASE_IDX 0
|
||||
#define mmATHUB_MISC_CNTL 0x000a
|
||||
#define mmATHUB_MISC_CNTL_BASE_IDX 0
|
||||
#define mmATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x000b
|
||||
#define mmATC_VMID_PASID_MAPPING_UPDATE_STATUS_BASE_IDX 0
|
||||
#define mmATC_VMID0_PASID_MAPPING 0x000c
|
||||
#define mmATC_VMID0_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID1_PASID_MAPPING 0x000d
|
||||
#define mmATC_VMID1_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID2_PASID_MAPPING 0x000e
|
||||
#define mmATC_VMID2_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID3_PASID_MAPPING 0x000f
|
||||
#define mmATC_VMID3_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID4_PASID_MAPPING 0x0010
|
||||
#define mmATC_VMID4_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID5_PASID_MAPPING 0x0011
|
||||
#define mmATC_VMID5_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID6_PASID_MAPPING 0x0012
|
||||
#define mmATC_VMID6_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID7_PASID_MAPPING 0x0013
|
||||
#define mmATC_VMID7_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID8_PASID_MAPPING 0x0014
|
||||
#define mmATC_VMID8_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID9_PASID_MAPPING 0x0015
|
||||
#define mmATC_VMID9_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID10_PASID_MAPPING 0x0016
|
||||
#define mmATC_VMID10_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID11_PASID_MAPPING 0x0017
|
||||
#define mmATC_VMID11_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID12_PASID_MAPPING 0x0018
|
||||
#define mmATC_VMID12_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID13_PASID_MAPPING 0x0019
|
||||
#define mmATC_VMID13_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID14_PASID_MAPPING 0x001a
|
||||
#define mmATC_VMID14_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID15_PASID_MAPPING 0x001b
|
||||
#define mmATC_VMID15_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_STATUS 0x001c
|
||||
#define mmATC_ATS_VMID_STATUS_BASE_IDX 0
|
||||
#define mmATC_ATS_GFX_ATCL2_STATUS 0x001d
|
||||
#define mmATC_ATS_GFX_ATCL2_STATUS_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER0_CFG 0x001e
|
||||
#define mmATC_PERFCOUNTER0_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER1_CFG 0x001f
|
||||
#define mmATC_PERFCOUNTER1_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER2_CFG 0x0020
|
||||
#define mmATC_PERFCOUNTER2_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER3_CFG 0x0021
|
||||
#define mmATC_PERFCOUNTER3_CFG_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_RSLT_CNTL 0x0022
|
||||
#define mmATC_PERFCOUNTER_RSLT_CNTL_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_LO 0x0023
|
||||
#define mmATC_PERFCOUNTER_LO_BASE_IDX 0
|
||||
#define mmATC_PERFCOUNTER_HI 0x0024
|
||||
#define mmATC_PERFCOUNTER_HI_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL 0x0025
|
||||
#define mmATHUB_PCIE_ATS_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_PASID_CNTL 0x0026
|
||||
#define mmATHUB_PCIE_PASID_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_PAGE_REQ_CNTL 0x0027
|
||||
#define mmATHUB_PCIE_PAGE_REQ_CNTL_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_OUTSTAND_PAGE_REQ_ALLOC 0x0028
|
||||
#define mmATHUB_PCIE_OUTSTAND_PAGE_REQ_ALLOC_BASE_IDX 0
|
||||
#define mmATHUB_COMMAND 0x0029
|
||||
#define mmATHUB_COMMAND_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_0 0x002a
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_0_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_1 0x002b
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_1_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_2 0x002c
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_2_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_3 0x002d
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_3_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_4 0x002e
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_4_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_5 0x002f
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_5_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_6 0x0030
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_6_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_7 0x0031
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_7_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_8 0x0032
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_8_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_9 0x0033
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_9_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_10 0x0034
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_10_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_11 0x0035
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_11_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_12 0x0036
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_12_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_13 0x0037
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_13_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_14 0x0038
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_14_BASE_IDX 0
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_15 0x0039
|
||||
#define mmATHUB_PCIE_ATS_CNTL_VF_15_BASE_IDX 0
|
||||
#define mmATHUB_MEM_POWER_LS 0x003a
|
||||
#define mmATHUB_MEM_POWER_LS_BASE_IDX 0
|
||||
#define mmATS_IH_CREDIT 0x003b
|
||||
#define mmATS_IH_CREDIT_BASE_IDX 0
|
||||
#define mmATHUB_IH_CREDIT 0x003c
|
||||
#define mmATHUB_IH_CREDIT_BASE_IDX 0
|
||||
#define mmATC_VMID16_PASID_MAPPING 0x003d
|
||||
#define mmATC_VMID16_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID17_PASID_MAPPING 0x003e
|
||||
#define mmATC_VMID17_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID18_PASID_MAPPING 0x003f
|
||||
#define mmATC_VMID18_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID19_PASID_MAPPING 0x0040
|
||||
#define mmATC_VMID19_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID20_PASID_MAPPING 0x0041
|
||||
#define mmATC_VMID20_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID21_PASID_MAPPING 0x0042
|
||||
#define mmATC_VMID21_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID22_PASID_MAPPING 0x0043
|
||||
#define mmATC_VMID22_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID23_PASID_MAPPING 0x0044
|
||||
#define mmATC_VMID23_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID24_PASID_MAPPING 0x0045
|
||||
#define mmATC_VMID24_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID25_PASID_MAPPING 0x0046
|
||||
#define mmATC_VMID25_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID26_PASID_MAPPING 0x0047
|
||||
#define mmATC_VMID26_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID27_PASID_MAPPING 0x0048
|
||||
#define mmATC_VMID27_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID28_PASID_MAPPING 0x0049
|
||||
#define mmATC_VMID28_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID29_PASID_MAPPING 0x004a
|
||||
#define mmATC_VMID29_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID30_PASID_MAPPING 0x004b
|
||||
#define mmATC_VMID30_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_VMID31_PASID_MAPPING 0x004c
|
||||
#define mmATC_VMID31_PASID_MAPPING_BASE_IDX 0
|
||||
#define mmATC_ATS_MMHUB_ATCL2_STATUS 0x004d
|
||||
#define mmATC_ATS_MMHUB_ATCL2_STATUS_BASE_IDX 0
|
||||
#define mmATHUB_SHARED_VIRT_RESET_REQ 0x004e
|
||||
#define mmATHUB_SHARED_VIRT_RESET_REQ_BASE_IDX 0
|
||||
#define mmATHUB_SHARED_ACTIVE_FCN_ID 0x004f
|
||||
#define mmATHUB_SHARED_ACTIVE_FCN_ID_BASE_IDX 0
|
||||
#define mmATC_ATS_SDPPORT_CNTL 0x0050
|
||||
#define mmATC_ATS_SDPPORT_CNTL_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_GFX_STAT 0x0052
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_GFX_STAT_BASE_IDX 0
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_MMHUB_STAT 0x0053
|
||||
#define mmATC_ATS_VMID_SNAPSHOT_MMHUB_STAT_BASE_IDX 0
|
||||
|
||||
|
||||
// addressBlock: athub_xpbdec
|
||||
// base address: 0x31f0
|
||||
#define mmXPB_RTR_SRC_APRTR0 0x005c
|
||||
#define mmXPB_RTR_SRC_APRTR0_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR1 0x005d
|
||||
#define mmXPB_RTR_SRC_APRTR1_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR2 0x005e
|
||||
#define mmXPB_RTR_SRC_APRTR2_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR3 0x005f
|
||||
#define mmXPB_RTR_SRC_APRTR3_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR4 0x0060
|
||||
#define mmXPB_RTR_SRC_APRTR4_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR5 0x0061
|
||||
#define mmXPB_RTR_SRC_APRTR5_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR6 0x0062
|
||||
#define mmXPB_RTR_SRC_APRTR6_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR7 0x0063
|
||||
#define mmXPB_RTR_SRC_APRTR7_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR8 0x0064
|
||||
#define mmXPB_RTR_SRC_APRTR8_BASE_IDX 0
|
||||
#define mmXPB_RTR_SRC_APRTR9 0x0065
|
||||
#define mmXPB_RTR_SRC_APRTR9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR0 0x0066
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR1 0x0067
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR2 0x0068
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR3 0x0069
|
||||
#define mmXPB_XDMA_RTR_SRC_APRTR3_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP0 0x006a
|
||||
#define mmXPB_RTR_DEST_MAP0_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP1 0x006b
|
||||
#define mmXPB_RTR_DEST_MAP1_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP2 0x006c
|
||||
#define mmXPB_RTR_DEST_MAP2_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP3 0x006d
|
||||
#define mmXPB_RTR_DEST_MAP3_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP4 0x006e
|
||||
#define mmXPB_RTR_DEST_MAP4_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP5 0x006f
|
||||
#define mmXPB_RTR_DEST_MAP5_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP6 0x0070
|
||||
#define mmXPB_RTR_DEST_MAP6_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP7 0x0071
|
||||
#define mmXPB_RTR_DEST_MAP7_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP8 0x0072
|
||||
#define mmXPB_RTR_DEST_MAP8_BASE_IDX 0
|
||||
#define mmXPB_RTR_DEST_MAP9 0x0073
|
||||
#define mmXPB_RTR_DEST_MAP9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP0 0x0074
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP1 0x0075
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP2 0x0076
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP3 0x0077
|
||||
#define mmXPB_XDMA_RTR_DEST_MAP3_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG0 0x0078
|
||||
#define mmXPB_CLG_CFG0_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG1 0x0079
|
||||
#define mmXPB_CLG_CFG1_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG2 0x007a
|
||||
#define mmXPB_CLG_CFG2_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG3 0x007b
|
||||
#define mmXPB_CLG_CFG3_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG4 0x007c
|
||||
#define mmXPB_CLG_CFG4_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG5 0x007d
|
||||
#define mmXPB_CLG_CFG5_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG6 0x007e
|
||||
#define mmXPB_CLG_CFG6_BASE_IDX 0
|
||||
#define mmXPB_CLG_CFG7 0x007f
|
||||
#define mmXPB_CLG_CFG7_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA 0x0080
|
||||
#define mmXPB_CLG_EXTRA_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_MSK 0x0081
|
||||
#define mmXPB_CLG_EXTRA_MSK_BASE_IDX 0
|
||||
#define mmXPB_LB_ADDR 0x0082
|
||||
#define mmXPB_LB_ADDR_BASE_IDX 0
|
||||
#define mmXPB_WCB_STS 0x0083
|
||||
#define mmXPB_WCB_STS_BASE_IDX 0
|
||||
#define mmXPB_HST_CFG 0x0084
|
||||
#define mmXPB_HST_CFG_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_CFG 0x0085
|
||||
#define mmXPB_P2P_BAR_CFG_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR0 0x0086
|
||||
#define mmXPB_P2P_BAR0_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR1 0x0087
|
||||
#define mmXPB_P2P_BAR1_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR2 0x0088
|
||||
#define mmXPB_P2P_BAR2_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR3 0x0089
|
||||
#define mmXPB_P2P_BAR3_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR4 0x008a
|
||||
#define mmXPB_P2P_BAR4_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR5 0x008b
|
||||
#define mmXPB_P2P_BAR5_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR6 0x008c
|
||||
#define mmXPB_P2P_BAR6_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR7 0x008d
|
||||
#define mmXPB_P2P_BAR7_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_SETUP 0x008e
|
||||
#define mmXPB_P2P_BAR_SETUP_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_DELTA_ABOVE 0x0090
|
||||
#define mmXPB_P2P_BAR_DELTA_ABOVE_BASE_IDX 0
|
||||
#define mmXPB_P2P_BAR_DELTA_BELOW 0x0091
|
||||
#define mmXPB_P2P_BAR_DELTA_BELOW_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR0 0x0092
|
||||
#define mmXPB_PEER_SYS_BAR0_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR1 0x0093
|
||||
#define mmXPB_PEER_SYS_BAR1_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR2 0x0094
|
||||
#define mmXPB_PEER_SYS_BAR2_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR3 0x0095
|
||||
#define mmXPB_PEER_SYS_BAR3_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR4 0x0096
|
||||
#define mmXPB_PEER_SYS_BAR4_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR5 0x0097
|
||||
#define mmXPB_PEER_SYS_BAR5_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR6 0x0098
|
||||
#define mmXPB_PEER_SYS_BAR6_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR7 0x0099
|
||||
#define mmXPB_PEER_SYS_BAR7_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR8 0x009a
|
||||
#define mmXPB_PEER_SYS_BAR8_BASE_IDX 0
|
||||
#define mmXPB_PEER_SYS_BAR9 0x009b
|
||||
#define mmXPB_PEER_SYS_BAR9_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR0 0x009c
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR0_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR1 0x009d
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR1_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR2 0x009e
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR2_BASE_IDX 0
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR3 0x009f
|
||||
#define mmXPB_XDMA_PEER_SYS_BAR3_BASE_IDX 0
|
||||
#define mmXPB_CLK_GAT 0x00a0
|
||||
#define mmXPB_CLK_GAT_BASE_IDX 0
|
||||
#define mmXPB_INTF_CFG 0x00a1
|
||||
#define mmXPB_INTF_CFG_BASE_IDX 0
|
||||
#define mmXPB_INTF_STS 0x00a2
|
||||
#define mmXPB_INTF_STS_BASE_IDX 0
|
||||
#define mmXPB_PIPE_STS 0x00a3
|
||||
#define mmXPB_PIPE_STS_BASE_IDX 0
|
||||
#define mmXPB_SUB_CTRL 0x00a4
|
||||
#define mmXPB_SUB_CTRL_BASE_IDX 0
|
||||
#define mmXPB_MAP_INVERT_FLUSH_NUM_LSB 0x00a5
|
||||
#define mmXPB_MAP_INVERT_FLUSH_NUM_LSB_BASE_IDX 0
|
||||
#define mmXPB_PERF_KNOBS 0x00a6
|
||||
#define mmXPB_PERF_KNOBS_BASE_IDX 0
|
||||
#define mmXPB_STICKY 0x00a7
|
||||
#define mmXPB_STICKY_BASE_IDX 0
|
||||
#define mmXPB_STICKY_W1C 0x00a8
|
||||
#define mmXPB_STICKY_W1C_BASE_IDX 0
|
||||
#define mmXPB_MISC_CFG 0x00a9
|
||||
#define mmXPB_MISC_CFG_BASE_IDX 0
|
||||
#define mmXPB_INTF_CFG2 0x00aa
|
||||
#define mmXPB_INTF_CFG2_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_RD 0x00ab
|
||||
#define mmXPB_CLG_EXTRA_RD_BASE_IDX 0
|
||||
#define mmXPB_CLG_EXTRA_MSK_RD 0x00ac
|
||||
#define mmXPB_CLG_EXTRA_MSK_RD_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_MATCH 0x00ad
|
||||
#define mmXPB_CLG_GFX_MATCH_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_MATCH_MSK 0x00ae
|
||||
#define mmXPB_CLG_GFX_MATCH_MSK_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_MATCH 0x00af
|
||||
#define mmXPB_CLG_MM_MATCH_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_MATCH_MSK 0x00b0
|
||||
#define mmXPB_CLG_MM_MATCH_MSK_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING0 0x00b1
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING0_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING1 0x00b2
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING1_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING2 0x00b3
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING2_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING3 0x00b4
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING3_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING4 0x00b5
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING4_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING5 0x00b6
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING5_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING6 0x00b7
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING6_BASE_IDX 0
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING7 0x00b8
|
||||
#define mmXPB_CLG_GFX_UNITID_MAPPING7_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING0 0x00b9
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING0_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING1 0x00ba
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING1_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING2 0x00bb
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING2_BASE_IDX 0
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING3 0x00bc
|
||||
#define mmXPB_CLG_MM_UNITID_MAPPING3_BASE_IDX 0
|
||||
|
||||
|
||||
// addressBlock: athub_rpbdec
|
||||
// base address: 0x33b0
|
||||
#define mmRPB_PASSPW_CONF 0x00cc
|
||||
#define mmRPB_PASSPW_CONF_BASE_IDX 0
|
||||
#define mmRPB_BLOCKLEVEL_CONF 0x00cd
|
||||
#define mmRPB_BLOCKLEVEL_CONF_BASE_IDX 0
|
||||
#define mmRPB_TAG_CONF 0x00cf
|
||||
#define mmRPB_TAG_CONF_BASE_IDX 0
|
||||
#define mmRPB_EFF_CNTL 0x00d1
|
||||
#define mmRPB_EFF_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ARB_CNTL 0x00d2
|
||||
#define mmRPB_ARB_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ARB_CNTL2 0x00d3
|
||||
#define mmRPB_ARB_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_BIF_CNTL 0x00d4
|
||||
#define mmRPB_BIF_CNTL_BASE_IDX 0
|
||||
#define mmRPB_WR_SWITCH_CNTL 0x00d5
|
||||
#define mmRPB_WR_SWITCH_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_SWITCH_CNTL 0x00d7
|
||||
#define mmRPB_RD_SWITCH_CNTL_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_WR 0x00d8
|
||||
#define mmRPB_CID_QUEUE_WR_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_RD 0x00d9
|
||||
#define mmRPB_CID_QUEUE_RD_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_EX 0x00dc
|
||||
#define mmRPB_CID_QUEUE_EX_BASE_IDX 0
|
||||
#define mmRPB_CID_QUEUE_EX_DATA 0x00dd
|
||||
#define mmRPB_CID_QUEUE_EX_DATA_BASE_IDX 0
|
||||
#define mmRPB_SWITCH_CNTL2 0x00de
|
||||
#define mmRPB_SWITCH_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_DEINTRLV_COMBINE_CNTL 0x00df
|
||||
#define mmRPB_DEINTRLV_COMBINE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_VC_SWITCH_RDWR 0x00e0
|
||||
#define mmRPB_VC_SWITCH_RDWR_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_LO 0x00e1
|
||||
#define mmRPB_PERFCOUNTER_LO_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_HI 0x00e2
|
||||
#define mmRPB_PERFCOUNTER_HI_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER0_CFG 0x00e3
|
||||
#define mmRPB_PERFCOUNTER0_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER1_CFG 0x00e4
|
||||
#define mmRPB_PERFCOUNTER1_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER2_CFG 0x00e5
|
||||
#define mmRPB_PERFCOUNTER2_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER3_CFG 0x00e6
|
||||
#define mmRPB_PERFCOUNTER3_CFG_BASE_IDX 0
|
||||
#define mmRPB_PERFCOUNTER_RSLT_CNTL 0x00e7
|
||||
#define mmRPB_PERFCOUNTER_RSLT_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_QUEUE_CNTL 0x00e9
|
||||
#define mmRPB_RD_QUEUE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_RD_QUEUE_CNTL2 0x00ea
|
||||
#define mmRPB_RD_QUEUE_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_WR_QUEUE_CNTL 0x00eb
|
||||
#define mmRPB_WR_QUEUE_CNTL_BASE_IDX 0
|
||||
#define mmRPB_WR_QUEUE_CNTL2 0x00ec
|
||||
#define mmRPB_WR_QUEUE_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_EA_QUEUE_WR 0x00ed
|
||||
#define mmRPB_EA_QUEUE_WR_BASE_IDX 0
|
||||
#define mmRPB_ATS_CNTL 0x00ee
|
||||
#define mmRPB_ATS_CNTL_BASE_IDX 0
|
||||
#define mmRPB_ATS_CNTL2 0x00ef
|
||||
#define mmRPB_ATS_CNTL2_BASE_IDX 0
|
||||
#define mmRPB_SDPPORT_CNTL 0x00f0
|
||||
#define mmRPB_SDPPORT_CNTL_BASE_IDX 0
|
||||
|
||||
#endif
|
||||
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande
Carregar diff
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Build dynamic Library object
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}" )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/aql_profile.cpp
|
||||
${LIB_DIR}/core/counters.cpp
|
||||
${LIB_DIR}/core/threadtrace.cpp
|
||||
${LIB_DIR}/core/spm_data.cpp
|
||||
${LIB_DIR}/core/populate_aql.cpp
|
||||
${LIB_DIR}/core/memorymanager.cpp
|
||||
${LIB_DIR}/core/pm4_factory.cpp
|
||||
${LIB_DIR}/core/gfx9_factory.cpp
|
||||
${LIB_DIR}/core/gfx908_factory.cpp
|
||||
${LIB_DIR}/core/gfx90a_factory.cpp
|
||||
${LIB_DIR}/core/gfx940_factory.cpp
|
||||
${LIB_DIR}/core/gfx10_factory.cpp
|
||||
${LIB_DIR}/core/gfx11_factory.cpp
|
||||
${LIB_DIR}/core/gfx12_factory.cpp
|
||||
${LIB_DIR}/core/vega20_reg_init.cpp
|
||||
${LIB_DIR}/core/parse_ip_discovery.cpp
|
||||
${LIB_DIR}/core/navi_reg_init.cpp
|
||||
${LIB_DIR}/core/ip_offset_table_init.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${API_PATH})
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE pthread hsa-runtime64::hsa-runtime64 )
|
||||
|
||||
## Generating definitions
|
||||
set ( SCRIPT_DIR "${ROOT_DIR}/script" )
|
||||
set ( GFXIP_DIR "${ROOT_DIR}/gfxip" )
|
||||
set ( DEF_DIR "${ROOT_DIR}/src/def" )
|
||||
set ( BINFO_TEMPL "${SCRIPT_DIR}/gpu_block_info.h" )
|
||||
set ( BINFO_DEF "${GFXIP_DIR}/gpu_block_info.h" )
|
||||
set ( BINFO_HEADER "${DEF_DIR}/gpu_block_info.h" )
|
||||
set ( GFX9_TEMPL "${SCRIPT_DIR}/gfx9_def.h" )
|
||||
set ( GFX9_DEF "${GFXIP_DIR}/gfx9_def.h" )
|
||||
set ( GFX9_HEADER "${DEF_DIR}/gfx9_def.h" )
|
||||
set ( GFX10_TEMPL "${SCRIPT_DIR}/gfx10_def.h" )
|
||||
set ( GFX10_DEF "${GFXIP_DIR}/gfx10_def.h" )
|
||||
set ( GFX10_HEADER "${DEF_DIR}/gfx10_def.h" )
|
||||
set ( GFX11_TEMPL "${SCRIPT_DIR}/gfx11_def.h" )
|
||||
set ( GFX11_DEF "${GFXIP_DIR}/gfx11_def.h" )
|
||||
set ( GFX11_HEADER "${DEF_DIR}/gfx11_def.h" )
|
||||
set ( GFX12_TEMPL "${SCRIPT_DIR}/gfx12_def.h" )
|
||||
set ( GFX12_DEF "${GFXIP_DIR}/gfx12_def.h" )
|
||||
set ( GFX12_HEADER "${DEF_DIR}/gfx12_def.h" )
|
||||
include_directories ( ${ROOT_DIR} )
|
||||
add_custom_target( mygenreset
|
||||
COMMAND sh -xc "cp ${BINFO_TEMPL} ${BINFO_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX9_TEMPL} ${GFX9_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX10_TEMPL} ${GFX10_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX11_TEMPL} ${GFX11_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX12_TEMPL} ${GFX12_HEADER}"
|
||||
)
|
||||
add_custom_target( mygen
|
||||
COMMAND sh -xc "cp ${BINFO_TEMPL} ${BINFO_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX9_TEMPL} ${GFX9_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX10_TEMPL} ${GFX10_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX11_TEMPL} ${GFX11_HEADER}"
|
||||
COMMAND sh -xc "cp ${GFX12_TEMPL} ${GFX12_HEADER}"
|
||||
COMMAND sh -xc "sed '/gfx9_def/ s/.*//' ${GFX9_TEMPL} >${GFX9_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX9_HEADER} ${GFX9_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx10_def/ s/.*//' ${GFX10_TEMPL} >${GFX10_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX10_HEADER} ${GFX10_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx11_def/ s/.*//' ${GFX11_TEMPL} >${GFX11_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX11_HEADER} ${GFX11_DEF}"
|
||||
COMMAND sh -xc "sed '/gfx12_def/ s/.*//' ${GFX12_TEMPL} >${GFX12_HEADER}"
|
||||
COMMAND sh -xc "${ROOT_DIR}/script/errextr.sh ${GFX12_HEADER} ${GFX12_DEF}"
|
||||
COMMAND sh -xc "head -n1 ${BINFO_TEMPL} >${BINFO_HEADER}"
|
||||
COMMAND sh -xc "sed 's/_GPU_BLOCKINFO_H_/SRC_DEF_GPU_BLOCK_INFO_H_/' ${BINFO_DEF} >>${BINFO_HEADER}"
|
||||
)
|
||||
|
||||
add_subdirectory(src/core)
|
||||
@@ -0,0 +1 @@
|
||||
add_subdirectory(include)
|
||||
@@ -0,0 +1,46 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#ifndef SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
#define SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
// Value of 'pm4_ib_format' field of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_FORMAT = 1;
|
||||
// Value of 'dw_count_remain' field of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_DW_COUNT_REMAIN = 10;
|
||||
// Size of 'reserved' array of amd_aql_pm4_ib_packet_t packet
|
||||
static const uint32_t AMD_AQL_PM4_IB_RESERVED_COUNT = 8;
|
||||
|
||||
// AQL Vendor Specific Packet which carry PM4 IB command
|
||||
typedef struct {
|
||||
uint16_t header;
|
||||
uint16_t pm4_ib_format;
|
||||
uint32_t pm4_ib_command[4];
|
||||
uint32_t dw_count_remain;
|
||||
uint32_t reserved[AMD_AQL_PM4_IB_RESERVED_COUNT];
|
||||
hsa_signal_t completion_signal;
|
||||
} amd_aql_pm4_ib_packet_t;
|
||||
|
||||
#endif // SRC_CORE_AMD_AQL_PM4_IB_PACKET_H_
|
||||
@@ -0,0 +1,799 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/counter_dimensions.hpp"
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Getting SPM data using driver API
|
||||
namespace spm_kfd_namespace {
|
||||
hsa_status_t spm_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data);
|
||||
}
|
||||
|
||||
// PC sampling callback data
|
||||
struct pcsmp_callback_data_t {
|
||||
const char* kernel_name; // sampled kernel name
|
||||
void* data_buffer; // host buffer for tracing data
|
||||
uint64_t id; // sample id
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
};
|
||||
|
||||
std::atomic<int> ATT_TARGET_CU{0};
|
||||
|
||||
namespace aql_profile {
|
||||
// Command buffer partitioning manager
|
||||
// Supports Pre/Post commands partitioning
|
||||
// and prefix control partition
|
||||
|
||||
static std::unordered_map<void*, pm4_builder::TraceConfig> configs;
|
||||
static std::mutex config_mut;
|
||||
|
||||
static inline pm4_builder::counters_vector CountersVec(const profile_t* profile,
|
||||
const Pm4Factory* pm4_factory) {
|
||||
pm4_builder::counters_vector vec;
|
||||
std::map<block_des_t, uint32_t, lt_block_des> index_map;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(p);
|
||||
const block_des_t block_des = {pm4_factory->GetBlockInfo(p)->id, p->block_index};
|
||||
// Counting counter register index per block
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
uint32_t& reg_index = ret.first->second;
|
||||
|
||||
if (pm4_builder::SPISkip(block_info->attr, p->counter_id)) {
|
||||
vec.push_back({p->counter_id, reg_index, block_des, block_info});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (reg_index >= block_info->counter_count) {
|
||||
throw event_exception("Event is out of block counter registers number limit, ", *p);
|
||||
}
|
||||
|
||||
vec.push_back({p->counter_id, reg_index, block_des, block_info});
|
||||
|
||||
++reg_index;
|
||||
}
|
||||
|
||||
if (pm4_factory->IsGFX10() && (vec.get_attr() & CounterBlockGRBMAttr) == 0 && !vec.empty()) {
|
||||
event_t grbm_event{
|
||||
.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, .block_index = 0, .counter_id = 0};
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(&grbm_event);
|
||||
if (block_info == nullptr) return vec;
|
||||
const block_des_t block_des = {block_info->id, 0};
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
uint32_t& reg_index = ret.first->second;
|
||||
vec.push_back({0, reg_index, block_des, block_info});
|
||||
reg_index++;
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
static inline bool IsEventMatch(const event_t& event1, const event_t& event2) {
|
||||
return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) &&
|
||||
(event1.counter_id == event2.counter_id);
|
||||
}
|
||||
|
||||
hsa_status_t DefaultPmcdataCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
if (IsEventMatch(info_data->pmc_data.event, passed_data->pmc_data.event)) {
|
||||
if (passed_data->sample_id == UINT32_MAX) {
|
||||
passed_data->pmc_data.result += info_data->pmc_data.result;
|
||||
} else if (passed_data->sample_id == info_data->sample_id) {
|
||||
passed_data->pmc_data.result = info_data->pmc_data.result;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t DefaultTracedataCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA) {
|
||||
if (info_data->sample_id == passed_data->sample_id) {
|
||||
passed_data->trace_data = info_data->trace_data;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Logger::mutex_t Logger::mutex_;
|
||||
Logger* Logger::instance_ = NULL;
|
||||
bool Pm4Factory::concurrent_create_mode_ = false;
|
||||
bool Pm4Factory::spm_kfd_mode_ = false;
|
||||
Pm4Factory::mutex_t Pm4Factory::mutex_;
|
||||
Pm4Factory::instances_t* Pm4Factory::instances_ = NULL;
|
||||
bool read_api_enabled = true;
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
const char* read_api_enabled_str = getenv("AQLPROFILE_READ_API");
|
||||
if (read_api_enabled_str != NULL) {
|
||||
if (atoi(read_api_enabled_str) == 0) read_api_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
Logger::Destroy();
|
||||
Pm4Factory::Destroy();
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Return library major/minor version
|
||||
PUBLIC_API uint32_t hsa_ven_amd_aqlprofile_version_major() { return HSA_AQLPROFILE_VERSION_MAJOR; }
|
||||
PUBLIC_API uint32_t hsa_ven_amd_aqlprofile_version_minor() { return HSA_AQLPROFILE_VERSION_MINOR; }
|
||||
|
||||
// Returns the last error message
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) {
|
||||
*str = aql_profile::Logger::LastMessage().c_str();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->GetBlockInfo(event) != NULL) *result = true;
|
||||
} catch (aql_profile::event_exception& e) {
|
||||
INFO_LOGGING << e.what();
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_start_packet) {
|
||||
try {
|
||||
pm4_builder::CmdBuffer commands;
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile->command_buffer.ptr, UINT_MAX);
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
const pm4_builder::counters_vector countersVec = CountersVec(profile, pm4_factory);
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
|
||||
// Generate read commands
|
||||
auto data_size = pmc_builder->Read(&commands, countersVec, profile->output_buffer.ptr);
|
||||
if (!aql_profile::read_api_enabled) commands.Clear();
|
||||
cmd_buffer_mgr.SetRdSize(commands.Size());
|
||||
|
||||
// Copy generated read commands
|
||||
if (profile->command_buffer.ptr != NULL) {
|
||||
const aql_profile::descriptor_t rd_descr = cmd_buffer_mgr.GetRdDescr();
|
||||
memcpy(rd_descr.ptr, commands.Data(), commands.Size());
|
||||
commands.Clear();
|
||||
}
|
||||
|
||||
// Generate start commands
|
||||
pmc_builder->Start(&commands, countersVec);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
|
||||
// Generate stop commands
|
||||
if (!aql_profile::read_api_enabled)
|
||||
pmc_builder->Read(&commands, countersVec, profile->output_buffer.ptr);
|
||||
pmc_builder->Stop(&commands, countersVec);
|
||||
|
||||
if (profile->output_buffer.size < data_size) {
|
||||
profile->output_buffer.size = data_size;
|
||||
if (profile->output_buffer.ptr != NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE) {
|
||||
pm4_builder::TraceConfig trace_config{};
|
||||
const uint64_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
|
||||
if (profile->parameters) {
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
switch (p->parameter_name) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK:
|
||||
trace_config.se_mask = p->value & ((1ull << se_number_total) - 1);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
if (p->value > 15)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
|
||||
trace_config.targetCu = p->value;
|
||||
ATT_TARGET_CU.store(p->value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
trace_config.vmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
if ((p->value & 0x50) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
|
||||
trace_config.deprecated_mask = p->value;
|
||||
trace_config.targetCu = p->value & 0xF;
|
||||
ATT_TARGET_CU.store(trace_config.targetCu);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
if ((p->value & 0xFF000000) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
|
||||
p->value);
|
||||
trace_config.deprecated_tokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
trace_config.deprecated_tokenMask2 = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
|
||||
trace_config.sampleRate = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
|
||||
trace_config.concurrent = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
|
||||
trace_config.simd_sel = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE:
|
||||
trace_config.occupancy_mode = p->value ? 1 : 0;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
|
||||
trace_config.perfMASK = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
|
||||
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
|
||||
if (trace_config.perfcounters.size() < 8)
|
||||
trace_config.perfcounters.push_back({p->value, 0xF});
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "Bad trace parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto control_size = sizeof(pm4_builder::TraceControl) * se_number_total;
|
||||
char* prefix_ptr = cmd_buffer_mgr.AddPrefix(control_size);
|
||||
auto* control_ptr = reinterpret_cast<pm4_builder::TraceControl*>(prefix_ptr);
|
||||
|
||||
trace_config.control_buffer_ptr = control_ptr;
|
||||
trace_config.control_buffer_size = control_size;
|
||||
trace_config.data_buffer_ptr = profile->output_buffer.ptr;
|
||||
trace_config.data_buffer_size = profile->output_buffer.size;
|
||||
|
||||
if (countersVec.size() == 0) {
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
// Generate start commands
|
||||
sqtt_builder->Begin(&commands, &trace_config);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
sqtt_builder->End(&commands, &trace_config);
|
||||
} else {
|
||||
const char* sz_sampling_rate = getenv("AQLPROFILE_SPM_SAMPLE_RATE");
|
||||
if (sz_sampling_rate != NULL) trace_config.sampleRate = atoi(sz_sampling_rate);
|
||||
|
||||
pm4_builder::SpmBuilder* spm_builder = pm4_factory->GetSpmBuilder();
|
||||
// Generate start commands
|
||||
spm_builder->Begin(&commands, &trace_config, countersVec);
|
||||
cmd_buffer_mgr.SetPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
spm_builder->End(&commands, &trace_config);
|
||||
}
|
||||
aql_profile::configs[profile->command_buffer.ptr] = trace_config;
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
cmd_buffer_mgr.Finalize(commands.Size());
|
||||
const uint32_t cmd_size = (cmd_buffer_mgr.GetSize() + 0x1800) & ~0xFFF;
|
||||
if (profile->command_buffer.size < cmd_size) {
|
||||
profile->command_buffer.size = cmd_size;
|
||||
if (profile->command_buffer.ptr != NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (profile->command_buffer.ptr != NULL) {
|
||||
// Copy generated commands
|
||||
const aql_profile::descriptor_t pre_descr = cmd_buffer_mgr.GetPreDescr();
|
||||
const aql_profile::descriptor_t post_descr = cmd_buffer_mgr.GetPostDescr();
|
||||
memcpy(pre_descr.ptr, commands.Data(), pre_descr.size);
|
||||
memcpy(post_descr.ptr, reinterpret_cast<const char*>(commands.Data()) + pre_descr.size,
|
||||
post_descr.size);
|
||||
// Populate start aql packet
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::PopulateAql(pre_descr.ptr, pre_descr.size, cmd_writer, aql_start_packet);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_stop_packet) {
|
||||
try {
|
||||
// Populate stop aql packet
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
const aql_profile::descriptor_t post_descr = cmd_buffer_mgr.GetPostDescr();
|
||||
aql_profile::PopulateAql(post_descr.ptr, post_descr.size, cmd_writer, aql_stop_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling read commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_read(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_read_packet) {
|
||||
if (!aql_profile::read_api_enabled) return HSA_STATUS_ERROR;
|
||||
try {
|
||||
// Populate read aql packet
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
|
||||
const aql_profile::descriptor_t rd_descr =
|
||||
(is_concurrent == false) ? cmd_buffer_mgr.GetRdDescr() : cmd_buffer_mgr.FetchRdDescr();
|
||||
aql_profile::PopulateAql(rd_descr.ptr, rd_descr.size, cmd_writer, aql_read_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Legacy devices, converting of the profiling AQL packet to PM4 packet blob
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Method for getting the profile info
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
const uint32_t attr_op = (uint32_t)attribute;
|
||||
const uint32_t begin_op = (uint32_t)HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD;
|
||||
if (attr_op >= begin_op) attribute = (hsa_ven_amd_aqlprofile_info_type_t)begin_op;
|
||||
|
||||
if (profile == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'profile'";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
if (attribute != HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD) {
|
||||
if (value == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'value'";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
switch (attribute) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE:
|
||||
*(uint32_t*)value = 0x2000; // a current approximation as 4K is big enough
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE:
|
||||
*(uint32_t*)value = 0x1800; // a current approximation as 4K is big enough
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA:
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(value)->pmc_data.result = 0;
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::DefaultPmcdataCallback,
|
||||
value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA:
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::DefaultTracedataCallback,
|
||||
value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS:
|
||||
*reinterpret_cast<uint32_t*>(value) =
|
||||
pm4_factory->GetBlockInfo(&(profile->events[0]))->counter_count;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID: {
|
||||
hsa_ven_amd_aqlprofile_id_query_t* query =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_id_query_t*>(value);
|
||||
const uint32_t block = pm4_factory->FindBlock(query->name);
|
||||
const GpuBlockInfo* info = pm4_factory->GetBlockInfo(block);
|
||||
status = (info == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
query->id = block;
|
||||
query->instance_count = info->instance_count;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD: {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
const uint32_t op = attr_op - begin_op;
|
||||
switch (op) {
|
||||
case 0:
|
||||
pmc_builder->Enable(&commands);
|
||||
break;
|
||||
case 1:
|
||||
pmc_builder->Disable(&commands);
|
||||
break;
|
||||
case 2:
|
||||
pmc_builder->WaitIdle(&commands);
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "get_info, not supported op (" << op << ")";
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (profile->command_buffer.ptr == NULL) {
|
||||
const_cast<hsa_ven_amd_aqlprofile_profile_t*>(profile)->command_buffer.size =
|
||||
commands.Size();
|
||||
break;
|
||||
}
|
||||
|
||||
if (profile->command_buffer.size != commands.Size()) {
|
||||
ERR_LOGGING << "get_info, wrong profile cmd size";
|
||||
status = HSA_STATUS_ERROR;
|
||||
break;
|
||||
}
|
||||
if (value == NULL) {
|
||||
ERR_LOGGING << "NULL argument 'value'";
|
||||
status = HSA_STATUS_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(profile->command_buffer.ptr, commands.Data(), profile->command_buffer.size);
|
||||
aql_profile::PopulateAql(profile->command_buffer.ptr, profile->command_buffer.size,
|
||||
cmd_writer, reinterpret_cast<aql_profile::packet_t*>(value));
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
ERR_LOGGING << "Invalid attribute (" << attribute << ")";
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t callback) {
|
||||
try {
|
||||
EventDimension::init();
|
||||
for (auto& [name, id] : EventDimension::dimension_table) callback(id, name.c_str());
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
hsa_agent_t agent, hsa_ven_amd_aqlprofile_event_t event, uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback, void* userdata) {
|
||||
try {
|
||||
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
|
||||
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
std::vector<uint8_t> coord;
|
||||
coord.resize(attrib.get_num());
|
||||
attrib.get_coordinates(coord.data(),
|
||||
sample_id * attrib.get_num_instances() + event.block_index);
|
||||
|
||||
for (size_t i = 0; i < attrib.get_num(); i++) {
|
||||
EventDimension dim = attrib.get_dim(i);
|
||||
callback(i, dim.id, dim.extent, coord[i], dim.name.data(), userdata);
|
||||
}
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
const uint32_t xcc_num = pm4_factory->GetXccNumber();
|
||||
const uint32_t se_number = pm4_factory->GetShaderEnginesNumber() / xcc_num;
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
uint64_t* samples = reinterpret_cast<uint64_t*>(profile->output_buffer.ptr);
|
||||
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
if ((char*)samples >= (char*)profile->output_buffer.ptr + profile->output_buffer.size)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
if (!(pm4_factory->GetBlockInfo(p)->attr & CounterBlockAidAttr)) continue;
|
||||
|
||||
// Process an MI300 UMC event for XCC 0 ONLY
|
||||
auto sample_id = p->block_index; // sample id is the event block_index or the UMCCH id
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = sample_id;
|
||||
sample_info.pmc_data.event = *p;
|
||||
sample_info.pmc_data.result = *samples;
|
||||
#if DEBUG_TRACE == 2
|
||||
printf(
|
||||
"DATA: sample index(%u) id(%u) bloc id(%u) index(%u) counter id(%u) "
|
||||
"res(%lu)\n",
|
||||
sample_index, sample_id, p->block_name, p->block_index, p->counter_id,
|
||||
samples[sample_index]);
|
||||
#endif
|
||||
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
samples++;
|
||||
}
|
||||
for (uint32_t xcc_index = 0; xcc_index < xcc_num; xcc_index++) {
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
// this check needs to be the first check as it takes care of a corner case
|
||||
// in which a UMC event is the last event in profile->events
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockAidAttr) continue;
|
||||
|
||||
if ((char*)samples > (char*)profile->output_buffer.ptr + profile->output_buffer.size)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// non-MI300A-AID counter event.
|
||||
uint32_t block_samples_count = 1;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSqAttr && pm4_factory->IsGFX11())
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
|
||||
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = blk;
|
||||
sample_info.pmc_data.event = *p;
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: xcc(%u) id(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n",
|
||||
xcc_index, blk, p->block_name, p->block_index, p->counter_id, *samples);
|
||||
#endif
|
||||
|
||||
sample_info.pmc_data.result = *samples;
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
samples++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE) {
|
||||
uint32_t mode = 2;
|
||||
switch (profile->event_count) {
|
||||
case 0:
|
||||
mode = 0;
|
||||
break;
|
||||
case UINT32_MAX:
|
||||
const_cast<hsa_ven_amd_aqlprofile_profile_t*>(profile)->event_count = 0;
|
||||
mode = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (mode != 2) { // SQTT trace data, or SQTT pc sampling
|
||||
auto& trace_config = aql_profile::configs.at(profile->command_buffer.ptr);
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
const uint64_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
// Control buffer was allocated as the CmdBuffer prefix partition
|
||||
aql_profile::CommandBufferMgr cmd_buffer_mgr(profile);
|
||||
|
||||
auto* control_ptr =
|
||||
reinterpret_cast<pm4_builder::TraceControl*>(cmd_buffer_mgr.GetPrefix1());
|
||||
// Check if SQTT buffer was wrapped
|
||||
for (size_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
if (control_ptr[se_index].status & sqttbuilder->GetUTCErrorMask()) {
|
||||
ERR_LOGGING << "SQTT memory error received, SE(" << se_index << ")";
|
||||
status = HSA_STATUS_ERROR_EXCEPTION;
|
||||
} else if (control_ptr[se_index].status & sqttbuilder->GetBufferFullMask()) {
|
||||
ERR2_LOGGING << "SQTT data buffer full, SE(" << se_index << ")";
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (size_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
bool bMaskedIn = trace_config.GetTargetCU(se_index) >= 0;
|
||||
uint64_t sample_capacity = trace_config.GetCapacity(se_index);
|
||||
void* sample_ptr = reinterpret_cast<void*>(trace_config.GetSEBaseAddr(se_index));
|
||||
|
||||
// WPTR specifies the index in thread trace buffer where next token will be
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
size_t sample_size = (control_ptr[se_index].wptr & sqttbuilder->GetWritePtrMask()) *
|
||||
sqttbuilder->GetWritePtrBlk();
|
||||
|
||||
if (pm4_factory->GetGpuId() == aql_profile::GFX11_GPU_ID) {
|
||||
sample_size = sample_size - reinterpret_cast<uint64_t>(sample_ptr);
|
||||
sample_size &= (1ull << 29) - 1;
|
||||
}
|
||||
|
||||
if (sample_size >= sample_capacity) {
|
||||
ERR_LOGGING << "SQTT data out of bounds, sample_id(" << se_index << ") size("
|
||||
<< sample_size << "/" << sample_capacity << ")";
|
||||
sample_size = sample_capacity;
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
hsa_status_t call_status;
|
||||
if (mode == 0) { // SQTT trace
|
||||
if (bMaskedIn) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t info;
|
||||
info.sample_id = se_index;
|
||||
info.trace_data.ptr = sample_ptr;
|
||||
info.trace_data.size = sample_size;
|
||||
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &info, data);
|
||||
}
|
||||
} else { // PC sampling
|
||||
pcsmp_callback_data_t* pcsmp_data = reinterpret_cast<pcsmp_callback_data_t*>(data);
|
||||
pcsmp_data->id = se_index;
|
||||
pcsmp_data->cycle = 333;
|
||||
pcsmp_data->pc = 0x333;
|
||||
call_status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, NULL, data);
|
||||
}
|
||||
}
|
||||
} else { // SPM trace data
|
||||
if (pm4_factory->SpmKfdMode() == false) {
|
||||
const uint32_t tnumber = 1;
|
||||
void* sample_ptr = profile->output_buffer.ptr;
|
||||
const uint32_t sample_size = profile->output_buffer.size;
|
||||
const uint32_t sample_capacity = (profile->output_buffer.size / tnumber);
|
||||
|
||||
for (unsigned i = 0; i < tnumber; ++i) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.trace_data.ptr = sample_ptr;
|
||||
sample_info.trace_data.size = sample_size;
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
sample_ptr = reinterpret_cast<char*>(sample_ptr) + sample_capacity;
|
||||
}
|
||||
} else {
|
||||
status = spm_kfd_namespace::spm_iterate_data(profile, callback, data);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_marker_packet,
|
||||
uint32_t data, hsa_ven_amd_aqlprofile_att_marker_channel_t channel) {
|
||||
assert(profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE);
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
// Generate start commands
|
||||
auto status = sqtt_builder->InsertMarker(&commands, data, channel);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
aql_profile::descriptor_t& cmdbuffer = profile->command_buffer;
|
||||
|
||||
size_t cmd_size = cmdbuffer.size;
|
||||
cmdbuffer.size = commands.Size();
|
||||
|
||||
if (cmdbuffer.ptr == NULL) return HSA_STATUS_SUCCESS;
|
||||
if (cmd_size < commands.Size()) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Populate stop aql packet
|
||||
memcpy(cmdbuffer.ptr, commands.Data(), commands.Size());
|
||||
aql_profile::PopulateAql(cmdbuffer.ptr, commands.Size(), cmd_writer, aql_marker_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,67 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_AQL_PROFILE_H_
|
||||
#define SRC_CORE_AQL_PROFILE_H_
|
||||
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "include/aql_profile_v2.h"
|
||||
|
||||
#include "core/aql_profile_exception.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
|
||||
namespace pm4_builder {
|
||||
class CmdBuilder;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t;
|
||||
typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
|
||||
void PopulateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_builder::CmdBuilder* cmd_writer,
|
||||
packet_t* aql_packet);
|
||||
void* LegacyAqlAcquire(const packet_t* aql_packet, void* data);
|
||||
void* LegacyAqlRelease(const packet_t* aql_packet, void* data);
|
||||
void* LegacyPm4(const packet_t* aql_packet, void* data);
|
||||
|
||||
class event_exception : public aql_profile_exc_val<event_t> {
|
||||
public:
|
||||
event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {}
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const aql_profile::event_t& ev) {
|
||||
os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id
|
||||
<< "))";
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif // SRC_CORE_AQL_PROFILE_H_
|
||||
@@ -0,0 +1,57 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
#define SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class aql_profile_exc_msg : public std::exception {
|
||||
public:
|
||||
explicit aql_profile_exc_msg(const std::string& msg) : str_(msg) {}
|
||||
virtual const char* what() const throw() { return str_.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class aql_profile_exc_val : public std::exception {
|
||||
public:
|
||||
aql_profile_exc_val(const std::string& msg, const T& val) {
|
||||
std::ostringstream oss;
|
||||
oss << msg << "(" << val << ")";
|
||||
str_ = oss.str();
|
||||
}
|
||||
virtual const char* what() const throw() { return str_.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str_;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // SRC_CORE_AQL_PROFILE_EXCEPTION_H_
|
||||
@@ -0,0 +1,187 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
|
||||
namespace aql_profile {
|
||||
class CommandBufferMgr {
|
||||
public:
|
||||
struct info_t {
|
||||
uint32_t prefix_size;
|
||||
uint32_t rdcmds_size;
|
||||
uint32_t rd2cmds_size;
|
||||
uint32_t is_rd_fetch2;
|
||||
uint32_t precmds_size;
|
||||
uint32_t postcmds_size;
|
||||
};
|
||||
|
||||
CommandBufferMgr(void* ptr, const uint32_t& size) { Init(descriptor_t{ptr, size}, false); }
|
||||
explicit CommandBufferMgr(const profile_t* profile) { Init(profile->command_buffer, true); }
|
||||
|
||||
char* GetPrefix() { return reinterpret_cast<char*>(buffer_.ptr); }
|
||||
char* GetPrefix1() { return reinterpret_cast<char*>(buffer_.ptr) + sizeof(info_t); }
|
||||
char* AddPrefix(const uint32_t& delta) {
|
||||
const uint32_t size = Align(delta);
|
||||
char* ptr = (buffer_.ptr != NULL) ? GetPrefix() + info_.prefix_size : NULL;
|
||||
info_.prefix_size += delta;
|
||||
buffer_.size -= (size < buffer_.size) ? size : buffer_.size;
|
||||
if (buffer_.size == 0)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::AddPrefix(): buffer size set to zero");
|
||||
return (buffer_.size != 0) ? ptr : NULL;
|
||||
}
|
||||
|
||||
bool SetRdSize(const uint32_t& rd_data_size) {
|
||||
const uint32_t size = Align(rd_data_size);
|
||||
const bool suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.rdcmds_size = rd_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetRdSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool SetRd2Size(const uint32_t& rd_data_size) {
|
||||
const uint32_t size = Align(rd_data_size);
|
||||
const bool suc = SetRdSize(Align(size));
|
||||
if (suc) {
|
||||
info_.rd2cmds_size = rd_data_size;
|
||||
info_.rdcmds_size = 2 * size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetRd2Size(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool SetPreSize(const uint32_t& pre_data_size) {
|
||||
const uint32_t size = Align(pre_data_size);
|
||||
const bool suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.precmds_size = pre_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::SetPreSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool Finalize(const uint32_t& data_size) {
|
||||
bool suc = (data_size > info_.precmds_size);
|
||||
if (suc) {
|
||||
const uint32_t post_data_size = data_size - info_.precmds_size;
|
||||
const uint32_t size = Align(post_data_size);
|
||||
suc = (size <= buffer_.size);
|
||||
if (suc) {
|
||||
info_.postcmds_size = post_data_size;
|
||||
buffer_.size -= size;
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::Finalize(): postcmd size is out of cmdbuffer");
|
||||
}
|
||||
if (!suc) throw aql_profile_exc_msg("CommandBufferMgr::Finalize(): postcmd size is zero");
|
||||
|
||||
if (info_slot_) *info_slot_ = info_;
|
||||
|
||||
return suc;
|
||||
}
|
||||
|
||||
uint32_t GetSize() const { return GetEndOffset(); }
|
||||
|
||||
descriptor_t GetRdDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset();
|
||||
descr.size = info_.rdcmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t FetchRdDescr() {
|
||||
descriptor_t descr;
|
||||
if (info_.is_rd_fetch2 == 0) {
|
||||
info_.is_rd_fetch2 = 1;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset();
|
||||
} else {
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetRdOffset() + (info_.rdcmds_size / 2);
|
||||
}
|
||||
descr.size = info_.rd2cmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t GetPreDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetPreOffset();
|
||||
descr.size = info_.precmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t GetPostDescr() const {
|
||||
descriptor_t descr;
|
||||
descr.ptr = reinterpret_cast<char*>(buffer_.ptr) + GetPostOffset();
|
||||
descr.size = info_.postcmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
static uint32_t Align(const uint32_t& size) { return (size + align_mask_) & ~align_mask_; }
|
||||
|
||||
private:
|
||||
void Init(const descriptor_t& buffer, const bool& import) {
|
||||
buffer_ = buffer;
|
||||
info_ = {};
|
||||
info_slot_ = NULL;
|
||||
|
||||
uint32_t prefix_size = sizeof(info_t);
|
||||
if (buffer_.ptr != NULL) {
|
||||
info_slot_ = reinterpret_cast<info_t*>(GetPrefix());
|
||||
if (import) {
|
||||
prefix_size = info_slot_->prefix_size;
|
||||
info_ = *info_slot_;
|
||||
info_.prefix_size = 0;
|
||||
}
|
||||
} else {
|
||||
buffer_.size = UINT_MAX;
|
||||
}
|
||||
AddPrefix(prefix_size);
|
||||
}
|
||||
|
||||
uint32_t GetRdOffset() const { return Align(info_.prefix_size); }
|
||||
uint32_t GetPreOffset() const { return GetRdOffset() + Align(info_.rdcmds_size); }
|
||||
uint32_t GetPostOffset() const { return GetPreOffset() + Align(info_.precmds_size); }
|
||||
uint32_t GetEndOffset() const { return GetPostOffset() + Align(info_.postcmds_size); }
|
||||
|
||||
static const uint32_t align_size_ = 0x100;
|
||||
static const uint32_t align_mask_ = align_size_ - 1;
|
||||
|
||||
descriptor_t buffer_;
|
||||
info_t info_;
|
||||
info_t* info_slot_;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,204 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
#include "def/gpu_block_info.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <array>
|
||||
|
||||
struct EventDimension {
|
||||
EventDimension(const EventDimension& other) = default;
|
||||
EventDimension(std::string_view _name, size_t _extent)
|
||||
: id(dimension_table.at(std::string(_name))), name(_name), extent(_extent) {}
|
||||
|
||||
uint64_t id;
|
||||
uint64_t extent;
|
||||
std::string_view name;
|
||||
|
||||
static std::vector<std::string> dimension_list;
|
||||
static std::unordered_map<std::string, size_t> dimension_table;
|
||||
static void init() {
|
||||
if (dimension_list.size()) return;
|
||||
|
||||
dimension_list.push_back("XCD");
|
||||
dimension_list.push_back("AID");
|
||||
dimension_list.push_back("SE");
|
||||
dimension_list.push_back("SA");
|
||||
dimension_list.push_back("WGP");
|
||||
dimension_list.push_back("INSTANCE");
|
||||
|
||||
for (size_t i = 0; i < dimension_list.size(); i++) dimension_table[dimension_list[i]] = i;
|
||||
}
|
||||
};
|
||||
|
||||
class EventKey {
|
||||
public:
|
||||
uint64_t agent;
|
||||
uint64_t block;
|
||||
|
||||
bool operator==(const EventKey& other) const {
|
||||
return agent == other.agent && block == other.block;
|
||||
}
|
||||
bool operator!=(const EventKey& other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct std::hash<EventKey> {
|
||||
uint64_t operator()(const EventKey& ev) const {
|
||||
return ev.agent | (ev.block << 56) | (ev.block >> 8);
|
||||
}
|
||||
};
|
||||
|
||||
class EventAttribDimension {
|
||||
public:
|
||||
static constexpr size_t event_id_bit = 24;
|
||||
|
||||
template <typename AgentType>
|
||||
EventAttribDimension(AgentType agent, hsa_ven_amd_aqlprofile_block_name_t block_name)
|
||||
: key({agent.handle, (uint64_t)block_name}) {
|
||||
EventDimension::init();
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
this->block_info = pm4_factory->GetBlockInfo(block_name);
|
||||
|
||||
bIsGFX12 = pm4_factory->IsGFX12();
|
||||
bIsGFX11 = pm4_factory->IsGFX11();
|
||||
bIsGFX9 = pm4_factory->IsGFX9();
|
||||
|
||||
num_xccs = pm4_factory->GetXccNumber();
|
||||
if (num_xccs > 1 && HasAttr(CounterBlockUmcAttr)) { // For MI300 AID only
|
||||
num_xccs = 1;
|
||||
num_aid = 4;
|
||||
}
|
||||
shader_engine = HasAttr(CounterBlockSeAttr);
|
||||
shader_array = HasAttr(CounterBlockSaAttr);
|
||||
|
||||
if (bIsGFX9)
|
||||
compute_unit = HasAttr(CounterBlockTcAttr) && shader_engine;
|
||||
else if (bIsGFX11 || bIsGFX12)
|
||||
workgroup_processor = HasAttr(CounterBlockSqAttr);
|
||||
|
||||
se_num = pm4_factory->GetShaderEnginesNumber();
|
||||
sarrays = pm4_factory->GetShaderArraysNumber() * se_num;
|
||||
|
||||
cu_num = (pm4_factory->GetComputeUnitNumber() + sarrays - 1) / sarrays;
|
||||
wgp_num = (pm4_factory->GetComputeUnitNumber() / 2 + sarrays - 1) / sarrays;
|
||||
|
||||
if (HasAttr(CounterBlockUmcAttr))
|
||||
block_instance_count = block_info->instance_count / num_aid;
|
||||
else if (compute_unit)
|
||||
block_instance_count = std::min<size_t>(block_info->instance_count, cu_num + 1);
|
||||
else
|
||||
block_instance_count = block_info->instance_count;
|
||||
|
||||
if (num_xccs > 1) dimensions.push_back({"XCD", num_xccs});
|
||||
if (num_aid > 1) dimensions.push_back({"AID", num_aid});
|
||||
|
||||
if (workgroup_processor)
|
||||
dimensions.push_back({"WGP", wgp_num});
|
||||
else
|
||||
dimensions.push_back({"INSTANCE", block_instance_count});
|
||||
|
||||
if (shader_engine)
|
||||
dimensions.push_back(
|
||||
{"SE", pm4_factory->GetShaderEnginesNumber() / (num_xccs > 0 ? num_xccs : 1)});
|
||||
if (shader_array) dimensions.push_back({"SA", pm4_factory->GetShaderArraysNumber()});
|
||||
}
|
||||
|
||||
size_t get_num_xccs() const { return num_xccs; };
|
||||
size_t get_total_elements() const {
|
||||
size_t acc = 1;
|
||||
for (auto& d : dimensions) acc *= d.extent;
|
||||
return acc;
|
||||
}
|
||||
uint64_t get_num() const { return dimensions.size(); };
|
||||
EventDimension get_dim(uint64_t index) const { return dimensions.at(index); };
|
||||
|
||||
hsa_status_t get_coordinates(uint8_t* coordinates, int64_t cumulative_id) const {
|
||||
const int end = static_cast<int>(get_num()) - 1;
|
||||
for (int i = end; i >= 0; i--) {
|
||||
coordinates[i] = static_cast<uint8_t>(cumulative_id % dimensions.at(i).extent);
|
||||
cumulative_id /= dimensions.at(i).extent;
|
||||
}
|
||||
if (cumulative_id != 0) return HSA_STATUS_ERROR_INVALID_INDEX;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
size_t get_num_instances() const { return block_instance_count; }
|
||||
|
||||
private:
|
||||
bool HasAttr(CounterBlockAttr attr) const { return (block_info->attr & attr) != 0; }
|
||||
|
||||
EventKey key;
|
||||
const GpuBlockInfo* block_info = nullptr;
|
||||
hsa_ven_amd_aqlprofile_event_t event{};
|
||||
|
||||
bool bIsGFX12;
|
||||
bool bIsGFX11;
|
||||
bool bIsGFX9;
|
||||
|
||||
bool shader_engine = false;
|
||||
bool shader_array = false;
|
||||
bool compute_unit = false;
|
||||
bool workgroup_processor = false;
|
||||
|
||||
size_t num_xccs = 1;
|
||||
size_t num_aid = 1;
|
||||
size_t se_num = 1;
|
||||
size_t sarrays = 1;
|
||||
size_t cu_num = 1;
|
||||
size_t wgp_num = 1;
|
||||
size_t block_instance_count = 1;
|
||||
|
||||
std::vector<EventDimension> dimensions;
|
||||
|
||||
public:
|
||||
template <typename AgentType>
|
||||
static const EventAttribDimension& get(AgentType agent,
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name) {
|
||||
thread_local std::unordered_map<EventKey, std::shared_ptr<EventAttribDimension>> event_map{};
|
||||
thread_local std::shared_ptr<EventAttribDimension> event_cache{nullptr};
|
||||
|
||||
EventKey key{agent.handle, (uint64_t)block_name};
|
||||
|
||||
if (!event_cache || event_cache->key != key) {
|
||||
auto it = event_map.find(key);
|
||||
if (auto it = event_map.find(key); it != event_map.end())
|
||||
event_cache = it->second;
|
||||
else
|
||||
event_cache =
|
||||
event_map.emplace(key, std::make_shared<EventAttribDimension>(agent, block_name))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
return *event_cache;
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,447 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/counter_dimensions.hpp"
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
#include "memorymanager.hpp"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define HSA_TRY_WRAP try {
|
||||
#define HSA_CATCH_WRAP \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
return HSA_STATUS_ERROR; \
|
||||
}
|
||||
|
||||
std::vector<std::string> EventDimension::dimension_list;
|
||||
std::unordered_map<std::string, size_t> EventDimension::dimension_table;
|
||||
|
||||
namespace aql_profile_v2 {
|
||||
// Command buffer partitioning manager
|
||||
// Supports Pre/Post commands partitioning
|
||||
// and prefix control partition
|
||||
|
||||
using aql_profile::event_exception;
|
||||
using aql_profile::event_t;
|
||||
using ::aql_profile::Pm4Factory;
|
||||
|
||||
uint32_t HandleSQFlagsBlock(Pm4Factory* pm4_factory, const aqlprofile_pmc_event_t& event) {
|
||||
auto visible_id = event.event_id;
|
||||
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_LO_RES)
|
||||
visible_id = pm4_factory->GetAccumLowID();
|
||||
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_HI_RES)
|
||||
visible_id = pm4_factory->GetAccumHiID();
|
||||
return visible_id;
|
||||
}
|
||||
|
||||
counter_des_t GetCounter(Pm4Factory* pm4_factory, EventRequest& event,
|
||||
std::map<block_des_t, uint32_t, lt_block_des>& index_map) {
|
||||
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(event.block_name);
|
||||
const block_des_t block_des = {block_info->id, event.block_index};
|
||||
const auto ret = index_map.insert({block_des, 0});
|
||||
auto reg_index = ret.first->second;
|
||||
auto visible_id = event.event_id;
|
||||
|
||||
if (pm4_builder::SPISkip(block_info->attr, visible_id)) {
|
||||
event.bInternal = true;
|
||||
return {visible_id, reg_index, block_des, block_info};
|
||||
}
|
||||
|
||||
if (reg_index >= block_info->counter_count)
|
||||
throw std::string("Event is out of block counter registers number limit");
|
||||
|
||||
if (event.flags.raw) {
|
||||
if (event.block_name == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ) {
|
||||
visible_id = HandleSQFlagsBlock(pm4_factory, event);
|
||||
} else {
|
||||
throw HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
|
||||
ret.first->second++;
|
||||
return {visible_id, reg_index, block_des, block_info};
|
||||
}
|
||||
|
||||
pm4_builder::counters_vector CountersVec(std::vector<EventRequest>& events,
|
||||
Pm4Factory* pm4_factory) {
|
||||
pm4_builder::counters_vector vec;
|
||||
std::map<block_des_t, uint32_t, lt_block_des> index_map;
|
||||
|
||||
for (auto& event : events) vec.push_back(GetCounter(pm4_factory, event, index_map));
|
||||
|
||||
if (pm4_factory->IsGFX10() && (vec.get_attr() & CounterBlockGRBMAttr) == 0) {
|
||||
EventRequest grbm_event{0};
|
||||
grbm_event.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM;
|
||||
vec.push_back(GetCounter(pm4_factory, grbm_event, index_map));
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
hsa_status_t _internal_aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback,
|
||||
void* userdata) {
|
||||
auto counter_memorymgr = MemoryManager::GetManager(handle.handle);
|
||||
CounterMemoryManager* memorymgr = dynamic_cast<CounterMemoryManager*>(counter_memorymgr.get());
|
||||
if (!memorymgr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(memorymgr->AgentHandle());
|
||||
const uint32_t xcc_num = pm4_factory->GetXccNumber();
|
||||
|
||||
uint64_t* samples = reinterpret_cast<uint64_t*>(memorymgr->GetOutputBuf());
|
||||
uint64_t* buffer_end_location = samples + memorymgr->GetOutputBufSize() / sizeof(uint64_t);
|
||||
auto& events = memorymgr->GetEvents();
|
||||
|
||||
size_t umc_sample_id = 0;
|
||||
if (xcc_num > 1)
|
||||
for (auto& event : events) {
|
||||
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
|
||||
|
||||
if (!(pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr)) continue;
|
||||
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: sample index(%u) id(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n",
|
||||
sample_index, sample_id, p->block_name, p->block_index, p->counter_id, *samples);
|
||||
#endif
|
||||
|
||||
hsa_status_t status = callback(event, event.block_index, *samples, userdata);
|
||||
samples++;
|
||||
umc_sample_id++;
|
||||
|
||||
if (status == HSA_STATUS_INFO_BREAK) return HSA_STATUS_SUCCESS;
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
}
|
||||
|
||||
size_t xcc_sample_count = 0;
|
||||
for (uint32_t xcc_index = 0; xcc_index < xcc_num; xcc_index++)
|
||||
for (auto& event : events) {
|
||||
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
|
||||
|
||||
if (pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr) continue;
|
||||
|
||||
// non-MI300A-AID counter event.
|
||||
uint32_t block_samples_count = pm4_factory->GetNumEvents(event.block_name);
|
||||
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
|
||||
#if DEBUG_TRACE == 2
|
||||
printf("DATA: xcc(%u) blk(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n", xcc_index,
|
||||
blk, event.block_name, event.block_index, event.event_id, *samples);
|
||||
#endif
|
||||
xcc_sample_count += xcc_index == 0;
|
||||
size_t xcc_sample_id = xcc_sample_count * xcc_index +
|
||||
static_cast<size_t>(event.block_index) * block_samples_count + blk;
|
||||
|
||||
if (!event.bInternal) {
|
||||
hsa_status_t status = callback(event, xcc_sample_id, *samples, userdata);
|
||||
if (status == HSA_STATUS_INFO_BREAK)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
else if (status != HSA_STATUS_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
samples++;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t _internal_aqlprofile_pmc_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
|
||||
void* userdata) {
|
||||
pm4_builder::CmdBuffer commands;
|
||||
auto memorymgr =
|
||||
std::make_shared<CounterMemoryManager>(profile.agent, alloc_cb, dealloc_cb, userdata);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
memorymgr->CopyEvents(profile.events, profile.event_count);
|
||||
|
||||
pm4_builder::CmdBuffer read_cmd;
|
||||
pm4_builder::CmdBuffer start_cmd;
|
||||
pm4_builder::CmdBuffer stop_cmd;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile.agent);
|
||||
const pm4_builder::counters_vector countersVec = CountersVec(memorymgr->GetEvents(), pm4_factory);
|
||||
|
||||
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
|
||||
|
||||
// Start outputbuf ptr
|
||||
size_t output_bytes = 8; // Extra space for GRBM block on gfx10
|
||||
for (auto& event : memorymgr->GetEvents())
|
||||
output_bytes += pm4_factory->GetBytesNeeded(event.block_name);
|
||||
memorymgr->CreateOutputBuf(output_bytes);
|
||||
// Generate read commands
|
||||
size_t data_size = pmc_builder->Read(&read_cmd, countersVec, memorymgr->GetOutputBuf());
|
||||
// Generate start commands
|
||||
pmc_builder->Start(&start_cmd, countersVec);
|
||||
// Generate stop commands
|
||||
pmc_builder->Stop(&stop_cmd, countersVec);
|
||||
|
||||
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC Builder Stop(): data size set to zero");
|
||||
if (memorymgr->GetOutputBufSize() < data_size) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Copy generated commands
|
||||
size_t start_size = aql_profile::CommandBufferMgr::Align(start_cmd.Size());
|
||||
size_t stop_size = aql_profile::CommandBufferMgr::Align(stop_cmd.Size());
|
||||
size_t read_size = aql_profile::CommandBufferMgr::Align(read_cmd.Size());
|
||||
memorymgr->CreateCmdBuf(start_size + stop_size + read_size);
|
||||
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
uint8_t* cmdbuf = reinterpret_cast<uint8_t*>(memorymgr->GetCmdBuf());
|
||||
|
||||
memcpy_cb(cmdbuf, read_cmd.Data(), read_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, read_cmd.Size(), cmd_writer, &packets->read_packet);
|
||||
cmdbuf += read_size;
|
||||
memcpy_cb(cmdbuf, start_cmd.Data(), start_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, start_cmd.Size(), cmd_writer, &packets->start_packet);
|
||||
cmdbuf += start_size;
|
||||
memcpy_cb(cmdbuf, stop_cmd.Data(), stop_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, stop_cmd.Size(), cmd_writer, &packets->stop_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace aql_profile_v2
|
||||
|
||||
extern "C" {
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_pmc_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_pmc_create_packets(
|
||||
handle, packets, profile, alloc_cb, dealloc_cb, memcpy_cb, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle) {
|
||||
try {
|
||||
MemoryManager::DeleteManager(handle.handle);
|
||||
} catch (std::exception& e) {
|
||||
return;
|
||||
} catch (...) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_pmc_iterate_data(handle, callback, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_iterate_event_ids(aqlprofile_eventname_callback_t callback,
|
||||
void* user_data) {
|
||||
try {
|
||||
EventDimension::init();
|
||||
for (auto& [name, id] : EventDimension::dimension_table) {
|
||||
if (auto ret = callback(id, name.c_str(), user_data); ret != HSA_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_iterate_event_coord(aqlprofile_agent_handle_t agent,
|
||||
aqlprofile_pmc_event_t event,
|
||||
uint64_t counter_id,
|
||||
aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
|
||||
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
std::array<uint8_t, 32> coord;
|
||||
assert(attrib.get_num() < coord.size());
|
||||
attrib.get_coordinates(coord.data(), counter_id);
|
||||
|
||||
for (size_t i = 0; i < attrib.get_num(); i++) {
|
||||
EventDimension dim = attrib.get_dim(i);
|
||||
callback(i, dim.id, dim.extent, coord.at(i), dim.name.data(), userdata);
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
|
||||
const aqlprofile_agent_info_t* agent_info) {
|
||||
return aqlprofile_register_agent_info(agent_id, agent_info, AQLPROFILE_AGENT_VERSION_V0);
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_register_agent_info(aqlprofile_agent_handle_t* agent_id,
|
||||
const void* agent_info,
|
||||
aqlprofile_agent_version_t version) {
|
||||
try {
|
||||
if (agent_info == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
switch (version) {
|
||||
case AQLPROFILE_AGENT_VERSION_V0: {
|
||||
const auto* info = static_cast<const aqlprofile_agent_info_t*>(agent_info);
|
||||
aqlprofile_agent_info_v1_t info_v1 = {
|
||||
.agent_gfxip = info->agent_gfxip,
|
||||
.xcc_num = info->xcc_num,
|
||||
.se_num = info->se_num,
|
||||
.cu_num = info->cu_num,
|
||||
.shader_arrays_per_se = info->shader_arrays_per_se,
|
||||
.domain = 0,
|
||||
.location_id = 0,
|
||||
};
|
||||
*agent_id = aql_profile::RegisterAgent(&info_v1);
|
||||
} break;
|
||||
case AQLPROFILE_AGENT_VERSION_V1: {
|
||||
*agent_id =
|
||||
aql_profile::RegisterAgent(static_cast<const aqlprofile_agent_info_v1_t*>(agent_info));
|
||||
} break;
|
||||
case AQLPROFILE_AGENT_VERSION_NONE:
|
||||
case AQLPROFILE_AGENT_VERSION_LAST:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
const aqlprofile_pmc_event_t* event,
|
||||
bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->GetBlockInfo(event) != NULL) *result = true;
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute, void* value) {
|
||||
if (!profile) return HSA_STATUS_ERROR;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile->agent);
|
||||
|
||||
switch (attribute) {
|
||||
case AQLPROFILE_INFO_BLOCK_ID: {
|
||||
hsa_ven_amd_aqlprofile_id_query_t* query =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_id_query_t*>(value);
|
||||
const uint32_t block = pm4_factory->FindBlock(query->name);
|
||||
const GpuBlockInfo* info = pm4_factory->GetBlockInfo(block);
|
||||
if (!info) return HSA_STATUS_ERROR;
|
||||
|
||||
const auto& attrib =
|
||||
EventAttribDimension::get(profile->agent, (hsa_ven_amd_aqlprofile_block_name_t)block);
|
||||
if (!attrib.get_num()) return HSA_STATUS_ERROR;
|
||||
|
||||
query->id = block;
|
||||
query->instance_count = attrib.get_num_instances();
|
||||
} break;
|
||||
case AQLPROFILE_INFO_BLOCK_COUNTERS: {
|
||||
*reinterpret_cast<uint32_t*>(value) =
|
||||
pm4_factory->GetBlockInfo(&profile->events[0])->counter_count;
|
||||
} break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,107 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx10_def.h"
|
||||
#include "pm4/gfx10_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx10 factory class
|
||||
class Gfx10Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx10Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx10Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX10() const override { return true; }
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 1; };
|
||||
|
||||
protected:
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
// void Gfx10Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
void Gfx10Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx10CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx10CmdBuilder, gfx10_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
// GFX10 block table
|
||||
const GpuBlockInfo* Gfx10Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
NULL /*&TcaCounterBlockInfo*/, NULL /*&TccCounterBlockInfo*/, NULL /*&TcpCounterBlockInfo*/,
|
||||
NULL /*&TdCounterBlockInfo*/,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/,
|
||||
NULL /*&McVmL2CounterBlockInfo*/, NULL /*MC_XBAR*/, NULL /*&AtcCounterBlockInfo*/,
|
||||
NULL /*&AtcL2CounterBlockInfo*/, &GceaCounterBlockInfo, NULL /*&RpbCounterBlockInfo*/,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/,
|
||||
// new navi blocks
|
||||
&Gl1aCounterBlockInfo, &Gl1cCounterBlockInfo, &Gl2aCounterBlockInfo, &Gl2cCounterBlockInfo,
|
||||
&GcrCounterBlockInfo, &GusCounterBlockInfo};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx10Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx10Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx10Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,107 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx11_def.h"
|
||||
#include "pm4/gfx11_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx11 factory class
|
||||
class Gfx11Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx11Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx11Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX11() const override { return true; }
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 1; };
|
||||
|
||||
protected:
|
||||
// void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info);
|
||||
// void ConstructBuilders(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
// Gfx builders init
|
||||
// void Gfx11Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
void Gfx11Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx11CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx11CmdBuilder, gfx11_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
// GFX11 block table
|
||||
const GpuBlockInfo* Gfx11Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
NULL /*&GrbmSeCounterBlockInfo*/, &SpiCounterBlockInfo, &SqCounterBlockInfo,
|
||||
NULL /*&SqCsCounterBlockInfo*/, NULL /*GFX8 SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo,
|
||||
NULL /*&TcaCounterBlockInfo*/, NULL /*&TccCounterBlockInfo*/, &TcpCounterBlockInfo,
|
||||
NULL /*&TdCounterBlockInfo*/,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/,
|
||||
NULL /*&McVmL2CounterBlockInfo*/, NULL /*MC_XBAR*/, NULL /*&AtcCounterBlockInfo*/,
|
||||
NULL /*&AtcL2CounterBlockInfo*/, &GceaCounterBlockInfo, NULL /*&RpbCounterBlockInfo*/,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/,
|
||||
// new navi blocks
|
||||
&Gl1aCounterBlockInfo, &Gl1cCounterBlockInfo, &Gl2aCounterBlockInfo, &Gl2cCounterBlockInfo,
|
||||
&GcrCounterBlockInfo, &GusCounterBlockInfo};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx11Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx11Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx11Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,116 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/pm4_factory.h"
|
||||
#include "def/gfx12_def.h"
|
||||
#include "pm4/gfx12_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx12 factory class
|
||||
class Gfx12Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx12Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx12Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
bool IsGFX12() const override { return true; }
|
||||
|
||||
protected:
|
||||
void ConstructBuilders(const AgentInfo* agent_info);
|
||||
void ConstructTable(const AgentInfo* agent_info);
|
||||
void Init(const AgentInfo* agent_info) {
|
||||
agent_info_ = agent_info;
|
||||
ConstructBuilders(agent_info);
|
||||
ConstructTable(agent_info);
|
||||
}
|
||||
const GpuBlockInfo* block_table_[LastCounterBlockId + 1]{};
|
||||
};
|
||||
|
||||
void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx12CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx12CmdBuilder, gfx12_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
}
|
||||
|
||||
void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
|
||||
// Global blocks
|
||||
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPC)] = &CpcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPF)] = &CpfCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(CPG)] = &CpgCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCEA)] = &GceaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GCR)] = &GcrCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2A)] = &Gl2aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL2C)] = &Gl2cCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBM)] = &GrbmCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(RLC)] = &RlcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SDMA_PM)] = &SdmaPmCounterBlockInfo;
|
||||
// SE blocks
|
||||
block_table_[__BLOCK_ID(GCEA_SE)] = &GceaSeCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GRBMH)] = &GrbmhCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SPI)] = &SpiCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(SQ)] = &SqcCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GC_UTCL1)] = &GcUtcl1CounterBlockInfo;
|
||||
// SA blocks
|
||||
block_table_[__BLOCK_ID(GL1A)] = &Gl1aCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(GL1C)] = &Gl1cCounterBlockInfo;
|
||||
// WGP blocks
|
||||
block_table_[__BLOCK_ID(TA)] = &TaCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TCP)] = &TcpCounterBlockInfo;
|
||||
block_table_[__BLOCK_ID(TD)] = &TdCounterBlockInfo;
|
||||
}
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx12Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx12Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx12Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,81 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx908_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
const GpuBlockInfo* Mi100Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi100Factory::Mi100Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (i == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[i] = block_info;
|
||||
|
||||
// overwrite block info for any update from gfx9 to mi100
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 303;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 87;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 295;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 58;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 83;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = gfx9_cntx_prim::SDMA_COUNTER_BLOCK_NUM_INSTANCES;
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 6;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi100Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi100Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi100Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,93 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx90a_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Mi200 factory class
|
||||
class Mi200Factory : public Gfx9Factory {
|
||||
public:
|
||||
explicit Mi200Factory(const AgentInfo* agent_info);
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 185; };
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
const GpuBlockInfo* Mi200Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {};
|
||||
|
||||
Mi200Factory::Mi200Factory(const AgentInfo* agent_info)
|
||||
: Gfx9Factory(block_table_, sizeof(block_table_), agent_info) {
|
||||
for (unsigned i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[i];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (i == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[i] = block_info;
|
||||
// overwrite block info for any update from gfx9 to mi100
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 303;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 87;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 295;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 58;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 83;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = 5;
|
||||
// Print(block_info);
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 9;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi200Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi200Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi200Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,108 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx940_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Mi300Factory : public Mi100Factory {
|
||||
public:
|
||||
explicit Mi300Factory(const AgentInfo* agent_info) : Mi100Factory(agent_info) {
|
||||
for (unsigned blockname_id = 0; blockname_id < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER;
|
||||
++blockname_id) {
|
||||
const GpuBlockInfo* base_table_ptr = Gfx9Factory::block_table_[blockname_id];
|
||||
if (base_table_ptr == NULL) continue;
|
||||
GpuBlockInfo* block_info = nullptr;
|
||||
if (blockname_id == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB)
|
||||
block_info = new GpuBlockInfo(RpbCounterBlockInfo);
|
||||
else if (blockname_id == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC)
|
||||
block_info = new GpuBlockInfo(AtcCounterBlockInfo);
|
||||
else
|
||||
block_info = new GpuBlockInfo(*base_table_ptr);
|
||||
block_table_[blockname_id] = block_info;
|
||||
// overwrite block info for any update from gfx9 to mi300
|
||||
switch (block_info->id) {
|
||||
case SqCounterBlockId:
|
||||
block_info->event_id_max = 373;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->event_id_max = 84;
|
||||
break;
|
||||
case TccCounterBlockId:
|
||||
block_info->instance_count = 16;
|
||||
block_info->event_id_max = 199;
|
||||
break;
|
||||
case TcaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 34;
|
||||
break;
|
||||
case GceaCounterBlockId:
|
||||
block_info->instance_count = 32;
|
||||
block_info->event_id_max = 82;
|
||||
break;
|
||||
case SdmaCounterBlockId:
|
||||
block_info->instance_count = 4 * pm4_builder::MAX_AID;
|
||||
break;
|
||||
case UmcCounterBlockId:
|
||||
block_info->counter_count = 11;
|
||||
block_info->instance_count = 32 * pm4_builder::MAX_AID;
|
||||
break;
|
||||
case RpbCounterBlockId:
|
||||
block_info->instance_count = 4;
|
||||
break;
|
||||
case AtcCounterBlockId:
|
||||
block_info->instance_count = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 184; };
|
||||
};
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi300Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi300Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi300Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
class Mi350Factory : public Mi300Factory {
|
||||
public:
|
||||
// MI350 is a copy of Mi300
|
||||
explicit Mi350Factory(const AgentInfo* agent_info) : Mi300Factory(agent_info) {}
|
||||
|
||||
virtual int GetAccumLowID() const override { return 1; };
|
||||
virtual int GetAccumHiID() const override { return 200; };
|
||||
};
|
||||
|
||||
Pm4Factory* Pm4Factory::Mi350Create(const AgentInfo* agent_info) {
|
||||
auto p = new Mi350Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Mi350Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,100 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/gfx9_factory.h"
|
||||
#include "def/gfx9_def.h"
|
||||
#include "pm4/gfx9_cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx factory init
|
||||
void Gfx9Factory::Init(const AgentInfo* agent_info) {
|
||||
Pm4Factory::cmd_builder_ = new pm4_builder::Gfx9CmdBuilder(nullptr);
|
||||
if (Pm4Factory::cmd_builder_ == NULL) throw aql_profile_exc_msg("CmdBuilder allocation failed");
|
||||
|
||||
// Mark and set the mode
|
||||
if (Pm4Factory::IsConcurrent()) {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim, true>(
|
||||
agent_info);
|
||||
} else {
|
||||
Pm4Factory::pmc_builder_ =
|
||||
new pm4_builder::GpuPmcBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim, false>(
|
||||
agent_info);
|
||||
}
|
||||
if (Pm4Factory::pmc_builder_ == NULL) throw aql_profile_exc_msg("PmcBuilder allocation failed");
|
||||
|
||||
Pm4Factory::spm_builder_ =
|
||||
new pm4_builder::GpuSpmBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::spm_builder_ == NULL) throw aql_profile_exc_msg("SpmBuilder allocation failed");
|
||||
|
||||
Pm4Factory::sqtt_builder_ =
|
||||
new pm4_builder::GpuSqttBuilder<pm4_builder::Gfx9CmdBuilder, gfx9_cntx_prim>(agent_info);
|
||||
if (Pm4Factory::sqtt_builder_ == NULL) throw aql_profile_exc_msg("SqttBuilder allocation failed");
|
||||
|
||||
agent_info_ = agent_info;
|
||||
}
|
||||
|
||||
void Gfx9Factory::Print(const GpuBlockInfo* block_info) {
|
||||
std::cout << "Block name: " << block_info->name << std::endl;
|
||||
std::cout << "\tInstances: " << block_info->instance_count << std::endl;
|
||||
std::cout << "\tMax Events: " << block_info->event_id_max << std::endl;
|
||||
std::cout << "\tCounters: " << block_info->counter_count << std::endl;
|
||||
auto counters = block_info->instance_count * block_info->counter_count;
|
||||
for (int i = 0; i < counters; ++i) {
|
||||
auto reg_info = block_info->counter_reg_info[i];
|
||||
std::cout << "\t " << i << ": select_addr = 0x" << std::hex << reg_info.select_addr.offset
|
||||
<< "(" << reg_info.select_addr.offset * 4 << ")"
|
||||
<< ", control_addr = 0x" << reg_info.control_addr.offset << "("
|
||||
<< reg_info.control_addr.offset * 4 << ")"
|
||||
<< ", counter_addr_lo = 0x" << reg_info.register_addr_lo.offset << "("
|
||||
<< reg_info.register_addr_lo.offset * 4 << ")"
|
||||
<< ", counter_addr_hi = 0x" << reg_info.register_addr_hi.offset << "("
|
||||
<< reg_info.register_addr_hi.offset * 4 << ")" << std::dec << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// GFX9 block table
|
||||
const GpuBlockInfo* Gfx9Factory::block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
&CpcCounterBlockInfo, &CpfCounterBlockInfo, &GdsCounterBlockInfo, &GrbmCounterBlockInfo,
|
||||
&GrbmSeCounterBlockInfo, &SpiCounterBlockInfo, &SqCounterBlockInfo, &SqCsCounterBlockInfo,
|
||||
NULL /*GFX? SRBM*/, &SxCounterBlockInfo, &TaCounterBlockInfo, &TcaCounterBlockInfo,
|
||||
&TccCounterBlockInfo, &TcpCounterBlockInfo, &TdCounterBlockInfo,
|
||||
// MC blocks
|
||||
NULL /*MC_ARB*/, NULL /*MC_HUB*/, NULL /*MC_MCBVM*/, NULL /*MC_SEQ*/, &McVmL2CounterBlockInfo,
|
||||
NULL /*MC_XBAR*/, &AtcCounterBlockInfo, &AtcL2CounterBlockInfo, &GceaCounterBlockInfo,
|
||||
&RpbCounterBlockInfo,
|
||||
// System blocks
|
||||
NULL /*&SdmaCounterBlockInfo*/, NULL /*GL1A*/, NULL /*GL1C*/, NULL /*GL2A*/, NULL /*GL2C*/,
|
||||
NULL /*GCR*/, NULL /*GUS*/, NULL /*&UmcCounterBlockInfo*/
|
||||
};
|
||||
|
||||
// Pm4Factory create mathods
|
||||
Pm4Factory* Pm4Factory::Gfx9Create(const AgentInfo* agent_info) {
|
||||
auto p = new Gfx9Factory(agent_info);
|
||||
if (p == NULL) throw aql_profile_exc_msg("Gfx9Factory allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,61 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX9_FACTORY_H_
|
||||
#define _GFX9_FACTORY_H_
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Gfx9 factory class
|
||||
class Gfx9Factory : public Pm4Factory {
|
||||
public:
|
||||
explicit Gfx9Factory(const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(block_table_, sizeof(block_table_))) {
|
||||
Init(agent_info);
|
||||
}
|
||||
Gfx9Factory(const GpuBlockInfo** table, const uint32_t& size, const AgentInfo* agent_info)
|
||||
: Pm4Factory(BlockInfoMap(table, size)) {
|
||||
Init(agent_info);
|
||||
}
|
||||
|
||||
bool IsGFX9() const override { return true; }
|
||||
|
||||
protected:
|
||||
void Init(const AgentInfo* agent_info);
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
|
||||
static void Print(const GpuBlockInfo* block_info);
|
||||
};
|
||||
|
||||
// Mi100 factory class
|
||||
class Mi100Factory : public Gfx9Factory {
|
||||
public:
|
||||
explicit Mi100Factory(const AgentInfo* agent_info);
|
||||
|
||||
protected:
|
||||
static const GpuBlockInfo* block_table_[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _GFX9_FACTORY_H_
|
||||
@@ -0,0 +1,7 @@
|
||||
set(AQLPROFILE_HEADER_FILES
|
||||
aql_profile_v2.h
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${AQLPROFILE_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/aqlprofile-sdk)
|
||||
@@ -0,0 +1,434 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} aqlprofile_handle_t;
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_MEMORY_HINT_NONE = 0,
|
||||
AQLPROFILE_MEMORY_HINT_HOST = 1,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED = 2,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_COHERENT = 3,
|
||||
AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT = 4,
|
||||
AQLPROFILE_MEMORY_HINT_LAST
|
||||
} aqlprofile_memory_hint_t;
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_AGENT_VERSION_NONE = 0,
|
||||
AQLPROFILE_AGENT_VERSION_V0 = 1,
|
||||
AQLPROFILE_AGENT_VERSION_V1 = 2,
|
||||
AQLPROFILE_AGENT_VERSION_LAST
|
||||
} aqlprofile_agent_version_t;
|
||||
|
||||
/**
|
||||
* @brief Flags to describe which agents can access given buffer.
|
||||
*/
|
||||
typedef union {
|
||||
uint32_t raw;
|
||||
struct {
|
||||
uint32_t device_access : 1;
|
||||
uint32_t host_access : 1;
|
||||
uint32_t memory_hint : 6; // One of aqlprofile_memory_hint_t
|
||||
uint32_t _reserved : 24;
|
||||
};
|
||||
} aqlprofile_buffer_desc_flags_t;
|
||||
|
||||
/**
|
||||
* @brief Callback to request a memory buffer, which will be tied to a profile.
|
||||
* The user is responsible for clearing up memory after the profile is no longer needed.
|
||||
* @param[out] ptr The pointer containing memory.
|
||||
* @param[in] size Minimum requested buffer size.
|
||||
* @param[in] flags Access flags, requesting which agents need to read/write to the buffer.
|
||||
* @param[in] userdata Data to be passed back to user.
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if memory could not be allocated
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_memory_alloc_callback_t)(void** ptr, uint64_t size,
|
||||
aqlprofile_buffer_desc_flags_t flags,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Callback to dealloc memory requested via aqlprofile_memory_alloc_callback_t
|
||||
* @param[in] ptr The pointer containing memory.
|
||||
* @param[in] userdata Data to be passed back to user.
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if memory could not be allocated
|
||||
*/
|
||||
typedef void (*aqlprofile_memory_dealloc_callback_t)(void* ptr, void* userdata);
|
||||
|
||||
typedef enum {
|
||||
AQLPROFILE_ACCUMULATION_NONE = 0, /** Do not accumulate event */
|
||||
AQLPROFILE_ACCUMULATION_LO_RES, /**< The event should be integrated over quad-cycles */
|
||||
AQLPROFILE_ACCUMULATION_HI_RES, /**< The event should be integrated every cycle */
|
||||
AQLPROFILE_ACCUMULATION_LAST,
|
||||
} aqlprofile_accumulation_type_t;
|
||||
|
||||
/**
|
||||
* @brief Special flags indicating additional properties to a counter. E.g. Accumulation metrics
|
||||
*/
|
||||
typedef union {
|
||||
uint32_t raw;
|
||||
struct {
|
||||
uint32_t accum : 3; /**< One of aqlprofile_accumulation_type_t */
|
||||
uint32_t _reserved : 29;
|
||||
} sq_flags;
|
||||
} aqlprofile_pmc_event_flags_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing all necessary information of an event (counter).
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t block_index; /**< Block channel. */
|
||||
uint32_t event_id; /**< Event ID as fined by XML */
|
||||
aqlprofile_pmc_event_flags_t flags; /**< Special event flags e.g. accumulation */
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name; /**< Block name as defined by block indexes */
|
||||
} aqlprofile_pmc_event_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing information about the agent. User code sets these values
|
||||
* to the describe the agent to profile. Information can be obtained either from HSA
|
||||
* (if loaded) or the KFD topology.
|
||||
*/
|
||||
typedef struct {
|
||||
const char* agent_gfxip; /**< Agent GFXIP (HSA_AGENT_INFO_NAME or KFD.product_name) */
|
||||
uint32_t xcc_num; /**< XCC's on the agent (HSA_AMD_AGENT_INFO_NUM_XCC or KFD.num_xcc) */
|
||||
uint32_t se_num; /**< SE's on the agent (HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES or
|
||||
KFD.num_shader_banks) */
|
||||
uint32_t cu_num; /**< CU's on the agent (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT or KFD.cu_count) */
|
||||
uint32_t shader_arrays_per_se; /**< Shader arrays per SE of agent
|
||||
(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE or
|
||||
KFD.simd_arrays_per_engine)*/
|
||||
} aqlprofile_agent_info_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing information about the agent. User code sets these values
|
||||
* to the describe the agent to profile. Information can be obtained either from HSA
|
||||
* (if loaded) or the KFD topology.
|
||||
*/
|
||||
typedef struct {
|
||||
const char* agent_gfxip; /**< Agent GFXIP (HSA_AGENT_INFO_NAME or KFD.product_name) */
|
||||
uint32_t xcc_num; /**< XCC's on the agent (HSA_AMD_AGENT_INFO_NUM_XCC or KFD.num_xcc) */
|
||||
uint32_t se_num; /**< SE's on the agent (HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES or
|
||||
KFD.num_shader_banks) */
|
||||
uint32_t cu_num; /**< CU's on the agent (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT or KFD.cu_count) */
|
||||
uint32_t shader_arrays_per_se; /**< Shader arrays per SE of agent
|
||||
(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE or
|
||||
KFD.simd_arrays_per_engine)*/
|
||||
uint32_t domain; /**< PCI domain of the GPU agent (HSA_AMD_AGENT_INFO_DOMAIN or KFD.domain) */
|
||||
uint32_t location_id; /**< BDF (Bus/Device/function number) of the GPU agent
|
||||
(HSA_AMD_AGENT_INFO_BDFID or KFD.location_id)*/
|
||||
} aqlprofile_agent_info_v1_t;
|
||||
|
||||
/**
|
||||
* @brief Struct containing a handle to a registered agent
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} aqlprofile_agent_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Registers an agent to be used with AQL profile.
|
||||
* @param[out] agent_id Handle to newly registered agent
|
||||
* @param[in] agent_info Info to register a new agent with AQL Profiler
|
||||
* @retval HSA_STATUS_SUCCESS registration ok
|
||||
* @retval HSA_STATUS_ERROR registration failed
|
||||
*/
|
||||
hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
|
||||
const aqlprofile_agent_info_t* agent_info);
|
||||
|
||||
/**
|
||||
* @brief Registers an agent to be used with AQL profile.
|
||||
* @param[out] agent_id Handle to newly registered agent
|
||||
* @param[in] agent_info Info to register a new agent with AQL Profiler
|
||||
* @param[in] version Version of the agent info structure
|
||||
* @retval HSA_STATUS_SUCCESS registration ok
|
||||
* @retval HSA_STATUS_ERROR registration failed
|
||||
*/
|
||||
hsa_status_t aqlprofile_register_agent_info(aqlprofile_agent_handle_t* agent_id,
|
||||
const void* agent_info,
|
||||
aqlprofile_agent_version_t version);
|
||||
/**
|
||||
* @brief AQLprofile struct containing information for perfmon events
|
||||
*/
|
||||
typedef struct {
|
||||
aqlprofile_agent_handle_t agent;
|
||||
const aqlprofile_pmc_event_t* events;
|
||||
uint32_t event_count;
|
||||
} aqlprofile_pmc_profile_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
|
||||
// counters disable command buffer
|
||||
} aqlprofile_pmc_info_type_t;
|
||||
|
||||
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute, void* value);
|
||||
|
||||
// Profile parameter object
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t counter_id : 28;
|
||||
uint32_t simd_mask : 4;
|
||||
};
|
||||
};
|
||||
} aqlprofile_att_parameter_t;
|
||||
|
||||
/**
|
||||
* @brief AQLprofile struct containing information for Advanced Thread Trace
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_agent_t agent;
|
||||
const aqlprofile_att_parameter_t* parameters;
|
||||
uint32_t parameter_count;
|
||||
} aqlprofile_att_profile_t;
|
||||
|
||||
/**
|
||||
* @brief Data callback for perfmon events. Each event will call this once per coordinate
|
||||
* @param[in] event The event information passed in from aqlprofile_pmc_profile_t
|
||||
* @param[in] counter_id Internal ID of the counter
|
||||
* @param[in] counter_value The event value, as incremented from start() to stop()
|
||||
* @param[in] userdata Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS to continue iteration
|
||||
* @retval HSA_STATUS_ERROR to stop callback iteration
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_pmc_data_callback_t)(aqlprofile_pmc_event_t event,
|
||||
uint64_t counter_id, uint64_t counter_value,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Data callback for thread trace. This will be called at least once per shader engine
|
||||
* @param[in] shader Shader Engine ID
|
||||
* @param[in] buffer Pointer containing the data
|
||||
* @param[in] size Amount of bytes used by thread trace
|
||||
* @param[in] callback_data Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS to continue iteration
|
||||
* @retval HSA_STATUS_ERROR to stop callback iteration
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_att_data_callback_t)(uint32_t shader, void* buffer, uint64_t size,
|
||||
void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Memory copy fn for aqlprofile to copy data.
|
||||
* @param[in] dst Destination pointer to copy data to.
|
||||
* @param[in] src Source pointer where data is to be copied from.
|
||||
* @param[in] size Amount of bytes to be copied.
|
||||
* @param[in] userdata Data returned to user
|
||||
* @retval HSA_STATUS_SUCCESS on success
|
||||
* @retval HSA_STATUS_ERROR on failure
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_memory_copy_t)(void* dst, const void* src, size_t size,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Validates the event for the agent.
|
||||
* @param[in] agent The agent to validate the event for.
|
||||
* @param[in] event The event to validate.
|
||||
* @param[out] result True if the event is valid for the agent, false otherwise.
|
||||
* @retval HSA_STATUS_SUCCESS if the event was validated.
|
||||
* @retval HSA_STATUS_ERROR if the event was not validated.
|
||||
*/
|
||||
hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
const aqlprofile_pmc_event_t* event, bool* result);
|
||||
|
||||
/**
|
||||
* @brief Iterate_data() will parse the event data and call @callback with the resulting event data
|
||||
* @param[in] handle The handle returned from aqlprofile_pmc_create_packets()
|
||||
* @param[in] callback CB where the resulting event values are going to be returned
|
||||
* @param[in] userdata Data sent back to user
|
||||
* @retval HSA_STATUS_SUCCESS all operations exited succesfully
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Struct to be returned by aqlprofile_pmc_create_packets
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ext_amd_aql_pm4_packet_t start_packet; /**< Reset counters and start incrementing */
|
||||
hsa_ext_amd_aql_pm4_packet_t stop_packet; /**< Pause counters from incrementing */
|
||||
hsa_ext_amd_aql_pm4_packet_t read_packet; /**< Retrieve results from device */
|
||||
} aqlprofile_pmc_aql_packets_t;
|
||||
|
||||
/**
|
||||
* @brief Function to create AQL packets to be inserted into the queue.
|
||||
* @param[out] handle To be passed to iterate_data()
|
||||
* @param[out] packets Pointer to where the start, stop and read packets will be written to
|
||||
* @param[in] profile Agent and events information
|
||||
* @param[in] alloc_cb Memory allocation, which may request cpu or gpu memory for internal use
|
||||
* @param[in] dealloc_cb Function to free memory allocated by alloc_cb
|
||||
* @param[in] userdata Data passed back to user via memory alloc callback
|
||||
*/
|
||||
hsa_status_t aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
aqlprofile_memory_copy_t memcpy_cb, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Function to delete AQL packets after creation by aqlprofile_pmc_create_packets
|
||||
* @param[in] handle Returned by aqlprofile_pmc_create_packets()
|
||||
*/
|
||||
void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Iterates over thread trace data and the data to user
|
||||
* @param[in] handle The handle returned from aqlprofile_att_create_packets()
|
||||
* @param[in] callback CB where the resulting data is going to be returned
|
||||
* @param[in] userdata Data sent back to user
|
||||
* @retval HSA_STATUS_SUCCESS all operations exited succesfully
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Struct containing AQLpackets to start and stop thread trace
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ext_amd_aql_pm4_packet_t start_packet; /**< Packet to start thread trace */
|
||||
hsa_ext_amd_aql_pm4_packet_t stop_packet; /**< Packet to stop thread trace and flush data */
|
||||
} aqlprofile_att_control_aql_packets_t;
|
||||
|
||||
/**
|
||||
* @brief Fn to create start and stop thread trace packets
|
||||
* @param[out] handle To be passed to iterate_data()
|
||||
* @param[out] packets Packets returned by this function to start and stop thread trace
|
||||
* @param[in] profile Agent information and extra parameters for thread trace
|
||||
* @param[in] callback Memory allocation fn which may request cpu or gpu memory
|
||||
* @retval HSA_STATUS_SUCCESS if all packets created succesfully
|
||||
* @retval HSA_STATUS_ERROR otherwise
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
aqlprofile_memory_copy_t memcpy_cb, void* userdata);
|
||||
|
||||
void aqlprofile_att_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
||||
* @param [in] id Integer identifying the dimension.
|
||||
* @param [in] name Name of the dimension
|
||||
* @param [in] data User data supplied to @ref aqlprofile_iterate_event_ids
|
||||
* @retval HSA_STATUS_SUCCESS Continues iteration
|
||||
* @retval OTHERS Any other HSA return values stops iteration, passing back this value through
|
||||
* @ref aqlprofile_iterate_event_ids
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_eventname_callback_t)(int id, const char* name, void* data);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all possible event coordinate IDs and their names.
|
||||
* @param [in] callback Callback to use for iteration of dimensions
|
||||
* @param [in] user_data Data to supply to callback @ref aqlprofile_eventname_callback_t
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if error on interation
|
||||
* @retval OTHERS If @ref aqlprofile_eventname_callback_t returns non-HSA_STATUS_SUCCESS,
|
||||
* that value is returned.
|
||||
*/
|
||||
hsa_status_t aqlprofile_iterate_event_ids(aqlprofile_eventname_callback_t callback,
|
||||
void* user_data);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param position A counting sequence indicating callback number.
|
||||
* @param id Coordinate ID as in _iterate_event_ids.
|
||||
* @param extent Coordinate extent indicating maximum allowed instances.
|
||||
* @param coordinate The coordinate, in the range [0,extent-1].
|
||||
* @param name Coordinate name as in _iterate_event_ids.
|
||||
* @param userdata Userdata returned from _iterate_event_coord function.
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_coordinate_callback_t)(int position, int id, int extent,
|
||||
int coordinate, const char* name,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param[in] agent HSA agent.
|
||||
* @param[in] event The event ID and block ID to iterate for.
|
||||
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
|
||||
* @param[in] callback Callback function to return the coordinates.
|
||||
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
|
||||
*/
|
||||
hsa_status_t aqlprofile_iterate_event_coord(aqlprofile_agent_handle_t agent,
|
||||
aqlprofile_pmc_event_t event, uint64_t sample_id,
|
||||
aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata);
|
||||
|
||||
typedef struct {
|
||||
uint64_t id;
|
||||
uint64_t addr;
|
||||
uint64_t size;
|
||||
hsa_agent_t agent;
|
||||
uint32_t isUnload : 1;
|
||||
uint32_t fromStart : 1;
|
||||
} aqlprofile_att_codeobj_data_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an AQL packet for marking code objects
|
||||
* @param[out] packet Returned packet
|
||||
* @param[out] handle The handle created for these packets
|
||||
* @param[in] data Code object information
|
||||
* @param[in] alloc_cb Callback to return both CPU and GPU accessible memory on demand
|
||||
* @param[in] dealloc_cb Callback to free data allocated by alloc_cb()
|
||||
* @param[in] userdata Userdata to be passed back to memory callbacks
|
||||
*/
|
||||
hsa_status_t aqlprofile_att_codeobj_marker(hsa_ext_amd_aql_pm4_packet_t* packet,
|
||||
aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data,
|
||||
aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb,
|
||||
void* userdata);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,51 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_IP_DISCOVERY_H_
|
||||
#define SRC_CORE_IP_DISCOVERY_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
|
||||
#include "util/reg_offsets.h"
|
||||
|
||||
using base_addr_segments_t = std::array<uint32_t, HWIP_MAX_SEGMENT>;
|
||||
|
||||
// Represents a single entry in the discovery table, containing information about a specific IP
|
||||
// block.
|
||||
struct discovery_table_entry_t {
|
||||
int die{0}; // Die index
|
||||
base_addr_segments_t segments{}; // Base address segments
|
||||
int major{0}; // Major version of the IP
|
||||
int minor{0}; // Minor version of the IP
|
||||
int revision{0}; // Revision number of the IP
|
||||
int instance{0}; // Instance ID of the IP
|
||||
std::string ipname{}; // Name of the IP block
|
||||
};
|
||||
|
||||
using discovery_table_t = std::vector<discovery_table_entry_t>;
|
||||
discovery_table_t parse_ip_discovery(uint32_t domain, uint32_t bdf);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,98 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <shared_mutex>
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "ip_offset_table_init.h"
|
||||
|
||||
// Pair of pcie domain, bdf
|
||||
using domain_bdf_t = std::pair<uint32_t, uint32_t>;
|
||||
|
||||
// Hash function for domain_bdf_t
|
||||
template <>
|
||||
struct std::hash<domain_bdf_t> {
|
||||
std::size_t operator()(const domain_bdf_t& key) const {
|
||||
return std::hash<uint32_t>()(key.first) ^ (std::hash<uint32_t>()(key.second) << 1);
|
||||
}
|
||||
};
|
||||
|
||||
// Map from (Domain, BDF) to reg_base_offset_table*
|
||||
using reg_base_offset_table_cache = std::unordered_map<domain_bdf_t, const reg_base_offset_table*>;
|
||||
|
||||
class locked_ip_offset_table_cache {
|
||||
public:
|
||||
const reg_base_offset_table* get(const AgentInfo* agent_info) {
|
||||
{
|
||||
std::shared_lock lock{mutex};
|
||||
auto it = cache.find(std::make_pair(agent_info->domain, agent_info->bdf_id));
|
||||
if (it != cache.end()) return it->second;
|
||||
}
|
||||
{
|
||||
std::string_view gfxip(agent_info->gfxip);
|
||||
std::unique_lock lock{mutex};
|
||||
const reg_base_offset_table* table = nullptr;
|
||||
|
||||
if (auto gfxip_prefix = gfxip.substr(0, 4); gfxip_prefix == "gfx9")
|
||||
table = vega20_reg_base_init();
|
||||
else {
|
||||
if (auto gfxip_prefix = gfxip.substr(0, 5);
|
||||
gfxip_prefix == "gfx10" || gfxip_prefix == "gfx11" || gfxip_prefix == "gfx12") {
|
||||
table = navi_ip_offset_table_discovery_sysfs(agent_info->domain, agent_info->bdf_id);
|
||||
if (!table) table = sienna_cichlid_reg_base_init();
|
||||
}
|
||||
}
|
||||
|
||||
if (table) cache.emplace(std::make_pair(agent_info->domain, agent_info->bdf_id), table);
|
||||
return table;
|
||||
}
|
||||
}
|
||||
|
||||
static locked_ip_offset_table_cache& get_instance() {
|
||||
// Note: never cleanup, keep in memory to prevent issue with global destructor
|
||||
static auto* cache = new locked_ip_offset_table_cache{};
|
||||
return *cache;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_mutex mutex;
|
||||
reg_base_offset_table_cache cache;
|
||||
};
|
||||
|
||||
// acquire the IP offset table for the device using the domain and bdf_id
|
||||
const reg_base_offset_table* acquire_ip_offset_table(const AgentInfo* agent_info) {
|
||||
auto ip_offset_table = locked_ip_offset_table_cache::get_instance().get(agent_info);
|
||||
if (ip_offset_table == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"Failed to acquire the IP offset table for the device. Possible reasons include:\n"
|
||||
" 1. Incorrect or incomplete ROCm setup. Please verify your installation.\n"
|
||||
" 2. The device is not supported.\n"
|
||||
" 3. An internal error or bug.\n");
|
||||
}
|
||||
return ip_offset_table;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
#define SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
|
||||
// static IP offset table init functions
|
||||
const reg_base_offset_table* vega20_reg_base_init();
|
||||
const reg_base_offset_table* sienna_cichlid_reg_base_init();
|
||||
|
||||
// dynamic IP offset table functions
|
||||
const reg_base_offset_table* navi_ip_offset_table_discovery_sysfs(uint32_t domain, uint32_t bdf);
|
||||
|
||||
#endif // SRC_CORE_IP_OFFSET_TABLE_INIT_H_
|
||||
@@ -0,0 +1,177 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_LOGGER_H_
|
||||
#define SRC_CORE_LOGGER_H_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
template <typename T>
|
||||
Logger& operator<<(const T& m) {
|
||||
std::ostringstream oss;
|
||||
oss << m;
|
||||
if (!streaming_)
|
||||
Log(oss.str());
|
||||
else
|
||||
Put(oss.str());
|
||||
streaming_ = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef void (*manip_t)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
f();
|
||||
return *this;
|
||||
}
|
||||
|
||||
static void begm() { Instance().messaging_ = true; }
|
||||
static void endl() { Instance().ResetStreaming(); }
|
||||
|
||||
static const std::string& LastMessage() {
|
||||
Logger& logger = Instance();
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
return logger.message_[GetTid()];
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) instance_ = new Logger();
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ != NULL) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
Logger() : file_(NULL), dirty_(false), streaming_(false), messaging_(false) {
|
||||
const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG");
|
||||
if (path != NULL) {
|
||||
file_ = fopen("/tmp/aql_profile_log.txt", "a");
|
||||
}
|
||||
ResetStreaming();
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file_ != NULL) {
|
||||
if (dirty_) Put("\n");
|
||||
fclose(file_);
|
||||
}
|
||||
}
|
||||
|
||||
void ResetStreaming() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] = "";
|
||||
}
|
||||
messaging_ = false;
|
||||
streaming_ = false;
|
||||
}
|
||||
|
||||
void Put(const std::string& m) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] += m;
|
||||
}
|
||||
if (file_ != NULL) {
|
||||
dirty_ = true;
|
||||
flock(fileno(file_), LOCK_EX);
|
||||
fprintf(file_, "%s", m.c_str());
|
||||
fflush(file_);
|
||||
flock(fileno(file_), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void Log(const std::string& m) {
|
||||
const time_t rawtime = time(NULL);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
Put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file_;
|
||||
bool dirty_;
|
||||
bool streaming_;
|
||||
bool messaging_;
|
||||
|
||||
static mutex_t mutex_;
|
||||
static Logger* instance_;
|
||||
std::map<uint32_t, std::string> message_;
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#define ERR_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
#define ERR2_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ << "(): ")
|
||||
#define INFO_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Info: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#define WARN_LOGGING \
|
||||
(aql_profile::Logger::Instance() \
|
||||
<< aql_profile::Logger::endl \
|
||||
<< "Warning: " << __FUNCTION__ << "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Debug: in " << __FUNCTION__ << " at " << __FILE__ \
|
||||
<< " line " << __LINE__ << aql_profile::Logger::begm)
|
||||
#endif
|
||||
|
||||
#endif // SRC_CORE_LOGGER_H_
|
||||
@@ -0,0 +1,61 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "memorymanager.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
std::atomic<size_t> MemoryManager::HANDLE_COUNTER{1};
|
||||
std::unordered_map<size_t, std::shared_ptr<MemoryManager>> MemoryManager::managers;
|
||||
std::mutex MemoryManager::managers_map_mutex;
|
||||
|
||||
void CounterMemoryManager::CopyEvents(const aqlprofile_pmc_event_t* _events, size_t count) {
|
||||
events.reserve(count + 4);
|
||||
int num_flag_metrics = 0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
events.push_back(EventRequest{_events[i], false});
|
||||
num_flag_metrics += _events[i].flags.raw != 0;
|
||||
}
|
||||
|
||||
if (!num_flag_metrics) return;
|
||||
|
||||
std::sort(events.begin(), events.end());
|
||||
|
||||
std::vector<EventRequest> acc_requests;
|
||||
for (auto it = events.begin(); it != events.end(); it++) {
|
||||
if (!it->flags.raw) continue;
|
||||
|
||||
if (it != events.begin()) {
|
||||
auto prev = std::prev(it);
|
||||
if (it->IsSameNoFlags(*prev) && (!prev->flags.raw || prev->bInternal)) continue;
|
||||
}
|
||||
|
||||
EventRequest req = *it;
|
||||
req.bInternal = true;
|
||||
req.flags.raw = 0;
|
||||
acc_requests.push_back(req);
|
||||
}
|
||||
|
||||
if (!acc_requests.size()) return;
|
||||
|
||||
events.insert(events.end(), acc_requests.begin(), acc_requests.end());
|
||||
std::sort(events.begin(), events.end());
|
||||
}
|
||||
@@ -0,0 +1,258 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include "include/aql_profile_v2.h"
|
||||
#include <stdexcept>
|
||||
#include "pm4/trace_config.h"
|
||||
|
||||
struct EventRequest : public aqlprofile_pmc_event_t {
|
||||
bool bInternal;
|
||||
|
||||
auto GetOrder() const -> auto{
|
||||
uint64_t idx = bInternal ? 0 : 1;
|
||||
idx |= uint64_t(flags.raw) << 1;
|
||||
idx |= uint64_t(event_id) << 33;
|
||||
|
||||
uint64_t blk = block_index;
|
||||
blk |= uint64_t(block_name) << 32;
|
||||
|
||||
return std::pair<uint64_t, uint64_t>{blk, idx};
|
||||
}
|
||||
|
||||
bool operator<(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
if (idx1.first == idx2.first)
|
||||
return idx1.second < idx2.second;
|
||||
else
|
||||
return idx1.first < idx2.first;
|
||||
}
|
||||
|
||||
bool operator==(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
return idx1.second == idx2.second && idx1.first == idx2.first;
|
||||
}
|
||||
|
||||
bool IsSameNoFlags(const EventRequest& other) const {
|
||||
auto idx1 = this->GetOrder();
|
||||
auto idx2 = other.GetOrder();
|
||||
return idx1.first == idx2.first && event_id == other.event_id;
|
||||
}
|
||||
};
|
||||
|
||||
class MemoryManager {
|
||||
public:
|
||||
MemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: agent(agent),
|
||||
alloc_cb(alloc),
|
||||
dealloc_cb(dealloc),
|
||||
userdata(data),
|
||||
handle(HANDLE_COUNTER.fetch_add(1)) {}
|
||||
|
||||
MemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: agent_handle(agent),
|
||||
alloc_cb(alloc),
|
||||
dealloc_cb(dealloc),
|
||||
userdata(data),
|
||||
handle(HANDLE_COUNTER.fetch_add(1)) {}
|
||||
|
||||
virtual ~MemoryManager() {}
|
||||
|
||||
void CheckStatus(hsa_status_t status) const {
|
||||
if (status != HSA_STATUS_SUCCESS) throw status;
|
||||
}
|
||||
|
||||
void* GetCmdBuf() const { return cmdbuf.get(); }
|
||||
void* GetOutputBuf() const { return outputbuf.get(); }
|
||||
|
||||
size_t GetOutputBufSize() const { return outputbuf_size; }
|
||||
|
||||
size_t GetHandler() const { return handle; }
|
||||
hsa_agent_t GetAgent() const { return agent; }
|
||||
aqlprofile_agent_handle_t AgentHandle() const { return agent_handle; }
|
||||
|
||||
void CreateCmdBuf(size_t size) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = true;
|
||||
flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT;
|
||||
cmdbuf = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
virtual void CreateOutputBuf(size_t size) = 0;
|
||||
|
||||
static void RegisterManager(const std::shared_ptr<MemoryManager>& shared) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
managers[shared->handle] = shared;
|
||||
}
|
||||
|
||||
static void DeleteManager(size_t handle) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
managers.erase(handle);
|
||||
}
|
||||
|
||||
static std::shared_ptr<MemoryManager> GetManager(size_t handle) {
|
||||
std::lock_guard<std::mutex> lk(managers_map_mutex);
|
||||
try {
|
||||
return managers.at(handle);
|
||||
} catch (std::exception& e) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
struct MemoryDeleter {
|
||||
aqlprofile_memory_dealloc_callback_t free_fn;
|
||||
void* userdata;
|
||||
void operator()(void* ptr) const {
|
||||
if (ptr && free_fn) free_fn(ptr, userdata);
|
||||
};
|
||||
};
|
||||
|
||||
std::unique_ptr<void, MemoryDeleter> AllocMemory(size_t size,
|
||||
aqlprofile_buffer_desc_flags_t flags) const {
|
||||
void* ptr;
|
||||
CheckStatus(alloc_cb(&ptr, size, flags, userdata));
|
||||
return std::unique_ptr<void, MemoryDeleter>{ptr, MemoryDeleter{dealloc_cb, userdata}};
|
||||
}
|
||||
|
||||
aqlprofile_agent_handle_t agent_handle = {.handle = 0};
|
||||
hsa_agent_t agent = {.handle = 0};
|
||||
std::unique_ptr<void, MemoryDeleter> cmdbuf = nullptr;
|
||||
std::unique_ptr<void, MemoryDeleter> outputbuf = nullptr;
|
||||
size_t outputbuf_size = 0;
|
||||
|
||||
void* const userdata;
|
||||
aqlprofile_memory_alloc_callback_t const alloc_cb;
|
||||
aqlprofile_memory_dealloc_callback_t const dealloc_cb;
|
||||
size_t handle;
|
||||
|
||||
static std::atomic<size_t> HANDLE_COUNTER;
|
||||
static std::unordered_map<size_t, std::shared_ptr<MemoryManager>> managers;
|
||||
static std::mutex managers_map_mutex;
|
||||
};
|
||||
|
||||
class CounterMemoryManager : public MemoryManager {
|
||||
public:
|
||||
CounterMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
CounterMemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
void CreateOutputBuf(size_t size) override {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED;
|
||||
outputbuf = AllocMemory(size, flags);
|
||||
outputbuf_size = size;
|
||||
}
|
||||
|
||||
std::vector<EventRequest>& GetEvents() { return events; }
|
||||
void CopyEvents(const aqlprofile_pmc_event_t* events, size_t count);
|
||||
|
||||
protected:
|
||||
std::vector<EventRequest> events;
|
||||
};
|
||||
|
||||
class TraceMemoryManager : public MemoryManager {
|
||||
public:
|
||||
TraceMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc,
|
||||
aqlprofile_memory_copy_t _copy_fn, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data), copy_fn(_copy_fn) {}
|
||||
|
||||
TraceMemoryManager(aqlprofile_agent_handle_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {}
|
||||
|
||||
void CreateOutputBuf(size_t size) override {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_DEVICE_NONCOHERENT;
|
||||
outputbuf = AllocMemory(size, flags);
|
||||
outputbuf_size = size;
|
||||
}
|
||||
|
||||
void CreateTraceControlBuf(size_t size) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
flags.memory_hint = AQLPROFILE_MEMORY_HINT_HOST;
|
||||
trace_control_buf = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
const std::vector<hsa_ven_amd_aqlprofile_parameter_t>& GetATTParams() const { return att_params; }
|
||||
void CopyATTParams(hsa_ven_amd_aqlprofile_parameter_t* params, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) this->att_params.push_back(params[i]);
|
||||
for (auto& param : att_params) {
|
||||
if (param.parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET)
|
||||
target_cu = param.value;
|
||||
else if (param.parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION)
|
||||
simd_mask = param.value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
Type* GetTraceControlBuf() const {
|
||||
return reinterpret_cast<Type*>(trace_control_buf.get());
|
||||
}
|
||||
|
||||
void CopyMemory(void* dst, const void* src, size_t size) {
|
||||
this->copy_fn(dst, src, size, this->userdata);
|
||||
}
|
||||
|
||||
int GetSimdMask() const { return simd_mask; }
|
||||
|
||||
pm4_builder::TraceConfig config{};
|
||||
|
||||
protected:
|
||||
int target_cu = -1;
|
||||
int simd_mask = 0xF;
|
||||
aqlprofile_memory_copy_t copy_fn;
|
||||
std::vector<hsa_ven_amd_aqlprofile_parameter_t> att_params;
|
||||
std::unique_ptr<void, MemoryDeleter> trace_control_buf = nullptr;
|
||||
};
|
||||
|
||||
class CodeobjMemoryManager : public MemoryManager {
|
||||
public:
|
||||
CodeobjMemoryManager(hsa_agent_t agent, aqlprofile_memory_alloc_callback_t alloc,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc, size_t size, void* data)
|
||||
: MemoryManager(agent, alloc, dealloc, data) {
|
||||
aqlprofile_buffer_desc_flags_t flags{};
|
||||
flags.host_access = flags.device_access = true;
|
||||
this->cmd_buffer = AllocMemory(size, flags);
|
||||
}
|
||||
|
||||
void CreateOutputBuf(size_t size) override{};
|
||||
std::unique_ptr<void, MemoryDeleter> cmd_buffer;
|
||||
};
|
||||
@@ -0,0 +1,103 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <shared_mutex>
|
||||
#include "ip_discovery.h"
|
||||
|
||||
#define __maybe_unused __attribute__((__unused__))
|
||||
|
||||
#include "linux/registers/sienna_cichlid_ip_offset.h"
|
||||
#include "util/reg_offsets.h"
|
||||
|
||||
#define LOG_VERBOSE 0
|
||||
|
||||
namespace {
|
||||
void LogErrors(std::string msg) {
|
||||
#if LOG_VERBOSE
|
||||
std::cerr << msg << std::endl;
|
||||
#endif /* LOG_VERBOSE */
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const reg_base_offset_table* sienna_cichlid_reg_base_init() {
|
||||
static_assert(HWIP_MAX_INSTANCE >= MAX_INSTANCE,
|
||||
"HWIP_MAX_INSTANCE must be greater than MAX_INSTANCE");
|
||||
static_assert(HWIP_MAX_SEGMENT >= MAX_SEGMENT,
|
||||
"HWIP_MAX_SEGMENT must be greater than MAX_SEGMENT");
|
||||
|
||||
static const auto* sienna_cichlid_reg_table = []() {
|
||||
auto* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& base) {
|
||||
for (uint32_t i = 0; i < MAX_INSTANCE; ++i) {
|
||||
std::copy(std::begin(base.instance[i].segment), std::end(base.instance[i].segment),
|
||||
std::begin(reg_table->reg_offset[hwip][i]));
|
||||
}
|
||||
};
|
||||
|
||||
// HW has more IP blocks, only initialize the blocks needed
|
||||
init_hwip(GC_HWIP, GC_BASE);
|
||||
init_hwip(ATHUB_HWIP, ATHUB_BASE);
|
||||
return reg_table;
|
||||
}();
|
||||
|
||||
return sienna_cichlid_reg_table;
|
||||
}
|
||||
|
||||
const reg_base_offset_table* navi_ip_offset_table_discovery_sysfs(uint32_t domain, uint32_t bdf) {
|
||||
// Read the drm device properties, which includes all the IP base offsets for a GPU card on the
|
||||
// system.
|
||||
discovery_table_t table;
|
||||
try {
|
||||
table = parse_ip_discovery(domain, bdf);
|
||||
} catch (const std::exception& e) {
|
||||
LogErrors("Error in IP discovery for domain=" + std::to_string(domain) +
|
||||
" bdf=" + std::to_string(bdf) + ": \n" + e.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Note: never cleanup, keep in memory to prevent issue with global destructor
|
||||
struct reg_base_offset_table* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& entry) {
|
||||
std::copy(std::begin(entry.segments), std::end(entry.segments),
|
||||
std::begin(reg_table->reg_offset[hwip][entry.instance]));
|
||||
};
|
||||
|
||||
for (auto& entry : table) {
|
||||
if (entry.ipname == "gc") {
|
||||
init_hwip(GC_HWIP, entry);
|
||||
} else if (entry.ipname == "athub") {
|
||||
init_hwip(ATHUB_HWIP, entry);
|
||||
}
|
||||
}
|
||||
|
||||
return reg_table;
|
||||
}
|
||||
@@ -0,0 +1,269 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <unordered_map>
|
||||
#include <regex>
|
||||
#include <iomanip>
|
||||
#include <cassert>
|
||||
|
||||
#include "ip_discovery.h"
|
||||
|
||||
#define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
|
||||
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
|
||||
#define PCI_FUNC(devfn) ((devfn)&0x07)
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* @brief Reads a single integer (decimal or hexadecimal) from a sysfs file.
|
||||
*
|
||||
* This helper function reads a file containing a single numeric value and parses it
|
||||
* as either a decimal or hexadecimal integer, based on the provided flag.
|
||||
*
|
||||
* @param fname The path to the sysfs file containing the numeric value.
|
||||
*
|
||||
* @return An `std::optional<int>` containing the parsed integer if successful, or `std::nullopt`
|
||||
* if the file does not exist, cannot be opened, or contains invalid data.
|
||||
*/
|
||||
std::optional<int> read_sysfs_single_int(const fs::path& path) {
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) return std::nullopt; // Failed to open file
|
||||
|
||||
int value;
|
||||
file >> value;
|
||||
if (file.fail()) return std::nullopt; // Failed to parse data
|
||||
|
||||
file.close();
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads base address segments from a sysfs file.
|
||||
*
|
||||
* This helper function reads a file containing hexadecimal values representing
|
||||
* base address segments and parses them into a `base_addr_segments_t` structure.
|
||||
*
|
||||
* @param fname The path to the sysfs file containing base address segments.
|
||||
*
|
||||
* @return An `std::optional<base_addr_segments_t>` containing the parsed base address segments
|
||||
* if successful, or `std::nullopt` if the file does not exist, cannot be opened,
|
||||
* or contains invalid data.
|
||||
*/
|
||||
std::optional<base_addr_segments_t> read_sysfs_base_addr_segments(const fs::path& path) {
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) return std::nullopt; // Failed to open file
|
||||
|
||||
base_addr_segments_t segments{0};
|
||||
std::string databuf;
|
||||
size_t x = 0;
|
||||
while (std::getline(file, databuf) && x < segments.size()) {
|
||||
std::stringstream ss(databuf);
|
||||
ss >> std::hex >> segments[x++];
|
||||
if (ss.fail()) return std::nullopt; // Failed to parse data
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parses IP instances for a given die and IP name from the sysfs directory structure.
|
||||
*
|
||||
* This function reads attributes such as base address segments, version information,
|
||||
* and instance number for each IP instance and stores them in the discovery table.
|
||||
*
|
||||
* @param die_num The die number associated with the IP instances.
|
||||
* @param diepath The sysfs path to the die directory.
|
||||
* @param ipname The name of the IP to be parsed.
|
||||
*
|
||||
* @return The discovery table where parsed IP instance data will be stored.
|
||||
*/
|
||||
discovery_table_t parse_ip_instances(int die_num, const fs::path& diepath,
|
||||
const std::string& ipname) {
|
||||
// /sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}
|
||||
const fs::path dir_path = fs::path(diepath) / ipname;
|
||||
if (!fs::exists(dir_path) || !fs::is_directory(dir_path)) {
|
||||
throw std::runtime_error("sysfs path does not exist or is not a directory: " +
|
||||
dir_path.string());
|
||||
}
|
||||
|
||||
discovery_table_t instances{};
|
||||
|
||||
// sub-folders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}"
|
||||
for (const auto& dir_entry : fs::directory_iterator(dir_path)) {
|
||||
if (!std::isdigit(dir_entry.path().filename().string()[0])) continue;
|
||||
|
||||
discovery_table_entry_t table_entry{};
|
||||
table_entry.die = die_num;
|
||||
|
||||
// "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die{die_num}/{ipname}/{instance_num}"
|
||||
fs::path instance_path = dir_path / dir_entry.path().filename();
|
||||
|
||||
// base_addr list
|
||||
if (auto segments = read_sysfs_base_addr_segments(instance_path / "base_addr"))
|
||||
table_entry.segments = *segments;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP base_addr segments for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// major
|
||||
if (auto major = read_sysfs_single_int(instance_path / "major"))
|
||||
table_entry.major = *major;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP major version for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// minor
|
||||
if (auto minor = read_sysfs_single_int(instance_path / "minor"))
|
||||
table_entry.minor = *minor;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP minor version for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// revision
|
||||
if (auto revision = read_sysfs_single_int(instance_path / "revision"))
|
||||
table_entry.revision = *revision;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP revision for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// instance
|
||||
if (auto instance = read_sysfs_single_int(instance_path / "num_instance"))
|
||||
table_entry.instance = *instance;
|
||||
else
|
||||
throw std::runtime_error("Failed to read IP instance for ipname=" + ipname +
|
||||
" die=" + std::to_string(die_num));
|
||||
|
||||
// convert name to lowercase
|
||||
table_entry.ipname = ipname;
|
||||
std::transform(table_entry.ipname.begin(), table_entry.ipname.end(), table_entry.ipname.begin(),
|
||||
[](unsigned char c) { return std::tolower(c); });
|
||||
|
||||
instances.emplace_back(table_entry);
|
||||
}
|
||||
|
||||
return instances;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Generates a PCI domain BDF (Bus:Device.Function) string.
|
||||
*
|
||||
* This function converts the given PCI domain and BDF (Bus:Device.Function) values
|
||||
* into a standardized string format: "Domain:Bus:Device.Function".
|
||||
*
|
||||
* @param domain The PCI domain number (32-bit unsigned integer).
|
||||
* @param bdf The PCI Bus/Device/Function (BDF) value (32-bit unsigned integer).
|
||||
*
|
||||
* @return A string representing the PCI domain and BDF in the format "Domain:Bus:Device.Function".
|
||||
* Example: "0000:47:00.0".
|
||||
*
|
||||
* @details
|
||||
* - The domain is represented as a 4-digit hexadecimal value.
|
||||
* - The bus is represented as a 2-digit hexadecimal value.
|
||||
* - The device is represented as a 2-digit hexadecimal value.
|
||||
* - The function is represented as a single decimal digit.
|
||||
*/
|
||||
std::string get_domain_bdf_str(uint32_t domain, uint32_t bdf) {
|
||||
uint8_t pci_bus = PCI_BUS_NUM(bdf);
|
||||
uint8_t pci_devfn = bdf & 0xFF;
|
||||
uint8_t pci_dev = PCI_SLOT(pci_devfn);
|
||||
uint8_t pci_func = 0; // PCI_FUNC(pci_devfn); // Future ToDo: Use the macro PCI_FUNC() to support
|
||||
// multiple functions. For now, it's always zero.
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::hex << std::setfill('0') << std::setw(4) << domain << ":" << std::setw(2)
|
||||
<< static_cast<int>(pci_bus) << ":" << std::setw(2) << static_cast<int>(pci_dev) << "."
|
||||
<< static_cast<int>(pci_func);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* @brief Parses IP discovery information for a given PCI domain and BDF (Bus:Device.Function).
|
||||
*
|
||||
* This function discovers IP instances for all dies associated with a given PCI device.
|
||||
* It reads the sysfs directory structure to extract information about IP instances
|
||||
* and populates the provided discovery table.
|
||||
*
|
||||
* @param domain The PCI domain number (32-bit unsigned integer).
|
||||
* @param bdf The PCI Bus/Device/Function (BDF) value (32-bit unsigned integer).
|
||||
* @return table The discovery table where parsed IP instance data will be stored.
|
||||
*
|
||||
* @throws std::runtime_error If the sysfs directory does not exist, is not a directory,
|
||||
* or if no IP instances are found.
|
||||
*
|
||||
* @details
|
||||
* - Constructs the sysfs path for the PCI device using the domain and BDF values.
|
||||
* - Iterates over the dies in the `/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die`
|
||||
* directory.
|
||||
* - For each die, iterates over the IP directories and calls `parse_ip_instances` to parse
|
||||
* individual IP instance data.
|
||||
* - If no IP instances are found, throws an exception.
|
||||
*/
|
||||
discovery_table_t parse_ip_discovery(uint32_t domain, uint32_t bdf) {
|
||||
// /sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die
|
||||
const fs::path die_path =
|
||||
fs::path("/sys/bus/pci/devices") / get_domain_bdf_str(domain, bdf) / "ip_discovery/die";
|
||||
|
||||
if (!fs::exists(die_path) || !fs::is_directory(die_path)) {
|
||||
throw std::runtime_error("sysfs path does not exist or is not a directory: " +
|
||||
die_path.string());
|
||||
}
|
||||
|
||||
discovery_table_t table{};
|
||||
|
||||
// iterate over every die
|
||||
// subfolders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die"
|
||||
for (const auto& die_entry : fs::directory_iterator(die_path)) {
|
||||
if (!die_entry.is_directory()) continue;
|
||||
|
||||
// "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die/{die_num}"
|
||||
const fs::path die_entry_path = die_entry.path();
|
||||
int die_num = std::stoi(die_entry_path.filename());
|
||||
|
||||
// subfolders in "/sys/bus/pci/devices/{domain_bdf_str}/ip_discovery/die/{die_num}"
|
||||
for (const auto& ip_entry : fs::directory_iterator(die_entry_path)) {
|
||||
if (!ip_entry.is_directory()) continue;
|
||||
const std::string filename = ip_entry.path().filename();
|
||||
if (std::isalpha(filename[0])) {
|
||||
const auto instances = parse_ip_instances(die_num, die_entry.path(), filename);
|
||||
table.insert(table.end(), instances.begin(), instances.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (table.empty()) {
|
||||
throw std::runtime_error("No IP instances found");
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "pm4_factory.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace aql_profile {
|
||||
namespace {
|
||||
struct locked_agent_cache {
|
||||
std::shared_mutex mutex;
|
||||
std::unordered_map<uint64_t, AgentInfo> cache;
|
||||
|
||||
void add(uint64_t& agent_id, const AgentInfo& agent_info) {
|
||||
auto lock = std::unique_lock{mutex};
|
||||
agent_id = cache.size();
|
||||
cache[agent_id] = agent_info;
|
||||
}
|
||||
|
||||
const AgentInfo* get(uint64_t agent_id) {
|
||||
auto lock = std::shared_lock{mutex};
|
||||
auto it = cache.find(agent_id);
|
||||
if (it == cache.end()) return nullptr;
|
||||
return &it->second;
|
||||
}
|
||||
};
|
||||
|
||||
locked_agent_cache& get_cache() {
|
||||
static auto* cache = new locked_agent_cache{};
|
||||
return *cache;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
aqlprofile_agent_handle_t RegisterAgent(const aqlprofile_agent_info_v1_t* agent_info) {
|
||||
aqlprofile_agent_handle_t agent_id;
|
||||
AgentInfo int_agent_info;
|
||||
int_agent_info.cu_num = agent_info->cu_num;
|
||||
int_agent_info.se_num = agent_info->se_num;
|
||||
int_agent_info.xcc_num = agent_info->xcc_num;
|
||||
int_agent_info.shader_arrays_per_se = agent_info->shader_arrays_per_se;
|
||||
int_agent_info.domain = agent_info->domain;
|
||||
int_agent_info.bdf_id = agent_info->location_id;
|
||||
|
||||
auto len = strlen(agent_info->agent_gfxip);
|
||||
memset(int_agent_info.gfxip, 0, sizeof(int_agent_info.gfxip));
|
||||
memcpy(int_agent_info.gfxip, agent_info->agent_gfxip,
|
||||
(len >= sizeof(int_agent_info.gfxip) ? sizeof(int_agent_info.gfxip) - 1 : len));
|
||||
get_cache().add(agent_id.handle, int_agent_info);
|
||||
return agent_id;
|
||||
}
|
||||
|
||||
const AgentInfo* GetAgentInfo(aqlprofile_agent_handle_t agent_id) {
|
||||
return get_cache().get(agent_id.handle);
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,417 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef SRC_CORE_PM4_FACTORY_H_
|
||||
#define SRC_CORE_PM4_FACTORY_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/aql_profile_exception.h"
|
||||
#include "def/gpu_block_info.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/pmc_builder.h"
|
||||
#include "pm4/spm_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
struct pm4_agent_info {
|
||||
std::string agent_gfxip;
|
||||
uint32_t cu_num;
|
||||
uint32_t se_num;
|
||||
uint32_t shader_arrays_per_se;
|
||||
uint32_t xcc_num;
|
||||
};
|
||||
|
||||
const AgentInfo* GetAgentInfo(aqlprofile_agent_handle_t agent_id);
|
||||
|
||||
aqlprofile_agent_handle_t RegisterAgent(const aqlprofile_agent_info_v1_t* agent_info);
|
||||
|
||||
// GPU enumeration
|
||||
enum gpu_id_t {
|
||||
INVAL_GPU_ID, // invalid GPU id
|
||||
GFX9_GPU_ID, // generic Gfx9 id
|
||||
MI100_GPU_ID, // Mi100 GPU id
|
||||
MI200_GPU_ID, // Mi200 GPU id
|
||||
MI300_GPU_ID, // Mi300 GPU id
|
||||
MI350_GPU_ID, // Mi350 GPU id
|
||||
GFX10_GPU_ID, // generic Gfx10 id
|
||||
GFX11_GPU_ID, // generic Gfx11 id
|
||||
GFX12_GPU_ID, // generic Gfx12 id
|
||||
};
|
||||
|
||||
// Block info map class
|
||||
class BlockInfoMap {
|
||||
public:
|
||||
BlockInfoMap(const GpuBlockInfo** table, const uint32_t& size)
|
||||
: block_table_(table), block_count_(size / sizeof(uintptr_t)) {}
|
||||
BlockInfoMap(const BlockInfoMap& map)
|
||||
: block_table_(map.block_table_), block_count_(map.block_count_) {}
|
||||
|
||||
// Get block info for a given block id
|
||||
const GpuBlockInfo* Get(const uint32_t& block_id) const {
|
||||
return (block_id < block_count_) ? block_table_[block_id] : NULL;
|
||||
}
|
||||
|
||||
// Find block by name
|
||||
// Return block id or UINT32_MAX if not found
|
||||
uint32_t Find(const char* name) const {
|
||||
uint32_t index = 0;
|
||||
while (index < block_count_) {
|
||||
const GpuBlockInfo* entry = block_table_[index];
|
||||
if (entry) {
|
||||
if (strcmp(name, entry->name) == 0) break;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
return (index == block_count_) ? UINT32_MAX : index;
|
||||
}
|
||||
|
||||
private:
|
||||
// Block info table
|
||||
const GpuBlockInfo** const block_table_;
|
||||
// Number of elements in the block info table
|
||||
const uint32_t block_count_;
|
||||
};
|
||||
|
||||
// Factory of PM4 builders
|
||||
class Pm4Factory {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static Pm4Factory* Create(aqlprofile_agent_handle_t agent_info, bool concurrent = false);
|
||||
static Pm4Factory* Create(const AgentInfo* agent_info, gpu_id_t gpu_id, bool concurrent);
|
||||
// Create factory for a given agent
|
||||
static Pm4Factory* Create(const hsa_agent_t agent, const bool concurrent = false);
|
||||
// Create factory for a given profile
|
||||
static Pm4Factory* Create(const profile_t* profile) {
|
||||
// First check and save the mode
|
||||
return Create(profile->agent, CheckConcurrent(profile));
|
||||
}
|
||||
// Destroy factory
|
||||
static void Destroy();
|
||||
|
||||
// Return gpu id
|
||||
gpu_id_t GetGpuId() const { return gpu_id_; }
|
||||
// Is pmc to be profiled concurrently?
|
||||
bool IsConcurrent() const { return concurrent_mode_; }
|
||||
// Is getting SPM data using driver public API?
|
||||
bool SpmKfdMode() const { return spm_kfd_mode_; }
|
||||
|
||||
// Return PM4 command builder
|
||||
pm4_builder::CmdBuilder* GetCmdBuilder() const { return cmd_builder_; }
|
||||
// Return PMC PM4 packets builder
|
||||
pm4_builder::PmcBuilder* GetPmcBuilder() const { return pmc_builder_; }
|
||||
// Return SPM PM4 packets builder
|
||||
pm4_builder::SpmBuilder* GetSpmBuilder() const { return spm_builder_; }
|
||||
// Return SQTT PM4 packets builder
|
||||
pm4_builder::SqttBuilder* GetSqttBuilder() const { return sqtt_builder_; }
|
||||
|
||||
// Return Shader Engines number
|
||||
uint32_t GetShaderEnginesNumber() const { return agent_info_->se_num; }
|
||||
uint32_t GetShaderArraysNumber() const { return agent_info_->shader_arrays_per_se; }
|
||||
uint32_t GetComputeUnitNumber() const { return agent_info_->cu_num; }
|
||||
// Return SQTT buffer alignment
|
||||
uint32_t GetSQTTBufferAlignment() const { return 0x1000; }
|
||||
const char* GetGFX() const { return agent_info_->name; }
|
||||
virtual bool IsGFX9() const { return false; }
|
||||
virtual bool IsGFX10() const { return false; }
|
||||
virtual bool IsGFX11() const { return false; }
|
||||
virtual bool IsGFX12() const { return false; }
|
||||
// Return number of XCC on the GPU
|
||||
uint32_t GetXccNumber() const { return agent_info_->xcc_num; }
|
||||
|
||||
const GpuBlockInfo* GetBlockInfo(const aqlprofile_pmc_event_t* event) const {
|
||||
const GpuBlockInfo* info = block_map_.Get(event->block_name);
|
||||
if (info == NULL) throw std::runtime_error("Bad Block");
|
||||
// Checking that the block index is in proper range
|
||||
if (event->block_index >= info->instance_count) throw std::runtime_error("Bad Index");
|
||||
// Checking that the counter event index is in proper range
|
||||
#if 0
|
||||
if (event->counter_id > info->event_id_max)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
#endif
|
||||
return info;
|
||||
}
|
||||
|
||||
// Return block info foor a given event
|
||||
const GpuBlockInfo* GetBlockInfo(const event_t* event) const {
|
||||
const GpuBlockInfo* info = block_map_.Get(event->block_name);
|
||||
if (info == NULL) throw event_exception(std::string("Bad block, "), *event);
|
||||
// Checking that the block index is in proper range
|
||||
if (event->block_index >= info->instance_count)
|
||||
throw event_exception(std::string("Bad block index, "), *event);
|
||||
// Checking that the counter event index is in proper range
|
||||
#if 0
|
||||
if (event->counter_id > info->event_id_max)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
#endif
|
||||
return info;
|
||||
}
|
||||
|
||||
// Return block info for a given block id
|
||||
const GpuBlockInfo* GetBlockInfo(const uint32_t& block_id) const {
|
||||
return block_map_.Get(block_id);
|
||||
}
|
||||
|
||||
virtual size_t GetNumEvents(uint32_t block_name) const {
|
||||
size_t se_number = GetShaderEnginesNumber() / GetXccNumber();
|
||||
size_t block_samples_count = 1;
|
||||
auto* block_info = GetBlockInfo(block_name);
|
||||
|
||||
if (block_info->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (block_info->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
if (block_info->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= GetNumWGPs();
|
||||
if ((block_info->attr & CounterBlockSqAttr) && IsGFX11()) // TODO: Move to CounterBlockWgpAttr
|
||||
block_samples_count *= GetNumWGPs();
|
||||
return block_samples_count;
|
||||
}
|
||||
|
||||
virtual size_t GetBytesNeeded(uint32_t block_name) const {
|
||||
return GetNumEvents(block_name) * GetXccNumber() * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
// Return block id for a given block name string
|
||||
uint32_t FindBlock(const char* name) const { return block_map_.Find(name); }
|
||||
|
||||
/// Workaround for GFX11. PMC Builder overrides this.
|
||||
virtual int GetNumWGPs() const {
|
||||
if (pmc_builder_) return pmc_builder_->GetNumWGPs();
|
||||
return 1;
|
||||
};
|
||||
|
||||
virtual int GetAccumLowID() const { throw HSA_STATUS_ERROR_INVALID_ARGUMENT; };
|
||||
virtual int GetAccumHiID() const { throw HSA_STATUS_ERROR_INVALID_ARGUMENT; };
|
||||
|
||||
protected:
|
||||
explicit Pm4Factory(const BlockInfoMap& map)
|
||||
: cmd_builder_(NULL),
|
||||
pmc_builder_(NULL),
|
||||
spm_builder_(NULL),
|
||||
sqtt_builder_(NULL),
|
||||
agent_info_(NULL),
|
||||
concurrent_mode_(concurrent_create_mode_),
|
||||
block_map_(map) {}
|
||||
|
||||
virtual ~Pm4Factory() {
|
||||
delete cmd_builder_;
|
||||
delete pmc_builder_;
|
||||
delete spm_builder_;
|
||||
delete sqtt_builder_;
|
||||
}
|
||||
|
||||
// PM4 command builder
|
||||
pm4_builder::CmdBuilder* cmd_builder_;
|
||||
// PMC PM4 packets builder
|
||||
pm4_builder::PmcBuilder* pmc_builder_;
|
||||
// SPM PM4 packets builder
|
||||
pm4_builder::SpmBuilder* spm_builder_;
|
||||
// SQTT PM4 packets builder
|
||||
pm4_builder::SqttBuilder* sqtt_builder_;
|
||||
// agent info
|
||||
const AgentInfo* agent_info_;
|
||||
gpu_id_t gpu_id_;
|
||||
// Concurrent mode
|
||||
static bool concurrent_create_mode_;
|
||||
static bool spm_kfd_mode_;
|
||||
bool concurrent_mode_;
|
||||
|
||||
private:
|
||||
// PM4 factory instance map type
|
||||
struct instances_fncomp_t {
|
||||
bool operator()(const hsa_agent_t& a, const hsa_agent_t& b) const {
|
||||
return a.handle < b.handle;
|
||||
}
|
||||
};
|
||||
typedef std::map<hsa_agent_t, Pm4Factory*, instances_fncomp_t> instances_t;
|
||||
|
||||
// Create GFX9 generic factory
|
||||
static Pm4Factory* Gfx9Create(const AgentInfo* agent_info);
|
||||
// Create GFX10 generic factory
|
||||
static Pm4Factory* Gfx10Create(const AgentInfo* agent_info);
|
||||
// Create GFX11 generic factory
|
||||
static Pm4Factory* Gfx11Create(const AgentInfo* agent_info);
|
||||
// Create GFX12 generic factory
|
||||
static Pm4Factory* Gfx12Create(const AgentInfo* agent_info);
|
||||
// Create MI100 factory
|
||||
static Pm4Factory* Mi100Create(const AgentInfo* agent_info);
|
||||
// Create MI200 factory
|
||||
static Pm4Factory* Mi200Create(const AgentInfo* agent_info);
|
||||
// Create MI300 factory
|
||||
static Pm4Factory* Mi300Create(const AgentInfo* agent_info);
|
||||
// Create MI350 factory
|
||||
static Pm4Factory* Mi350Create(const AgentInfo* agent_info);
|
||||
// Return GPU id for a given agent
|
||||
static gpu_id_t GetGpuId(std::string_view);
|
||||
|
||||
static bool CheckConcurrent(const profile_t* profile);
|
||||
|
||||
// Mutex for inter thread synchronization for the instances create/destroy
|
||||
static mutex_t mutex_;
|
||||
// Factory instances container
|
||||
static instances_t* instances_;
|
||||
// Block info container
|
||||
const BlockInfoMap block_map_;
|
||||
};
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(const AgentInfo* agent_info, gpu_id_t gpu_id,
|
||||
bool concurrent) {
|
||||
// Check if we have the instance already created
|
||||
if (instances_ == NULL) instances_ = new instances_t;
|
||||
const auto ret = instances_->insert({agent_info->dev_id, NULL});
|
||||
instances_t::iterator it = ret.first;
|
||||
|
||||
concurrent_create_mode_ = concurrent;
|
||||
static bool spm_kfd = getenv("ROCP_SPM_KFD_MODE") != NULL;
|
||||
spm_kfd_mode_ = spm_kfd;
|
||||
|
||||
// Create a factory implementation for the GPU id
|
||||
if (ret.second) {
|
||||
switch (gpu_id) {
|
||||
// Create Gfx9 generic factory
|
||||
case GFX9_GPU_ID:
|
||||
it->second = Gfx9Create(agent_info);
|
||||
break;
|
||||
// Create Gfx10 generic factory
|
||||
case GFX10_GPU_ID:
|
||||
it->second = Gfx10Create(agent_info);
|
||||
break;
|
||||
// Create Gfx11 generic factory
|
||||
case GFX11_GPU_ID:
|
||||
it->second = Gfx11Create(agent_info);
|
||||
break;
|
||||
case GFX12_GPU_ID:
|
||||
it->second = Gfx12Create(agent_info);
|
||||
break;
|
||||
// Create MI100 generic factory
|
||||
case MI100_GPU_ID:
|
||||
it->second = Mi100Create(agent_info);
|
||||
break;
|
||||
case MI200_GPU_ID:
|
||||
it->second = Mi200Create(agent_info);
|
||||
break;
|
||||
case MI300_GPU_ID:
|
||||
it->second = Mi300Create(agent_info);
|
||||
break;
|
||||
case MI350_GPU_ID:
|
||||
it->second = Mi350Create(agent_info);
|
||||
break;
|
||||
default:
|
||||
throw aql_profile_exc_val<gpu_id_t>("GPU id error", gpu_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory::Create() failed");
|
||||
it->second->gpu_id_ = gpu_id;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Create PM4 factory
|
||||
inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent, bool concurrent) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
// Get GPU id for a given agent
|
||||
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
std::vector<char> agent_name{};
|
||||
agent_name.resize(64);
|
||||
uint32_t device_id = 0;
|
||||
|
||||
// Getting GfxIP name
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name.data());
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
// Getting DeviceId
|
||||
hsa_agent_info_t attribute = static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_CHIP_ID);
|
||||
status = hsa_agent_get_info(agent, attribute, &device_id);
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
throw aql_profile_exc_msg("Pm4Factory::Create() bad agent");
|
||||
}
|
||||
|
||||
const gpu_id_t gpu_id = GetGpuId(agent_name.data());
|
||||
return Pm4Factory::Create(agent_info, gpu_id, concurrent);
|
||||
}
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(aqlprofile_agent_handle_t agent_info, bool concurrent) {
|
||||
const auto* info = GetAgentInfo(agent_info);
|
||||
if (info == NULL) throw aql_profile_exc_val<uint64_t>("Bad agent handle", agent_info.handle);
|
||||
const gpu_id_t gpu_id = GetGpuId(info->gfxip);
|
||||
return Pm4Factory::Create(info, gpu_id, concurrent);
|
||||
}
|
||||
|
||||
// Destroy PM4 factory
|
||||
inline void Pm4Factory::Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (instances_ != NULL) {
|
||||
for (auto& item : *instances_) delete item.second;
|
||||
delete instances_;
|
||||
instances_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the setting of pmc profiling mode
|
||||
inline bool Pm4Factory::CheckConcurrent(const profile_t* profile) {
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
if (p->parameter_name == HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return GPU id for a given agent
|
||||
inline gpu_id_t Pm4Factory::GetGpuId(std::string_view gfx_ip) {
|
||||
std::vector<std::pair<std::string, gpu_id_t>> gfxip_map = {
|
||||
{"gfx908", MI100_GPU_ID}, {"gfx90a", MI200_GPU_ID}, {"gfx900", GFX9_GPU_ID},
|
||||
{"gfx902", GFX9_GPU_ID}, {"gfx906", GFX9_GPU_ID}, {"gfx94", MI300_GPU_ID},
|
||||
{"gfx95", MI350_GPU_ID}, {"gfx10", GFX10_GPU_ID}, {"gfx11", GFX11_GPU_ID},
|
||||
{"gfx12", GFX12_GPU_ID},
|
||||
};
|
||||
|
||||
for (const auto& [name, id] : gfxip_map) {
|
||||
if (gfx_ip.rfind(name, 0) == 0) {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
return INVAL_GPU_ID;
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // SRC_CORE_PM4_FACTORY_H_
|
||||
@@ -0,0 +1,71 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "core/amd_aql_pm4_ib_packet.h"
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "pm4/cmd_builder.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
void PopulateAql(const uint32_t* ib_packet, packet_t* aql_packet) {
|
||||
// Populate relevant fields of Aql pkt
|
||||
// Size of IB pkt is four DWords
|
||||
// Header and completion sinal are not set
|
||||
amd_aql_pm4_ib_packet_t* aql_pm4_ib = reinterpret_cast<amd_aql_pm4_ib_packet_t*>(aql_packet);
|
||||
aql_pm4_ib->pm4_ib_format = AMD_AQL_PM4_IB_FORMAT;
|
||||
aql_pm4_ib->pm4_ib_command[0] = ib_packet[0];
|
||||
aql_pm4_ib->pm4_ib_command[1] = ib_packet[1];
|
||||
aql_pm4_ib->pm4_ib_command[2] = ib_packet[2];
|
||||
aql_pm4_ib->pm4_ib_command[3] = ib_packet[3];
|
||||
aql_pm4_ib->dw_count_remain = AMD_AQL_PM4_IB_DW_COUNT_REMAIN;
|
||||
for (unsigned i = 0; i < AMD_AQL_PM4_IB_RESERVED_COUNT; ++i) {
|
||||
aql_pm4_ib->reserved[i] = 0;
|
||||
}
|
||||
|
||||
#if defined(DEBUG_TRACE)
|
||||
const uint32_t* dwords = (uint32_t*)aql_packet;
|
||||
const uint32_t dword_count = sizeof(*aql_packet) / sizeof(uint32_t);
|
||||
std::ostringstream oss;
|
||||
oss << "AQL 'IB' size(" << dword_count << ")";
|
||||
std::clog << std::setw(40) << std::left << "AQL 'IB' size(16)"
|
||||
<< ":";
|
||||
for (unsigned idx = 0; idx < dword_count; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << dwords[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PopulateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_builder::CmdBuilder* cmd_writer,
|
||||
packet_t* aql_packet) {
|
||||
pm4_builder::CmdBuffer ib_buffer;
|
||||
cmd_writer->BuildIndirectBufferCmd(&ib_buffer, cmd_buffer, (size_t)cmd_size);
|
||||
PopulateAql((const uint32_t*)ib_buffer.Data(), aql_packet);
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
@@ -0,0 +1,239 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <dirent.h>
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include <pthread.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace spm_kfd_namespace {
|
||||
|
||||
int get_gpu_node_id(uint32_t gpu_ind) {
|
||||
int gpu_node = -1;
|
||||
uint32_t index = 0;
|
||||
|
||||
// find a valid gpu node from /sys/class/kfd/kfd/topology/nodes
|
||||
std::string path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
DIR* dir;
|
||||
struct dirent* ent;
|
||||
|
||||
if ((dir = opendir(path.c_str())) != NULL) {
|
||||
while ((ent = readdir(dir)) != NULL) {
|
||||
std::string dir = ent->d_name;
|
||||
|
||||
if (dir.find_first_not_of("0123456789") == std::string::npos) {
|
||||
std::string file = path + "/" + ent->d_name + "/gpu_id";
|
||||
std::ifstream infile(file);
|
||||
int id;
|
||||
|
||||
infile >> id;
|
||||
if ((id != 0) && (index == gpu_ind)) {
|
||||
++index;
|
||||
gpu_node = atoi(ent->d_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
if (gpu_node == -1) {
|
||||
printf("get_gpu_node_id`error: GPU[%d] not found\n", gpu_ind);
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
return gpu_node;
|
||||
}
|
||||
|
||||
int get_gpu_node_id(hsa_agent_t agent) {
|
||||
const uint32_t gpu_ind = HsaRsrcFactory::Instance().GetAgentInfo(agent)->dev_index;
|
||||
return get_gpu_node_id(gpu_ind);
|
||||
}
|
||||
|
||||
struct state_t {
|
||||
bool thread_stop;
|
||||
int node_id;
|
||||
uint32_t buf_size;
|
||||
uint32_t timeout;
|
||||
uint32_t data_size;
|
||||
void* kfd_buf;
|
||||
void* prod_buf;
|
||||
void* cons_buf;
|
||||
bool data_loss;
|
||||
bool ready;
|
||||
pthread_mutex_t work_mutex;
|
||||
pthread_cond_t work_cond;
|
||||
hsa_agent_t agent;
|
||||
};
|
||||
|
||||
void producer_fun(state_t* state) {
|
||||
uint32_t timeout = 0;
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, state->buf_size, &timeout, &(state->data_size),
|
||||
// state->kfd_buf, &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer init error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
do {
|
||||
timeout = state->timeout;
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, state->buf_size, &timeout, &(state->data_size),
|
||||
// state->prod_buf, &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(state->work_mutex)));
|
||||
void* tmp = state->prod_buf;
|
||||
state->prod_buf = state->cons_buf;
|
||||
state->cons_buf = state->kfd_buf;
|
||||
state->kfd_buf = tmp;
|
||||
state->ready = true;
|
||||
PTHREAD_CALL(pthread_cond_signal(&(state->work_cond)));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(state->work_mutex)));
|
||||
} while (!state->thread_stop);
|
||||
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
// hsa_amd_spm_set_dest_buffer(state->agent, 0, &timeout, &(state->data_size), NULL,
|
||||
// &(state->data_loss));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Set DestBuffer stop error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void consumer_fun(state_t* state, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
const uint32_t sample_id = 0;
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(state->work_mutex)));
|
||||
do {
|
||||
while (state->ready == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(state->work_cond), &(state->work_mutex)));
|
||||
}
|
||||
state->ready = false;
|
||||
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = sample_id;
|
||||
sample_info.trace_data.ptr = state->cons_buf;
|
||||
sample_info.trace_data.size = state->data_size;
|
||||
|
||||
hsa_status_t status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
state->thread_stop = true;
|
||||
break;
|
||||
} else if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("SPM consumer callback failed\n");
|
||||
abort();
|
||||
}
|
||||
} while (1);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(state->work_mutex)));
|
||||
}
|
||||
|
||||
void mananger_fun(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
state_t obj{};
|
||||
const int gpu_node_id = get_gpu_node_id(profile->agent);
|
||||
char* buf_ptr = (char*)(profile->output_buffer.ptr);
|
||||
// SPM data buffer size 256 byte aligned
|
||||
const uint32_t buf_size = (profile->output_buffer.size / 3) & ~(uint32_t(256) - 1);
|
||||
|
||||
obj.timeout = 1000000; // 1sec
|
||||
obj.node_id = gpu_node_id;
|
||||
obj.buf_size = buf_size;
|
||||
obj.kfd_buf = buf_ptr;
|
||||
obj.prod_buf = buf_ptr + buf_size;
|
||||
obj.cons_buf = buf_ptr + 2 * buf_size;
|
||||
obj.agent = profile->agent;
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_init(&(obj.work_mutex), NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&(obj.work_cond), NULL));
|
||||
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS; // hsa_amd_spm_acquire(profile->agent);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Acquire error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
// spm threads
|
||||
std::thread producer(producer_fun, &obj);
|
||||
std::thread consumer(consumer_fun, &obj, callback, data);
|
||||
|
||||
producer.join();
|
||||
consumer.join();
|
||||
|
||||
status = HSA_STATUS_SUCCESS; // hsa_amd_spm_release(profile->agent);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
printf("hsa SPM Release error\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::mutex spm_mutex_t;
|
||||
spm_mutex_t spm_mutex;
|
||||
|
||||
// Getting SPM data using driver API
|
||||
hsa_status_t spm_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
std::lock_guard<spm_mutex_t> lck(spm_mutex);
|
||||
static std::thread* t = NULL;
|
||||
|
||||
if (t == NULL) {
|
||||
// spm manager thread
|
||||
t = new std::thread(mananger_fun, profile, callback, data);
|
||||
} else {
|
||||
t->join();
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace spm_kfd_namespace
|
||||
@@ -0,0 +1,420 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/aql_profile.hpp"
|
||||
#include "core/include/aql_profile_v2.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "core/logger.h"
|
||||
#include "core/pm4_factory.h"
|
||||
#include "pm4/cmd_builder.h"
|
||||
#include "pm4/sqtt_builder.h"
|
||||
|
||||
#include "core/commandbuffermgr.hpp"
|
||||
#include "memorymanager.hpp"
|
||||
|
||||
#define THREAD_TRACE_PREFIX_SIZE 0x100
|
||||
#define DEFAULT_TRACE_BUFFER_SIZE (3 << 26)
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint64_t legacy_version : 13;
|
||||
uint64_t gfx9_version2 : 3;
|
||||
uint64_t DSIMDM : 4;
|
||||
uint64_t DCU : 5;
|
||||
uint64_t DSA : 1;
|
||||
uint64_t SEID : 6;
|
||||
uint64_t reserved2 : 32;
|
||||
};
|
||||
uint64_t raw;
|
||||
} att_header_packet_t;
|
||||
|
||||
typedef enum {
|
||||
ATT_MARKER_HEADER_CHANNEL = 0,
|
||||
ATT_MARKER_SIZE_LO_CHANNEL,
|
||||
ATT_MARKER_ADDR_LO_CHANNEL,
|
||||
ATT_MARKER_ADDR_HI_CHANNEL,
|
||||
ATT_MARKER_SIZE_HI_CHANNEL,
|
||||
ATT_MARKER_ID_LO_CHANNEL,
|
||||
ATT_MARKER_ID_HI_CHANNEL,
|
||||
ATT_MARKER_WAIT_FOR_HEADER = 32
|
||||
} att_marker_state;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint32_t isUnload : 1; // 0 if code object is being loaded, 1 for unload
|
||||
uint32_t bFromStart : 1; // Has this code object been loaded before thread trace started?
|
||||
uint32_t legacy_id : 30; // Legacy code object ID, if it fits in 30 bits.
|
||||
};
|
||||
uint32_t raw;
|
||||
} aqlprofile_att_header_marker_t;
|
||||
|
||||
inline att_header_packet_t getHeaderPacket(int SE, int CU, int SIMD) {
|
||||
att_header_packet_t header{.raw = 0};
|
||||
header.legacy_version = 0x11; // The thread trace viewer only sees gfx9 for 0x11
|
||||
header.gfx9_version2 = 4;
|
||||
header.SEID = SE;
|
||||
header.DCU = CU;
|
||||
header.DSIMDM = SIMD;
|
||||
header.DSA = 0;
|
||||
return header;
|
||||
}
|
||||
|
||||
namespace aql_profile_v2 {
|
||||
|
||||
hsa_status_t _internal_aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback,
|
||||
void* userdata) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
auto shared_memorymgr = MemoryManager::GetManager(handle.handle);
|
||||
TraceMemoryManager* memorymgr = dynamic_cast<TraceMemoryManager*>(shared_memorymgr.get());
|
||||
if (!memorymgr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(memorymgr->GetAgent());
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
auto* control_ptr = memorymgr->GetTraceControlBuf<pm4_builder::TraceControl>();
|
||||
|
||||
// Check if SQTT buffer was wrapped
|
||||
for (size_t se = 0; se < se_number_total; se++) {
|
||||
if (control_ptr[se].status & sqttbuilder->GetUTCErrorMask()) {
|
||||
ERR_LOGGING << "SQTT memory error received, SE(" << se << ")";
|
||||
status = HSA_STATUS_ERROR_EXCEPTION;
|
||||
} else if (control_ptr[se].status & sqttbuilder->GetBufferFullMask()) {
|
||||
ERR2_LOGGING << "SQTT data buffer full, SE(" << se << ")";
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> sample_sizes(se_number_total, 0);
|
||||
size_t max_sample_size = 0;
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (uint64_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
bool bMaskedIn = memorymgr->config.GetTargetCU(se_index) >= 0;
|
||||
uint64_t sample_capacity = memorymgr->config.GetCapacity(se_index);
|
||||
void* sample_ptr = reinterpret_cast<void*>(memorymgr->config.GetSEBaseAddr(se_index));
|
||||
|
||||
// WPTR specifies the index in thread trace buffer where next token will be
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
size_t wptr_mask = sqttbuilder->GetWritePtrMask();
|
||||
size_t sample_size = (control_ptr[se_index].wptr & wptr_mask) * sqttbuilder->GetWritePtrBlk();
|
||||
|
||||
// GFX11 hardware bug workaround
|
||||
if (pm4_factory->GetGpuId() == aql_profile::GFX11_GPU_ID) {
|
||||
sample_size = sample_size - reinterpret_cast<uint64_t>(sample_ptr);
|
||||
sample_size &= (1ull << 29) - 1;
|
||||
}
|
||||
|
||||
if (sample_size >= sample_capacity) {
|
||||
ERR_LOGGING << "SQTT data out of bounds, sample_id(" << se_index << ") size(" << sample_size
|
||||
<< "/" << sample_capacity << ")";
|
||||
sample_size = sample_capacity;
|
||||
if (status == HSA_STATUS_SUCCESS) status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
sample_sizes.at(se_index) = sample_size;
|
||||
max_sample_size = std::max(sample_size, max_sample_size);
|
||||
}
|
||||
|
||||
std::vector<size_t> cpu_sample(max_sample_size / sizeof(size_t) + sizeof(att_header_packet_t), 0);
|
||||
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (uint64_t se_index = 0; se_index < se_number_total; se_index++) {
|
||||
int target_cu = memorymgr->config.GetTargetCU(se_index);
|
||||
if (target_cu < 0) continue;
|
||||
|
||||
void* sample_ptr = reinterpret_cast<void*>(memorymgr->config.GetSEBaseAddr(se_index));
|
||||
size_t sample_size = sample_sizes.at(se_index);
|
||||
size_t sample_size_plus_header = sample_size;
|
||||
|
||||
char* sample_data_ptr = (char*)cpu_sample.data();
|
||||
if (pm4_factory->GetGpuId() < aql_profile::GFX10_GPU_ID) {
|
||||
auto* header = reinterpret_cast<att_header_packet_t*>(cpu_sample.data());
|
||||
*header = getHeaderPacket(se_index, target_cu, memorymgr->GetSimdMask());
|
||||
sample_data_ptr += sizeof(att_header_packet_t);
|
||||
sample_size_plus_header = sample_size + sizeof(att_header_packet_t);
|
||||
}
|
||||
|
||||
memorymgr->CopyMemory((void*)sample_data_ptr, sample_ptr, sample_size);
|
||||
callback(se_index, (void*)cpu_sample.data(), sample_size_plus_header, userdata);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t _internal_aqlprofile_att_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t copy_fn,
|
||||
void* userdata) {
|
||||
pm4_builder::CmdBuffer start_cmd;
|
||||
pm4_builder::CmdBuffer stop_cmd;
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile.agent);
|
||||
|
||||
auto memorymgr =
|
||||
std::make_shared<TraceMemoryManager>(profile.agent, alloc_cb, dealloc_cb, copy_fn, userdata);
|
||||
|
||||
auto& trace_config = memorymgr->config;
|
||||
|
||||
trace_config.vmIdMask = 0;
|
||||
trace_config.simd_sel = 0xF;
|
||||
trace_config.perfMASK = ~0u;
|
||||
trace_config.se_mask = 0x11111111;
|
||||
|
||||
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
|
||||
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
|
||||
|
||||
if (profile.parameters)
|
||||
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
|
||||
switch (p->parameter_name) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK:
|
||||
trace_config.se_mask = p->value & ((1ull << se_number_total) - 1);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
if (p->value > 15)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
|
||||
trace_config.targetCu = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
trace_config.vmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
if ((p->value & 0x50) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
|
||||
trace_config.deprecated_mask = p->value;
|
||||
trace_config.targetCu = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
if ((p->value & 0xFF000000) != 0)
|
||||
throw aql_profile::aql_profile_exc_val<uint32_t>(
|
||||
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
|
||||
p->value);
|
||||
trace_config.deprecated_tokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
trace_config.deprecated_tokenMask2 = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
|
||||
trace_config.sampleRate = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
|
||||
trace_config.concurrent = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
|
||||
trace_config.simd_sel = p->value & 0xF;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE:
|
||||
trace_config.occupancy_mode = p->value ? 1 : 0;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
|
||||
buffer_size = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
|
||||
trace_config.perfMASK = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL:
|
||||
trace_config.perfCTRL = ((p->value & 0x1F) << 8) | 0xFFFF007F;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME:
|
||||
if (trace_config.perfcounters.size() >= 8) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
trace_config.perfcounters.push_back({p->counter_id, p->simd_mask});
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "Bad trace parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const size_t control_size = sizeof(pm4_builder::TraceControl) * se_number_total;
|
||||
|
||||
memorymgr->CreateTraceControlBuf(control_size + THREAD_TRACE_PREFIX_SIZE);
|
||||
memorymgr->CreateOutputBuf(buffer_size);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
|
||||
auto* control_ptr = memorymgr->GetTraceControlBuf<pm4_builder::TraceControl>();
|
||||
|
||||
trace_config.control_buffer_ptr = control_ptr;
|
||||
trace_config.control_buffer_size = control_size;
|
||||
trace_config.data_buffer_ptr = memorymgr->GetOutputBuf();
|
||||
trace_config.data_buffer_size = memorymgr->GetOutputBufSize();
|
||||
|
||||
uint32_t se_per_xcc = pm4_factory->GetShaderEnginesNumber() / pm4_factory->GetXccNumber();
|
||||
pm4_builder::SqttBuilder* sqtt_builder = pm4_factory->GetSqttBuilder();
|
||||
|
||||
// Generate start commands
|
||||
sqtt_builder->Begin(&start_cmd, &trace_config);
|
||||
// Generate stop commands
|
||||
sqtt_builder->End(&stop_cmd, &trace_config);
|
||||
|
||||
// Copy generated commands
|
||||
const size_t start_size = aql_profile::CommandBufferMgr::Align(start_cmd.Size());
|
||||
const size_t stop_size = aql_profile::CommandBufferMgr::Align(stop_cmd.Size());
|
||||
memorymgr->CreateCmdBuf(start_size + stop_size);
|
||||
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
uint8_t* cmdbuf = reinterpret_cast<uint8_t*>(memorymgr->GetCmdBuf());
|
||||
|
||||
copy_fn(cmdbuf, start_cmd.Data(), start_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, start_cmd.Size(), cmd_writer, &packets->start_packet);
|
||||
cmdbuf += start_size;
|
||||
copy_fn(cmdbuf, stop_cmd.Data(), stop_cmd.Size(), userdata);
|
||||
aql_profile::PopulateAql(cmdbuf, stop_cmd.Size(), cmd_writer, &packets->stop_packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
hsa_status_t _internal_aqlprofile_att_codeobj_marker(
|
||||
hsa_ext_amd_aql_pm4_packet_t* packet, aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, void* userdata) {
|
||||
static auto* mut = new std::shared_mutex{};
|
||||
static auto* factory_cache = new std::map<uint64_t, aql_profile::Pm4Factory*>{};
|
||||
|
||||
auto _slk = std::shared_lock{*mut};
|
||||
|
||||
if (factory_cache->find(data.agent.handle) == factory_cache->end()) {
|
||||
_slk.unlock();
|
||||
{
|
||||
auto _unique = std::unique_lock{*mut};
|
||||
factory_cache->emplace(data.agent.handle, aql_profile::Pm4Factory::Create(data.agent));
|
||||
}
|
||||
_slk.lock();
|
||||
}
|
||||
|
||||
aql_profile::Pm4Factory* pm4_factory = factory_cache->at(data.agent.handle);
|
||||
pm4_builder::SqttBuilder* sqttbuilder = pm4_factory->GetSqttBuilder();
|
||||
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
|
||||
pm4_builder::CmdBuffer commands;
|
||||
|
||||
if (!data.isUnload) {
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.addr), ATT_MARKER_ADDR_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.addr >> 32, ATT_MARKER_ADDR_HI_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.size), ATT_MARKER_SIZE_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.size >> 32, ATT_MARKER_SIZE_HI_CHANNEL);
|
||||
}
|
||||
|
||||
aqlprofile_att_header_marker_t header{};
|
||||
header.bFromStart = data.fromStart;
|
||||
header.isUnload = data.isUnload;
|
||||
|
||||
if (data.id >= (1 << 30)) {
|
||||
sqttbuilder->InsertMarker(&commands, uint32_t(data.id), ATT_MARKER_ID_LO_CHANNEL);
|
||||
sqttbuilder->InsertMarker(&commands, data.id >> 32, ATT_MARKER_ID_HI_CHANNEL);
|
||||
} else
|
||||
header.legacy_id = data.id;
|
||||
|
||||
sqttbuilder->InsertMarker(&commands, header.raw, ATT_MARKER_HEADER_CHANNEL);
|
||||
|
||||
auto memorymgr = std::make_shared<CodeobjMemoryManager>(data.agent, alloc_cb, dealloc_cb,
|
||||
commands.Size(), userdata);
|
||||
MemoryManager::RegisterManager(memorymgr);
|
||||
handle->handle = memorymgr->GetHandler();
|
||||
void* cmdbuffer = memorymgr->cmd_buffer.get();
|
||||
|
||||
memcpy(cmdbuffer, commands.Data(), commands.Size());
|
||||
aql_profile::PopulateAql(cmdbuffer, commands.Size(), cmd_writer, packet);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace aql_profile_v2
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Method to populate the provided AQL packet with ATT Markers
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_codeobj_marker(
|
||||
hsa_ext_amd_aql_pm4_packet_t* packet, aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_codeobj_data_t data, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_codeobj_marker(packet, handle, data, alloc_cb,
|
||||
dealloc_cb, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_iterate_data(handle, callback, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API hsa_status_t aqlprofile_att_create_packets(
|
||||
aqlprofile_handle_t* handle, aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
|
||||
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t copy_fn,
|
||||
void* userdata) {
|
||||
try {
|
||||
return aql_profile_v2::_internal_aqlprofile_att_create_packets(
|
||||
handle, packets, profile, alloc_cb, dealloc_cb, copy_fn, userdata);
|
||||
} catch (hsa_status_t err) {
|
||||
ERR_LOGGING << err;
|
||||
return err;
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
} catch (...) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
};
|
||||
|
||||
PUBLIC_API void aqlprofile_att_delete_packets(aqlprofile_handle_t handle) {
|
||||
try {
|
||||
MemoryManager::DeleteManager(handle.handle);
|
||||
} catch (std::exception& e) {
|
||||
return;
|
||||
} catch (...) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,73 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include "linux/registers/vega20_ip_offset.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "util/soc15_common.h"
|
||||
|
||||
const reg_base_offset_table* vega20_reg_base_init() {
|
||||
static_assert(HWIP_MAX_INSTANCE >= MAX_INSTANCE,
|
||||
"HWIP_MAX_INSTANCE must be greater than MAX_INSTANCE");
|
||||
static_assert(HWIP_MAX_SEGMENT >= MAX_SEGMENT,
|
||||
"HWIP_MAX_SEGMENT must be greater than MAX_SEGMENT");
|
||||
|
||||
static const auto* vega20_reg_table = []() {
|
||||
auto* reg_table = new reg_base_offset_table();
|
||||
|
||||
// helper lambda to initialize blocks
|
||||
auto init_hwip = [&](amd_hw_ip_block_type hwip, const auto& base) {
|
||||
for (uint32_t i = 0; i < MAX_INSTANCE; i++) {
|
||||
std::copy(std::begin(base.instance[i].segment), std::end(base.instance[i].segment),
|
||||
std::begin(reg_table->reg_offset[hwip][i]));
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize all HWIP blocks
|
||||
init_hwip(GC_HWIP, GC_BASE);
|
||||
init_hwip(HDP_HWIP, HDP_BASE);
|
||||
init_hwip(MMHUB_HWIP, MMHUB_BASE);
|
||||
init_hwip(ATHUB_HWIP, ATHUB_BASE);
|
||||
init_hwip(NBIO_HWIP, NBIO_BASE);
|
||||
init_hwip(MP0_HWIP, MP0_BASE);
|
||||
init_hwip(MP1_HWIP, MP1_BASE);
|
||||
init_hwip(UVD_HWIP, UVD_BASE);
|
||||
init_hwip(VCE_HWIP, VCE_BASE);
|
||||
init_hwip(DF_HWIP, DF_BASE);
|
||||
init_hwip(DCE_HWIP, DCE_BASE);
|
||||
init_hwip(OSSSYS_HWIP, OSSSYS_BASE);
|
||||
init_hwip(SDMA0_HWIP, SDMA0_BASE);
|
||||
init_hwip(SDMA1_HWIP, SDMA1_BASE);
|
||||
init_hwip(SMUIO_HWIP, SMUIO_BASE);
|
||||
init_hwip(NBIF_HWIP, NBIO_BASE);
|
||||
init_hwip(THM_HWIP, THM_BASE);
|
||||
init_hwip(CLK_HWIP, CLK_BASE);
|
||||
init_hwip(UMC_HWIP, UMC_BASE);
|
||||
init_hwip(RSMU_HWIP, RSMU_BASE);
|
||||
|
||||
return reg_table;
|
||||
}();
|
||||
|
||||
return vega20_reg_table;
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX10_DEF_H_
|
||||
#define _GFX10_DEF_H_
|
||||
|
||||
#include "linux/navi10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_10_3_0_offset.h"
|
||||
#include "linux/registers/gc/gc_10_3_0_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx10/gfx10_block_info.h"
|
||||
#include "gfxip/gfx10/gfx10_primitives.h"
|
||||
#include "gfxip/gfx10/gfx10_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx10;
|
||||
#endif // _GFX10_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX11_DEF_H_
|
||||
#define _GFX11_DEF_H_
|
||||
|
||||
#include "linux/soc21_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_11_0_0_offset.h"
|
||||
#include "linux/registers/gc/gc_11_0_0_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx11/gfx11_block_info.h"
|
||||
#include "gfxip/gfx11/gfx11_primitives.h"
|
||||
#include "gfxip/gfx11/gfx11_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx11;
|
||||
#endif // _GFX11_DEF_H_
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX12_DEF_H_
|
||||
#define _GFX12_DEF_H_
|
||||
|
||||
#include "linux/soc24_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/registers/gc/gc_12_0_0_offset.h"
|
||||
#include "linux/registers/gc/gc_12_0_0_sh_mask.h"
|
||||
#include "linux/packets/nvd.h"
|
||||
#include "gfxip/gfx12/gfx12_block_info.h"
|
||||
using namespace gfxip::gfx12;
|
||||
using namespace gfxip::gfx12::gfx1201;
|
||||
#include "gfxip/gfx12/gfx12_primitives.h"
|
||||
#include "gfxip/gfx12/gfx12_block_table.h"
|
||||
|
||||
#endif // _GFX12_DEF_H_
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX908_DEF_H_
|
||||
#define _GFX908_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX908_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX90A_DEF_H_
|
||||
#define _GFX90A_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX90A_DEF_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef _GFX940_DEF_H_
|
||||
#define _GFX940_DEF_H_
|
||||
|
||||
#include "linux/vega10_enum.h"
|
||||
#include "util/soc15_common.h"
|
||||
#include "util/reg_offsets.h"
|
||||
#include "linux/packets/soc15d.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_offset.h"
|
||||
#include "linux/registers/gc/gc_9_2_1_sh_mask.h"
|
||||
#include "linux/registers/athub/athub_1_0_offset.h"
|
||||
#include "linux/registers/athub/athub_1_0_sh_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_primitives.h"
|
||||
#include "gfxip/gfx9/gfx9_block_table.h"
|
||||
|
||||
using namespace gfxip::gfx9;
|
||||
|
||||
#endif // _GFX940_DEF_H_
|
||||
Alguns ficheiros não foram mostrados porque foram modificados demasiados ficheiros neste diff Mostrar mais
Criar uma nova questão referindo esta
Bloquear um utilizador