Add 'projects/amdsmi/' from commit 'b4b3539631460b986dddc86a2303cef11cd38816'

git-subtree-dir: projects/amdsmi
git-subtree-mainline: 0633d8d8ce
git-subtree-split: b4b3539631
This commit is contained in:
Ameya Keshava Mallya
2025-11-17 22:28:37 +00:00
melakukan ac9e029c3e
302 mengubah file dengan 140753 tambahan dan 0 penghapusan
@@ -0,0 +1,42 @@
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
variables:
- group: common
- template: /.azuredevops/variables-global.yml@pipelines_repo
trigger:
batch: true
branches:
include:
- amd-staging
- amd-mainline
paths:
exclude:
- .github
- docs
- '.*.y*ml'
- '*.md'
- LICENSE
pr:
autoCancel: true
branches:
include:
- amd-staging
- amd-mainline
paths:
exclude:
- .github
- docs
- '.*.y*ml'
- '*.md'
- LICENSE
drafts: false
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/amdsmi.yml@pipelines_repo
+4
Melihat File
@@ -0,0 +1,4 @@
---
Language: Cpp
BasedOnStyle: Google
ColumnLimit: 100
+33
Melihat File
@@ -0,0 +1,33 @@
Checks:
bugprone*,
clang-analyzer*,
google*,
misc*,
modernize*,
-abseil*,
-bugprone-easily-swappable-parameters,
-bugprone-reserved-identifier,
-clang-analyzer-security.insecureAPI.strcpy,
-clang-diagnostic-sign-conversion,
-clang-diagnostic-unused-parameter,
-cppcoreguidelines*,
-cppcoreguidelines-pro*,
-google-readability*,
-google-runtime-int,
-misc-const-correctness,
-misc-include-cleaner,
-misc-non-copyable-objects,
-misc-unused-parameters,
-misc-use-anonymous-namespace,
-misc-use-internal-linkage,
-modernize-avoid-c-arrays,
-modernize-macro-to-enum,
-modernize-redundant-void-arg,
-modernize-use-auto,
-modernize-use-nodiscard,
-modernize-use-noexcept,
-modernize-use-nullptr,
-modernize-use-trailing-return-type,
-modernize-use-using,
-performance*,
-readability*,
+42
Melihat File
@@ -0,0 +1,42 @@
CompileFlags:
Remove: -W*
Add: [-Wall, -Wno-c++20-designator, -pedantic, -Wno-sign-conversion]
Compiler: clang++
# list here: https://clang.llvm.org/extra/clang-tidy/checks/list.html
Diagnostics:
UnusedIncludes: Strict
# rules below are copied into .clang-tidy using ./.update-clang-tidy.sh
# please keep the rules sorted alphabetically
ClangTidy:
Add: [
bugprone*,
clang-analyzer*,
google*,
misc*,
modernize*,
]
Remove: [
abseil*,
bugprone-easily-swappable-parameters,
bugprone-reserved-identifier,
cppcoreguidelines*,
cppcoreguidelines-pro*,
google-readability*,
google-runtime-int,
misc-const-correctness,
misc-include-cleaner,
misc-non-copyable-objects,
misc-unused-parameters,
misc-use-anonymous-namespace,
modernize-avoid-c-arrays,
modernize-redundant-void-arg,
modernize-use-auto,
modernize-use-nodiscard,
modernize-use-noexcept,
modernize-use-nullptr,
modernize-use-trailing-return-type,
modernize-use-using,
performance*,
readability*,
]
+253
Melihat File
@@ -0,0 +1,253 @@
# ----------------------------------
# Options affecting listfile parsing
# ----------------------------------
with section("parse"):
# Specify structure for custom cmake functions
additional_commands = {
'parse_version': {
'kwargs': {
'VERSION_STRING': '*'
}
},
'get_version_from_tag': {
'kwargs': {
'DEFAULT_VERSION_STRING': '*',
'VERSION_PREFIX': '*',
'GIT': '*'
}
}
}
# Override configurations per-command where available
override_spec = {}
# Specify variable tags.
vartags = []
# Specify property tags.
proptags = []
# -----------------------------
# Options affecting formatting.
# -----------------------------
with section("format"):
# Disable formatting entirely, making cmake-format a no-op
#disable = True
# How wide to allow formatted cmake files
line_width = 120
# How many spaces to tab for indent
tab_size = 4
# If true, lines are indented using tab characters (utf-8 0x09) instead of
# <tab_size> space characters (utf-8 0x20). In cases where the layout would
# require a fractional tab character, the behavior of the fractional
# indentation is governed by <fractional_tab_policy>
use_tabchars = False
# If <use_tabchars> is True, then the value of this variable indicates how
# fractional indentions are handled during whitespace replacement. If set to
# 'use-space', fractional indentation is left as spaces (utf-8 0x20). If set
# to `round-up` fractional indentation is replaced with a single tab character
# (utf-8 0x09) effectively shifting the column to the next tabstop
fractional_tab_policy = 'use-space'
# If an argument group contains more than this many sub-groups (parg or kwarg
# groups) then force it to a vertical layout.
max_subgroups_hwrap = 3
# If a positional argument group contains more than this many arguments, then
# force it to a vertical layout.
max_pargs_hwrap = 6
# If a cmdline positional group consumes more than this many lines without
# nesting, then invalidate the layout (and nest)
max_rows_cmdline = 2
# If true, separate flow control names from their parentheses with a space
separate_ctrl_name_with_space = False
# If true, separate function names from parentheses with a space
separate_fn_name_with_space = False
# If a statement is wrapped to more than one line, than dangle the closing
# parenthesis on its own line.
dangle_parens = False
# If the trailing parenthesis must be 'dangled' on its on line, then align it
# to this reference: `prefix`: the start of the statement, `prefix-indent`:
# the start of the statement, plus one indentation level, `child`: align to
# the column of the arguments
dangle_align = 'prefix'
# If the statement spelling length (including space and parenthesis) is
# smaller than this amount, then force reject nested layouts.
min_prefix_chars = 4
# If the statement spelling length (including space and parenthesis) is larger
# than the tab width by more than this amount, then force reject un-nested
# layouts.
max_prefix_chars = 10
# If a candidate layout is wrapped horizontally but it exceeds this many
# lines, then reject the layout.
max_lines_hwrap = 2
# What style line endings to use in the output.
line_ending = 'unix'
# Format command names consistently as 'lower' or 'upper' case
command_case = 'canonical'
# Format keywords consistently as 'lower' or 'upper' case
keyword_case = 'unchanged'
# A list of command names which should always be wrapped
always_wrap = ['install']
# If true, the argument lists which are known to be sortable will be sorted
# lexicographicall
enable_sort = True
# If true, the parsers may infer whether or not an argument list is sortable
# (without annotation).
autosort = False
# By default, if cmake-format cannot successfully fit everything into the
# desired linewidth it will apply the last, most agressive attempt that it
# made. If this flag is True, however, cmake-format will print error, exit
# with non-zero status code, and write-out nothing
require_valid_layout = False
# A dictionary mapping layout nodes to a list of wrap decisions. See the
# documentation for more information.
layout_passes = {}
# ------------------------------------------------
# Options affecting comment reflow and formatting.
# ------------------------------------------------
with section("markup"):
# What character to use for bulleted lists
bullet_char = '*'
# What character to use as punctuation after numerals in an enumerated list
enum_char = '.'
# If comment markup is enabled, don't reflow the first comment block in each
# listfile. Use this to preserve formatting of your copyright/license
# statements.
first_comment_is_literal = False
# If comment markup is enabled, don't reflow any comment block which matches
# this (regex) pattern. Default is `None` (disabled).
literal_comment_pattern = None
# Regular expression to match preformat fences in comments default=
# ``r'^\s*([`~]{3}[`~]*)(.*)$'``
fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$'
# Regular expression to match rulers in comments default=
# ``r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'``
ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$'
# If a comment line matches starts with this pattern then it is explicitly a
# trailing comment for the preceeding argument. Default is '#<'
explicit_trailing_pattern = '#<'
# If a comment line starts with at least this many consecutive hash
# characters, then don't lstrip() them off. This allows for lazy hash rulers
# where the first hash char is not separated by space
hashruler_min_length = 10
# If true, then insert a space between the first hash char and remaining hash
# chars in a hash ruler, and normalize its length to fill the column
canonicalize_hashrulers = True
# enable comment markup parsing and reflow
enable_markup = False
# ----------------------------
# Options affecting the linter
# ----------------------------
with section("lint"):
# a list of lint codes to disable
disabled_codes = ['C0307', 'C0301', 'C0305']
# regular expression pattern describing valid function names
function_pattern = '[0-9a-z_]+'
# regular expression pattern describing valid macro names
macro_pattern = '[0-9A-Z_]+'
# regular expression pattern describing valid names for variables with global
# (cache) scope
global_var_pattern = '[A-Z][0-9A-Z_]+'
# regular expression pattern describing valid names for variables with global
# scope (but internal semantic)
internal_var_pattern = '_[A-Z][0-9A-Z_]+'
# regular expression pattern describing valid names for variables with local
# scope
local_var_pattern = '[a-z][a-z0-9_]+'
# regular expression pattern describing valid names for privatedirectory
# variables
private_var_pattern = '_[0-9a-z_]+'
# regular expression pattern describing valid names for public directory
# variables
public_var_pattern = '[A-Z][0-9A-Z_]+'
# regular expression pattern describing valid names for function/macro
# arguments and loop variables.
argument_var_pattern = '[a-z][a-z0-9_]+'
# regular expression pattern describing valid names for keywords used in
# functions or macros
keyword_pattern = '[A-Z][0-9A-Z_]+'
# In the heuristic for C0201, how many conditionals to match within a loop in
# before considering the loop a parser.
max_conditionals_custom_parser = 2
# Require at least this many newlines between statements
min_statement_spacing = 1
# Require no more than this many newlines between statements
max_statement_spacing = 2
max_returns = 6
max_branches = 12
max_arguments = 5
max_localvars = 15
max_statements = 50
# -------------------------------
# Options affecting file encoding
# -------------------------------
with section("encode"):
# If true, emit the unicode byte-order mark (BOM) at the start of the file
emit_byteorder_mark = False
# Specify the encoding of the input file. Defaults to utf-8
input_encoding = 'utf-8'
# Specify the encoding of the output file. Defaults to utf-8. Note that cmake
# only claims to support utf-8 so be careful when using anything else
output_encoding = 'utf-8'
# -------------------------------------
# Miscellaneous configurations options.
# -------------------------------------
with section("misc"):
# A dictionary containing any per-command configuration overrides. Currently
# only `command_case` is supported.
per_command = {}
+16
Melihat File
@@ -0,0 +1,16 @@
# EditorConfig standardizes spacing in all editors: https://EditorConfig.org
# Please get a plugin for your editor to match the formatting
# top-most EditorConfig file
root = true
[*.py]
indent_style = space
# Matches multiple files with brace expansion notation
# Set default charset
[*.{c,cc,cpp,h,hh,hpp}]
charset = utf-8
indent_style = space
indent_size = 2
max_line_length = 100
+6
Melihat File
@@ -0,0 +1,6 @@
* @maisarif_amdeng @shuzhliu_amdeng @dgalants_amdeng @charpoag_amdeng @daolivei_amdeng @marifamd @bill-shuzhou-liu @dmitrii-galantsev @charis-poag-amd @oliveiradan @gabrpham_amdeng
docs/* @ROCm/rocm-documentation
*.md @ROCm/rocm-documentation
*.rst @ROCm/rocm-documentation
+84
Melihat File
@@ -0,0 +1,84 @@
# Contributing to AMD SMI #
We welcome contributions to AMD SMI.
Please follow these details to help ensure your contributions will be successfully accepted.
## Issue Discussion ##
Please use the GitHub Issues tab to notify us of issues.
* Use your best judgement for issue creation. If your issue is already listed, upvote the issue and
comment or post to provide additional details, such as how you reproduced this issue.
* If you're not sure if your issue is the same, err on the side of caution and file your issue.
You can add a comment to include the issue number (and link) for the similar issue. If we evaluate
your issue as being the same as the existing issue, we'll close the duplicate.
* If your issue doesn't exist, use the issue template to file a new issue.
* When filing an issue, be sure to provide as much information as possible, including script output so
we can collect information about your configuration. This helps reduce the time required to
reproduce your issue.
* Check your issue regularly, as we may require additional information to successfully reproduce the
issue.
* You may also open an issue to ask questions to the maintainers about whether a proposed change
meets the acceptance criteria, or to discuss an idea pertaining to the library.
## Acceptance Criteria ##
The goal of AMD SMI project is to provide a simple CLI interface and a library
for interacting with AMD GPUs.
## Coding Style ##
Please refer to `.clang-format`. It is suggested you use `pre-commit` tool.
It mostly follows Google C++ formatting with 100 character line limit.
## Pull Request Guidelines ##
When you create a pull request, you should target the default branch. Our
current default branch is the **amd-staging** branch, which serves as our
integration branch.
### Deliverables ###
For each new file in repository,
Please include the licensing header
/*
* =============================================================================
* Copyright (c) 2019-2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/
### Process ###
* Reviewers are listed in the CODEOWNERS file
* Code format guidelines
AMD SMI uses the clang-format tool for formatting code in source files.
The formatting style is captured in .clang-format which is located at
the root of AMD SMI. These are different options to follow:
1. Using pre-commit and docker - `pre-commit run`
1. Using only clang-format - `clang-format -i \<path-to-the-source-file\>`
## References ##
1. [pre-commit](https://github.com/pre-commit/pre-commit)
1. [clang-format](https://clang.llvm.org/docs/ClangFormat.html)
+18
Melihat File
@@ -0,0 +1,18 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/docs/sphinx" # Location of package manifests
open-pull-requests-limit: 10
schedule:
interval: "weekly"
target-branch: "amd-staging"
labels:
- "documentation"
- "dependencies"
reviewers:
- "petepark_amdeng"
+5
Melihat File
@@ -0,0 +1,5 @@
disabled: false
scmId: gh-emu-rocm
branchesToScan:
- amd-staging
- amd-mainline
@@ -0,0 +1,314 @@
name: ABI Compliance Check
on:
pull_request:
branches:
- amd-staging
- release/rocm-rel-*
paths:
- 'include/amd_smi/amdsmi.h'
push:
branches:
- amd-staging
- release/rocm-rel-*
paths:
- 'include/amd_smi/amdsmi.h'
workflow_dispatch:
permissions:
contents: read
pull-requests: write
jobs:
major_abi_check:
name: Major ABI Compliance Check
runs-on: AMD-ROCm-Internal-dev1
steps:
- name: Setup Environment
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
sudo rm -rf $GITHUB_WORKSPACE/.[!.]* || true
sudo apt-get update -qq
sudo apt-get install -y -qq perl build-essential git universal-ctags
git clone https://github.com/lvc/abi-compliance-checker.git
cd abi-compliance-checker
sudo make install
abi-compliance-checker --version
- name: Checkout current code (new version)
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Fetch base branch for PR
if: github.event_name == 'pull_request'
run: |
echo "Fetching base branch: ${{ github.base_ref }}"
git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
git branch -a
- name: Prepare amdsmi.h files for comparison
id: prepare_files
run: |
echo "Preparing amdsmi.h files..."
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/major_abi_status.txt
if [ -f include/amd_smi/amdsmi.h ]; then
cp include/amd_smi/amdsmi.h amdsmi_new.h
echo "Copied current amdsmi.h to amdsmi_new.h"
else
echo "::error::New amdsmi.h (include/amd_smi/amdsmi.h) not found in current checkout."
touch amdsmi_new.h
exit 0
fi
OLD_VERSION_REF=""
V1_NAME_SUFFIX=""
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
OLD_VERSION_REF="${{ github.base_ref }}"
V1_NAME_SUFFIX="base_${{ github.base_ref }}"
echo "Event is Pull Request. Old version source is base branch: ${OLD_VERSION_REF}"
elif [[ "${{ github.event_name }}" == "push" ]]; then
if [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
OLD_VERSION_REF="${{ github.event.before }}"
V1_NAME_SUFFIX="before_$(echo ${{ github.event.before }} | cut -c1-7)"
echo "Event is Push. Old version source is commit before push: ${OLD_VERSION_REF}"
else
echo "Push event is for a new branch or forced push. Cannot determine 'old' version."
touch amdsmi_old.h
echo "Created dummy amdsmi_old.h. Assuming no ABI breakage as no baseline."
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
echo "skip_check=true" >> $GITHUB_OUTPUT
exit 0
fi
else
echo "::warning::Unsupported event type: ${{ github.event_name }}. Cannot determine old version."
touch amdsmi_old.h
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
echo "skip_check=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "Fetching amdsmi.h from ref: $OLD_VERSION_REF (as amdsmi_old.h)"
git show $OLD_VERSION_REF:include/amd_smi/amdsmi.h > amdsmi_old.h 2>/dev/null
if [ $? -ne 0 ] || [ ! -s amdsmi_old.h ]; then
echo "::warning::Failed to fetch 'include/amd_smi/amdsmi.h' from ref '$OLD_VERSION_REF' or file is empty/missing."
echo "Proceeding with an empty amdsmi_old.h. This may result in all symbols reported as 'added'."
echo -n "" > amdsmi_old.h
if [ ! -s amdsmi_new.h ]; then
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
fi
else
echo "Successfully fetched amdsmi.h from $OLD_VERSION_REF to amdsmi_old.h"
fi
echo "v1_name_suffix=${V1_NAME_SUFFIX}" >> $GITHUB_OUTPUT
echo "skip_check=false" >> $GITHUB_OUTPUT
- name: Run Major ABI Compliance Check
if: steps.prepare_files.outputs.skip_check == 'false'
run: |
V1_NAME_SUFFIX_CLEAN=$(echo "${{ steps.prepare_files.outputs.v1_name_suffix }}" | tr '/' '-')
V2_NAME_CLEAN=$(echo "${{ github.ref_name || github.head_ref }}" | tr '/' '-')
echo "Comparing $V1_NAME_SUFFIX_CLEAN (old) with $V2_NAME_CLEAN (new) for Major ABI Check"
abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 "$V1_NAME_SUFFIX_CLEAN" -v2 "$V2_NAME_CLEAN" -report-path major-abi-report.html && echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
continue-on-error: true
- name: Display ABI Check Logs (Major)
if: always() && steps.prepare_files.outputs.skip_check == 'false'
run: |
echo "Displaying Major ABI compliance check logs (if any)"
find logs -type f -name "*.txt" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No .txt logs found in logs/ directory."
- name: Label PR on Major ABI Breakage
if: always() && github.event_name == 'pull_request'
run: |
source $GITHUB_WORKSPACE/major_abi_status.txt
if [ "$abi_exit_code" -ne 0 ]; then
echo "Major ABI check failed, adding 'MAJOR ABI BREAKAGE' label to PR #${{ github.event.pull_request.number }}"
gh pr edit ${{ github.event.pull_request.number }} --add-label "MAJOR ABI BREAKAGE"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload Major ABI Report
if: always()
uses: actions/upload-artifact@v4
with:
name: major-abi-report
path: major-abi-report.html
if-no-files-found: ignore
- name: Report Major ABI Check Results
if: always()
run: |
echo "Checking Major ABI check exit code..."
source $GITHUB_WORKSPACE/major_abi_status.txt
echo "Major ABI check exit code: $abi_exit_code"
if [ "$abi_exit_code" -ne 0 ]; then
echo "::warning::⚠️ MAJOR ABI BREAKAGE FOUND ⚠️ CHECK \"Run Major ABI Compliance Check\" LOGS OR THE major-abi-report ARTIFACT FOR DETAILS."
else
echo "✅ Major ABI check succeeded."
fi
minor_abi_check:
name: Minor ABI Compliance Check
runs-on: AMD-ROCm-Internal-dev1
steps:
- name: Setup Environment
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
sudo rm -rf $GITHUB_WORKSPACE/.[!.]* || true
sudo apt-get update -qq
sudo apt-get install -y -qq perl build-essential git universal-ctags
git clone https://github.com/lvc/abi-compliance-checker.git
cd abi-compliance-checker
sudo make install
abi-compliance-checker --version
- name: Checkout current code (new version)
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Fetch base branch for PR
if: github.event_name == 'pull_request'
run: |
echo "Fetching base branch: ${{ github.base_ref }}"
git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
git branch -a
- name: Prepare amdsmi.h files for comparison
id: prepare_files_minor
run: |
echo "Preparing amdsmi.h files for Minor check..."
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/minor_abi_status.txt
if [ -f include/amd_smi/amdsmi.h ]; then
cp include/amd_smi/amdsmi.h amdsmi_new.h
echo "Copied current amdsmi.h to amdsmi_new.h for Minor check"
else
echo "::error::New amdsmi.h (include/amd_smi/amdsmi.h) not found in current checkout for Minor check."
touch amdsmi_new.h
exit 0
fi
OLD_VERSION_REF_MINOR=""
V1_NAME_SUFFIX_MINOR=""
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
OLD_VERSION_REF_MINOR="${{ github.base_ref }}"
V1_NAME_SUFFIX_MINOR="base_${{ github.base_ref }}"
elif [[ "${{ github.event_name }}" == "push" ]]; then
if [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
OLD_VERSION_REF_MINOR="${{ github.event.before }}"
V1_NAME_SUFFIX_MINOR="before_$(echo ${{ github.event.before }} | cut -c1-7)"
else
echo "Push event is for a new branch (Minor check). Assuming no ABI changes as no baseline."
touch amdsmi_old.h
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
echo "skip_check_minor=true" >> $GITHUB_OUTPUT
exit 0
fi
else
echo "::warning::Unsupported event type for Minor ABI check: ${{ github.event_name }}."
touch amdsmi_old.h
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
echo "skip_check_minor=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "Fetching amdsmi.h from ref: $OLD_VERSION_REF_MINOR (as amdsmi_old.h) for Minor check"
git show $OLD_VERSION_REF_MINOR:include/amd_smi/amdsmi.h > amdsmi_old.h 2>/dev/null
if [ $? -ne 0 ] || [ ! -s amdsmi_old.h ]; then
echo "::warning::Failed to fetch 'include/amd_smi/amdsmi.h' from ref '$OLD_VERSION_REF_MINOR' or file is empty/missing for Minor check."
echo "Proceeding with an empty amdsmi_old.h for Minor check."
echo -n "" > amdsmi_old.h
if [ ! -s amdsmi_new.h ]; then
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
fi
else
echo "Successfully fetched amdsmi.h from $OLD_VERSION_REF_MINOR to amdsmi_old.h for Minor check"
fi
echo "v1_name_suffix_minor=${V1_NAME_SUFFIX_MINOR}" >> $GITHUB_OUTPUT
echo "skip_check_minor=false" >> $GITHUB_OUTPUT
- name: Run Minor ABI Compliance Check (Strict)
if: steps.prepare_files_minor.outputs.skip_check_minor == 'false'
run: |
V1_NAME_SUFFIX_CLEAN=$(echo "${{ steps.prepare_files_minor.outputs.v1_name_suffix_minor }}" | tr '/' '-')
V2_NAME_CLEAN=$(echo "${{ github.ref_name || github.head_ref }}" | tr '/' '-')
COMPARE_MSG="$V1_NAME_SUFFIX_CLEAN vs $V2_NAME_CLEAN"
echo "Comparing $COMPARE_MSG for Minor ABI Check (Strict)"
abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 "$V1_NAME_SUFFIX_CLEAN" -v2 "$V2_NAME_CLEAN" -report-path minor-abi-report.html -strict || {
ACC_EXIT_CODE=$?
echo "abi-compliance-checker -strict failed with exit code $ACC_EXIT_CODE."
echo "abi_exit_code=$ACC_EXIT_CODE" > $GITHUB_WORKSPACE/minor_abi_status.txt
}
current_abi_status=$(cat $GITHUB_WORKSPACE/minor_abi_status.txt)
current_exit_code=${current_abi_status#*=}
if [ "$current_exit_code" -eq 0 ] && [ -f minor-abi-report.html ]; then
echo "ACC strict check passed. Parsing HTML report for any changes..."
CHANGED=0
if grep -q "Added Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found added symbols"; fi
if grep -q "Removed Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found removed symbols"; fi
if grep -q "Problems with.*Data Types.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with data types"; fi
if grep -q "Problems with.*Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with symbols"; fi
if grep -q "Problems with.*Constants.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with constants"; fi
if [ "$CHANGED" -eq 1 ]; then
echo "::error::STRICT ABI CHECK FAILED: Found changes in ABI report comparing $COMPARE_MSG"
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/minor_abi_status.txt
else
echo "No strict ABI changes found in HTML report."
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
fi
elif [ ! -f minor-abi-report.html ] && [ "$current_exit_code" -eq 0 ]; then
echo "::warning::Minor ABI report (minor-abi-report.html) not found, but ACC reported success. Assuming no changes."
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
elif [ "$current_exit_code" -ne 0 ]; then
echo "ACC strict check already indicated failure (exit code $current_exit_code). HTML parsing for further changes skipped or confirmed failure."
fi
continue-on-error: true
- name: Display ABI Check Logs (Minor)
if: always() && steps.prepare_files_minor.outputs.skip_check_minor == 'false'
run: |
echo "Displaying Minor ABI compliance check logs (if any)"
find logs -type f -name "*.txt" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No .txt logs found in logs/ directory."
- name: Label PR on Minor ABI Breakage
if: always() && github.event_name == 'pull_request'
run: |
source $GITHUB_WORKSPACE/minor_abi_status.txt
if [ "$abi_exit_code" -ne 0 ]; then
echo "Minor ABI check failed, adding 'MINOR ABI BREAKAGE' label to PR #${{ github.event.pull_request.number }}"
gh pr edit ${{ github.event.pull_request.number }} --add-label "MINOR ABI BREAKAGE"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload Minor ABI Report
if: always()
uses: actions/upload-artifact@v4
with:
name: minor-abi-report
path: minor-abi-report.html
if-no-files-found: ignore
- name: Report Minor ABI Check Results
if: always()
run: |
echo "Checking Minor ABI check exit code..."
source $GITHUB_WORKSPACE/minor_abi_status.txt
echo "Minor ABI check exit code: $abi_exit_code"
if [ "$abi_exit_code" -ne 0 ]; then
echo "::warning::⚠️ MINOR ABI CHANGES FOUND (STRICT CHECK) ⚠️ CHECK \"Run Minor ABI Compliance Check (Strict)\" LOGS OR THE minor-abi-report ARTIFACT FOR DETAILS."
else
echo "✅ Minor ABI check (Strict) succeeded or found no changes."
fi
+836
Melihat File
@@ -0,0 +1,836 @@
name: AMDSMI CI
on:
pull_request:
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
push:
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
workflow_dispatch:
permissions:
contents: read
env:
DEBIAN_FRONTEND: noninteractive
DEBCONF_NONINTERACTIVE_SEEN: true
BUILD_TYPE: Release
ROCM_DIR: /opt/rocm
jobs:
debian-buildinstall:
name: Build
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
continue-on-error: true
strategy:
max-parallel: 10
matrix:
os: [Ubuntu20, Ubuntu22, Debian10]
container:
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
steps:
- uses: actions/checkout@v4
- name: Update repositories for Debian10
if: matrix.os == 'Debian10'
run: |
set -e
echo 'Updating repositories for Debian10 (archived)'
cat > /etc/apt/sources.list << EOF
deb http://archive.debian.org/debian buster main
deb http://archive.debian.org/debian-security buster/updates main
EOF
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
apt update
- name: Build AMDSMI
run: |
set -e
echo 'Building on ${{ matrix.os }}'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Build attempt $i for ${{ matrix.os }}..."
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
# Capture build output to parse warnings
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
make -j $(nproc) 2>&1 | tee make.log && \
make package 2>&1 | tee package.log; then
# Parse and report warnings as GitHub annotations
echo "::group::Build Warnings"
grep -i "warning" cmake.log make.log package.log | while read -r line; do
echo "::warning::$line"
done
echo "::endgroup::"
echo "Build successful on attempt $i"
break
else
echo "Build failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES build attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo "Build completed on ${{ matrix.os }}"
- name: Install AMDSMI
run: |
cd $GITHUB_WORKSPACE/build
if [ "${{ matrix.os }}" != "Debian10" ]; then
apt update
fi
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Installation attempt $i for ${{ matrix.os }}..."
if apt install -y ./amd-smi-lib*99999-local_amd64.deb; then
echo "Installation successful on attempt $i"
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
# Verify Installation
echo 'Verifying installation:'
amd-smi version
python3 -m pip list | grep amd
python3 -m pip list | grep pip
python3 -m pip list | grep setuptools
echo 'Completed installation on ${{ matrix.os }}'
break
else
echo "Installation failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES installation attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo "Build completed on ${{ matrix.os }}"
- name: Uninstall
if: always()
run: |
set -e
echo 'Uninstalling on ${{ matrix.os }}'
apt remove -y amd-smi-lib || true
rm -f /usr/local/bin/amd-smi
if [ -d /opt/rocm/share/amd_smi ]; then
echo '/opt/rocm/share/amd_smi exists. Removing.'
rm -rf /opt/rocm/share/amd_smi
fi
echo 'Uninstall done on ${{ matrix.os }}'
debian-test:
name: Tests
needs: debian-buildinstall
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
continue-on-error: true
strategy:
max-parallel: 10
matrix:
os: [Ubuntu20, Ubuntu22, Debian10]
container:
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
steps:
- uses: actions/checkout@v4
- name: Update repositories for Debian10
if: matrix.os == 'Debian10'
run: |
set -e
echo 'Updating repositories for Debian10 (archived)'
cat > /etc/apt/sources.list << EOF
deb http://archive.debian.org/debian buster main
deb http://archive.debian.org/debian-security buster/updates main
EOF
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
apt update
- name: Build and Install for Test
run: |
set -e
echo 'Building for test on ${{ matrix.os }}'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Build attempt $i for ${{ matrix.os }} test..."
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
make -j $(nproc) && \
make package; then
echo "Build successful on attempt $i"
break
else
echo "Build failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES build attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo 'Installing for test on ${{ matrix.os }}'
for i in $(seq 1 $RETRIES); do
echo "Installation attempt $i for test on ${{ matrix.os }}..."
if apt install -y $BUILD_FOLDER/amd-smi-lib*99999-local_amd64.deb; then
echo "Installation successful on attempt $i"
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
echo 'Install done for test on ${{ matrix.os }}'
break
else
echo "Installation failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES installation attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
- name: AMDSMI Command Tests
shell: bash
run: |
set -e
echo "Running AMDSMI commands on ${{ matrix.os }}"
mkdir -p /tmp/test-results-${{ matrix.os }}
commands=(
"amd-smi version"
"amd-smi list"
"amd-smi static"
"amd-smi firmware"
"amd-smi ucode"
"amd-smi bad-pages"
"amd-smi metric"
"amd-smi process"
"amd-smi topology"
"amd-smi monitor"
"amd-smi dmon"
"amd-smi xgmi"
"amd-smi partition"
)
for cmd in "${commands[@]}"; do
debug_cmd="$cmd --loglevel debug"
echo "Running: $debug_cmd"
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
echo "Command '$debug_cmd' failed."
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
exit 1
else
echo "$debug_cmd passed."
fi
done
echo "AMDSMI commands done on ${{ matrix.os }}"
- name: Upload AMDSMI Command Test Results
if: always()
uses: actions/upload-artifact@v4
with:
name: amdsmi-command-tests-${{ matrix.os }}
path: /tmp/test-results-${{ matrix.os }}
- name: Run AMDSMI, Python, and Example Tests
shell: bash
run: |
set -e
echo 'Running other tests on ${{ matrix.os }}'
# AMDSMI Tests
echo 'Running AMDSMI tests'
cd /opt/rocm/share/amd_smi/tests
source amdsmitst.exclude
AMDSMI_RETRIES=3
for attempt in $(seq 1 $AMDSMI_RETRIES); do
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
echo "AMDSMI tests passed on attempt $attempt"
echo "=============== TEST OUTPUT ==============="
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
echo "=============================================="
echo "AMDSMI tests done"
break
else
TEST_EXIT_CODE=$?
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
if [ $attempt -eq $AMDSMI_RETRIES ]; then
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
echo "=============== TEST OUTPUT ==============="
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
echo "=============================================="
echo "AMDSMI tests failed"
exit $TEST_EXIT_CODE
else
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
sleep $((2 * attempt))
fi
fi
done
# Python Tests
echo 'Running Python tests'
cd /opt/rocm/share/amd_smi/tests/python_unittest
echo "Running integration tests..."
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
echo "Integration tests failed!"
echo "=============== INTEGRATION TEST OUTPUT ==============="
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
echo "======================================================="
exit 1
else
echo "Integration tests passed"
fi
echo "Running unit tests..."
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
echo "Unit tests failed!"
echo "=============== UNIT TEST OUTPUT ==============="
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
echo "================================================"
exit 1
else
echo "Unit tests passed"
fi
echo "Python tests done"
# Example Tests
echo 'Running Example tests'
cd $GITHUB_WORKSPACE/example
rm -rf build
cmake -B build -DENABLE_ESMI_LIB=OFF
make -C build -j $(nproc)
cd build
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
echo "Example tests done"
- name: AMDSMI Test Results
if: always()
run: |
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
- name: Integration Test Results
if: always()
run: |
echo "Displaying Integration test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
- name: Unit Test Results
if: always()
run: |
echo "Displaying Unit Test Results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
- name: Example DRM Test Results
if: always()
run: |
echo "Displaying Example DRM test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
- name: Example NoDRM Test Results
if: always()
run: |
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
rpm-buildinstall:
name: Build
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
continue-on-error: true
strategy:
max-parallel: 10
matrix:
os:
- SLES
- RHEL8
- RHEL9
- RHEL10
- AzureLinux3
- AlmaLinux8
container:
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
steps:
- uses: actions/checkout@v4
- name: Set PkgMgr
run: |
set -e
case "${{ matrix.os }}" in
SLES)
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
;;
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
;;
esac
- name: Add more_itertools
if: matrix.os == 'AzureLinux3'
run: |
set -e
echo 'Installing more_itertools on ${{ matrix.os }}'
python3 -m pip install more_itertools
- name: Build AMDSMI(RHEL10 & AlmaLinux8)
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
run: |
set -e
echo 'Building on ${{ matrix.os }} with retries and QA_RPATHS'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
RETRIES=5
# Set QA_RPATHS to ignore empty (0x0010) and invalid (0x0002) RPATHs
export QA_RPATHS=$((0x0010 | 0x0002))
for i in $(seq 1 $RETRIES); do
echo "Build attempt $i for ${{ matrix.os }} ..."
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
make -j $(nproc) && \
make package; then
echo "Build successful on attempt $i"
break
else
echo "Build failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES build attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo "Build completed on ${{ matrix.os }}"
- name: Build AMDSMI
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
run: |
set -e
echo 'Building on ${{ matrix.os }}'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Build attempt $i for ${{ matrix.os }}..."
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
# Capture build output to parse warnings
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
make -j $(nproc) 2>&1 | tee make.log && \
make package 2>&1 | tee package.log; then
# Parse and report warnings as GitHub annotations
echo "::group::Build Warnings"
grep -i "warning" cmake.log make.log package.log | while read -r line; do
echo "::warning::$line"
done
echo "::endgroup::"
echo "Build successful on attempt $i"
break
else
echo "Build failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES build attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo "Build completed on ${{ matrix.os }}"
- name: Install AMDSMI(RHEL10 & AlmaLinux8)
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
run: |
cd $GITHUB_WORKSPACE/build
dnf install python3-setuptools python3-wheel -y
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "RHEL10: Installation attempt $i..."
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
echo "Installation successful on attempt $i"
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
echo 'Verifying installation:'
amd-smi version
python3 -m pip list | grep amd
python3 -m pip list | grep pip
python3 -m pip list | grep setuptools
echo 'Completed installation on RHEL10'
break
else
echo "Installation failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES installation attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
- name: Install AMDSMI
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
run: |
cd $GITHUB_WORKSPACE/build
case ${{ env.PACKAGE_MANAGER }} in
zypper)
timeout 10m zypper --no-refresh --no-gpg-checks install -y ./amd-smi-lib-*99999-local*.rpm
;;
dnf)
dnf install python3-setuptools python3-wheel -y
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Attempt $i: Installing AMDSMI package..."
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
echo "AMDSMI package installed successfully."
break
else
echo "Installation failed on attempt $i. Retrying..."
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES attempts failed. Exiting."
exit 1
fi
sleep 10
fi
done
;;
esac
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
# Verify Installation
echo 'Verifying installation:'
amd-smi version
python3 -m pip list | grep amd
python3 -m pip list | grep pip
python3 -m pip list | grep setuptools
echo 'Completed installation on ${{ matrix.os }}'
- name: Uninstall
if: always()
run: |
set -e
echo 'Uninstalling on ${{ matrix.os }}'
case ${{ matrix.os }} in
SLES)
zypper remove -y amd-smi-lib || true
;;
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
dnf remove -y amd-smi-lib || true
;;
esac
rm -f /usr/local/bin/amd-smi
if [ -d /opt/rocm/share/amd_smi ]; then
echo '/opt/rocm/share/amd_smi exists. Removing.'
rm -rf /opt/rocm/share/amd_smi
fi
echo 'Uninstall done on ${{ matrix.os }}'
rpm-test:
name: Tests
needs: [rpm-buildinstall, debian-test] # debian-test is needed to complete before rpm-test starts (see comment about driver reloads)
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
continue-on-error: true
strategy:
max-parallel: 10
matrix:
os:
- SLES
- RHEL8
- RHEL9
- RHEL10
- AzureLinux3
- AlmaLinux8
container:
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
steps:
- uses: actions/checkout@v4
- name: Set PkgMgr
run: |
set -e
case "${{ matrix.os }}" in
SLES)
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
;;
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
;;
esac
- name: Add more_itertools
if: matrix.os == 'AzureLinux3'
run: |
set -e
echo 'Installing more_itertools on ${{ matrix.os }}'
python3 -m pip install more_itertools
- name: Build and Install for Tests (RHEL10 & AlmaLinux8)
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
run: |
set -e
echo 'Building for test on RHEL10/AlmaLinux8 with retries and QA_RPATHS'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
RETRIES=5
# Set QA_RPATHS to ignore empty (0x0010 | 0x0002) RPATHs
export QA_RPATHS=$((0x0010 | 0x0002))
for i in $(seq 1 $RETRIES); do
echo "Build attempt $i for RHEL10/AlmaLinux8 test..."
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
make -j $(nproc) && \
make package; then
echo "Build successful on attempt $i"
break
else
echo "Build failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES build attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
echo 'Installing for test on RHEL10/AlmaLinux8'
dnf install python3-setuptools python3-wheel -y
for i in $(seq 1 $RETRIES); do
echo "RHEL10/AlmaLinux8: Installation attempt $i for test..."
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
echo "Installation successful on attempt $i"
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
echo 'Install done for test on RHEL10/AlmaLinux8'
break
else
echo "Installation failed on attempt $i"
if [ $i -eq $RETRIES ]; then
echo "All $RETRIES installation attempts failed. Exiting."
exit 1
fi
sleep $((2 * i))
fi
done
- name: Build and Install for Tests
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
run: |
set -e
echo 'Building for test on ${{ matrix.os }}'
BUILD_FOLDER=$GITHUB_WORKSPACE/build
rm -rf $BUILD_FOLDER
mkdir -p $BUILD_FOLDER
cd $BUILD_FOLDER
cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON
make -j $(nproc)
make package
echo 'Installing for test on ${{ matrix.os }}'
case ${{ env.PACKAGE_MANAGER }} in
zypper)
timeout 10m zypper --no-refresh --no-gpg-checks install -y $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm
;;
dnf)
dnf install python3-setuptools python3-wheel -y
RETRIES=3
for i in $(seq 1 $RETRIES); do
echo "Attempt $i: Installing..."
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
echo "Install successful."
break
else
echo "Attempt $i failed. Retrying..."
if [ $i -eq $RETRIES ]; then
echo "All attempts failed."
exit 1
fi
sleep 10
fi
done
;;
esac
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
echo 'Install done for test on ${{ matrix.os }}'
- name: AMDSMI Command Tests
shell: bash
run: |
set -e
echo "Running AMDSMI commands on ${{ matrix.os }}"
mkdir -p /tmp/test-results-${{ matrix.os }}
commands=(
"amd-smi version"
"amd-smi list"
"amd-smi static"
"amd-smi firmware"
"amd-smi ucode"
"amd-smi bad-pages"
"amd-smi metric"
"amd-smi process"
"amd-smi topology"
"amd-smi monitor"
"amd-smi dmon"
"amd-smi xgmi"
"amd-smi partition"
)
for cmd in "${commands[@]}"; do
debug_cmd="$cmd --loglevel debug"
echo "Running: $debug_cmd"
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
echo "Command '$debug_cmd' failed."
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
exit 1
else
echo "$debug_cmd passed."
fi
done
echo "AMDSMI commands done on ${{ matrix.os }}"
- name: Upload AMDSMI Command Test Results
if: always()
uses: actions/upload-artifact@v4
with:
name: amdsmi-command-tests-${{ matrix.os }}
path: /tmp/test-results-${{ matrix.os }}
- name: Run AMDSMI, Python, and Example Tests
shell: bash
run: |
set -e
echo 'Running other tests on ${{ matrix.os }}'
# AMDSMI Tests
echo 'Running AMDSMI tests'
cd /opt/rocm/share/amd_smi/tests
source amdsmitst.exclude
AMDSMI_RETRIES=3
for attempt in $(seq 1 $AMDSMI_RETRIES); do
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
echo "AMDSMI tests passed on attempt $attempt"
echo "=============== TEST OUTPUT ==============="
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
echo "=============================================="
echo "AMDSMI tests done"
break
else
TEST_EXIT_CODE=$?
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
if [ $attempt -eq $AMDSMI_RETRIES ]; then
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
echo "=============== TEST OUTPUT ==============="
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
echo "=============================================="
echo "AMDSMI tests failed"
exit $TEST_EXIT_CODE
else
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
sleep $((2 * attempt))
fi
fi
done
# Python Tests
echo 'Running Python tests'
cd /opt/rocm/share/amd_smi/tests/python_unittest
echo "Running integration tests..."
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
echo "Integration tests failed!"
echo "=============== INTEGRATION TEST OUTPUT ==============="
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
echo "======================================================="
exit 1
else
echo "Integration tests passed"
fi
echo "Running unit tests..."
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
echo "Unit tests failed!"
echo "=============== UNIT TEST OUTPUT ==============="
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
echo "================================================"
exit 1
else
echo "Unit tests passed"
fi
echo "Python tests done"
# Example Tests
echo 'Running Example tests'
cd $GITHUB_WORKSPACE/example
rm -rf build
cmake -B build -DENABLE_ESMI_LIB=OFF
make -C build -j $(nproc)
cd build
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
echo "Example tests done"
- name: AMDSMI Test Results
if: always()
run: |
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
- name: Integration Test Results
if: always()
run: |
echo "Displaying Integration test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
- name: Unit Test Results
if: always()
run: |
echo "Displaying Unit Test Results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
- name: Example DRM Test Results
if: always()
run: |
echo "Displaying Example DRM test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
- name: Example NoDRM Test Results
if: always()
run: |
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
+319
Melihat File
@@ -0,0 +1,319 @@
name: Auto Label PRs
on:
pull_request:
types: [opened, synchronize, reopened, closed]
workflow_run:
workflows: ["ABI Compliance Check"]
types: [completed]
jobs:
apply-labels:
runs-on: AMD-ROCm-Internal-dev1
permissions:
pull-requests: write
actions: read
contents: read
steps:
- name: Add/Remove labels based on branch names and ABI results
uses: actions/github-script@v6
with:
script: |
const pr = context.payload.pull_request;
let prNumber, headSha, baseBranch, headBranch;
// Handle different event types
if (context.eventName === 'pull_request') {
prNumber = pr.number;
headSha = pr.head.sha;
baseBranch = pr.base.ref;
headBranch = pr.head.ref;
} else if (context.eventName === 'workflow_run') {
// Find the associated PR for workflow_run events
const workflowRun = context.payload.workflow_run;
console.log(`Workflow run completed: ${workflowRun.name} with conclusion: ${workflowRun.conclusion}`);
if (workflowRun.event !== 'pull_request') {
console.log('Workflow run was not triggered by a pull request, skipping');
return;
}
const prs = await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
head: `${context.repo.owner}:${workflowRun.head_branch}`
});
const associatedPr = prs.data.find(p => p.head.sha === workflowRun.head_sha);
if (!associatedPr) {
console.log('No associated PR found for this workflow run');
return;
}
prNumber = associatedPr.number;
headSha = associatedPr.head.sha;
baseBranch = associatedPr.base.ref;
headBranch = associatedPr.head.ref;
} else {
console.log('Unsupported event type');
return;
}
let labelsApplied = false;
// Debug information
console.log(`Processing PR #${prNumber}: Head: ${headBranch}, Base: ${baseBranch}`);
// Get current PR data to check existing labels
const { data: currentPr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber
});
const existingLabels = currentPr.labels.map(label => label.name);
// Condition 1: PR targeting amd-mainline
if (baseBranch === 'amd-mainline' && context.eventName === 'pull_request') {
const labelToAdd = 'Merge amd-mainline';
try {
if (!existingLabels.includes(labelToAdd)) {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: [labelToAdd]
});
console.log(`Added label "${labelToAdd}" to PR #${prNumber}`);
labelsApplied = true;
}
} catch (error) {
console.error(`Error adding label "${labelToAdd}": ${error.message}`);
}
}
// Condition 2: Cherry-pick based on head branch name or release target
if (context.eventName === 'pull_request') {
const isCherryPickHead = /cherry.*pick/i.test(headBranch);
const isReleaseTargetBase = baseBranch.startsWith('release/');
if (isCherryPickHead || isReleaseTargetBase) {
const labelToAdd = 'cherry-pick';
try {
if (!existingLabels.includes(labelToAdd)) {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: [labelToAdd]
});
console.log(`Added label "${labelToAdd}" to PR #${prNumber}`);
labelsApplied = true;
} else {
console.log(`Label "${labelToAdd}" already exists on PR #${prNumber}`);
}
} catch (error) {
console.error(`Error adding label "${labelToAdd}": ${error.message}`);
}
}
}
// ABI BREAKAGE LOGIC: Check on both workflow_run AND pull_request events
let shouldCheckABI = false;
let hasMajorAbiBreakage = false;
let hasMinorAbiBreakage = false;
if (context.eventName === 'workflow_run') {
// Handle workflow_run events (existing logic)
const workflowRun = context.payload.workflow_run;
if (workflowRun.name === 'ABI Compliance Check') {
shouldCheckABI = true;
console.log(`ABI Compliance Check completed with conclusion: ${workflowRun.conclusion}`);
try {
const { data: jobs } = await github.rest.actions.listJobsForWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: workflowRun.id
});
// Check job conclusions for ABI breakage
for (const job of jobs.jobs) {
console.log(`Job: ${job.name}, Conclusion: ${job.conclusion}`);
if (job.name.includes('Major ABI') && job.conclusion === 'failure') {
hasMajorAbiBreakage = true;
console.log('Major ABI breakage detected from job failure');
}
if (job.name.includes('Minor ABI') && job.conclusion === 'failure') {
hasMinorAbiBreakage = true;
console.log('Minor ABI breakage detected from job failure');
}
}
// If workflow succeeded, no ABI breakage
if (workflowRun.conclusion === 'success') {
console.log('ABI Compliance Check succeeded - no ABI breakage');
hasMajorAbiBreakage = false;
hasMinorAbiBreakage = false;
}
} catch (error) {
console.log(`Could not fetch job details: ${error.message}`);
return;
}
}
} else if (context.eventName === 'pull_request') {
// NEW: Check if amdsmi.h has been reverted on PR events
const hasAbiLabels = existingLabels.includes('MAJOR ABI BREAKAGE') || existingLabels.includes('MINOR ABI BREAKAGE');
if (hasAbiLabels) {
console.log('PR has ABI labels, checking if amdsmi.h changes were reverted...');
shouldCheckABI = true;
try {
// Get the diff for amdsmi.h between base and head
const { data: comparison } = await github.rest.repos.compareCommits({
owner: context.repo.owner,
repo: context.repo.repo,
base: currentPr.base.sha,
head: currentPr.head.sha
});
// Check if amdsmi.h has any changes
const amdsmiFile = comparison.files?.find(file => file.filename === 'include/amd_smi/amdsmi.h');
if (!amdsmiFile) {
console.log('No changes to amdsmi.h found in this PR - removing ABI labels');
hasMajorAbiBreakage = false;
hasMinorAbiBreakage = false;
} else if (amdsmiFile.changes === 0) {
console.log('amdsmi.h file exists but has no changes - removing ABI labels');
hasMajorAbiBreakage = false;
hasMinorAbiBreakage = false;
} else {
console.log(`amdsmi.h has ${amdsmiFile.changes} changes - keeping existing ABI labels`);
// Keep existing labels since we can't determine ABI status without running the check
hasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
hasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
}
} catch (error) {
console.log(`Error checking file changes: ${error.message}`);
// If we can't check, preserve existing labels
hasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
hasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
}
}
}
// Manage ABI breakage labels (only if we determined ABI status)
if (shouldCheckABI) {
const abiLabels = {
'MAJOR ABI BREAKAGE': hasMajorAbiBreakage,
'MINOR ABI BREAKAGE': hasMinorAbiBreakage
};
const wasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
const wasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
for (const [labelName, shouldHaveLabel] of Object.entries(abiLabels)) {
const hasLabel = existingLabels.includes(labelName);
if (shouldHaveLabel && !hasLabel) {
// Add label
try {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: [labelName]
});
console.log(`✅ Added label "${labelName}" to PR #${prNumber}`);
labelsApplied = true;
} catch (error) {
console.error(`❌ Error adding label "${labelName}": ${error.message}`);
}
} else if (!shouldHaveLabel && hasLabel) {
// Remove label
try {
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
name: labelName
});
console.log(`🗑️ Removed label "${labelName}" from PR #${prNumber}`);
labelsApplied = true;
} catch (error) {
console.error(`❌ Error removing label "${labelName}": ${error.message}`);
}
}
}
// Add comments when ABI issues are detected or resolved
if (context.eventName === 'workflow_run') {
// Only add comments for workflow_run events (actual ABI check results)
if (hasMajorAbiBreakage && !wasMajorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '⚠️ **MAJOR ABI BREAKAGE detected** in the latest ABI compliance check. Please review the ABI compliance report and fix any breaking changes.'
});
}
if (hasMinorAbiBreakage && !wasMinorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '⚠️ **MINOR ABI BREAKAGE detected** in the latest ABI compliance check. Please review the ABI compliance report for details.'
});
}
if (!hasMajorAbiBreakage && wasMajorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '✅ **MAJOR ABI BREAKAGE resolved** - ABI compliance check is now passing!'
});
}
if (!hasMinorAbiBreakage && wasMinorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '✅ **MINOR ABI BREAKAGE resolved** - ABI compliance check is now passing!'
});
}
} else if (context.eventName === 'pull_request') {
// Add comment when labels are removed due to file reversion
if (!hasMajorAbiBreakage && wasMajorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '✅ **MAJOR ABI BREAKAGE resolved** - `amdsmi.h` changes have been reverted.'
});
}
if (!hasMinorAbiBreakage && wasMinorAbiBreakage) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '✅ **MINOR ABI BREAKAGE resolved** - `amdsmi.h` changes have been reverted.'
});
}
}
}
if (!labelsApplied && context.eventName === 'pull_request') {
console.log(`PR #${prNumber} did not match criteria for automatic labeling by this workflow.`);
}
+99
Melihat File
@@ -0,0 +1,99 @@
# caution: most of this file was written using Claude 3.7 Sonnet
name: CMake Format Check
on:
push:
branches: [ amd-staging ]
paths:
- '**/*.cmake'
- '**/CMakeLists.txt'
- '**/*.cmake.in'
pull_request:
branches: [ amd-staging ]
paths:
- '**/*.cmake'
- '**/CMakeLists.txt'
- '**/*.cmake.in'
workflow_dispatch: # Allows manual triggering
defaults:
run:
shell: bash
jobs:
check-cmake-format:
name: Check CMake files formatting
runs-on: self-hosted
container: catthehacker/ubuntu:act-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for better diff context
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install cmake-format
run: |
python -m pip install --upgrade pip
pip install cmake-format==0.6.13
- name: Check CMake formatting
id: check-format
run: |
echo "::group::Finding CMake files"
FILES=$(find . -type f \( -name "CMakeLists.txt" -o -name "*.cmake" -o -name "*.cmake.in" \) \
-not -path "*/esmi_ib_library/*" \
-not -path "*/\.*" \
-not -path "*/build/*")
echo "Found $(echo "$FILES" | wc -l) CMake files to check"
echo "::endgroup::"
# Create an array to store failed files
declare -a failed_files
# Check if files are formatted correctly
for file in $FILES; do
echo "Checking $file..."
if ! cmake-format --check "$file"; then
failed_files+=("$file")
echo "::error file=$file::File needs formatting"
fi
done
# Generate report and exit with error if any files failed
if [ ${#failed_files[@]} -ne 0 ]; then
echo "Failed files: ${failed_files[*]}"
echo "FAILED_FILES=${failed_files[*]}" >> $GITHUB_ENV
exit 1
else
echo "All CMake files are formatted correctly!"
fi
- name: Generate diff for failed files
if: failure() && env.FAILED_FILES != ''
run: |
echo "## CMake Format Check Failed" >> $GITHUB_STEP_SUMMARY
echo "The following files need formatting:" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
for file in ${FAILED_FILES}; do
echo "### $file" >> $GITHUB_STEP_SUMMARY
done
cat << 'EOF' >> $GITHUB_STEP_SUMMARY
### How to fix
Run this command locally to fix formatting issues:
```bash
# Install cmake-format
pip install cmake-format==0.6.13
# Format files
cmake-format -i <file>
```
EOF
+92
Melihat File
@@ -0,0 +1,92 @@
name: "CodeQL Advanced"
on:
pull_request:
branches:
- amd-staging
push:
branches:
- amd-staging
schedule:
- cron: '34 18 * * 5'
jobs:
analyze:
name: Analyze (${{ matrix.language }})
# Runner size impacts CodeQL analysis time. To learn more, please see:
# - https://gh.io/recommended-hardware-resources-for-running-codeql
# - https://gh.io/supported-runners-and-hardware-resources
# - https://gh.io/using-larger-runners (GitHub.com only)
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
runs-on: ${{ 'ubuntu-latest' }}
permissions:
# required for all workflows
security-events: write
# required to fetch internal or private CodeQL packs
packages: read
# only required for workflows in private repositories
actions: read
contents: read
strategy:
fail-fast: false
matrix:
include:
- language: c-cpp
build-mode: manual
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12.6'
- name: Install CMake
run: python3 -m pip install cmake
- name: Install Virtualenv
run: python3 -m pip install virtualenv
- name: Install g++
run: sudo apt-get install -y g++
- name: Install libdrm
run: sudo apt-get install -y libdrm-dev
- name: Install DOxygen
run: sudo apt-get install -y doxygen
- name: Install LaTeX
run: sudo apt-get install -y texlive
- name: Clean old ROCm directories
run: |
sudo rm -rf /opt/rocm
sudo rm -rf /opt/rocm-*
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
queries: security-extended
- name: Create build directory
run: mkdir -p build
- name: Build AMD SMI Library
run: |
cd build
cmake ..
make -j $(nproc)
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"
+83
Melihat File
@@ -0,0 +1,83 @@
name: Generate Documentation
on:
pull_request:
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
push:
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
workflow_dispatch:
permissions:
contents: read
env:
DEBIAN_FRONTEND: noninteractive
DEBCONF_NONINTERACTIVE_SEEN: true
BUILD_TYPE: Release
jobs:
generate-docs:
name: Generate Documentation
runs-on: AMD-ROCm-Internal-dev1
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Get branch name for artifact naming
id: get_branch_info
run: |
BRANCH_NAME=""
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
BRANCH_NAME="${{ github.head_ref }}"
else
BRANCH_NAME="${{ github.ref_name }}"
fi
SANITIZED_NAME=$(echo "$BRANCH_NAME" | sed -e 's|/|-|g' -e 's|[^a-zA-Z0-9._-]||g' -e 's|^-*||' -e 's|-*$||')
if [[ -z "$SANITIZED_NAME" ]]; then
SANITIZED_NAME="docs-$(date +%s)"
fi
echo "sanitized_name=${SANITIZED_NAME}" >> $GITHUB_OUTPUT
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install System Dependencies
run: |
sudo apt-get update
sudo apt-get install -y doxygen graphviz
- name: Set Up Python Environment
run: |
python3 -m pip install --upgrade pip
python3 -m pip install -r docs/sphinx/requirements.txt
- name: Build Documentation
run: |
if [ ! -e "docs/.git" ]; then
if [ -d ".git" ]; then
ln -s ../.git docs/.git
fi
fi
cd docs
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
- name: Upload Documentation
uses: actions/upload-artifact@v4
with:
name: documentation-${{ steps.get_branch_info.outputs.sanitized_name }}
path: docs/_build/html/
- name: Generate Job Summary
run: |
cat >> $GITHUB_STEP_SUMMARY << 'EOF'
# 📚 Documentation Generated Successfully!
## 🚀 Quick Start
1. **📥 Download** the artifact `documentation-${{ steps.get_branch_info.outputs.sanitized_name }}`
2. **📂 Extract** the ZIP file
3. **🖱️ Double-click** `index.html`
4. **✅ Done!** Documentation opens with full formatting in your browser
EOF
@@ -0,0 +1,83 @@
name: GitHub to Gerrit Mirror
run-name: "Mirror to Gerrit: ${{ github.event.ref || inputs.branch }} ${{ github.event.after }}"
on:
workflow_dispatch:
inputs:
branch:
description: 'Branch to mirror (amd-staging or amd-mainline)'
required: true
default: 'amd-staging'
type: choice
options:
- amd-staging
- amd-mainline
pull_request:
branches:
- amd-staging
- amd-mainline
types: [closed]
env:
GERRIT_SERVER: "gerrit-git.amd.com"
GERRIT_PROJECT: "SYS-MGMT/ec/amd-smi"
GERRIT_USER: "z1_runner"
GERRIT_PORT: "29418"
jobs:
Setup:
if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true
runs-on: banff-sc-cx43-29
steps:
- name: Fix workspace permissions
run: |
sudo chown -R $(id -u):$(id -g) ${{ github.workspace }}
sudo chmod -R u+rwX ${{ github.workspace }}
- name: Check out repository code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Create SSH key
run: |
mkdir -p ~/.ssh
chmod 700 ~/.ssh
touch ~/.ssh/known_hosts
touch ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
printf "%s" "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
ssh-keyscan -p ${{ env.GERRIT_PORT }} ${{ env.GERRIT_SERVER }} >> ~/.ssh/known_hosts
- name: Debug SSH setup
run: |
ls -la ~/.ssh
ssh -p ${{ env.GERRIT_PORT }} ${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }} || true
- name: Set Gerrit remote
run: |
cd ${{ github.workspace }}
if git remote | grep -q "gerrit"
then
git remote set-url gerrit ssh://${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }}:${{ env.GERRIT_PORT }}/${{ env.GERRIT_PROJECT }}
else
git remote add gerrit ssh://${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }}:${{ env.GERRIT_PORT }}/${{ env.GERRIT_PROJECT }}
fi
- name: Set committer identity for Gerrit
run: |
git config user.name "z1_runner"
git config user.email "z1_runner@amd.com"
- name: Fetch selected branch
run: |
BRANCH="${{ github.event.pull_request.base.ref || inputs.branch }}"
git fetch origin ${BRANCH}:refs/remotes/origin/${BRANCH}
git checkout ${BRANCH}
- name: Mirror selected branch to Gerrit
run: |
BRANCH="${{ github.event.pull_request.base.ref || inputs.branch }}"
git push gerrit refs/heads/${BRANCH}:refs/heads/${BRANCH}
+15
Melihat File
@@ -0,0 +1,15 @@
name: Rocm Validation Suite KWS
on:
push:
branches: [amd-staging, amd-mainline]
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
jobs:
kws:
if: ${{ github.event_name == 'pull_request' }}
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
secrets: inherit
with:
pr_number: ${{github.event.pull_request.number}}
base_branch: ${{github.base_ref}}
+25
Melihat File
@@ -0,0 +1,25 @@
name: ROCm CI Caller
on:
pull_request:
branches: [amd-staging, release/rocm-rel-*, amd-mainline]
types: [opened, reopened, synchronize]
push:
branches: [amd-mainline]
workflow_dispatch:
issue_comment:
types: [created]
jobs:
call-workflow:
if: github.event_name != 'issue_comment' ||(github.event_name == 'issue_comment' && github.event.issue.pull_request && (startsWith(github.event.comment.body, '!verify') || startsWith(github.event.comment.body, '!verify release') || startsWith(github.event.comment.body, '!verify retest')))
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
secrets: inherit
with:
input_sha: ${{github.event_name == 'pull_request' && github.event.pull_request.head.sha || (github.event_name == 'push' && github.sha) || (github.event_name == 'issue_comment' && github.event.issue.pull_request.head.sha) || github.sha}}
input_pr_num: ${{github.event_name == 'pull_request' && github.event.pull_request.number || (github.event_name == 'issue_comment' && github.event.issue.number) || 0}}
input_pr_url: ${{github.event_name == 'pull_request' && github.event.pull_request.html_url || (github.event_name == 'issue_comment' && github.event.issue.pull_request.html_url) || ''}}
input_pr_title: ${{github.event_name == 'pull_request' && github.event.pull_request.title || (github.event_name == 'issue_comment' && github.event.issue.pull_request.title) || ''}}
repository_name: ${{ github.repository }}
base_ref: ${{github.event_name == 'pull_request' && github.event.pull_request.base.ref || (github.event_name == 'issue_comment' && github.event.issue.pull_request.base.ref) || github.ref}}
trigger_event_type: ${{ github.event_name }}
comment_text: ${{ github.event_name == 'issue_comment' && github.event.comment.body || '' }}
+44
Melihat File
@@ -0,0 +1,44 @@
# NOTE! Please use 'git ls-files -i --exclude-standard'
# command after changing this file, to see if there are
# any tracked files which get ignored after the change.
# VisualStudioCode
.vscode/
# build directories generated by cmake
build/
cmake/build/
.cache/
# build artifacts
oam/include/oam/oamConfig.h
python_smi_tools/rsmiBindings.py
include/amd_smi/amd_smi64Config.h
rocm_smi/include/rocm_smi/rocm_smi64Config.h
docs/*.pdf
goamdsmi_shim/include/goamdsmi_shimConfig.h
goamdsmi_shim/include/goamdsmi_shim64Config.h
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*.egg-*
# documentation artifacts
_toc.yml
_build/
_doxygen/
docBin/
# Simulated SYSFS - for early development or debug
device/
# misc
esmi_ib_library/
# do NOT ignore these files
!.clang-format
!.clang-tidy
!.clangd
!.cmake-format
!.pre-commit-config.yaml
@@ -0,0 +1,34 @@
# - How to use:
# python3 -m pip install pre-commit
# pre-commit install --install hooks
# Upon a new commit - the hooks should automagically run
#
# - How to skip:
# git commit --no-verify
# or
# SKIP=clang-format-docker git commit
# SKIP=cpplint-docker git commit
fail_fast: false
repos:
# For portability I decided to use Docker containers
- repo: https://github.com/dmitrii-galantsev/pre-commit-docker-cpplint
rev: 0.0.3
hooks:
- id: clang-format-docker
- id: cpplint-docker
- repo: https://github.com/cheshirekow/cmake-format-precommit
rev: v0.6.13
hooks:
- id: cmake-format
# Below is a local way of running formatters and linters
# NOTE: clang-tidy is not used in the above tests
# - repo: https://github.com/pocc/pre-commit-hooks
# rev: v1.3.5
# hooks:
# - id: clang-format
# args: [--no-diff, -i]
# - id: clang-tidy
# args: [-p=build, --quiet]
# - id: cpplint
# args: [--verbose=5]
+18
Melihat File
@@ -0,0 +1,18 @@
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
build:
os: ubuntu-24.04
tools:
python: "3.12"
sphinx:
configuration: docs/conf.py
formats: [htmlzip, pdf]
python:
install:
- requirements: docs/sphinx/requirements.txt
File diff ditekan karena terlalu besar Load Diff
+511
Melihat File
@@ -0,0 +1,511 @@
# SPDX-License-Identifier: MIT
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.20)
set(AMD_SMI "amd_smi")
set(AMD_SMI_LIBS_TARGET "${AMD_SMI}_lib")
set(CPACK_PACKAGE_NAME amd-smi-lib CACHE STRING "")
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.")
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/" CACHE INTERNAL "Default module path.")
## Include common cmake modules
include(utils)
include(help_package)
find_package(PkgConfig)
generic_add_rocm()
# provide git to utilities
find_program(GIT NAMES git)
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "amdsmi_pkg_ver")
get_version_from_file("include/amd_smi/amdsmi.h" "MAJOR")
get_version_from_file("include/amd_smi/amdsmi.h" "MINOR")
get_version_from_file("include/amd_smi/amdsmi.h" "RELEASE")
set(DEFAULT_VERSION "${MAJOR}.${MINOR}.${RELEASE}")
get_package_version_number(${DEFAULT_VERSION} ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
message("Package version: ${PKG_VERSION_STR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_PATCH "${CPACK_PACKAGE_VERSION_PATCH}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_BUILD "0")
set(${AMD_SMI_LIBS_TARGET}_VERSION_HASH "${PKG_VERSION_HASH}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_STRING
"${${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR}.${${AMD_SMI_LIBS_TARGET}_VERSION_MINOR}.${${AMD_SMI_LIBS_TARGET}_VERSION_PATCH}+${${AMD_SMI_LIBS_TARGET}_VERSION_HASH}"
)
set(DEFAULT_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
update_version_in_file("include/amd_smi/amdsmi.h" ${DEFAULT_VERSION} "#define AMDSMI_LIB_VERSION_" " *" " ")
update_version_in_file("rust-interface/src/amdsmi_wrapper.rs" ${DEFAULT_VERSION} "AMDSMI_LIB_VERSION_" " *: *u32 *= *"
": u32 = ")
# Make proper version for appending
# Default Value is 99999
set(ROCM_VERSION_FOR_PACKAGE "99999")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
endif()
#Prepare final version for the CPACK use
set(CPACK_PACKAGE_VERSION
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}"
)
# The following default version values should be updated as appropriate for
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
# by git tags (through "git describe") when they are present.
set(PKG_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
set(PKG_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
set(PKG_VERSION_PATCH "${CPACK_PACKAGE_VERSION_PATCH}")
set(PKG_VERSION_NUM_COMMIT 0)
project(${AMD_SMI_LIBS_TARGET} DESCRIPTION "AMD System Management libraries"
HOMEPAGE_URL "https://github.com/ROCm/amdsmi")
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Link with stdc++fs for filesystem support (only for GCC < 9.0)
set(FILESYSTEM_LIB "")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
set(FILESYSTEM_LIB stdc++fs)
message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION} detected, linking with stdc++fs for filesystem support")
endif()
endif()
include(GNUInstallDirs)
option(BUILD_TESTS "Build test suite" OFF)
option(ENABLE_ASAN_PACKAGING "" OFF)
option(ENABLE_ESMI_LIB "Build ESMI Library" ON)
option(BUILD_EXAMPLES "Build examples" OFF)
# If amdsmi is built as a static library, it should support being embedded in other programs. The setting below essentially enables the -fPIC flag.
set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL "Enable position independent code for all targets")
mark_as_advanced(CMAKE_POSITION_INDEPENDENT_CODE)
include(CMakeDependentOption)
# these options don't work without BUILD_SHARED_LIBS
cmake_dependent_option(BUILD_WRAPPER "Rebuild AMDSMI-wrapper" OFF "BUILD_SHARED_LIBS" OFF)
cmake_dependent_option(BUILD_CLI "Build AMDSMI-CLI and install" ON "BUILD_SHARED_LIBS" OFF)
cmake_dependent_option(BUILD_RUST_WRAPPER "Build rust wrapper and install" OFF "BUILD_SHARED_LIBS" OFF)
cmake_dependent_option(ENABLE_LDCONFIG "Set library links and caches using ldconfig." ON "BUILD_SHARED_LIBS" OFF)
# Set share path here because project name != amd_smi
set(SHARE_INSTALL_PREFIX "${CMAKE_INSTALL_DATAROOTDIR}/${AMD_SMI}" CACHE STRING "Tests and Example install directory")
# Packaging directives
set(CPACK_PACKAGE_CONTACT "AMD-SMILib Support <amd-smi.support@amd.com>" CACHE STRING "")
generic_package()
# Dependencies
find_package(Threads REQUIRED)
pkg_check_modules(DRM REQUIRED IMPORTED_TARGET libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED IMPORTED_TARGET libdrm_amdgpu)
# Configuration
function(get_imported_soname target out_var)
get_target_property(link_libs ${target} INTERFACE_LINK_LIBRARIES)
set(result)
foreach(link_lib ${link_libs})
if(result)
message(FATAL_ERROR "Target ${target} has multiple link libraries: ${link_libs}")
endif()
execute_process(
COMMAND objdump -p "${link_lib}"
OUTPUT_VARIABLE OBJDUMP_OUTPUT
RESULT_VARIABLE OBJDUMP_RESULT
)
if(OBJDUMP_RESULT EQUAL 0)
string(REGEX MATCH "SONAME +([^ \n]+)" SONAME_MATCH "${OBJDUMP_OUTPUT}")
if(SONAME_MATCH)
set(SONAME_OF_MY_PKG "${CMAKE_MATCH_1}")
message(STATUS "SONAME of my_package_name: ${SONAME_OF_MY_PKG}")
else()
message(FATAL_ERROR "Could not find SONAME in objdump output for ${link_lib}")
endif()
set(result "${SONAME_OF_MY_PKG}")
else()
message(FATAL_ERROR "objdump failed for ${link_lib}")
endif()
endforeach()
if(NOT result)
message(FATAL_ERROR "Could not find SONAME for target ${target} libs: ${link_libs}")
endif()
set("${out_var}" "${result}" PARENT_SCOPE)
endfunction()
get_imported_soname(PkgConfig::DRM_AMDGPU LIBDRM_AMDGPU_SONAME)
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/config/amd_smi_config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/config/amd_smi_config.h"
@ONLY
)
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti")
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
endif()
# Security options
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wcast-align")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat=2 -fno-common -Wstrict-overflow")
# Intentionally leave out -Wsign-promo. It causes spurious warnings.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Woverloaded-virtual -Wreorder")
set(ROCM_SRC_DIR "${PROJECT_SOURCE_DIR}/rocm_smi/src")
set(ROCM_INC_DIR "${PROJECT_SOURCE_DIR}/rocm_smi/include/rocm_smi")
set(SHR_MUTEX_DIR "${PROJECT_SOURCE_DIR}/third_party/shared_mutex")
if(ENABLE_ESMI_LIB)
# Supported esmi library version tag
set(current_esmi_tag "esmi_pkg_ver-4.2")
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/esmi_ib_library/src)
# TODO: use ExternalProject_Add instead or a submodule
message(STATUS "Adding esmi_ib_library...")
execute_process(COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
${PROJECT_SOURCE_DIR}/esmi_ib_library)
else()
message(STATUS "esmi_ib_library already installed, checking version...")
# Grab latest commit and get the tag
execute_process(
COMMAND git rev-list --tags --max-count=1
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
OUTPUT_VARIABLE latest_commit
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND git describe --tags ${latest_commit} --match "*pkg*"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
OUTPUT_VARIABLE latest_esmi_tag
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Update to latest tags if not matched
if(NOT latest_esmi_tag STREQUAL current_esmi_tag)
message(STATUS "Updating esmi_ib_library...")
execute_process(
COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
${PROJECT_SOURCE_DIR}/esmi_ib_library_temp RESULT_VARIABLE clone_result)
if(clone_result EQUAL 0)
file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library)
file(RENAME ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp ${PROJECT_SOURCE_DIR}/esmi_ib_library)
message(STATUS "Successfully cloned updated esmi_ib_library")
else()
file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp)
message(FATAL_ERROR "Failed to clone updated esmi_ib_library")
endif()
else()
message(STATUS "esmi_ib_library is the latest version: ${current_esmi_tag}...")
endif()
endif()
# Make sure to update the amd_hsmp.h file with the corresponding esmi version
file(COPY "${PROJECT_SOURCE_DIR}/include/amd_smi/impl/amd_hsmp.h"
DESTINATION "${PROJECT_SOURCE_DIR}/esmi_ib_library/include/asm")
add_definitions("-DENABLE_ESMI_LIB=1")
set(ESMI_INC_DIR "${PROJECT_SOURCE_DIR}/esmi_ib_library/include")
set(ESMI_SRC_DIR "${PROJECT_SOURCE_DIR}/esmi_ib_library/src")
# esmi has a lot of write-strings warnings - silence them
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings")
endif()
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_BINARY_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/third_party/shared_mutex
${CMAKE_CURRENT_SOURCE_DIR}/include/amd_smi
${ESMI_INC_DIR}
)
set(CMN_SRC_LIST
"${ROCM_SRC_DIR}/rocm_smi_device.cc"
"${ROCM_SRC_DIR}/rocm_smi_main.cc"
"${ROCM_SRC_DIR}/rocm_smi_monitor.cc"
"${ROCM_SRC_DIR}/rocm_smi_power_mon.cc"
"${ROCM_SRC_DIR}/rocm_smi_utils.cc"
"${ROCM_SRC_DIR}/rocm_smi_counters.cc"
"${ROCM_SRC_DIR}/rocm_smi_kfd.cc"
"${ROCM_SRC_DIR}/rocm_smi_io_link.cc"
"${ROCM_SRC_DIR}/rocm_smi_gpu_metrics.cc"
"${ROCM_SRC_DIR}/rocm_smi_dyn_gpu_metrics.cc"
"${ROCM_SRC_DIR}/rocm_smi.cc"
"${ROCM_SRC_DIR}/rocm_smi_logger.cc"
"${SHR_MUTEX_DIR}/shared_mutex.cc"
"${ROCM_SRC_DIR}/rocm_smi_binary_parser.cc"
"${ROCM_SRC_DIR}/rocm_smi_board_temp.cc"
"${ROCM_SRC_DIR}/rocm_smi_npm.cc")
if(ENABLE_ESMI_LIB)
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi.c)
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_monitor.c)
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_plat.c)
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_utils.c)
endif()
set(CMN_INC_LIST
"${ROCM_INC_DIR}/rocm_smi_device.h"
"${ROCM_INC_DIR}/rocm_smi_main.h"
"${ROCM_INC_DIR}/rocm_smi_monitor.h"
"${ROCM_INC_DIR}/rocm_smi_power_mon.h"
"${ROCM_INC_DIR}/rocm_smi_utils.h"
"${ROCM_INC_DIR}/rocm_smi_common.h"
"${ROCM_INC_DIR}/rocm_smi_exception.h"
"${ROCM_INC_DIR}/rocm_smi_counters.h"
"${ROCM_INC_DIR}/rocm_smi_kfd.h"
"${ROCM_INC_DIR}/rocm_smi_io_link.h"
"${ROCM_INC_DIR}/rocm_smi_gpu_metrics.h"
"${ROCM_INC_DIR}/rocm_smi_dyn_gpu_metrics.h"
"${ROCM_INC_DIR}/rocm_smi.h"
"${ROCM_INC_DIR}/rocm_smi_logger.h"
"${SHR_MUTEX_DIR}/shared_mutex.h"
"${ROCM_INC_DIR}/rocm_smi_binary_parser.h"
"${ROCM_INC_DIR}/rocm_smi_board_temp.h"
"${ROCM_INC_DIR}/rocm_smi_npm.h")
add_subdirectory("rocm_smi")
add_subdirectory("src")
if(BUILD_TESTS)
set(TESTS_COMPONENT "tests")
#add_subdirectory("tests/rocm_smi_test")
add_subdirectory("tests/amd_smi_test")
add_subdirectory("tests/python_unittest")
endif()
# python interface, CLI, and py-test depend on shared libraries
if(BUILD_SHARED_LIBS)
add_subdirectory("py-interface")
if(BUILD_CLI)
add_subdirectory("amdsmi_cli")
endif()
if(BUILD_RUST_WRAPPER)
add_subdirectory("rust-interface")
endif()
endif()
if(BUILD_EXAMPLES)
add_subdirectory("example")
endif()
include(CMakePackageConfigHelpers)
configure_package_config_file(
amd_smi-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
PATH_VARS CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR CMAKE_INSTALL_BINDIR)
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config-version.cmake
VERSION "${CPACK_PACKAGE_VERSION}" COMPATIBILITY SameMajorVersion)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
COMPONENT dev)
# Create cmake target
# Add all targets to the build-tree export set
export(TARGETS ${AMD_SMI} FILE "${PROJECT_BINARY_DIR}/amd_smi_target.cmake")
# Export the package for use from the build-tree
# (this registers the build-tree with a global CMake-registry)
export(PACKAGE ${AMD_SMI})
install(
EXPORT amd_smiTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
COMPONENT dev)
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
if(ENABLE_ASAN_PACKAGING)
# install license file in share/doc/amd_smi-asan folder
install(
FILES ${CPACK_RESOURCE_FILE_LICENSE}
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}-asan
RENAME LICENSE.txt
COMPONENT asan)
endif()
# docs are installed into different share directory from tests and examples
install(
FILES ${CPACK_RESOURCE_FILE_LICENSE}
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
RENAME LICENSE.txt
COMPONENT dev)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.md
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
COMPONENT dev)
install(
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/example
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${AMD_SMI}
COMPONENT dev
FILES_MATCHING
PATTERN "*.h"
PATTERN "*.cc"
PATTERN "*.txt"
PATTERN "build*" EXCLUDE
PATTERN ".cache*" EXCLUDE)
# Make for goamdsmi_shim library
add_subdirectory(goamdsmi_shim)
#Debian package specific variables
set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "python3-argcomplete, libdrm-dev, libdrm-amdgpu-dev")
set(CPACK_DEBIAN_ASAN_PACKAGE_RECOMMENDS ${CPACK_DEBIAN_PACKAGE_RECOMMENDS})
set(CPACK_DEBIAN_DEV_PACKAGE_RECOMMENDS ${CPACK_DEBIAN_PACKAGE_RECOMMENDS})
set(CPACK_DEBIAN_PACKAGE_DEPENDS "sudo, libc6, python3 (>= 3.6.8), python3-pip, python3-setuptools, python3-wheel")
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
# $CURRENT_YEAR is used by copyright.in
string(TIMESTAMP CURRENT_YEAR "%Y")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/copyright.in DEBIAN/copyright @ONLY)
## Process the Debian install/remove scripts to update the CPACK variables
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in
DEBIAN/postinst
@ONLY
FILE_PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in
DEBIAN/prerm
@ONLY
FILE_PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE)
list(APPEND CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/postinst"
"${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/prerm")
# Configure pre-rm for tests only
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm.in
DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm
@ONLY
FILE_PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE)
# Assign control scripts to the AMDSMI Lib & Tests packages
set(CPACK_DEBIAN_DEV_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/prerm")
set(CPACK_DEBIAN_TESTS_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm")
# install copyright file into share/doc/amd-smi-lib/copyright
# required for debian package compliance
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/copyright"
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
COMPONENT dev)
# RPM package specific variables
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
"${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}")
#Set rpm distro
if(CPACK_RPM_PACKAGE_RELEASE)
set(CPACK_RPM_PACKAGE_RELEASE_DIST ON)
endif()
# NOTE: RPM SUGGESTS DO NOT WORK! https://bugzilla.redhat.com/show_bug.cgi?id=1811358
set(CPACK_RPM_PACKAGE_SUGGESTS "python3-argcomplete, libdrm-dev, libdrm-amdgpu-dev")
set(CPACK_RPM_DEV_PACKAGE_SUGGESTS ${CPACK_RPM_PACKAGE_SUGGESTS})
set(CPACK_RPM_ASAN_PACKAGE_SUGGESTS ${CPACK_RPM_PACKAGE_SUGGESTS})
# python version gated by rhel8 :(
set(CPACK_RPM_PACKAGE_REQUIRES "sudo, python3 >= 3.6.8, python3-pip, python3-wheel, python3-setuptools")
set(CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
# don't terminate if bytecompile of python files fails
set(CPACK_RPM_SPEC_MORE_DEFINE "%define _python_bytecompile_errors_terminate_build 0")
# Cpack converts !/usr/bin/env python3 to /usr/libexec/platform-python in RHEL8.
# prevent the BRP(buildroot policy) script from checking and modifying interpreter directives
string(APPEND CPACK_RPM_SPEC_MORE_DEFINE "\n%undefine __brp_mangle_shebangs")
# Add rocm-core dependency if -DROCM_DEP_ROCMCORE=ON is passed
if(ROCM_DEP_ROCMCORE)
string(APPEND CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ", rocm-core-asan")
string(APPEND CPACK_RPM_ASAN_PACKAGE_REQUIRES ", rocm-core-asan")
string(APPEND CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ", rocm-core")
string(APPEND CPACK_RPM_DEV_PACKAGE_REQUIRES ", rocm-core")
string(APPEND CPACK_DEBIAN_PACKAGE_DEPENDS ", rocm-core")
string(APPEND CPACK_RPM_PACKAGE_REQUIRES ", rocm-core")
endif()
## Enable Component Mode and set component specific flags
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${CPACK_PACKAGE_NAME}")
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-tests")
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-asan")
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_DEV_PACKAGE_NAME "${CPACK_PACKAGE_NAME}")
set(CPACK_RPM_TESTS_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-tests")
set(CPACK_RPM_ASAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-asan")
if(ENABLE_ASAN_PACKAGING)
# ASAN Package requires only asan component with libraries and license file
set(CPACK_COMPONENTS_ALL asan)
else()
set(CPACK_COMPONENTS_ALL dev tests)
endif()
# The line below doesn't currently work; it may be this issue:
# https://bugzilla.redhat.com/show_bug.cgi?id=1811358
# set(CPACK_RPM_PACKAGE_SUGGESTS "sudo, libdrm-dev")
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/preun.in" RPM/preun @ONLY)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY)
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post")
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/preun")
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun")
#Set the names now using CPACK utility
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
include(CPack)
generic_package_post()
+3
Melihat File
@@ -0,0 +1,3 @@
set noparent
linelength=100
filter=-build/include_subdir,-legal/copyright,-runtime/printf,-build/c++11,-runtime/int,-build/header_guard
+127
Melihat File
@@ -0,0 +1,127 @@
#!/bin/bash
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Other prerm actions
rm_ldconfig() {
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
ldconfig
fi
}
rm_leftovers() {
# remove pyc files generated by python
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
# remove build and egg files
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/build"
# remove leftover doc files
if test -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
fi
}
rm_logFolder() {
rm -rf /var/log/amd_smi_lib
}
rm_rocm_tests_dir(){
if [ -d "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/" ]; then
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/"
echo "Removed ROCm tests directory."
fi
}
return_logrotateToOrigConfig() {
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
if [ -f $logrotateConfFile ]; then
rm -rf "$logrotateConfFile"
fi
if [ -f /etc/cron.hourly/logrotate ]; then
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
fi
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
rm -rf /lib/systemd/system/logrotate.timer.backup
systemctl reenable --now logrotate.timer
fi
}
rm_python_lib() {
# get python version
local python3_minor_version
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine python version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# check if python version is supported
if [ "$python3_minor_version" -lt 6 ]; then
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# Remove old python library
local pip_list_output
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
fi
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
"Check post install to ensure version is correct"
else
echo "Removed AMD-SMI python library (amdsmi)..."
fi
}
case "$1" in
( remove | upgrade)
# remove old gpuv-smi symlink
rm -f @CPACK_PACKAGING_INSTALL_PREFIX@/bin/gpuv-smi &> /dev/null
echo "Removing AMDSMI Lib Tests Packages..."
rm_ldconfig
echo "ldconfig removed"
rm_leftovers
echo "leftovers removed"
;;
( purge )
;;
( * )
exit 0
;;
esac
+26
Melihat File
@@ -0,0 +1,26 @@
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: amdsmi
Source: https://github.com/ROCm/amdsmi.git
Files: *
Copyright: @CURRENT_YEAR@ Advanced Micro Devices, Inc.
License: MIT
License: MIT
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+210
Melihat File
@@ -0,0 +1,210 @@
#!/bin/bash
do_configureLogrotate() {
local IS_SYSTEMD=0
local packageName="amd-smi-lib"
local logPath=/var/log/amd_smi_lib
local logFile="${logPath}/AMD-SMI-lib.log"
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
mkdir -p "${logPath}"
touch "${logFile}"
chmod -R a+rw "${logPath}"
chmod a+rw "${logFile}"
command -v logrotate &>/dev/null
if [ $? -ne 0 ]; then
echo "[WARNING] Detected logrotate is not installed."\
"$packageName logs (when turned on) will not rotate properly."
return
fi
if [ ! -f $logrotateConfFile ]; then
touch "${logrotateConfFile}"
chmod 644 "${logrotateConfFile}" # root r/w, all others read
# AMD SMI logging rotation, rotates files using root user/group
# Hourly logrotation check
# Only rotates if size grew larger than 1MB
# Max of 4 rotation files, oldest will be removed
# Rotated files use date extention of ex. AMD-SMI-lib.log.2023-05-09_16:51:42
cat << EOF > "${logrotateConfFile}"
${logFile} {
su root root
hourly
missingok
notifempty
rotate 4
size 1M
copytruncate
dateext
dateformat .%%Y-%%m-%%d_%H:%%M:%%S
}
EOF
# Fix for % S argument not found (now we escape with %%)
# issue was RPM build thought we were using macros
# https://gitlab.kitware.com/cmake/cmake/-/issues/22965
# https://rpm-software-management.github.io/rpm/manual/spec.html
sed -i s/%%/%/g "${logrotateConfFile}"
# workaround: remove extra 'OURCE' text
# from amd_smi.conf. Unsure if CMAKE,
# bash, or here document
# issue (only seen on RHEL 8.7)
sed -i s/OURCE//g "${logrotateConfFile}"
fi
# check if logrotate uses system timers, Ubuntu/modern OS's do
# Several older OS's like RHEL 8.7, do not. Instead defaults
# to use daily cron jobs - see https://stackoverflow.com/a/69465677
if [ -d /run/systemd/system ]; then
systemctl list-timers | grep -iq logrotate
if [ $? -eq 0 ]; then
IS_SYSTEMD=1
fi
fi
if [ "$IS_SYSTEMD" -eq 1 ]; then
# Configure systemd timers - the typical setup for modern Linux logrotation setups
if [ -f /lib/systemd/system/logrotate.timer ]; then
if [ ! -f /lib/systemd/system/logrotate.timer.backup ]; then
cp /lib/systemd/system/logrotate.timer /lib/systemd/system/logrotate.timer.backup
fi
cat << EOF > /lib/systemd/system/logrotate.timer
[Unit]
Description=Hourly rotation of log files
Documentation=man:logrotate(8) man:logrotate.conf(5)
[Timer]
OnCalendar=
OnCalendar=hourly
AccuracySec=1m
Persistent=true
[Install]
WantedBy=timers.target
EOF
systemctl reenable --now logrotate.timer
else
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
"$packageName logs (when turned on) will not rotate properly."
fi
else
# $IS_SYSTEMD -eq 0
if [ -f /etc/cron.daily/logrotate ]; then
# move logrotate daily to hourly
if [ -d /etc/cron.hourly ]; then
mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
fi
fi
fi
}
do_ldconfig() {
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
ldconfig
fi
}
do_install_amdsmi_python_lib() {
# get python version
local python3_minor_version
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine python version. "\
"AMD-SMI python library will not be installed."
return
fi
# check if python version is supported
if [ "$python3_minor_version" -lt 6 ]; then
echo "[WARNING] AMD-SMI python library is not "\
"supported on python version 3.$python3_minor_version. "\
"AMD-SMI python library will not be installed."
return
fi
local PREVIOUS_PIP_ROOT_USER_ACTION="$PIP_ROOT_USER_ACTION"
export PIP_ROOT_USER_ACTION=ignore
# python3.11 requires --break-system-packages
local PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES="$PIP_BREAK_SYSTEM_PACKAGES"
export PIP_BREAK_SYSTEM_PACKAGES=1
# Remove old python library
local amdsmi_pip_list_output
amdsmi_pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $amdsmi_pip_list_output == *"amdsmi"* ]]; then
echo "Detected old AMD-SMI python library (amdsmi)..."
python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
echo "Removed old AMD-SMI python library (amdsmi)..."
fi
# static builds don't include python lib
if [ "@BUILD_SHARED_LIBS@" != "ON" ]; then
return
fi
check_and_install_amdsmi() {
local setuptools_version
setuptools_version=$(python3 -c 'import setuptools; print(setuptools.__version__)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine setuptools version. "\
"AMD-SMI python library will not be installed."
return
fi
# install python library at @CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi
local python_lib_path=@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@
local amdsmi_python_lib_path="$python_lib_path"
local amdsmi_setup_py_path="$python_lib_path/setup.py"
# Decide installation method based on setuptools version
if [[ "$(printf '%s\n' "$setuptools_version" "28.5" | sort -V | head -n1)" == "$setuptools_version" ]]; then
echo "[WARNING] Setuptools version is less than 28.5. AMD-SMI will not be installed."
elif [[ "$(printf '%s\n' "$setuptools_version" "41.0.1" | sort -V | head -n1)" != "41.0.1" ]]; then
echo "Using setup.py for installation due to setuptools version $setuptools_version"
python3 "$amdsmi_setup_py_path" install
else
echo "Using pyproject.toml for installation due to setuptools version $setuptools_version"
python3 -m pip install "$amdsmi_python_lib_path" --quiet --disable-pip-version-check --no-build-isolation --no-index
fi
}
# Call the function
check_and_install_amdsmi
export PIP_ROOT_USER_ACTION="$PREVIOUS_PIP_ROOT_USER_ACTION"
export PIP_BREAK_SYSTEM_PACKAGES="$PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES"
# only try to activate argcomplete if such command exists
# python3-argcomplete is recommended but optional, we handle its absence gracefully
if command -v activate-global-python-argcomplete &>/dev/null; then
activate-global-python-argcomplete 2>/dev/null || {
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete"
}
else
# try older argcomplete3 version
if command -v activate-global-python-argcomplete3 &>/dev/null; then
activate-global-python-argcomplete3 2>/dev/null || {
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete3"
}
else
echo "[WARNING] Could not find argcomplete activation command. "\
"Argument completion will not work. Install python3-argcomplete package to enable it."
fi
fi
}
case "$1" in
( configure )
do_install_amdsmi_python_lib
do_ldconfig
do_configureLogrotate || exit 0
;;
( abort-upgrade | abort-remove | abort-deconfigure )
echo "$1"
;;
( * )
exit 0
;;
esac
+136
Melihat File
@@ -0,0 +1,136 @@
#!/bin/bash
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
echo "Removing AMDSMI LIB Packages..."
# Other prerm actions
rm_ldconfig() {
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
ldconfig
fi
}
rm_leftovers() {
# remove pyc files generated by python
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
# remove build and egg files
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/build"
# remove leftover doc files
if test -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
fi
}
rm_logFolder() {
rm -rf /var/log/amd_smi_lib
}
rm_rocm_tests_dir(){
if [ -d "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/" ]; then
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/"
echo "Removed ROCm tests directory."
fi
}
return_logrotateToOrigConfig() {
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
if [ -f $logrotateConfFile ]; then
rm -rf "$logrotateConfFile"
fi
if [ -f /etc/cron.hourly/logrotate ]; then
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
fi
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
rm -rf /lib/systemd/system/logrotate.timer.backup
systemctl reenable --now logrotate.timer
fi
}
rm_python_lib() {
# get python version
local python3_minor_version
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine python version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# check if python version is supported
if [ "$python3_minor_version" -lt 6 ]; then
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# Remove old python library
local pip_list_output
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
fi
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
"Check post install to ensure version is correct"
else
echo "Removed AMD-SMI python library (amdsmi)..."
fi
}
case "$1" in
( remove | upgrade)
# remove old gpuv-smi symlink
rm -f @CPACK_PACKAGING_INSTALL_PREFIX@/bin/gpuv-smi &> /dev/null
echo "Removing AMDSMI Lib Packages..."
rm_python_lib
echo "python library removed"
rm_ldconfig
echo "ldconfig removed"
rm_leftovers
echo "leftovers removed"
rm_logFolder
echo "log folder removed"
rm_rocm_tests_dir
echo "rocm tests directory removed"
return_logrotateToOrigConfig
echo "logrotate configuration restored"
;;
( purge )
;;
( * )
exit 0
;;
esac
@@ -0,0 +1 @@
/opt/rocm/lib
+39
Melihat File
@@ -0,0 +1,39 @@
# Use rocm/dev-ubuntu-22.04 as the base image
FROM rocm/dev-ubuntu-22.04
# Set environment variables for build directories and package patterns
ENV BUILD_FOLDER=/home/amdsmi/build
ENV DEB_BUILD="amd-smi-lib*99999-local_amd64.deb"
ENV DEB_BUILD_TEST="amd-smi-lib-tests*99999-local_amd64.deb"
# Set the working directory to /home
WORKDIR /home
# Install necessary system packages
RUN apt update && apt-get install -y git build-essential rpm pkg-config g++ python3 python3-pip python3-wheel python3-setuptools
# Upgrade pip and install cmake and virtualenv using pip
RUN python3 -m pip install --upgrade pip setuptools && \
python3 -m pip install cmake virtualenv
# Clone the AMD SMI repository from GitHub
RUN git clone -b amd-mainline https://github.com/ROCm/amdsmi.git
# Navigate to the amdsmi directory
WORKDIR /home/amdsmi
# Build and Install AMDSMI
RUN rm -rf ${BUILD_FOLDER} && \
mkdir -p ${BUILD_FOLDER} && \
cd ${BUILD_FOLDER} && \
cmake .. -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
make -j $(nproc) VERBOSE=1 && \
make package && \
sudo apt install -y --allow-downgrades ${BUILD_FOLDER}/${DEB_BUILD} && \
sudo ln -s /opt/rocm/bin/amd-smi /usr/local/bin
# Verify the installation of Python packages related to AMD SMI
RUN python3 -m pip list | grep -E "amd|pip|setuptools"
# Set the entrypoint to bash for interactive use
ENTRYPOINT ["/bin/bash"]
+19
Melihat File
@@ -0,0 +1,19 @@
Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+221
Melihat File
@@ -0,0 +1,221 @@
# AMD System Management Interface (AMD SMI) library
The AMD System Management Interface (AMD SMI) library offers a unified tool for managing and monitoring GPUs,
particularly in high-performance computing environments. It provides a user-space interface that allows applications to
control GPU operations, monitor performance, and retrieve information about the system's drivers and GPUs.
For information on available features, installation steps, API reference material, and helpful tips, refer to the online
documentation at [rocm.docs.amd.com/projects/amdsmi](https://rocm.docs.amd.com/projects/amdsmi/en/latest/)
>[!NOTE]
>This project is a successor to [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib)
>and [esmi_ib_library](https://github.com/amd/esmi_ib_library).
>This project is applicable to Linux Baremetal and Linux VM(Guest). To use AMD SMI for Virtualization, please refer to [AMD-SMI Virtualization](https://github.com/amd/MxGPU-Virtualization/tree/mainline/smi-lib).
## Supported platforms
The AMD SMI library supports Linux bare metal and Linux virtual machine guest
for AMD GPUs and AMD EPYC™ CPUs via
[esmi_ib_library](https://github.com/amd/esmi_ib_library).
AMD SMI library can run on AMD ROCm supported platforms, refer to
[System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
for more information.
## Installation
* [Install the AMD SMI library and CLI tool](https://rocm.docs.amd.com/projects/amdsmi/en/latest/install/install.html)
## Requirements
The following are required to install and use the AMD SMI library through its language interfaces and CLI.
* `amdgpu` driver must be loaded for [`amdsmi_init()`](./docs/how-to/amdsmi-cpp-lib#hello-amd-smi) to work. Refer to the [Instinct documentation](https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/install/detailed-install/prerequisites.html) for installation instructions.
* Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
```bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64
```
### Python interface and CLI tool prerequisites
* Python 3.6.8+ (64-bit)
### Note: No module named more_itertools warning on Azure Linux 3
During the driver installation process on Azure Linux 3, you might encounter the `ModuleNotFoundError: No module named 'more_itertools'` warning. This warning is a result of the reintroduction of `python3-wheel` and `python3-setuptools` dependencies in the CMake of AMD SMI, which requires `more_itertools` to build these Python libraries. This issue will be fixed in a future ROCm release. As a workaround, use the following command before installation:
```
sudo python3 -m pip install more_itertools
```
### Go API prerequisites
* Go version 1.20 or greater
## AMD SMI basic usage
### C++ library
For developers focused on performance monitoring, system diagnostics, or resource management, the AMD SMI C++ library
offers a powerful and versatile tool to unlock the full capabilities of AMD hardware.
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-cpp-lib.html) and the
detailed [C++ API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-cpp-api.html) in the
ROCm documentation portal.
### Python library
The AMD SMI Python interface provides an easy-to-use
[API](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-lib.html) for interacting with AMD
hardware. It simplifies tasks like monitoring and controlling GPU operations, allowing for rapid development.
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-py-lib.html) and the
detailed [Python API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-api.html) in the
ROCm documentation portal.
### Go library
The AMD SMI Go interface provides a simple
[API](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-go-lib.html)
for AMD hardware management. It streamlines hardware monitoring and control
while leveraging Golang's features.
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-go-lib.html) and the
[Go API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-go-api.html) in the
ROCm documentation portal.
### CLI tool
A versatile command line tool for managing and monitoring AMD hardware. You can use `amd-smi` for:
- Device information: Quickly retrieve detailed information about AMD GPUs
- Performance monitoring: Real-time monitoring of GPU utilization, memory, temperature, and power consumption
- Process information: Identify which processes are using GPUs
- Configuration management: Adjust GPU settings like clock speeds and power limits
- Error reporting: Monitor and report GPU errors for proactive maintenance
Check out
[Getting to Know Your GPU: A Deep Dive into AMD SMI -- ROCm Blogs](https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html)
for a rundown.
### Docker container configuration
To ensure proper functionality of AMD SMI within a Docker container, the
following configuration options must be included. These settings are
particularly important for managing memory partitions, as partitioning depends
on loading and unloading kernel drivers.
- `--cap-add=SYS_MODULE`
- `-v /lib/modules:/lib/modules`
See [Using AMD SMI in a Docker
container](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/setup-docker-container.html)
for more information.
## Building AMD SMI
This section describes the prerequisites and steps to build AMD SMI from source.
### Required software
To build the AMD SMI library, the following components are required. Note that the software versions specified were used
during development; earlier versions are not guaranteed to work.
* CMake (v3.20.0 or later) -- `python3 -m pip install cmake`
* g++ (v5.4.0 or later)
* libdrm-dev (for Ubuntu and Debian)
* libdrm-devel (for RPM-based distributions)
In order to build the AMD SMI Python package, the following components are required:
* Python (3.6.8 or later)
* virtualenv -- `python3 -m pip install virtualenv`
### Build steps
1. Clone the AMD SMI repository to your local Linux machine.
```shell
git clone https://github.com/ROCm/amdsmi.git
```
2. The default installation location for the library and headers is `/opt/rocm`. Before installation, any old ROCm
directories should be deleted:
* `/opt/rocm`
* `/opt/rocm-<version_number>`
3. Build the library by following the typical CMake build sequence (run as root user or use `sudo` before `make install`
command); for instance:
```bash
mkdir -p build
cd build
cmake ..
make -j $(nproc)
make install
```
The built library is located in the `build/` directory. To build the `rpm` and `deb` packages use the following
command:
```bash
make package
```
### Rebuild the Python wrapper
The Python wrapper for the AMD SMI library is found in the [auto-generated file](#py_lib_fs)
`py-interface/amdsmi_wrapper.py`. It is essential to regenerate this wrapper whenever there are changes to the C++ API.
It is not regenerated automatically.
To regenerate the wrapper, use the following command.
```shell
./update_wrapper.sh
```
After this command, the file in `py-interface/amdsmi_wrapper.py` will be updated
on compile.
>[!NOTE]
>You need Docker installed on your system to regenerate the Python wrapper.
### Build the tests
To verify the build and capabilities of AMD SMI on your system, as well as to see practical examples of its usage, you
can build and run the available [tests in the repository](https://github.com/ROCm/amdsmi/tree/amd-staging/tests). Follow
these steps to build the tests:
```bash
mkdir -p build
cd build
cmake -DBUILD_TESTS=ON ..
make -j $(nproc)
```
#### Run the tests
Once the tests are [built](#build-the-tests), you can run them by executing the `amdsmitst` program. The executable can
be found at `build/tests/amd_smi_test/`.
### Build the docs
To build the documentation, follow the instructions at
[Building documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html).
## DISCLAIMER
The information contained herein is for informational purposes only, and is subject to change without notice. In
addition, any stated support is planned and is also subject to change. While every precaution has been taken in the
preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is
under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no
representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes
no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular
purposes, with respect to the operation or use of AMD hardware, software or other products described herein.
© 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+205
Melihat File
@@ -0,0 +1,205 @@
#!/bin/bash
do_configureLogrotate() {
local IS_SYSTEMD=0
local packageName="amd-smi-lib"
local logPath=/var/log/amd_smi_lib
local logFile="${logPath}/AMD-SMI-lib.log"
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
mkdir -p "${logPath}"
touch "${logFile}"
chmod -R a+rw "${logPath}"
chmod a+rw "${logFile}"
if ! command -v logrotate &>/dev/null; then
echo "[WARNING] Detected logrotate is not installed."\
"$packageName logs (when turned on) will not rotate properly."
return
fi
if [ ! -f $logrotateConfFile ]; then
touch "${logrotateConfFile}"
chmod 644 "${logrotateConfFile}" # root r/w, all others read
# AMD SMI logging rotation, rotates files using root user/group
# Hourly logrotation check
# Only rotates if size grew larger than 1MB
# Max of 4 rotation files, oldest will be removed
# Rotated files use date extention of ex. AMD-SMI-lib.log.2023-05-09_16:51:42
cat << EOF > "${logrotateConfFile}"
${logFile} {
su root root
hourly
missingok
notifempty
rotate 4
size 1M
copytruncate
dateext
dateformat .%%Y-%%m-%%d_%H:%%M:%%S
}
EOF
# Fix for % S argument not found (now we escape with %%)
# issue was RPM build thought we were using macros
# https://gitlab.kitware.com/cmake/cmake/-/issues/22965
# https://rpm-software-management.github.io/rpm/manual/spec.html
sed -i s/%%/%/g "${logrotateConfFile}"
# workaround: remove extra 'OURCE' text
# from amd_smi.conf. Unsure if CMAKE,
# bash, or here document
# issue (only seen on RHEL 8.7)
sed -i s/OURCE//g "${logrotateConfFile}"
fi
# check if logrotate uses system timers, Ubuntu/modern OS's do
# Several older OS's like RHEL 8.7, do not. Instead defaults
# to use daily cron jobs - see https://stackoverflow.com/a/69465677
if [ -d /run/systemd/system ]; then
systemctl list-timers | grep -iq logrotate
if [ $? -eq 0 ]; then
IS_SYSTEMD=1
fi
fi
if [ "$IS_SYSTEMD" -eq 1 ]; then
# Configure systemd timers - the typical setup for modern Linux logrotation setups
if [ -f /lib/systemd/system/logrotate.timer ]; then
if [ ! -f /lib/systemd/system/logrotate.timer.backup ]; then
cp /lib/systemd/system/logrotate.timer /lib/systemd/system/logrotate.timer.backup
fi
cat << EOF > /lib/systemd/system/logrotate.timer
[Unit]
Description=Hourly rotation of log files
Documentation=man:logrotate(8) man:logrotate.conf(5)
[Timer]
OnCalendar=
OnCalendar=hourly
AccuracySec=1m
Persistent=true
[Install]
WantedBy=timers.target
EOF
systemctl reenable --now logrotate.timer
else
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
"$packageName logs (when turned on) will not rotate properly."
fi
else
# $IS_SYSTEMD -eq 0
if [ -f /etc/cron.daily/logrotate ]; then
# move logrotate daily to hourly
if [ -d /etc/cron.hourly ]; then
mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
fi
fi
fi
}
do_ldconfig() {
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo $RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
ldconfig
fi
}
do_install_amdsmi_python_lib() {
# get python version
local python3_minor_version
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine python version. "\
"AMD-SMI python library will not be installed."
return
fi
# check if python version is supported
if [ "$python3_minor_version" -lt 6 ]; then
echo "[WARNING] AMD-SMI python library is not "\
"supported on python version 3.$python3_minor_version. "\
"AMD-SMI python library will not be installed."
return
fi
local PREVIOUS_PIP_ROOT_USER_ACTION="$PIP_ROOT_USER_ACTION"
export PIP_ROOT_USER_ACTION=ignore
# python3.11 requires --break-system-packages
local PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES="$PIP_BREAK_SYSTEM_PACKAGES"
export PIP_BREAK_SYSTEM_PACKAGES=1
# Remove old python library
local amdsmi_pip_list_output
amdsmi_pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $amdsmi_pip_list_output == *"amdsmi"* ]]; then
echo "Detected old AMD-SMI python library (amdsmi)..."
python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
echo "Removed old AMD-SMI python library (amdsmi)..."
fi
# static builds don't include python lib
if [ "@BUILD_SHARED_LIBS@" != "ON" ]; then
return
fi
check_and_install_amdsmi() {
local setuptools_version
setuptools_version=$(python3 -c 'import setuptools; print(setuptools.__version__)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine setuptools version. "\
"AMD-SMI python library will not be installed."
return
fi
# install python library at $RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi
local python_lib_path=$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@
local amdsmi_python_lib_path="$python_lib_path"
local amdsmi_setup_py_path="$python_lib_path/setup.py"
# Decide installation method based on setuptools version
if [[ "$(printf '%s\n' "$setuptools_version" "28.5" | sort -V | head -n1)" == "$setuptools_version" ]]; then
echo "[WARNING] Setuptools version is less than 28.5. AMD-SMI will not be installed."
elif [[ "$(printf '%s\n' "$setuptools_version" "41.0.1" | sort -V | head -n1)" != "41.0.1" ]]; then
echo "Using setup.py for installation due to setuptools version $setuptools_version"
cd $amdsmi_python_lib_path
python3 setup.py install
cd -
else
echo "Using pyproject.toml for installation due to setuptools version $setuptools_version"
python3 -m pip install "$amdsmi_python_lib_path" --quiet --disable-pip-version-check --no-build-isolation --no-index
fi
}
# Call the function
check_and_install_amdsmi
export PIP_ROOT_USER_ACTION="$PREVIOUS_PIP_ROOT_USER_ACTION"
export PIP_BREAK_SYSTEM_PACKAGES="$PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES"
# only try to activate argcomplete if such command exists
# python3-argcomplete is recommended but optional, we handle its absence gracefully
if command -v activate-global-python-argcomplete &>/dev/null; then
activate-global-python-argcomplete 2>/dev/null || {
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete"
}
else
# try older argcomplete3 version
if command -v activate-global-python-argcomplete3 &>/dev/null; then
activate-global-python-argcomplete3 2>/dev/null || {
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete3"
}
else
echo "[WARNING] Could not find argcomplete activation command. "\
"Argument completion will not work. Install python3-argcomplete package to enable it."
fi
fi
}
# post install or upgrade, $i is 1 or 2 -> do these actions
if [ "$1" -ge 1 ]; then
do_install_amdsmi_python_lib
do_ldconfig
do_configureLogrotate || exit 0
fi
+8
Melihat File
@@ -0,0 +1,8 @@
#!/bin/bash
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "$1" -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
ldconfig
fi
+95
Melihat File
@@ -0,0 +1,95 @@
#!/bin/bash
rm_leftovers() {
# remove pyc files generated by python
rm -rf "$RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
# remove build and egg files
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/build"
# remove dist files (only applies to old setuptools versions like on RHEL8)
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/dist"
# remove leftover doc files
if test -e "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
fi
}
rm_logFolder() {
rm -rf /var/log/amd_smi_lib
}
rm_rocm_tests_dir(){
if [ -d "$RPM_INSTALL_PREFIX0/share/amd_smi/tests/" ]; then
rm -rf "$RPM_INSTALL_PREFIX0/share/amd_smi/tests/"
echo "Removed ROCm tests directory."
fi
}
return_logrotateToOrigConfig() {
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
if [ -f $logrotateConfFile ]; then
rm -rf "$logrotateConfFile"
fi
if [ -f /etc/cron.hourly/logrotate ]; then
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
fi
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
rm -rf /lib/systemd/system/logrotate.timer.backup
systemctl reenable --now logrotate.timer
fi
}
rm_python_lib() {
# get python version
local python3_minor_version
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
if [ $? -ne 0 ]; then
echo "[WARNING] Could not determine python version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# check if python version is supported
if [ "$python3_minor_version" -lt 6 ]; then
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
"AMD-SMI python library will not be uninstalled."
return
fi
# Remove old python library
local pip_list_output
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
fi
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
# check pip list output for amdsmi
if [[ $pip_list_output == *"amdsmi"* ]]; then
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
"Check post install to ensure version is correct"
else
echo "Removed AMD-SMI python library (amdsmi)..."
fi
}
if [ "$1" -le 1 ]; then
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
# remove old gpuv-smi symlink
rm -f $RPM_INSTALL_PREFIX0/bin/gpuv-smi &> /dev/null
rm_python_lib
rm_leftovers
rm_logFolder
rm_rocm_tests_dir
return_logrotateToOrigConfig
fi
@@ -0,0 +1,27 @@
# - Config file for the amd_smi package
# It defines the following variables
# AMD_SMI_INCLUDE_DIRS - include directories for amd_smi
# AMD_SMI_LIBRARIES - libraries to link against
# Compute paths
@PACKAGE_INIT@
get_filename_component(AMD_SMI_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
set_and_check(amd_smi_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
set_and_check(AMD_SMI_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
set_and_check(AMD_SMI_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
set_and_check(amd_smi_LIB_DIR "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
set_and_check(AMD_SMI_LIB_DIR "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
set_and_check(AMD_SMI_LIB_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
# Our library dependencies (contains definitions for IMPORTED targets)
if(NOT TARGET amd_smi AND NOT amd_smi_BINARY_DIR)
include("${AMD_SMI_CMAKE_DIR}/amd_smiTargets.cmake")
endif()
# These are IMPORTED targets created by AmdSmiTargets.cmake
# TODO: Need to check if OAM libraries are needed here!
set(AMD_SMI_LIBRARIES amd_smi)
set(AMD_SMI_LIBRARY amd_smi)
check_required_components(amd_smi)
+128
Melihat File
@@ -0,0 +1,128 @@
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import logging
import re
class BDF():
""" BDF Class to cast and compare BDF objects using built-in python comparators
Useful for validating a BDF string and converting it to a BDF object
This allows us to handle BDF objects in a pythonic way
Attributes:
__eq__: The equals comparator
__: An integer count of the eggs we have laid.
"""
def __init__(self, bdf):
"""Init a BDF object"""
if isinstance(bdf, BDF):
self.segment, self.bus, self.device, self.function = tuple(bdf)
else:
if bdf.startswith("BDF("):
bdf = bdf.replace('BDF(', '').replace(')', '')
try:
bdf_components = [int(x, 16) for x in re.split('[:.]', bdf)]
except self.BDFError as e:
logging.error(f"Invalid string passed: {bdf}")
raise e
self.segment = bdf_components[0] if len(bdf_components) == 4 else 0
self.bus, self.device, self.function = bdf_components[-3:]
if self.segment > 65535:
raise self.BDFError("Segment can't be greater than 65535")
if self.bus > 255:
raise self.BDFError("Bus can't be greater than 255")
if self.device > 31:
raise self.BDFError("Device can't be greater than 31")
if self.function > 7:
raise self.BDFError("Function can't be greater than 7")
class BDFError(Exception):
"""BDF Class Error"""
def __eq__(self, passed_bdf):
"""Overrides the == operator and allows for BDF objects to be compared to BDF strings"""
# Only accept strings and BDF objects
if isinstance(passed_bdf, str):
if passed_bdf == '':
return False
passed_bdf = BDF(passed_bdf)
elif not isinstance(passed_bdf, BDF):
return False
if self.segment == passed_bdf.segment and \
self.bus == passed_bdf.bus and \
self.device == passed_bdf.device and \
self.function == passed_bdf.function:
return True
else:
return False
def __ne__(self, passed_bdf):
"""Overrides the != operator and allows for BDF objects to be compared to BDF strings"""
# Since we overrided the == operator we can use that to make this simple
return not self == passed_bdf
def __add__(self, passed_bdf):
"""Overrides the + operator and allows for string concatenation"""
return str(self) + passed_bdf
def __radd__(self, passed_bdf):
"""Overrides the + operator and allows for string concatenation"""
return passed_bdf + str(self)
def __str__(self):
"""Cast BDF object to a string"""
return "{:04X}:{:02X}:{:02X}:{}".format(self.segment, self.bus, self.device, self.function)
def __repr__(self):
"""How the BDF object is represented"""
return f"BDF({self})"
def __hash__(self):
"""Allow the BDF object to be hashable"""
return hash(str(self))
def __iter__(self):
"""Make the BDF object iterable over its 4 values"""
yield from (self.segment, self.bus, self.device, self.function)
def __contains__(self, passed_bdf):
"""Overrided the 'in' comparator in python"""
passed_bdf = str(BDF(passed_bdf))
bdf_regex = "(?:[0-6]?[0-9a-fA-F]{1,4}:)?[0-2]?[0-9a-fA-F]{1,2}:[0-9a-fA-F]{1,2}\\.[0-7]"
for match in re.findall(bdf_regex, passed_bdf):
if self == match:
return True
return False
@@ -0,0 +1,80 @@
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" CMake AMDSMI CLI Install ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
# Set CLI Build Directory
set(PY_PACKAGE_DIR "amdsmi_cli")
set(PY_CLI_INSTALL_DIR "${CMAKE_INSTALL_LIBEXECDIR}" CACHE STRING "CLI tool installation directory")
# populate version string
configure_file(_version.py.in ${PY_PACKAGE_DIR}/_version.py @ONLY)
# copy only if files are different
add_custom_command(
OUTPUT ${PY_PACKAGE_DIR}/__init__.py
${PY_PACKAGE_DIR}/amdsmi_cli.py
${PY_PACKAGE_DIR}/amdsmi_commands.py
${PY_PACKAGE_DIR}/amdsmi_helpers.py
${PY_PACKAGE_DIR}/amdsmi_init.py
${PY_PACKAGE_DIR}/amdsmi_logger.py
${PY_PACKAGE_DIR}/amdsmi_parser.py
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
${PY_PACKAGE_DIR}/BDF.py
${PY_PACKAGE_DIR}/README.md
${PY_PACKAGE_DIR}/Release_Notes.md
DEPENDS amdsmi_cli
COMMAND mkdir -p ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/__init__.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_cli.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_commands.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_helpers.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_init.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_logger.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_parser.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_cli_exceptions.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/BDF.py ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/README.md ${PY_PACKAGE_DIR}/
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/Release_Notes.md ${PY_PACKAGE_DIR}/)
# The CLI requires the python amdsmi wrapper to be installed
add_custom_target(
amdsmi_cli ALL
DEPENDS python_package
${PY_PACKAGE_DIR}/__init__.py
${PY_PACKAGE_DIR}/_version.py
${PY_PACKAGE_DIR}/amdsmi_cli.py
${PY_PACKAGE_DIR}/amdsmi_commands.py
${PY_PACKAGE_DIR}/amdsmi_helpers.py
${PY_PACKAGE_DIR}/amdsmi_init.py
${PY_PACKAGE_DIR}/amdsmi_logger.py
${PY_PACKAGE_DIR}/amdsmi_parser.py
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
${PY_PACKAGE_DIR}/BDF.py
${PY_PACKAGE_DIR}/README.md
${PY_PACKAGE_DIR}/Release_Notes.md)
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${PY_PACKAGE_DIR}
DESTINATION ${PY_CLI_INSTALL_DIR}
COMPONENT dev)
install(
PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${PY_PACKAGE_DIR}/amdsmi_cli.py
DESTINATION ${PY_CLI_INSTALL_DIR}/${PY_PACKAGE_DIR}
COMPONENT dev)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
# symlink amdsmi_cli.py to amd-smi
add_custom_target(
link_amdsmi_cli ALL
DEPENDS amdsmi_cli
BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/bin/amd-smi
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${PY_CLI_INSTALL_DIR}/${PY_PACKAGE_DIR}/amdsmi_cli.py
${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/amd-smi)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/bin/amd-smi
DESTINATION ${CMAKE_INSTALL_BINDIR}
COMPONENT dev)
+28
Melihat File
@@ -0,0 +1,28 @@
# AMD SMI CLI tool
A command line tool for manipulating and monitoring the `amdgpu` kernel;
`amd-smi` is intended to replace and deprecate the existing
[`rocm-smi`](https://github.com/rocm/rocm_smi_lib) CLI tool.
When using the CLI tool, you should have at least one AMD GPU and the driver
installed.
>[!NOTE]
>The AMD SMI CLI tool is provided as an example code to aid the development of
>telemetry tools. The Python or C++ library is recommended as a robust data
>source.
Find the documentation in the `docs/` directory.
- [Install AMD SMI](../docs/install/install.md)
- [About the tool and how to get started](../docs/how-to/amdsmi-cli-tool.md)
## Online documentation
Explore the latest documentation on the [ROCm documentation
portal](https://rocm.docs.amd.com/projects/en/latest/index.html).
- [Install AMD SMI](https://rocm.docs.amd.com/projects/amdsmi/en/latest/install/install.html)
- [CLI tool usage](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-cli-tool.html).
@@ -0,0 +1,52 @@
# Release Notes
## Documentation
Documentation for AMDSMI-CLI is available post install in /opt/<rocm_instance>/libexec/amdsmi_cli/README.md
## AMDSMI-CLI 23.3.1.0
- not all ecc fields are currently supported
- RHEL 8 & SLES 15 may have extra install steps
## AMDSMI-CLI 23.0.1.1
### Known Issues
- not all ecc fields are currently supported
- RHEL 8 & SLES 15 have extra install steps
## AMDSMI-CLI 23.0.1.0
### Known Issues
- not all ecc fields are currently supported
- RHEL 8 & SLES 15 have extra install steps
## AMDSMI-CLI 23.0.0.4
### Added
- AMDSMI-CLI tool enabled for Linux Baremetal & Guest
- Added CSV & Watch modifier
- Added topology subcommand
### Known Issues
- not all ecc fields are currently supported
- RHEL 8 & SLES 15 have extra install steps
## AMDSMI-CLI 0.0.2
### Added
- AMDSMI-CLI tool enabled for Linux Baremetal & Guest
### Known Issues
- ecc & ras subcommands will report N/A even if RAS is enabled
- process vram_mem's unit is listed as percentage vs bytes
- csv modifier does not work
- topology information is not yet enabled
- watch modifier not fully enabled
- limited guest support
@@ -0,0 +1 @@
from _version import __version__
@@ -0,0 +1 @@
__version__ = "@amd_smi_lib_VERSION_STRING@"
+224
Melihat File
@@ -0,0 +1,224 @@
#!/usr/bin/env python3
# PYTHON_ARGCOMPLETE_OK
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import logging
import sys
import os
try:
import argcomplete
except ImportError as e:
logging.debug(f"Unhandled import error: {e}")
logging.debug("argcomplete module not found. Autocomplete will not work.")
# from typing import TYPE_CHECKING
# # only used for type checking
# # pyright trips up and cannot find amdsmi scripts without it
# if TYPE_CHECKING:
# from amdsmi_commands import AMDSMICommands
# from amdsmi_parser import AMDSMIParser
# from amdsmi_logger import AMDSMILogger
# import amdsmi_cli_exceptions
# from amdsmi import amdsmi_interface
# from amdsmi import amdsmi_exception
# Set the environment variable for GPU metrics cache duration
gpu_metrics_cache_ms = os.environ.setdefault("AMDSMI_GPU_METRICS_CACHE_MS", "100")
logging.debug("AMDSMI_GPU_METRICS_CACHE_MS = %sms", gpu_metrics_cache_ms)
# Set the environment variable for ASIC cache duration
asic_info_cache_ms = os.environ.setdefault("AMDSMI_ASIC_INFO_CACHE_MS", "10000") # 10 seconds
logging.debug("AMDSMI_ASIC_INFO_CACHE_MS = %sms", asic_info_cache_ms)
try:
from amdsmi_init import *
from amdsmi_helpers import AMDSMIHelpers
from amdsmi_commands import AMDSMICommands
from amdsmi_parser import AMDSMIParser
from amdsmi_logger import AMDSMILogger
import amdsmi_cli_exceptions
except ImportError:
current_path = os.path.dirname(os.path.abspath(__file__))
cli_files_path = f"{current_path}/../libexec/amdsmi_cli"
sys.path.append(cli_files_path)
try:
from amdsmi_init import *
from amdsmi_helpers import AMDSMIHelpers
from amdsmi_commands import AMDSMICommands
from amdsmi_parser import AMDSMIParser
from amdsmi_logger import AMDSMILogger
import amdsmi_cli_exceptions
except ImportError as e:
print(f"Unhandled import error: {e}")
print(f"Unable to import amdsmi_cli files. Check {cli_files_path} if they are present.")
sys.exit(1)
def _print_error(e, destination):
if destination in ['stdout', 'json', 'csv']:
print(e)
else:
f = open(destination, "w", encoding="utf-8")
f.write(e)
f.close()
print("Error occurred. Result written to " + str(destination) + " file")
def configure_logging_and_execute(args, amd_smi_commands):
"""
Configures logging based on the provided arguments and executes the subcommand.
Args:
args: Parsed command-line arguments.
amd_smi_commands: Instance of AMDSMICommands.
"""
# Remove previous log handlers
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# To enable debug logs in AMD SMI library:
# set RSMI_LOGGING = 1 for logging to files
# set RSMI_LOGGING = 2 for logging to stdout
# set RSMI_LOGGING = 3 for logging to stdout and files
# set RSMI_LOGGING = 0 to disable logging
# Files will be located in /var/log/amd_smi_lib/AMD-SMI-lib.log*
# log string with the following format:
# loglevel | YYYY-MM-DD HH:MM:SS.ms | filename:line | message
logging_dict = {
'DEBUG': logging.DEBUG,
'INFO': logging.INFO,
'WARNING': logging.WARNING,
'ERROR': logging.ERROR,
'CRITICAL': logging.CRITICAL
}
time = '%(asctime)s.%(msecs)03d'
datefmt = '%Y-%m-%d %H:%M:%S'
logging.basicConfig(format='%(levelname)s | ' + time + ' | %(filename)s:%(lineno)d | %(message)s',
level=logging_dict[args.loglevel], datefmt=datefmt)
# Disable traceback for non-debug log levels
if args.loglevel == "DEBUG":
sys.tracebacklimit = 10
else:
sys.tracebacklimit = -1
logging.debug(args)
# Execute subcommands
try:
args.func(args)
except amdsmi_cli_exceptions.AmdSmiException as e:
_print_error(str(e), amd_smi_commands.logger.destination)
except amdsmi_exception.AmdSmiLibraryException as e:
exc = amdsmi_cli_exceptions.AmdSmiLibraryErrorException(amd_smi_commands.logger.format, e.get_error_code())
_print_error(str(exc), amd_smi_commands.logger.destination)
if __name__ == "__main__":
# Disable traceback before possible init errors in AMDSMICommands and AMDSMIParser
copy_argv = str(sys.argv.copy()).upper()
if "DEBUG" in copy_argv:
sys.tracebacklimit = 10
else:
sys.tracebacklimit = -1
amd_smi_helpers = AMDSMIHelpers()
amd_smi_commands = AMDSMICommands(helpers=amd_smi_helpers)
amd_smi_parser = AMDSMIParser(amd_smi_commands.version,
amd_smi_commands.list,
amd_smi_commands.static,
amd_smi_commands.firmware,
amd_smi_commands.bad_pages,
amd_smi_commands.metric,
amd_smi_commands.process,
amd_smi_commands.profile,
amd_smi_commands.event,
amd_smi_commands.topology,
amd_smi_commands.set_value,
amd_smi_commands.reset,
amd_smi_commands.monitor,
amd_smi_commands.xgmi,
amd_smi_commands.partition,
amd_smi_commands.ras,
amd_smi_commands.node,
amd_smi_commands.default,
sys_argv=sys.argv,
helpers=amd_smi_helpers)
try:
argcomplete.autocomplete(amd_smi_parser)
except NameError:
logging.debug("argcomplete module not found. Autocomplete will not work.")
# Store possible subcommands & aliases for later errors
valid_commands = amd_smi_parser.possible_commands
valid_commands += ['--help', '-h']
# Convert arguments to lowercase, but preserve case for folder path values
processed_argv = []
# Arguments that should preserve case
case_sensitive_args = ['--folder', '--file', '--gpu', '--cpu', '--core', '--profile', '--cper-file']
case_sensitive_prefixes = ['--folder=', '--file=', '--gpu=', '--cpu=', '--core=', '--profile=', '--cper-file=']
preserve_case_for_next = False
for i, arg in enumerate(sys.argv):
if preserve_case_for_next:
# Preserve case for the next argument value
processed_argv.append(arg)
preserve_case_for_next = False
elif arg in case_sensitive_args:
# Convert flag to lowercase but preserve next value
processed_argv.append(arg.lower())
preserve_case_for_next = True
elif any(arg.startswith(prefix) for prefix in case_sensitive_prefixes):
# Handle --arg=value format, preserve case for the value part
for prefix in case_sensitive_prefixes:
if arg.startswith(prefix):
flag = prefix.rstrip('=')
value = arg[len(prefix):]
processed_argv.append(flag.lower() + '=' + value)
break
elif arg.startswith('--') or not arg.startswith('-'):
# Convert other long options and positional arguments to lowercase
processed_argv.append(arg.lower())
else:
# Preserve case for short options
processed_argv.append(arg)
sys.argv = processed_argv
if len(sys.argv) == 1:
args = amd_smi_parser.parse_args(args=['default'])
elif sys.tracebacklimit == 10 and (sys.argv[1] == '--loglevel'):
args = amd_smi_parser.parse_args(args=['default', '--loglevel'] + sys.argv[2:])
elif sys.argv[1] in valid_commands:
args = amd_smi_parser.parse_args(args=None)
else:
raise amdsmi_cli_exceptions.AmdSmiInvalidSubcommandException(sys.argv[1],amd_smi_commands.logger.destination)
# Handle command modifiers before subcommand execution
# human readable is the default output format
if hasattr(args, 'json') and args.json:
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.json.value
if hasattr(args, 'csv') and args.csv:
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.csv.value
if hasattr(args, 'file') and args.file:
amd_smi_commands.logger.destination = args.file
configure_logging_and_execute(args, amd_smi_commands)
@@ -0,0 +1,309 @@
#!/usr/bin/env python3
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import json
AMDSMI_ERROR_MESSAGES = {
0: "Sucess",
1: "Invalid parameters",
2: "Command not supported",
3: "Command not yet implemented",
4: "Failed load module",
5: "Failed load symbole",
6: "Drm error",
7: "API call failed",
8: "Timeout in API call",
9: "Retry operation",
10: "Permission Denied",
11: "Interrupt ocurred during execution",
12: "I/O Error",
13: "Address fault",
14: "Error opening file",
15: "Not enough memory",
16: "Internal error",
17: "Out of bounds",
18: "Initialization error",
19: "Internal reference counter exceeded",
# Reserved for future error messages
30: "Device busy",
31: "Device Not found",
32: "Device not initialized",
33: "No more free slot",
34: "Driver not loaded",
# Reserved for future error messages
40: "No data was found for given input",
41: "Insufficient size for operation",
42: "Unexpected size of data was read",
43: "The data read or provided was unexpected",
44: "System has different cpu than AMD",
45: "Energy driver not found",
46: "MSR driver not found",
47: "HSMP driver not found",
48: "HSMP not supported",
49: "HSMP message/feature not supported",
50: "HSMP message timed out",
51: "No Energy and HSMP driver present",
52: "File or directory not found",
53: "Parsed argument is invalid",
54: "AMDGPU restart error",
55: "Setting is not available",
0xFFFFFFFE: "AMD-SMI Library error did not map to a status code",
0xFFFFFFFF: "Unknown error"
}
def _get_error_message(error_code):
if abs(error_code) in AMDSMI_ERROR_MESSAGES:
return AMDSMI_ERROR_MESSAGES[abs(error_code)]
return "Generic error"
class AmdSmiException(Exception):
def __init__(self):
self.json_message = {}
self.csv_message = ''
self.stdout_message = ''
self.message = ''
self.output_format = ''
self.device_type = ''
def __str__(self):
# Return message according to the current output format
if self.output_format == 'json':
self.message = json.dumps(self.json_message)
elif self.output_format == 'csv':
self.message = self.csv_message
else:
self.message = self.stdout_message
return self.message
class AmdSmiInvalidCommandException(AmdSmiException):
def __init__(self, command, outputformat: str, message=None):
super().__init__()
self.value = -1
self.command = command
self.output_format = outputformat
common_message = f"Command '{self.command}' is invalid. Run 'amd-smi -h' for more info."
if message:
common_message = message
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiInvalidParameterException(AmdSmiException):
def __init__(self, command, arg, outputformat: str):
super().__init__()
self.value = -2
self.command = command
self.arg = arg
self.output_format = outputformat
common_message = f"Parameter '{self.arg}' is invalid. Run 'amd-smi {self.command} -h' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiDeviceNotFoundException(AmdSmiException):
def __init__(self, command, outputformat: str, gpu: bool, cpu: bool, core: bool):
super().__init__()
self.value = -3
self.command = command
self.output_format = outputformat
# Handle different devices
self.device_type = ""
if gpu:
self.device_type = "GPU"
elif cpu:
self.device_type = "CPU"
elif core:
self.device_type = "CPU CORE"
common_message = f"Can not find a device: {self.device_type} '{self.command}'"
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiInvalidFilePathException(AmdSmiException):
def __init__(self, command, outputformat: str, message=None):
super().__init__()
self.value = -4
self.command = command
self.output_format = outputformat
common_message = f"Path '{self.command}' cannot be found."
if message:
common_message = message
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiInvalidParameterValueException(AmdSmiException):
def __init__(self, command, arg, outputformat: str):
super().__init__()
self.value = -5
self.command = command
self.arg = arg
self.output_format = outputformat
common_message = f"Value '{self.arg}' is not of valid type or format. Run 'amd-smi {self.command} -h' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiMissingParameterValueException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -6
self.command = command
self.output_format = outputformat
common_message = f"Parameter '{self.command}' requires a value. Run '--help' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiCommandNotSupportedException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -7
self.command = command
self.output_format = outputformat
common_message = f"Command '{self.command}' is not supported on the system. Run '--help' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiParameterNotSupportedException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -8
self.command = command
self.output_format = outputformat
common_message = f"Parameter '{self.command}' is not supported on the system. Run '--help' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiRequiredCommandException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -9
self.command = command
self.output_format = outputformat
common_message = f"Command '{self.command}' requires a target argument. Run 'amd-smi {self.command} -h' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiInvalidSubcommandException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -10
self.command = command
self.output_format = outputformat
common_message = f"AMD-SMI Command '{self.command}' is invalid. Must receive valid AMD-SMI Command first. Run 'amd-smi -h' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiPermissionDeniedException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -11
self.command = command
self.output_format = outputformat
common_message = f"AMD-SMI Command '{self.command}' requires elevation (sudo privileges required)"
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiUnknownErrorException(AmdSmiException):
def __init__(self, command, outputformat: str):
super().__init__()
self.value = -100
self.command = command
self.output_format = outputformat
common_message = "An unknown error has occurred. Run 'help' for more info."
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
class AmdSmiLibraryErrorException(AmdSmiException):
def __init__(self, outputformat: str, error_code):
super().__init__()
self.value = -1000 - abs(error_code)
self.smilibcode = error_code
self.output_format = outputformat
common_message = f"AMDSMI has returned error '{self.value}' - '{AMDSMI_ERROR_MESSAGES[abs(self.smilibcode)]}'"
self.json_message["error"] = common_message
self.json_message["code"] = self.value
self.csv_message = f"error,code\n{common_message}, {self.value}"
self.stdout_message = f"{common_message} Error code: {self.value}"
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,154 @@
#!/usr/bin/env python3
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
### Handle safe initialization for amdsmi
import atexit
import logging
import signal
import sys
import os
from pathlib import Path
current_path = os.path.dirname(os.path.abspath(__file__))
python_lib_path = f"{current_path}/../../share/amd_smi"
sys.path.append(python_lib_path)
# If the python library is installed, it will overwrite the path above
try:
from amdsmi import amdsmi_interface, amdsmi_exception
except ImportError as e:
print(f"Unhandled import error: {e}")
print("Failed to import the amdsmi Python library. Ensure it is installed in Python.")
print(f"Alternatively, verify that the library is in the path:\n{python_lib_path}")
sys.exit(1)
# Using basic python logging for user errors and development
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.ERROR) # User level logging
# This traceback limit only affects this file, once the code hit's the cli portion it get's reset to the user's preference
sys.tracebacklimit = -1 # Disable traceback when raising errors
# On initial import set initialized variable
AMDSMI_INITIALIZED = False
AMDSMI_INIT_FLAG = amdsmi_interface.AmdSmiInitFlags.INIT_ALL_PROCESSORS
AMD_VENDOR_ID = 4098
def check_amdgpu_driver():
""" Returns true if amdgpu is found in the list of initialized modules """
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
if amd_gpu_status_file.exists():
if amd_gpu_status_file.read_text(encoding="ascii").strip() == "live":
return True
return False
def check_amd_hsmp_driver():
""" Returns true if amd_hsmp or hsmp_acpi is found in the list of initialized modules """
amd_cpu_status_file = Path("/dev/hsmp")
if amd_cpu_status_file.exists():
return True
return False
def amdsmi_cli_init():
""" Initializes AMDSMI Library for the CLI
Checks for the presence of the amdgpu, amd_hsmp or hsmp_acpi drivers and initializes the
AMD SMI library based on the live drivers found.
Return:
init_flag: the flag used to initialize the AMD SMI library without error
Raises:
err: AmdSmiLibraryException if not successful in initializing any drivers
"""
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_ALL_PROCESSORS
if check_amdgpu_driver() and check_amd_hsmp_driver():
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS
logging.debug("Both amdgpu , amd_hsmp or hsmp_acpi driver's initstate is live")
try:
amdsmi_interface.amdsmi_init(init_flag)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Drivers not loaded (amdgpu, amd_hsmp or hsmp_acpi drivers not found in modules)")
sys.exit(-1)
else:
raise e
elif check_amdgpu_driver():
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS
logging.debug("amdgpu driver initstate is live")
try:
amdsmi_interface.amdsmi_init(init_flag)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Driver not loaded (amdgpu not found in modules)")
sys.exit(-1)
else:
raise e
logging.debug("amdgpu driver initialized successfully, but amd_hsmp or hsmp_acpi initstate was not live")
elif check_amd_hsmp_driver():
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS
logging.debug("amd_hsmp or hsmp_acpi driver initstate is live")
try:
amdsmi_interface.amdsmi_init(init_flag)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Driver not loaded (amd_hsmp or hsmp_acpi not found in modules)")
sys.exit(-1)
else:
raise e
logging.debug("amd_hsmp or hsmp_acpi driver initialized successfully, but amdgpu initstate was not live")
logging.debug(f"AMDSMI initialized with atleast one driver successfully | init flag: {init_flag}")
return init_flag
def amdsmi_cli_shutdown():
"""Shutdown AMDSMI instance
Raises:
err: AmdSmiLibraryException if not successful
"""
try:
amdsmi_interface.amdsmi_shut_down()
except amdsmi_exception.AmdSmiLibraryException as e:
logging.error("Unable to cleanly shut down amd-smi-lib")
raise e
def signal_handler(sig, frame):
logging.debug(f"Handling signal: {sig}")
try:
sys.exit(0)
except Exception as e:
logging.error("Unable to cleanly shut down amd-smi-lib, exception: %s", str(type(e).__name__))
os._exit(0)
if not AMDSMI_INITIALIZED:
AMDSMI_INIT_FLAG = amdsmi_cli_init()
AMDSMI_INITIALIZED = True
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
atexit.register(amdsmi_cli_shutdown)
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,143 @@
# This module provides common functions used for building
# and packaging ROCm projects
option(CMAKE_VERBOSE_MAKEFILE "Enable verbose output" ON)
option(CMAKE_EXPORT_COMPILE_COMMANDS "Export compile commands for linters and autocompleters" ON)
function(generic_add_rocm)
set(ROCM_DIR "/opt/rocm" CACHE STRING "ROCm directory.")
if(DEFINED ENV{ROCM_RPATH} AND NOT DEFINED LIB_RUNPATH)
set(LIB_RUNPATH "\$ORIGIN:\$ORIGIN/../lib:\$ORIGIN/../lib64" PARENT_SCOPE)
endif()
set(CMAKE_INSTALL_PREFIX ${ROCM_DIR} CACHE STRING "Default installation directory.")
set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix.")
# add package search paths
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} /usr/local PARENT_SCOPE)
set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /usr/lib64 /usr/lib/x86_64-linux-gnu PARENT_SCOPE)
endfunction()
function(generic_package)
# Used by test and example CMakeLists
set(SHARE_INSTALL_PREFIX "share/${CMAKE_PROJECT_NAME}" CACHE STRING "Tests and Example install directory")
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)
message("Compiler version is " ${CMAKE_CXX_COMPILER_VERSION})
message(FATAL_ERROR "Require at least gcc-5.4.0")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" PARENT_SCOPE)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG" PARENT_SCOPE)
endif()
# Add address sanitizer
# derived from:
# https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/blob/e176056061bf11fdd98b58dd57deb4ac5625844d/amdocl/CMakeLists.txt#L27
if(${ADDRESS_SANITIZER})
set(ASAN_COMPILER_FLAGS "-fno-omit-frame-pointer -fsanitize=address")
set(ASAN_LINKER_FLAGS "-fsanitize=address")
if(BUILD_SHARED_LIBS)
# Clang-specific flag for shared ASAN library
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(ASAN_COMPILER_FLAGS "${ASAN_COMPILER_FLAGS} -shared-libsan")
set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -shared-libsan")
endif()
else()
set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -static-libsan")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ASAN_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ASAN_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_LINKER_FLAGS}" PARENT_SCOPE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LINKER_FLAGS}" PARENT_SCOPE)
else()
## Security breach mitigation flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFORTIFY_SOURCE=2 -fstack-protector-all -Wcast-align" PARENT_SCOPE)
## More security breach mitigation flags
set(HARDENING_LDFLAGS "${HARDENING_LDFLAGS} -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${HARDENING_LDFLAGS}" PARENT_SCOPE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HARDENING_LDFLAGS}" PARENT_SCOPE)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-Wtrampolines" CXX_SUPPORTS_WTRAMPOLINES)
if(CXX_SUPPORTS_WTRAMPOLINES)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtrampolines" PARENT_SCOPE)
endif()
endif()
# Clang does not set the build-id
# similar to if(NOT CMAKE_COMPILER_IS_GNUCC)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1" PARENT_SCOPE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--build-id=sha1" PARENT_SCOPE)
endif()
# configure packaging
# cpack version is populated with CMAKE_PROJECT_VERSION implicitly
set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME} CACHE STRING "")
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." CACHE STRING "")
set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix.")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" CACHE STRING "")
set(CPACK_RPM_PACKAGE_LICENSE "MIT" CACHE STRING "")
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
set(CPACK_VERBATIM_VARIABLES ON CACHE BOOL "Escape strings passed to CPACK.")
set(CPACK_DEB_COMPONENT_INSTALL ON PARENT_SCOPE)
set(CPACK_RPM_COMPONENT_INSTALL ON PARENT_SCOPE)
mark_as_advanced(CPACK_PACKAGE_NAME CPACK_PACKAGE_VENDOR CPACK_PACKAGE_CONTACT CPACK_RESOURCE_FILE_LICENSE
CPACK_RPM_PACKAGE_LICENSE CPACK_GENERATOR)
# Debian package specific variables
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} PARENT_SCOPE)
else()
set(CPACK_DEBIAN_PACKAGE_RELEASE "local" PARENT_SCOPE)
endif()
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" PARENT_SCOPE)
# RPM package specific variables
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} PARENT_SCOPE)
else()
set(CPACK_RPM_PACKAGE_RELEASE "local" PARENT_SCOPE)
endif()
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT" PARENT_SCOPE)
set(CPACK_RPM_PACKAGE_AUTOREQ 0 PARENT_SCOPE)
set(CPACK_RPM_PACKAGE_AUTOPROV 1 PARENT_SCOPE)
list(
APPEND
CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
"/lib"
"/usr/sbin"
"/lib/systemd"
"/lib/systemd/system"
"/usr"
"/opt")
# PACKAGE-tests need PACKAGE
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
# Treat runtime group as package base.
# Without it - the base package would be named 'rdc-runtime'
# resulting in rdc-runtime*.deb and rdc-runtime*.rpm
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
endfunction()
# this function goes after 'include(CPack)'
function(generic_package_post)
# PACKAGE package, no postfix
cpack_add_component_group("runtime")
cpack_add_component(dev GROUP runtime DESCRIPTION "Development components of the library")
cpack_add_component(unspecified GROUP runtime)
# not quite sure why this is the only way to populate cpack description
cpack_add_component(runtime GROUP runtime DESCRIPTION "Runtime components of the library")
# PACKAGE-tests package, -tests postfix
cpack_add_component_group("tests")
cpack_add_component(tests GROUP tests DESCRIPTION "Test components of the library")
endfunction()
@@ -0,0 +1,200 @@
################################################################################
## Copyright (C) Advanced Micro Devices. All rights reserved.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy of
## this software and associated documentation files (the "Software"), to deal in
## the Software without restriction, including without limitation the rights to
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
## the Software, and to permit persons to whom the Software is furnished to do so,
## subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
## FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
## COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
## IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
## CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
################################################################################
## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function(parse_version VERSION_STRING)
string(FIND ${VERSION_STRING} "-" STRING_INDEX)
if(${STRING_INDEX} GREATER -1)
math(EXPR STRING_INDEX "${STRING_INDEX} + 1")
string(SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD)
endif()
string(REGEX MATCHALL "[0-9]+" VERSIONS ${VERSION_STRING})
list(LENGTH VERSIONS VERSION_COUNT)
if(${VERSION_COUNT} GREATER 0)
list(GET VERSIONS 0 MAJOR)
set(VERSION_MAJOR ${MAJOR} PARENT_SCOPE)
set(TEMP_VERSION_STRING "${MAJOR}")
endif()
if(${VERSION_COUNT} GREATER 1)
list(GET VERSIONS 1 MINOR)
set(VERSION_MINOR ${MINOR} PARENT_SCOPE)
set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}")
endif()
if(${VERSION_COUNT} GREATER 2)
list(GET VERSIONS 2 PATCH)
set(VERSION_PATCH ${PATCH} PARENT_SCOPE)
set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}")
endif()
set(VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE)
endfunction()
function(get_version_from_file REL_FILE_PATH ITEM)
set(FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${REL_FILE_PATH}")
set(OUTPUT_ITEM "0")
if(EXISTS "${FILE_PATH}")
file(READ ${FILE_PATH} file_contents)
string(REGEX MATCHALL "AMDSMI_LIB_VERSION_${ITEM} *[0-9]+" OUTPUT_STR "${file_contents}")
list(LENGTH OUTPUT_STR OUTPUT_STR_LENGTH)
if(${OUTPUT_STR_LENGTH} GREATER 0)
string(REGEX MATCH "[0-9]+" OUTPUT_ITEM "${OUTPUT_STR}")
endif()
endif()
set(${ITEM} "${OUTPUT_ITEM}" PARENT_SCOPE)
endfunction()
# Parses file for a pattern and replaces the value
# associated with that pattern with a specified value
# Replaces VERSION(MAJOR.MINOR.RELEASE) with updated values
function(update_version_in_file REL_FILE_PATH DEFAULT_VERSION PAT1 PAT2 PAT3)
get_version_from_file(${REL_FILE_PATH} "MAJOR")
get_version_from_file(${REL_FILE_PATH} "MINOR")
get_version_from_file(${REL_FILE_PATH} "RELEASE")
set(FILE_VERSION "${MAJOR}.${MINOR}.${RELEASE}")
if(DEFAULT_VERSION VERSION_GREATER FILE_VERSION)
set(FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${REL_FILE_PATH}")
if(EXISTS "${FILE_PATH}")
parse_version(${DEFAULT_VERSION})
file(READ ${FILE_PATH} file_contents_new)
string(REGEX REPLACE "${PAT1}MAJOR${PAT2} *[0-9]*" "${PAT1}MAJOR${PAT3}${VERSION_MAJOR}" file_contents
"${file_contents_new}")
string(REGEX REPLACE "${PAT1}MINOR${PAT2} *[0-9]*" "${PAT1}MINOR${PAT3}${VERSION_MINOR}" file_contents_new
"${file_contents}")
string(REGEX REPLACE "${PAT1}RELEASE${PAT2} *[0-9]*" "${PAT1}RELEASE${PAT3}${VERSION_PATCH}" file_contents
"${file_contents_new}")
file(WRITE ${FILE_PATH} "${file_contents}")
endif()
set(VERSION_STRING "${DEFAULT_VERSION}" PARENT_SCOPE)
else()
set(VERSION_STRING "${FILE_VERSION}" PARENT_SCOPE)
endif()
endfunction()
## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function(get_version_from_tag DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
parse_version(${DEFAULT_VERSION_STRING})
set(DEFAULT_VERSION_MAJOR "${VERSION_MAJOR}")
set(DEFAULT_VERSION_MINOR "${VERSION_MINOR}")
set(DEFAULT_VERSION_PATCH "${VERSION_PATCH}")
if(GIT)
execute_process(
COMMAND git tag --list --sort=-version:refname "${VERSION_PREFIX}*"
COMMAND head -n 1
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_STRING
OUTPUT_STRIP_TRAILING_WHITESPACE RESULTS_VARIABLE RESULTS)
if(GIT_TAG_STRING)
parse_version(${GIT_TAG_STRING})
endif()
endif()
if(VERSION_STRING VERSION_GREATER DEFAULT_VERSION_STRING)
set(VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE)
set(VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE)
set(VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE)
set(VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE)
else()
set(VERSION_STRING "${DEFAULT_VERSION_STRING}" PARENT_SCOPE)
set(VERSION_MAJOR "${DEFAULT_VERSION_MAJOR}" PARENT_SCOPE)
set(VERSION_MINOR "${DEFAULT_VERSION_MINOR}" PARENT_SCOPE)
set(VERSION_PATCH "${DEFAULT_VERSION_PATCH}" PARENT_SCOPE)
endif()
endfunction()
function(num_change_since_prev_pkg VERSION_PREFIX)
find_program(get_commits NAMES version_util.sh PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules)
if(get_commits)
execute_process(
COMMAND ${get_commits} -c ${VERSION_PREFIX}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE NUM_COMMITS
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT)
set(NUM_COMMITS "${NUM_COMMITS}" PARENT_SCOPE)
if(${RESULT} EQUAL 0)
message("${NUM_COMMITS} were found since previous release")
else()
message("Unable to determine number of commits since previous release")
endif()
else()
message("WARNING: Didn't find version_util.sh")
set(NUM_COMMITS "unknown" PARENT_SCOPE)
endif()
endfunction()
function(get_package_version_number DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
parse_version(${DEFAULT_VERSION_STRING})
num_change_since_prev_pkg(${VERSION_PREFIX})
set(PKG_VERSION_STR "${VERSION_STRING}.${NUM_COMMITS}")
if(DEFINED ENV{ROCM_BUILD_ID})
set(VERSION_ID $ENV{ROCM_BUILD_ID})
else()
set(VERSION_ID "local-build-0")
endif()
set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_ID}")
if(GIT)
execute_process(
COMMAND git rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE VERSION_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT)
if(${RESULT} EQUAL 0)
# Check for dirty workspace.
execute_process(COMMAND git diff --quiet WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE RESULT)
if(${RESULT} EQUAL 1)
set(VERSION_HASH "${VERSION_HASH}-dirty")
endif()
else()
set(VERSION_HASH "unknown")
endif()
else()
set(VERSION_HASH "unknown")
endif()
set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_HASH}")
set(PKG_VERSION_STR ${PKG_VERSION_STR} PARENT_SCOPE)
set(PKG_VERSION_HASH ${VERSION_HASH} PARENT_SCOPE)
set(CPACK_PACKAGE_VERSION_MAJOR ${VERSION_MAJOR} PARENT_SCOPE)
set(CPACK_PACKAGE_VERSION_MINOR ${VERSION_MINOR} PARENT_SCOPE)
set(CPACK_PACKAGE_VERSION_PATCH ${VERSION_PATCH} PARENT_SCOPE)
endfunction()
+40
Melihat File
@@ -0,0 +1,40 @@
#!/bin/bash
# Handle commandline args
while [ "$1" != "" ]; do
case $1 in
-c ) # Commits since prevous tag
TARGET="count" ;;
* )
TARGET="count"
break ;;
esac
shift 1
done
TAG_PREFIX=$1
reg_ex="${TAG_PREFIX}*"
commits_since_last_tag() {
TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`)
PREVIOUS_TAG=${TAG_ARR[0]}
CURRENT_TAG=${TAG_ARR[1]}
PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG`
CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG`
# Commits since prevous tag:
if [[ -z $PREV_CMT_NUM || -z $CURR_CMT_NUM ]]; then
let NUM_COMMITS="0"
else
let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}"
fi
echo $NUM_COMMITS
}
case $TARGET in
count) commits_since_last_tag ;;
*) die "Invalid target $target" ;;
esac
exit 0
+14
Melihat File
@@ -0,0 +1,14 @@
!.sphinx/
!.doxygen/
/_build/
/_doxygen/
/_images/
/_static/
/_templates/
/html/
/latex/
404.md
data/AMD-404.png
# file below is overwritten by sphinx script!
./esmi_lib_readme_link.md
@@ -0,0 +1,296 @@
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import re
import os
from pathlib import Path
from docutils import nodes
from docutils.parsers.rst import Directive, directives
from sphinx.application import Sphinx
from sphinx.util.typing import ExtensionMetadata
class GoApiRefDirective(Directive):
"""
Directive for generating Go API reference documentation.
Usage:
.. go-api-ref:: path/to/gofile.go
:section: gpu
"""
required_arguments = 1 # Requires one argument: the path to the Go file
optional_arguments = 0
has_content = False
option_spec = {
"section": directives.unchanged, # Optional section filter
}
def run(self):
# Get the path to the Go file
go_file_path = self.arguments[0]
env = self.state.document.settings.env
# Get the section filter if provided
section_filter = self.options.get("section", None)
# Resolve the path relative to the document
doc_dir = Path(env.doc2path(env.docname)).parent
source_path = (doc_dir / go_file_path).resolve()
# Check if the file exists
if not source_path.exists():
msg = f"Go source file not found: {source_path}"
return [nodes.warning("", nodes.paragraph("", msg))]
# Parse the Go file and generate documentation
functions = parse_go_file(str(source_path))
# Create a container for the API documentation
container = nodes.container()
container["classes"].append("go-api-reference")
# Add the API documentation to the container
content = generate_rst_content(functions, section_filter)
self.state_machine.insert_input(content, source=str(source_path))
return [container]
def parse_go_file(file_path):
"""Parse a Go file and extract function documentation."""
with open(file_path, "r") as f:
content = f.read()
# Pattern to match function documentation and definition
pattern = r"(\/\/[^\n]*(?:\n\/\/[^\n]*)*)\n\s*func\s+([A-Za-z0-9_]+)\s*\((.*?)\)\s*(\(.*?\)|\w+)\s*\{"
matches = re.findall(pattern, content, re.DOTALL)
functions = []
for match in matches:
doc_comment = match[0]
func_name = match[1]
params = match[2].strip()
return_type = match[3].strip()
# Process the comment lines
doc_lines = []
for line in doc_comment.split("\n"):
if line.strip().startswith("//"):
# Remove the comment marker and one space after it (if present)
comment_text = line.strip()[2:]
if comment_text.startswith(" "):
comment_text = comment_text[1:]
doc_lines.append(comment_text)
# Extract sections from the doc comment
description = []
input_params = []
output_params = []
example = []
current_section = "description"
for line in doc_lines:
if line.startswith("Input parameter"):
current_section = "input"
input_params.append(line)
elif line.startswith("Output:"):
current_section = "output"
output_params.append(line)
elif line.startswith("Example:"):
current_section = "example"
example.append(line)
elif current_section == "description":
description.append(line)
elif current_section == "input":
input_params.append(line)
elif current_section == "output":
output_params.append(line)
elif current_section == "example":
example.append(line)
# Combine description lines into a single line
desc_text = " ".join([line.strip() for line in description if line.strip()])
# Combine output lines into a single line
output_text = " ".join([line.strip() for line in output_params if line.strip()])
# Determine the section based on function name
parts = func_name.split("_")
section = parts[1] if len(parts) > 1 else "other"
functions.append(
{
"name": func_name,
"params": params,
"return_type": return_type,
"description": desc_text,
"input_params": "\n".join(input_params).strip(),
"output_params": output_text,
"example": "\n".join(example).strip(),
"section": section.lower(), # Store the section for filtering
}
)
return functions
def generate_rst_content(functions, section_filter=None):
"""Generate reStructuredText content from parsed functions."""
lines = []
# Filter functions by section if a filter is provided
if section_filter:
section_filter = section_filter.lower()
functions = [f for f in functions if f["section"] == section_filter]
if not functions:
lines.append(f"No functions found in section: {section_filter}")
return lines
# Group functions by prefix if no section filter is provided
if not section_filter:
# Group functions by prefix (e.g., GO_gpu_, GO_cpu_)
function_groups = {}
for func in functions:
section = func["section"]
if section not in function_groups:
function_groups[section] = []
function_groups[section].append(func)
# Define the order of sections (GPU first, then CPU, then others)
section_order = []
# Add GPU section first if it exists
if "gpu" in function_groups:
section_order.append("gpu")
# Add CPU section next if it exists
if "cpu" in function_groups:
section_order.append("cpu")
# Add all other sections in alphabetical order
for prefix in sorted(function_groups.keys()):
if prefix not in ["gpu", "cpu"]:
section_order.append(prefix)
# Write each group in the specified order
for section in section_order:
funcs = function_groups[section]
lines.append(f"{section.upper()} Functions")
lines.append("-" * len(f"{section.upper()} Functions"))
lines.append("")
for func in funcs:
add_function_documentation(lines, func)
else:
# If a section filter is provided, just document those functions without section headers
for func in functions:
add_function_documentation(lines, func)
return lines
def add_function_documentation(lines, func):
"""Add documentation for a single function to the lines list."""
lines.append(func['name'])
lines.append("~" * len(f"``{func['name']}``"))
lines.append("")
# Function signature
return_type = func["return_type"]
if return_type.startswith("(") and return_type.endswith(")"):
return_type = return_type[1:-1]
lines.append(".. code-block:: go")
lines.append("")
lines.append(f" func {func['name']}({func['params']}) {return_type}")
lines.append("")
# Description
if func["description"]:
lines.append(func["description"])
lines.append("")
# Input parameters
if func["input_params"]:
for input_line in func["input_params"].split("\n"):
lines.append(input_line)
lines.append("")
# Output parameters
if func["output_params"]:
lines.append(func["output_params"])
lines.append("")
# Example
if func["example"]:
# Process the example to properly format code blocks
example_lines = func["example"].split("\n")
in_code_block = False
for i, line in enumerate(example_lines):
stripped_line = line.strip()
# Check if this is the Example: line
if stripped_line == "Example:":
lines.append("Example:")
continue
# Check if we're entering a code block
if (
not in_code_block
and i > 0
and (
stripped_line.startswith("import")
or stripped_line.startswith("if")
or stripped_line.startswith("for")
)
):
in_code_block = True
lines.append("")
lines.append(".. code-block:: go")
lines.append("")
# Add the line to the formatted example
if in_code_block:
# For code blocks, add indentation
lines.append(f" {line}")
elif stripped_line: # Only add non-empty lines outside code blocks
lines.append(line)
lines.append("")
def setup(app):
"""
Setup function for Sphinx extension.
This will be called by Sphinx when the extension is loaded.
"""
# Register the directive
app.add_directive("go-api-ref", GoApiRefDirective)
return {
"version": "0.1.0",
"parallel_read_safe": True,
"parallel_write_safe": True,
}
+95
Melihat File
@@ -0,0 +1,95 @@
---
myst:
html_meta:
"description lang=en": "AMD SMI for reliability, availability, serviceability."
"keywords": "system, management, interface, cper, log, error, spec, ecc, afid, fault, ras"
---
# Reliability, availability, serviceability (RAS)
RAS aims to increase the robustness of a system by detecting hardware errors, recording them, and
correcting them where possible. See [Reliability, availability, serviceability (Linux
kernel)](https://docs.kernel.org/admin-guide/RAS/main.html) for more general information.
## ECC
ECC (Error-Correcting Code) is a type of memory to automatically detect errors. Correctable 1-bit
errors are handled by the ECC logic and logged by the hardware. Uncorrectable 2-bit errors can be
detected but not reliably fixed; this is a more serious event that must be reported. See [RAS Error
Count sysfs Interface](https://docs.kernel.org/gpu/amdgpu/ras.html#ras-error-count-sysfs-interface)
to learn how AMD SMI accesses error counts.
While ECC is a mechanism to handle different errors, CPER is the standard used to report that the event
occurred.
## CPER
At its core, CPER (Common Platform Error Record) is a standard format included in the [UEFI
specification](https://uefi.org/specs/UEFI/2.10/01_Introduction.html) to report errors to the
operating system. It works as a standard error report template that different hardware components
can fill out when something goes wrong. It consists of a header, one or more section descriptors --
and for each descriptor, an associated section containing error or informational data. See [CPER
(UEFI Specification)](https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html) for
more information.
A CPER record consists of vital information for diagnostics such as:
- Error source
- Error type
- Error severity
- 0 - Recoverable (also called non-fatal uncorrected)
- 1 - Fatal
- 2 - Corrected
- 3 - Informational
- Timestamp
- Other data
A CPER record might contain an AFID in its data to help map a complex error to a more actionable service task.
## AFID
AFIDs (AMD Field ID) are unique numerical IDs associated with specific events or errors produced by
AMD Instinct accelerators. It provides a specific identifier for a known condition, which helps
facilitate root cause analysis. Each AFID is associated with category, type, and severity fields. See
[AFID Event List](https://docs.amd.com/r/en-US/AMD_Field_ID_70122_v1.0/AFID-Event-List) for more
information.
## From concept to action
AMD SMI provides tools to programmatically monitor and manage these RAS features.
:::::{tab-set}
::::{tab-item} C/C++
The AMD SMI library provides APIs to query ECC error counts and manage CPER records
(list, decode, and clear).
See [ECC information](/doxygen/docBin/html/group__tagECCInfo) and [RAS
information](/doxygen/docBin/html/group__tagRasInfo) for available APIs.
::::
::::{tab-item} Python
See related APIs:
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_count)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_enabled)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_status)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_total_ecc_count)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_cper_entries)
- [](/reference/amdsmi-py-api.md#amdsmi_get_afids_from_cper)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ras_feature_info)
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ras_block_features_enabled)
::::
::::{tab-item} amd-smi CLI
See [`amd-smi ras --help`](/how-to/amdsmi-cli-tool.md#amd-smi-ras) for details and available options.
```shell
amd-smi ras --help
```
::::
:::::
## Further reading
- [AMD Field ID](https://docs.amd.com/r/en-US/AMD_Field_ID_70122_v1.0/Introduction)
- [CPER (UEFI specification)](https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html)
- [Reliability, availability, serviceability (Linux kernel)](https://docs.kernel.org/admin-guide/RAS/main.html)
+85
Melihat File
@@ -0,0 +1,85 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import re
import sys
from pathlib import Path
sys.path.append(str(Path("_extension").resolve()))
# get version number to print in docs
def get_version_info(filepath):
with open(filepath, "r") as f:
content = f.read()
version_pattern = (
r"^#define\s+AMDSMI_LIB_VERSION_MAJOR\s+(\d+)\s*$|"
r"^#define\s+AMDSMI_LIB_VERSION_MINOR\s+(\d+)\s*$|"
r"^#define\s+AMDSMI_LIB_VERSION_RELEASE\s+(\d+)\s*$"
)
matches = re.findall(version_pattern, content, re.MULTILINE)
if len(matches) == 3:
version_major, version_minor, version_release = [
match for match in matches if any(match)
]
return version_major[0], version_minor[1], version_release[2]
else:
raise ValueError("Couldn't find all VERSION numbers.")
version_major, version_minor, version_release = get_version_info(
"../include/amd_smi/amdsmi.h"
)
version_number = f"{version_major}.{version_minor}.{version_release}"
# project info
project = "AMD SMI"
author = "Advanced Micro Devices, Inc."
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
version = version_number
release = version_number
html_theme = "rocm_docs_theme"
html_theme_options = {"flavor": "rocm"}
html_title = f"AMD SMI {version_number} documentation"
suppress_warnings = ["etoc.toctree"]
external_toc_path = "./sphinx/_toc.yml"
external_projects_current_project = "amdsmi"
extensions = ["rocm_docs", "rocm_docs.doxygen", "go_api_ref"]
doxygen_root = "doxygen"
doxysphinx_enabled = True
doxygen_project = {
"name": "AMD SMI C++ API reference",
"path": "doxygen/docBin/xml",
}
def generate_doxyfile(app, _):
doxyfile_in = Path(app.confdir) / doxygen_root / "Doxyfile.in"
doxyfile_out = Path(app.confdir) / doxygen_root / "Doxyfile"
if not doxyfile_in.exists():
from sphinx.errors import ConfigError
raise ConfigError(f"Missing Doxyfile.in at {doxyfile_in}")
with open(doxyfile_in) as f:
content = f.read()
content = content.replace("@PROJECT_NUMBER@", version_number)
with open(doxyfile_out, "w") as f:
f.write(content)
def setup(app):
app.connect("config-inited", generate_doxyfile, priority=100)
return {"parallel_read_safe": True, "parallel_write_safe": True}
Binary file not shown.

After

Width:  |  Height:  |  Ukuran: 62 KiB

@@ -0,0 +1,2 @@
docBin/
Doxyfile
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,233 @@
---
myst:
html_meta:
"description lang=en": "Get started with the AMD SMI C++ library. Basic usage and examples."
"keywords": "api, smi, lib, c++, system, management, interface, ROCm"
---
# AMD SMI C++ library usage and examples
This section presents a brief overview and some basic examples on the AMD SMI
library's usage. Whether you are developing applications for performance
monitoring, system diagnostics, or resource allocation, the AMD SMI C++ library
serves as a valuable tool for leveraging the full potential of AMD hardware in
your projects.
```{note}
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
dynamic library. To compile code that uses the AMD SMI library API, ensure the
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
variable to the directory containing ``librocm_smi64.so`` (usually
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
```
```{note}
The environment variable ``AMDSMI_GPU_METRICS_CACHE_MS`` may be set to
control the internal GPU metrics cache duration (ms).
Default 1, set to 0 to disable.
```
```{note}
The environment variable ``AMDSMI_ASIC_INFO_CACHE_MS`` may be set to
control the internal GPU asic info cache duration (ms).
Default 10000 ms, set to 0 to disable.
```
```{seealso}
Refer to the [C++ library API reference](../reference/amdsmi-cpp-api.md).
```
(device_socket_handle)=
## Device and socket handles
Many functions in the library take a _socket handle_ or _device handle_. A
_socket_ refers to a physical hardware socket, abstracted by the library to
represent the hardware more effectively to the user. While there is always one
unique GPU per socket, an APU may house both a GPU and CPU on the same socket.
For MI200 GPUs, multiple GCDs may reside within a single socket
To identify the sockets in a system, use the `amdsmi_get_socket_handles()`
function, which returns a list of socket handles. These handles can then be used
with `amdsmi_get_processor_handles()` to query devices within each socket. The
device handle is used to differentiate between detected devices; however, it's
important to note that a device handle may change after restarting the
application, so it should not be considered a persistent identifier across
processes.
The list of socket handles obtained from `amdsmi_get_socket_handles()` can
also be used to query the CPUs in each socket by calling
`amdsmi_get_processor_handles_by_type()`. This function can then be called again
to query the cores within each CPU.
(cpp_hello_amdsmi)=
## Hello AMD SMI
An application using AMD SMI must call `amdsmi_init()` to initialize the AMI SMI
library before all other calls. This call initializes the internal data
structures required for subsequent AMD SMI operations. In the call, a flag can
be passed to indicate if the application is interested in a specific device
type.
`amdsmi_shut_down()` must be the last call to properly close connection to
driver and make sure that any resources held by AMD SMI are released.
1. A simple "Hello World" type program that displays the temperature of detected
devices.
```{note}
Sample build example:
$ g++ -I/opt/rocm/include <file_name>.cc -L/opt/rocm/lib -lamd_smi -o <filename>
Users /opt/rocm-*/bin path may differ (depending on install), please locate the path of your libamd_smi.so.*.
For example:
$ sudo find /opt/ -iname libamd_smi.so*
/opt/rocm-6.4.1/lib/libamd_smi.so.25.0
/opt/rocm-6.4.1/lib/libamd_smi.so
```
The code is as follows:
```cpp
#include <iostream>
#include <vector>
#include "amd_smi/amdsmi.h"
int main() {
amdsmi_status_t ret;
// Init amdsmi for sockets and devices. Here we are only interested in AMD_GPUS.
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
// Get all sockets
uint32_t socket_count = 0;
// Get the socket count available in the system.
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
// Allocate the memory for the sockets
std::vector<amdsmi_socket_handle> sockets(socket_count);
// Get the socket handles in the system
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
std::cout << "Total Socket: " << socket_count << std::endl;
// For each socket, get identifier and devices
for (uint32_t i=0; i < socket_count; i++) {
// Get Socket info
char socket_info[128];
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
std::cout << "Socket " << socket_info<< std::endl;
// Get the device count for the socket.
uint32_t device_count = 0;
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
// Allocate the memory for the device handlers on the socket
std::vector<amdsmi_processor_handle> processor_handles(device_count);
// Get all devices of the socket
ret = amdsmi_get_processor_handles(sockets[i],
&device_count, &processor_handles[0]);
// For each device of the socket, get name and temperature.
for (uint32_t j=0; j < device_count; j++) {
// Get device type. Since the amdsmi is initialized with
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
processor_type_t processor_type;
ret = amdsmi_get_processor_type(processor_handles[j], &processor_type);
if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
std::cout << "Expect AMDSMI_PROCESSOR_TYPE_AMD_GPU device type!\n";
return 1;
}
// Get device name
amdsmi_board_info_t board_info;
ret = amdsmi_get_gpu_board_info(processor_handles[j], &board_info);
std::cout << "\tdevice "
<< j <<"\n\t\tName:" << board_info.product_name << std::endl;
// Get temperature
int64_t val_i64 = 0;
ret = amdsmi_get_temp_metric(processor_handles[j], AMDSMI_TEMPERATURE_TYPE_EDGE,
AMDSMI_TEMP_CURRENT, &val_i64);
std::cout << "\t\tTemperature: " << val_i64 << "C" << std::endl;
}
}
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
// and devices pointers
ret = amdsmi_shut_down();
return 0;
}
```
2. A sample program that displays the power of detected CPUs.
```{note}
Sample build example:
$ g++ -DENABLE_ESMI -I/opt/rocm/include <file_name>.cc -L/opt/rocm/lib -lamd_smi -o <filename>
For finding available rocm include and library path, see building example on sample program 1 above.
```
The code is as follows:
```cpp
#include <iostream>
#include <vector>
#include "amd_smi/amdsmi.h"
int main(int argc, char **argv) {
amdsmi_status_t ret;
uint32_t socket_count = 0;
// Initialize amdsmi for AMD CPUs
ret = amdsmi_init(AMDSMI_INIT_AMD_CPUS);
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
// Allocate the memory for the sockets
std::vector<amdsmi_socket_handle> sockets(socket_count);
// Get the sockets of the system
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
std::cout << "Total Socket: " << socket_count << std::endl;
// For each socket, get cpus
for (uint32_t i = 0; i < socket_count; i++) {
uint32_t cpu_count = 0;
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU
processor_type_t processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &cpu_count);
// Allocate the memory for the cpus
std::vector<amdsmi_processor_handle> plist(cpu_count);
// Get the cpus for each socket
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &plist[0], &cpu_count);
for (uint32_t index = 0; index < plist.size(); index++) {
uint32_t socket_power;
std::cout<<"CPU "<<index<<"\t"<< std::endl;
std::cout<<"Power (Watts): ";
ret = amdsmi_get_cpu_socket_power(plist[index], &socket_power);
if(ret != AMDSMI_STATUS_SUCCESS)
std::cout<<"Failed to get cpu socket power"<<"["<<index<<"] , Err["<<ret<<"] "<< std::endl;
if (!ret) {
std::cout<<static_cast<double>(socket_power)/1000<<std::endl;
}
std::cout<<std::endl;
}
}
// Clean up resources allocated at amdsmi_init
ret = amdsmi_shut_down();
return 0;
}
```
@@ -0,0 +1,87 @@
---
myst:
html_meta:
"description lang=en": "Get started with the AMD SMI Go interface."
"keywords": "api, smi, lib, go, golang, system, management, interface, ROCm"
---
# AMD SMI Go interface overview
The AMD SMI Go interface provides a convenient way to interact with AMD
hardware through a simple and accessible [API](../reference/amdsmi-go-api.md).
The API is compatible with Go 1.20 and higher and requires the AMD driver to
be loaded for initialization. Review the [prerequisites](#install_reqs).
```{seealso}
Refer to the [Go library API reference](../reference/amdsmi-go-api.md).
```
(go_prereqs)=
## Prerequisites
Before get started, make sure your environment satisfies the following prerequisites.
See the [requirements](#install_reqs) section for more information.
1. Ensure `amdgpu` drivers are installed properly for initialization.
2. Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
```bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64:
```
3. Install Go 1.20+.
Download Go from [https://go.dev/dl/](https://go.dev/dl/) and follow the
official installation documentation at [Download and
install](https://go.dev/doc/install).
Alternatively, use a third-party utility like update-golang.
```bash
git clone https://github.com/udhos/update-golang
cd update-golang
sudo ./update-golang.sh
source /etc/profile.d/golang_path.sh
go version
```
## Get started
```{note}
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
dynamic library. To compile code that uses the AMD SMI library API, ensure the
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
variable to the directory containing ``librocm_smi64.so`` (usually
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
```
A Go application using AMD SMI must call `goamdsmi.GO_gpu_init()` to initialize
the AMI SMI library before all other calls. This call initializes the internal
data structures required for subsequent AMD SMI operations.
`goamdsmi.GO_gpu_shutdown()` must be the last call to properly close connection to
driver and make sure that any resources held by AMD SMI are released.
## Usage
For an example on using the AMD SMI Go API, refer to this implementation
[https://github.com/amd/amd_smi_exporter/tree/master](https://github.com/amd/amd_smi_exporter/tree/master).
```{seealso}
Refer to the [Go library API reference](../reference/amdsmi-go-api.md).
```
### Add AMD SMI library to your project
To include the AMD SMI Go API in your project, update your Makefile or Go module configuration
to fetch the appropriate version of the AMD SMI library.
```shell
go get github.com/ROCm/amdsmi@amd-staging
```
When using a Makefile, ensure you're fetching the latest AMD SMI repository
with Go API support. See
[https://github.com/amd/amd_smi_exporter/blob/master/src/Makefile](https://github.com/amd/amd_smi_exporter/blob/master/src/Makefile)
for an example implementation.
@@ -0,0 +1,150 @@
---
myst:
html_meta:
"description lang=en": "Get started with the AMD SMI Python interface."
"keywords": "api, smi, lib, py, system, management, interface, ROCm"
---
# AMD SMI Python interface overview
The AMD SMI Python interface provides a convenient way to interact with AMD
hardware through a simple and accessible [API](../reference/amdsmi-py-api.md).
```{seealso}
Refer to the [Python library API reference](../reference/amdsmi-py-api.md).
```
## Prerequisites
Before get started, make sure your environment satisfies the following prerequisites.
See the [requirements](#install_reqs) section for more information.
1. Ensure `amdgpu` drivers are installed properly for initialization.
2. Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
```bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64:
```
3. Install Python 3.6.8+.
## Get started
```{note}
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
dynamic library. To compile code that uses the AMD SMI library API, ensure the
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
variable to the directory containing ``librocm_smi64.so`` (usually
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
```
```{note}
The environment variable ``AMDSMI_GPU_METRICS_CACHE_MS`` may be set to
control the internal GPU metrics cache duration (ms).
Default 1, set to 0 to disable.
The environment variable ``AMDSMI_ASIC_INFO_CACHE_MS`` may be set to
control the internal GPU asic info cache duration (ms).
Default 10000 ms, set to 0 to disable.
You can apply them in one of two ways:
1. In Python code (before the AMDSMI library loads):
```
```python
import os
os.environ["AMDSMI_GPU_METRICS_CACHE_MS"] = "200"
from amdsmi import *
```
```{note}
2. On the shell when invoking Python:
```
```shell
AMDSMI_GPU_METRICS_CACHE_MS=200 python tools/amdsmi_quick_start.py
```
To get started, the `amdsmi` folder should be copied and placed next to
the importing script. Import it as follows:
```python
from amdsmi import *
try:
amdsmi_init()
# amdsmi calls ...
except AmdSmiException as e:
print(e)
finally:
try:
amdsmi_shut_down()
except AmdSmiException as e:
print(e)
```
(py_lib_fs)=
### Folder structure
File name | Description
----------------------|-------------------------------------------------
`__init__.py` | Python package initialization file
`amdsmi_interface.py` | Amdsmi library Python interface
`amdsmi_wrapper.py` | Python wrapper around amdsmi binary
`amdsmi_exception.py` | Amdsmi [exceptions](#py_exceptions) Python file
(py_usage)=
## Usage
An application using AMD SMI must call `amdsmi_init()` to initialize the AMI SMI
library before all other calls. This call initializes the internal data
structures required for subsequent AMD SMI operations. In the call, a flag can
be passed to indicate if the application is interested in a specific device
type.
`amdsmi_shut_down()` must be the last call to properly close connection to
driver and make sure that any resources held by AMD SMI are released.
```{seealso}
Refer to the [Python library API reference](../reference/amdsmi-py-api.md).
```
(py_exceptions)=
## Exceptions
All exceptions are in `amdsmi_exception.py` file.
Exceptions that can be thrown by AMD SMI are:
* `AmdSmiException`: base amdsmi exception class
* `AmdSmiLibraryException`: derives base `AmdSmiException` class and represents errors that can occur in amdsmi-lib.
When this exception is thrown, `err_code` and `err_info` are set. `err_code` is an integer that corresponds to errors that can occur
in amdsmi-lib and `err_info` is a string that explains the error that occurred.
For example:
```python
try:
num_of_GPUs = len(amdsmi_get_processor_handles())
if num_of_GPUs == 0:
print("No GPUs on machine")
except AmdSmiException as e:
print("Error code: {}".format(e.err_code))
if e.err_code == amdsmi_wrapper.AMDSMI_STATUS_RETRY:
print("Error info: {}".format(e.err_info))
```
* `AmdSmiRetryException` : Derives `AmdSmiLibraryException` class and signals
device is busy and call should be retried.
* `AmdSmiTimeoutException` : Derives `AmdSmiLibraryException` class and
represents that call had timed out.
* `AmdSmiParameterException`: Derives base `AmdSmiException` class and
represents errors related to invaild parameters passed to functions. When this
exception is thrown, `err_msg` is set and it explains what is the actual and
expected type of the parameters.
* `AmdSmiBdfFormatException`: Derives base `AmdSmiException` class and
represents invalid bdf format.
@@ -0,0 +1,39 @@
---
myst:
html_meta:
"description lang=en": "Docker container configuration and setup procedures for AMD SMI."
"keywords": "api, smi, lib, system, management, interface, ROCm, docker, systemd, modprobe"
---
# Using AMD SMI in a Docker container
To ensure proper functionality of AMD SMI within a Docker container, the
following configuration options must be included. These settings are
particularly important for managing memory partitions, as partitioning depends
on loading and unloading drivers (with `systemd` dependencies):
* `--cap-add=SYS_MODULE`
This option adds the `SYS_MODULE` capability to the container, allowing it to
load and interact with kernel modules.
```{note}
Granting `SYS_MODULE` increases the container's privileges and reduces
isolation from the host. Use this option only with trusted containers and
images.
```
* `-v /lib/modules:/lib/modules`
By mounting the `/lib/modules/` directory into the container, the container
gains access to the host's kernel modules, allowing it to load and interact
with them. Without this access, operations requiring module loading like
memory partitioning would fail.
For example:
```{image} ../data/how-to/setup-docker-container/docker-run-example.jpg
:alt: Command line example of running a Docker container for AMD SMI
:align: center
:width: 100%
```
+65
Melihat File
@@ -0,0 +1,65 @@
---
myst:
html_meta:
"description lang=en": "AMD SMI documentation and API reference."
"keywords": "amdsmi, lib, cli, system, management, interface, amdgpu, admin, sys"
---
# AMD SMI documentation
The AMD System Management Interface (AMD SMI) library offers a unified tool for
managing and monitoring GPUs, particularly in high-performance computing
environments. It provides a user-space interface that allows applications to
control GPU operations, monitor performance, and retrieve information about the
system's drivers and GPUs.
Find the source code at <https://github.com/ROCm/amdsmi>.
```{note}
AMD SMI is the successor to <https://github.com/ROCm/rocm_smi_lib>.
```
::::{grid} 2
:gutter: 3
:::{grid-item-card} Install
* [Library and CLI tool installation](./install/install.md)
* [Build from source](./install/build.md)
:::
:::{grid-item-card} How to
* [C++ library usage](./how-to/amdsmi-cpp-lib.md)
* [Python library usage](./how-to/amdsmi-py-lib.md)
* [Go library usage](./how-to/amdsmi-go-lib.md)
* [CLI tool usage](./how-to/amdsmi-cli-tool.md)
* [Use AMD SMI in a Docker container](./how-to/setup-docker-container.md)
:::
:::{grid-item-card} Reference
* [C++ API](./reference/amdsmi-cpp-api.md)
* [Modules](../doxygen/docBin/html/topics)
* [Files](../doxygen/docBin/html/files)
* [Globals](../doxygen/docBin/html/globals)
* [Data structures](../doxygen/docBin/html/annotated)
* [Data fields](../doxygen/docBin/html/functions_data_fields)
* [Python API](./reference/amdsmi-py-api.md)
* [Go API](./reference/amdsmi-go-api.md)
:::
:::{grid-item-card} Conceptual
* [Reliability, availability, serviceability](./conceptual/ras.md)
:::
:::{grid-item-card} Tutorials
* [AMD SMI examples (GitHub)](https://github.com/ROCm/amdsmi/tree/amd-staging/example)
* [AMD SMI CLI walkthrough](https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html)
:::
::::
To learn about contributing to AMD SMI, see [Contibuting to AMD
SMI](https://github.com/ROCm/amdsmi/blob/amd-mainline/.github/CONTRIBUTING.md).
To contribute to the documentation, see
{doc}`Contributing to ROCm documentation <rocm:contribute/contributing>`.
Find ROCm licensing information on the
{doc}`Licensing <rocm:about/license>` page.
+109
Melihat File
@@ -0,0 +1,109 @@
---
myst:
html_meta:
"description lang=en": "How to build AMD SMI from source."
"keywords": "system, management, interface, contribute, contributing, ROCm, develop, testing"
---
# Building AMD SMI
This section describes the prerequisites and steps to build AMD SMI from source.
(build_reqs)=
## Required software
To build the AMD SMI library, the following components are required. Note that
the software versions specified were used during development; earlier
versions are not guaranteed to work.
* CMake (v3.15.0 or later) -- `python3 -m pip install cmake`
* g++ (v5.4.0 or later)
* libdrm-dev (for Ubuntu and Debian)
* libdrm-devel (for RPM-based distributions)
In order to build the AMD SMI Python package, the following components are
required:
* Python (3.6.8 or later)
* virtualenv -- `python3 -m pip install virtualenv`
## Build steps
1. Clone the AMD SMI repository to your local Linux machine.
```shell
git clone https://github.com/ROCm/amdsmi.git
```
2. The default installation location for the library and headers is `/opt/rocm`.
Before installation, any old ROCm directories should be deleted:
* `/opt/rocm`
* `/opt/rocm-<version_number>`
3. Build the library by following the typical CMake build sequence (run as root
user or use `sudo` before `make install` command); for instance:
```bash
mkdir -p build
cd build
cmake ..
make -j $(nproc)
make install
```
The built library is located in the `build/` directory. To build the `rpm`
and `deb` packages use the following command:
```bash
make package
```
(rebuild_py_wrapper)=
## Rebuild the Python wrapper
The Python wrapper for the AMD SMI library is found in the [auto-generated
file](#py_lib_fs) `py-interface/amdsmi_wrapper.py`. It is essential to
regenerate this wrapper whenever there are changes to the C++ API. It is not
regenerated automatically.
To regenerate the wrapper, use the following command.
```shell
./update_wrapper.sh
```
After this command, the file in `py-interface/amdsmi_wrapper.py` will be updated
on compile.
```{note}
You need Docker installed on your system to regenerate the Python wrapper.
```
(build_tests)=
## Build the tests
To verify the build and capabilities of AMD SMI on your system, as well as to
see practical examples of its usage, you can build and run the available [tests
in the repository](https://github.com/ROCm/amdsmi/tree/amd-staging/tests).
Follow these steps to build the tests:
```bash
mkdir -p build
cd build
cmake -DBUILD_TESTS=ON ..
make -j $(nproc)
```
(run_tests)=
### Run the tests
Once the tests are [built](#build_tests), you can run them by executing the
`amdsmitst` program. The executable can be found at `build/tests/amd_smi_test/`.
(build_docs)=
## Build the docs
To build the documentation, follow the instructions at [Building
documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html).
+171
Melihat File
@@ -0,0 +1,171 @@
---
myst:
html_meta:
"description lang=en": "How to install AMD SMI libraries and CLI tool."
"keywords": "system, management, interface, cpu, gpu, hsmp, versions"
---
# Install the AMD SMI library and CLI tool
This section describes how to install the AMD SMI library, Python interface,
and command line tool either as part of the
{doc}`ROCm software stack <rocm:what-is-rocm>` -- or manually.
(install_reqs)=
## Requirements
The following are required to install and use the AMD SMI library through its language interfaces and CLI.
* The `amdgpu` driver must be loaded for AMD SMI initialization to work. See
[Install the amdgpu driver](#install_amdgpu_driver).
* Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
```bash
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64
```
### Supported platforms
The AMD SMI library supports Linux bare metal and Linux virtual machine guest
for AMD GPUs and AMD EPYC™ CPUs via
[esmi_ib_lirary](https://github.com/amd/esmi_ib_library). To use AMD SMI for virtualization, refer to
the [AMD SMI for Virtualization documentation](https://instinct.docs.amd.com/projects/amd-smi-virt/en/latest/index.html).
AMD SMI library can run on AMD ROCm supported platforms. Refer to
{doc}`System requirements (Linux) <rocm-install-on-linux:reference/system-requirements>`
for more information.
<!--https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html-->
To run the AMD SMI library, the `amdgpu` driver and the `amd_hsmp` or `hsmp_acpi` driver need to be installed. Optionally, `libdrm` can be installed to query firmware
information and hardware IPs.
### Python interface and CLI tool prerequisites
* Python version 3.6.8 or greater (64-bit)
::::{note}
During the driver installation process on Azure Linux 3, you might encounter the `ModuleNotFoundError: No module named 'more_itertools'` warning. This warning is a result of the reintroduction of `python3-wheel` and `python3-setuptools` dependencies in the CMake of AMD SMI, which requires `more_itertools` to build these Python libraries. This issue will be fixed in a future ROCm release. As a workaround, use the following command before installation:
```
sudo python3 -m pip install more_itertools
```
::::
### Go interface prerequisites
* Go version 1.20 or greater
(install_amdgpu_driver)=
## Install the amdgpu driver
```{note}
As of ROCm 7.0.0, the `amdgpu` driver is distributed separately from the ROCm
software stack. See
{doc}`rocm-install-on-linux:reference/user-kernel-space-compat-matrix` for
driver to ROCm user space compatibility information.
```
Confirm that your Linux kernel version matches the system requirements described in
{ref}`rocm-install-on-linux:supported_distributions`.
For up-to-date installation instructions, see the [AMD GPU Driver (amdgpu)
documentation](https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/install/detailed-install/prerequisites.html).
(install_amdgpu_rocm)=
## Install AMD SMI with ROCm
AMD SMI is included as a core package in the ROCm software stack as part of the
`rocm-developer-tools` meta package. See [ROCm runtime
packages](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/package-manager-integration.html#id3)
for more information.
```{note}
The `amdgpu-install` script is no longer the recommended way to install ROCm.
Install using your supported Linux distribution's package manager instead.
```
For up-to-date installation instructions via package manager, see {doc}`ROCm
installation for Linux <rocm-install-on-linux:install/prerequisites>`.
After installing the `amdgpu` driver and ROCm, verify your AMD SMI installation:
```shell
amd-smi
```
(install_without_rocm)=
## Install AMD SMI without ROCm
The following are example steps to install the AMD SMI libraries and CLI tool on
Ubuntu 22.04.
1. Install the library.
```shell
sudo apt install amd-smi-lib
```
2. Add the installation directory to your PATH. If installed with ROCm, ignore
this step.
```shell
export PATH="${PATH:+${PATH}:}~/opt/rocm/bin"
```
3. Verify your installation.
```shell
amd-smi --help
```
## Optionally enable CLI autocompletion
The `amd-smi` CLI application supports autocompletion. If `argcomplete` is not
installed and enabled already, do so using the following commands.
```shell
python3 -m pip install argcomplete
activate-global-python-argcomplete --user
# restart shell to enable
```
(install-manual-py-lib)=
## Install the Python library for multiple ROCm instances
If {doc}`multiple ROCm versions are installed
<rocm-install-on-linux:install/install-methods/multi-version-install-index>` and you
are not using `pyenv`, uninstall previous versions of AMD SMI before installing
the desired version from your ROCm instance.
### Manually install the Python library
The following are example AMD SMI installation steps on Ubuntu 22.04 without
ROCm.
1. Remove previous AMD SMI installation.
```shell
python3 -m pip list | grep amd
python3 -m pip uninstall amdsmi
```
2. Install the AMD SMI Python library from your target ROCm instance.
```shell
apt install amd-smi-lib
cd /opt/rocm/share/amd_smi
python3 -m pip install --upgrade pip
python3 -m pip install --user .
```
3. You should now have the AMD SMI Python library in your Python path:
```shell-session
~$ python3
Python 3.8.10 (default, May 26 2023, 14:05:08)
[GCC 9.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import amdsmi
>>>
```
+9
Melihat File
@@ -0,0 +1,9 @@
.. meta::
:description: Review the AMD SMI license agreement.
:keywords: amdsmi
*******
License
*******
.. include:: ../LICENSE
@@ -0,0 +1,21 @@
---
myst:
html_meta:
"description lang=en": "Explore the AMD SMI C++ API."
"keywords": "api, smi, lib, cpp, header, system, management, interface, ROCm"
---
# AMD SMI C++ API reference
This section provides comprehensive documentation for the AMD SMI C++ API.
Explore these sections to understand the full scope of available
functionalities and how to implement them in your applications.
- {doc}`Modules <../doxygen/docBin/html/topics>`
- {doc}`Files <../doxygen/docBin/html/files>`
- {doc}`Globals <../doxygen/docBin/html/globals>`
- {doc}`Data structures <../doxygen/docBin/html/annotated>`
@@ -0,0 +1,33 @@
---
myst:
html_meta:
"description lang=en": "Explore the AMD SMI Go API."
"keywords": "api, smi, lib, system, management, interface, ROCm, golang"
---
# AMD SMI Go API reference
The AMD SMI Go interface provides a convenient way to interact with AMD
hardware through a simple and accessible API. The API is compatible with Go
version 1.20 and higher and requires the AMD driver to be loaded for
initialization. Review the [prerequisites](#go_prereqs) before getting
started.
This section provides documentation for the AMD SMI Go API. Explore these
sections to understand the full scope of available functionalities and how to
implement them in your applications.
## GPU functions
```{eval-rst}
.. go-api-ref:: ../../goamdsmi.go
:section: gpu
```
## CPU functions
```{eval-rst}
.. go-api-ref:: ../../goamdsmi.go
:section: cpu
```
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,9 @@
---
myst:
html_meta:
"description lang=en": "A summary of changes to AMD SMI APIs. The changelog is listed for reference and subject to change."
"keywords": "api, smi, lib, changes, system, management, interface, ROCm"
---
```{include} ../../CHANGELOG.md
```
@@ -0,0 +1,68 @@
# Variables of the form ${<variable>} are substituted, currently the following
# list is supported:
# - ${branch} (or {branch}) the name of the current branch
# - ${url} (or {url}) github url of the current project
# - ${project:<project_name>} base url of the documentation of <project_name>
# based on intersphinx_mapping.
# These comments will also be removed.
defaults:
numbered: false
root: index
subtrees:
- caption: Install
entries:
- file: install/install.md
title: Library and CLI tool installation
- file: install/build.md
title: Build from source
- caption: How to
entries:
- file: how-to/amdsmi-cpp-lib.md
title: C++ library usage
- file: how-to/amdsmi-py-lib.md
title: Python library usage
- file: how-to/amdsmi-go-lib.md
title: Go library usage
- file: how-to/amdsmi-cli-tool.md
title: CLI tool usage
- file: how-to/setup-docker-container.md
title: Use AMD SMI in a Docker container
- caption: Reference
entries:
- file: reference/amdsmi-cpp-api.md
title: C++ API
entries:
- file: doxygen/docBin/html/topics
title: Modules
- file: doxygen/docBin/html/files
title: Files
- file: doxygen/docBin/html/globals
title: Globals
- file: doxygen/docBin/html/annotated
title: Data structures
- file: doxygen/docBin/html/functions_data_fields
title: Data fields
- file: reference/amdsmi-py-api.md
title: Python API
- file: reference/amdsmi-go-api.md
title: Go API
- file: reference/changelog.md
title: Changelog
- caption: Conceptual
entries:
- file: conceptual/ras.md
- caption: Tutorials
entries:
- url: https://github.com/ROCm/amdsmi/tree/${branch}/example
title: AMD SMI examples (GitHub)
- url: https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html
title: AMD SMI CLI walkthrough
- caption: About
entries:
- file: license.md
@@ -0,0 +1 @@
rocm-docs-core[api_reference]==1.27.0
@@ -0,0 +1,313 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile docs/sphinx/requirements.in
#
accessible-pygments==0.0.5
# via pydata-sphinx-theme
alabaster==1.0.0
# via sphinx
asttokens==3.0.0
# via stack-data
attrs==25.3.0
# via
# jsonschema
# jupyter-cache
# referencing
babel==2.17.0
# via
# pydata-sphinx-theme
# sphinx
beautifulsoup4==4.13.5
# via pydata-sphinx-theme
breathe==4.36.0
# via rocm-docs-core
certifi==2025.8.3
# via requests
cffi==2.0.0
# via
# cryptography
# pynacl
charset-normalizer==3.4.3
# via requests
click==8.3.0
# via
# click-log
# doxysphinx
# jupyter-cache
# sphinx-external-toc
click-log==0.4.0
# via doxysphinx
comm==0.2.3
# via ipykernel
contourpy==1.3.3
# via matplotlib
cryptography==46.0.1
# via pyjwt
cycler==0.12.1
# via matplotlib
debugpy==1.8.17
# via ipykernel
decorator==5.2.1
# via ipython
docutils==0.21.2
# via
# myst-parser
# pydata-sphinx-theme
# sphinx
doxysphinx==3.3.12
# via rocm-docs-core
executing==2.2.1
# via stack-data
fastjsonschema==2.21.2
# via
# nbformat
# rocm-docs-core
fonttools==4.60.0
# via matplotlib
gitdb==4.0.12
# via gitpython
gitpython==3.1.45
# via rocm-docs-core
greenlet==3.2.4
# via sqlalchemy
idna==3.10
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==8.7.0
# via
# jupyter-cache
# myst-nb
ipykernel==6.30.1
# via myst-nb
ipython==9.5.0
# via
# ipykernel
# myst-nb
ipython-pygments-lexers==1.1.1
# via ipython
jedi==0.19.2
# via ipython
jinja2==3.1.6
# via
# myst-parser
# sphinx
jsonschema==4.25.1
# via nbformat
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-cache==1.0.1
# via myst-nb
jupyter-client==8.6.3
# via
# ipykernel
# nbclient
jupyter-core==5.8.1
# via
# ipykernel
# jupyter-client
# nbclient
# nbformat
kiwisolver==1.4.9
# via matplotlib
libsass==0.22.0
# via doxysphinx
lxml==5.2.1
# via doxysphinx
markdown-it-py==3.0.0
# via
# mdit-py-plugins
# myst-parser
markupsafe==3.0.2
# via jinja2
matplotlib==3.10.6
# via doxysphinx
matplotlib-inline==0.1.7
# via
# ipykernel
# ipython
mdit-py-plugins==0.5.0
# via myst-parser
mdurl==0.1.2
# via markdown-it-py
mpire==2.10.2
# via doxysphinx
myst-nb==1.3.0
# via rocm-docs-core
myst-parser==4.0.1
# via myst-nb
nbclient==0.10.2
# via
# jupyter-cache
# myst-nb
nbformat==5.10.4
# via
# jupyter-cache
# myst-nb
# nbclient
nest-asyncio==1.6.0
# via ipykernel
numpy==1.26.4
# via
# contourpy
# doxysphinx
# matplotlib
packaging==25.0
# via
# ipykernel
# matplotlib
# sphinx
parso==0.8.5
# via jedi
pexpect==4.9.0
# via ipython
pillow==11.3.0
# via matplotlib
platformdirs==4.4.0
# via jupyter-core
prompt-toolkit==3.0.52
# via ipython
psutil==7.1.0
# via ipykernel
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
pycparser==2.23
# via cffi
pydata-sphinx-theme==0.16.1
# via
# rocm-docs-core
# sphinx-book-theme
pygithub==2.8.1
# via rocm-docs-core
pygments==2.19.2
# via
# accessible-pygments
# ipython
# ipython-pygments-lexers
# mpire
# pydata-sphinx-theme
# sphinx
pyjson5==1.6.9
# via doxysphinx
pyjwt[crypto]==2.10.1
# via pygithub
pynacl==1.6.0
# via pygithub
pyparsing==3.2.5
# via
# doxysphinx
# matplotlib
python-dateutil==2.9.0.post0
# via
# jupyter-client
# matplotlib
pyyaml==6.0.3
# via
# jupyter-cache
# myst-nb
# myst-parser
# rocm-docs-core
# sphinx-external-toc
pyzmq==27.1.0
# via
# ipykernel
# jupyter-client
referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
requests==2.32.5
# via
# pygithub
# sphinx
rocm-docs-core[api-reference]==1.27.0
# via -r requirements.in
roman-numerals-py==3.1.0
# via sphinx
rpds-py==0.27.1
# via
# jsonschema
# referencing
six==1.17.0
# via python-dateutil
smmap==5.0.2
# via gitdb
snowballstemmer==3.0.1
# via sphinx
soupsieve==2.8
# via beautifulsoup4
sphinx==8.2.3
# via
# breathe
# myst-nb
# myst-parser
# pydata-sphinx-theme
# rocm-docs-core
# sphinx-book-theme
# sphinx-copybutton
# sphinx-design
# sphinx-external-toc
# sphinx-notfound-page
sphinx-book-theme==1.1.3
# via rocm-docs-core
sphinx-copybutton==0.5.2
# via rocm-docs-core
sphinx-design==0.6.1
# via rocm-docs-core
sphinx-external-toc==1.0.1
# via rocm-docs-core
sphinx-notfound-page==1.1.0
# via rocm-docs-core
sphinxcontrib-applehelp==2.0.0
# via sphinx
sphinxcontrib-devhelp==2.0.0
# via sphinx
sphinxcontrib-htmlhelp==2.1.0
# via sphinx
sphinxcontrib-jsmath==1.0.1
# via sphinx
sphinxcontrib-qthelp==2.0.0
# via sphinx
sphinxcontrib-serializinghtml==2.0.0
# via sphinx
sqlalchemy==2.0.43
# via jupyter-cache
stack-data==0.6.3
# via ipython
tabulate==0.9.0
# via jupyter-cache
tornado==6.5.2
# via
# ipykernel
# jupyter-client
tqdm==4.67.1
# via mpire
traitlets==5.14.3
# via
# ipykernel
# ipython
# jupyter-client
# jupyter-core
# matplotlib-inline
# nbclient
# nbformat
typing-extensions==4.15.0
# via
# beautifulsoup4
# myst-nb
# pydata-sphinx-theme
# pygithub
# referencing
# sqlalchemy
urllib3==2.5.0
# via
# pygithub
# requests
wcwidth==0.2.14
# via prompt-toolkit
zipp==3.23.0
# via importlib-metadata
+63
Melihat File
@@ -0,0 +1,63 @@
cmake_minimum_required(VERSION 3.20)
option(ENABLE_ESMI_LIB "Build ESMI Library" ON)
option(CMAKE_VERBOSE_MAKEFILE "Enable verbose output" ON)
option(CMAKE_EXPORT_COMPILE_COMMANDS "Export compile commands for linters and autocompleters" ON)
# Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -m64 -msse -msse2")
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif()
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard to use")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)
message("Compiler version is " ${CMAKE_CXX_COMPILER_VERSION})
message(FATAL_ERROR "Require at least gcc-5.4.0")
endif()
project(amd_smi_example)
# required variables
if(DEFINED ENV{ROCM_PATH})
set(ROCM_DIR "$ENV{ROCM_PATH}" CACHE STRING "ROCm directory.")
else()
set(ROCM_DIR "/opt/rocm" CACHE STRING "ROCm directory.")
endif()
include(GNUInstallDirs)
# add package search paths
# ../../../ should resolve to /opt/rocm or another rocm install path
# fall back to ROCM_DIR
list(APPEND CMAKE_PREFIX_PATH ../../../ ${ROCM_DIR})
list(APPEND CMAKE_LIBRARY_PATH ${ROCM_DIR}/${CMAKE_INSTALL_LIBDIR})
find_package(amd_smi CONFIG REQUIRED)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Finished Cmake Example ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
# compile example files but do not install
# this is only useful if running from build directory
set(SMI_DRM_EXAMPLE_EXE "amd_smi_drm_ex")
add_executable(${SMI_DRM_EXAMPLE_EXE} "amd_smi_drm_example.cc")
target_link_libraries(${SMI_DRM_EXAMPLE_EXE} amd_smi)
set(SMI_NODRM_EXAMPLE_EXE "amd_smi_nodrm_ex")
add_executable(${SMI_NODRM_EXAMPLE_EXE} "amd_smi_nodrm_example.cc")
target_link_libraries(${SMI_NODRM_EXAMPLE_EXE} amd_smi)
if(ENABLE_ESMI_LIB)
set(ESMI_SAMPLE_EXE "amd_smi_esmi_ex")
add_executable(${ESMI_SAMPLE_EXE} "amdsmi_esmi_intg_example.cc")
target_link_libraries(${ESMI_SAMPLE_EXE} amd_smi)
target_compile_definitions(${ESMI_SAMPLE_EXE} PUBLIC ENABLE_ESMI_LIB)
endif()
@@ -0,0 +1,49 @@
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from amdsmi import *
import os
amdsmi_init()
def amdsmi_get_afids_from_cper():
directory_path = "/tmp/cper_dump/"
print(f"Searching for cper file in {directory_path}")
with os.scandir(directory_path) as cper_files:
for cper_file in cper_files:
if cper_file.is_file(): # Check if the entry is a file (not a subdirectory)
if ".bin" in cper_file.path:
print(f"Found {cper_file.path}")
with open(cper_file.path, "rb") as file:
raw = file.read()
afids, num_afids = amdsmi_interface.amdsmi_get_afids_from_cper(raw)
print(f"afids: {afids}")
amdsmi_get_afids_from_cper()
"""
Sample output:
sudo python3 afid.py
Searching for cper file in /tmp/cper_dump/
Found /tmp/cper_dump/cper_entry_0.bin
afids: [17]
Found /tmp/cper_dump/cper_entry_1.bin
afids: [17]
"""
@@ -0,0 +1,126 @@
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from amdsmi import *
import os
amdsmi_init()
def get_severity_mask(severity):
severity_mask = 0
if severity == "all":
# Set bits for NON_FATAL_UNCORRECTED (0), FATAL (1), and NON_FATAL_CORRECTED (2)
severity_mask |= ((1 << 0) | (1 << 1) | (1 << 2))
elif severity == "fatal":
# Set bit corresponding to AMDSMI_CPER_SEV_FATAL (which is 1)
severity_mask |= (1 << 1)
elif severity in ("nonfatal", "nonfatal-uncorrected"):
# Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_UNCORRECTED (which is 0)
severity_mask |= (1 << 0)
elif severity in ("nonfatal-corrected", "corrected"):
# Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_CORRECTED (which is 2)
severity_mask |= (1 << 2)
return severity_mask
def gpuid(device):
for gpu_index, device_handle in enumerate(amdsmi_interface.amdsmi_get_processor_handles()):
if device.value == device_handle.value:
return gpu_index
def dump_cper_entry(entry, cper_data, key):
try:
os.mkdir("/tmp/cper_dump", mode=0o777, dir_fd=None)
except FileExistsError:
pass
cper_file = f"/tmp/cper_dump/cper_entry_{key}.bin"
with open(cper_file, "wb") as file:
size = cper_data[key]["size"]
data = cper_data[key]["bytes"]
data = bytes(x % 256 for x in data[:size])
file.write(data)
print(f" Wrote cper data to file: {cper_file}")
json_file = f"/tmp/cper_dump/cper_entry_{key}.json"
with open(json_file, "wt") as file:
file.write(str(entry))
def get_gpu_cper_entries():
try:
devices = amdsmi_interface.amdsmi_get_processor_handles()
buffer_size = 1024*100
initial_cursor = 0
severity = "all"
for device in devices:
while True:
entries, new_cursor, cper_data, status_code = amdsmi_get_gpu_cper_entries(
device, get_severity_mask(severity), buffer_size, initial_cursor)
gpu_id = gpuid(device)
print("#############################################################################")
print(f"cper entries for severity: '{severity}', gpu #{gpu_id}, cursor: {initial_cursor}-{new_cursor - 1}")
for key, entry in entries.items():
print("----------------")
print("Entry", initial_cursor + key)
print(" Error Severity:", entry.get("error_severity", "Unknown"))
print(" Notify Type:", entry.get("notify_type", "Unknown"))
print(" Timestamp:", entry.get("timestamp", ""))
print(f" Cper entry metadata: {entry}")
dump_cper_entry(entry, cper_data, key)
if initial_cursor == new_cursor:
break
initial_cursor = new_cursor
break
except AmdSmiException as e:
print(e)
get_gpu_cper_entries()
"""
Sample output:
cper entries for severity: 'all', gpu #0, cursor: 0-3
----------------
Entry 0
Error Severity: non_fatal_corrected
Notify Type: CMC
Timestamp: 2025/09/07 00:14:22
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/07 00:14:22', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:1', 'flags': 0, 'persistence_info': 0}
Wrote cper data to file: /tmp/cper_dump/cper_entry_0.bin
----------------
Entry 1
Error Severity: non_fatal_corrected
Notify Type: CMC
Timestamp: 2025/09/07 00:14:26
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/07 00:14:26', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:2', 'flags': 0, 'persistence_info': 0}
Wrote cper data to file: /tmp/cper_dump/cper_entry_1.bin
----------------
Entry 2
Error Severity: non_fatal_corrected
Notify Type: CMC
Timestamp: 2025/09/08 06:12:11
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/08 06:12:11', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:3', 'flags': 0, 'persistence_info': 0}
Wrote cper data to file: /tmp/cper_dump/cper_entry_2.bin
----------------
Entry 3
Error Severity: non_fatal_corrected
Notify Type: CMC
Timestamp: 2025/09/08 06:13:59
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/08 06:13:59', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:4', 'flags': 0, 'persistence_info': 0}
Wrote cper data to file: /tmp/cper_dump/cper_entry_3.bin
#############################################################################
cper entries for severity: 'all', gpu #0, cursor: 4-3
"""
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,374 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <pwd.h>
#include <cinttypes>
#include <sys/stat.h>
#include <unistd.h>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <vector>
#include "amd_smi/amdsmi.h"
#define CHK_AMDSMI_RET(RET) \
{ \
if (RET != AMDSMI_STATUS_SUCCESS) { \
const char *err_str; \
amdsmi_status_code_to_string(RET, &err_str); \
std::cout << "AMDSMI call returned " << RET << " at line " \
<< __LINE__ << ": " << err_str << std::endl; \
if (RET != AMDSMI_STATUS_NOT_SUPPORTED && RET != AMDSMI_STATUS_INVAL) { \
return RET; \
} \
} \
}
int main() {
amdsmi_status_t ret;
// Init amdsmi for sockets and devices.
// Here we are only interested in AMD_GPUS.
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
CHK_AMDSMI_RET(ret)
// Get all sockets
uint32_t socket_count = 0;
// Get the socket count available for the system.
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
CHK_AMDSMI_RET(ret)
// Allocate the memory for the sockets
std::vector<amdsmi_socket_handle> sockets(socket_count);
// Get the sockets of the system
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
CHK_AMDSMI_RET(ret)
std::cout << "Total Socket: " << socket_count << std::endl;
// For each socket, get identifier and devices
for (uint32_t i = 0; i < socket_count; i++) {
// Get Socket info
char socket_info[128];
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
CHK_AMDSMI_RET(ret)
std::cout << "Socket " << socket_info << std::endl;
// Get the device count available for the socket.
uint32_t device_count = 0;
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
CHK_AMDSMI_RET(ret)
// Allocate the memory for the device handlers on the socket
std::vector<amdsmi_processor_handle> processor_handles(device_count);
// Get all devices of the socket
ret = amdsmi_get_processor_handles(sockets[i],
&device_count, &processor_handles[0]);
CHK_AMDSMI_RET(ret)
// For each device of the socket, get name and temperature.
for (uint32_t j = 0; j < device_count; j++) {
// Get device type. Since the amdsmi is initialized with
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
processor_type_t processor_type = {};
ret = amdsmi_get_processor_type(processor_handles[j], &processor_type);
CHK_AMDSMI_RET(ret)
if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
std::cout << "Expect AMDSMI_PROCESSOR_TYPE_AMD_GPU device type!\n";
return AMDSMI_STATUS_NOT_SUPPORTED;
}
amdsmi_ras_feature_t ras_feature;
ret = amdsmi_get_gpu_ras_feature_info(
processor_handles[j] ,&ras_feature);
CHK_AMDSMI_RET(ret)
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
printf("\tras_feature: version: %x, schema: %x\n",
ras_feature.ras_eeprom_version, ras_feature.ecc_correction_schema_flag);
}
amdsmi_bdf_t bdf = {};
ret = amdsmi_get_gpu_device_bdf(processor_handles[j], &bdf);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_device_bdf:\n");
printf("\tDevice[%d] BDF %04" PRIx64 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32 "\n\n", i,
static_cast<uint64_t>(bdf.domain_number),
static_cast<uint32_t>(bdf.bus_number),
static_cast<uint32_t>(bdf.device_number),
static_cast<uint32_t>(bdf.function_number));
amdsmi_asic_info_t asic_info = {};
ret = amdsmi_get_gpu_asic_info(processor_handles[j], &asic_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_asic_info:\n");
printf("\tMarket Name: %s\n", asic_info.market_name);
printf("\tDeviceID: 0x%lx\n", asic_info.device_id);
printf("\tVendorID: 0x%x\n", asic_info.vendor_id);
printf("\tRevisionID: 0x%x\n", asic_info.rev_id);
printf("\tSubSystemID: 0x%x\n", asic_info.subsystem_id);
printf("\tAsic serial: 0x%s\n", asic_info.asic_serial);
printf("\tOAM id: 0x%x\n", asic_info.oam_id);
printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units);
// Get VBIOS info
amdsmi_vbios_info_t vbios_info = {};
ret = amdsmi_get_gpu_vbios_info(processor_handles[j], &vbios_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_vbios_info:\n");
printf("\tVBIOS/IFWI Name: %s\n", vbios_info.name);
printf("\tVBIOS/IFWI Build Date: %s\n", vbios_info.build_date);
printf("\tVBIOS/IFWI Part Number: %s\n", vbios_info.part_number);
printf("\tVBIOS/IFWI Version String: %s\n\n", vbios_info.version);
printf("\tVBIOS/IFWI Boot Firmware: %s\n\n", vbios_info.boot_firmware);
// Get engine usage info
amdsmi_engine_usage_t engine_usage = {};
ret = amdsmi_get_gpu_activity(processor_handles[j], &engine_usage);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_activity:\n");
printf("\tAverage GFX Activity: %d\n",
engine_usage.gfx_activity);
printf("\tAverage MM Activity: %d\n",
engine_usage.mm_activity);
printf("\tAverage UMC Activity: %d\n\n",
engine_usage.umc_activity);
// Get firmware info
amdsmi_fw_info_t fw_information = {};
ret = amdsmi_get_fw_info(processor_handles[j], &fw_information);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_fw_info:\n");
printf("\tFirmware version: %d\n", fw_information.num_fw_info);
printf("\tSMU: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_SMU]
.fw_version);
printf("\tPM: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PM]
.fw_version);
printf("\tVCN: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_VCN]
.fw_version);
printf("\tCP_ME: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_ME]
.fw_version);
printf("\tCP_PFP: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_PFP]
.fw_version);
printf("\tCP_CE: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_CE]
.fw_version);
printf("\tRLC: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC]
.fw_version);
printf("\tCP_MEC1: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_MEC1]
.fw_version);
printf("\tCP_MEC2: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_MEC2]
.fw_version);
printf("\tSDMA0: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_SDMA0]
.fw_version);
printf("\tMC: %ld\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_MC]
.fw_version);
printf("\tRLC RESTORE LIST CNTL: %ld\n",
fw_information
.fw_info_list
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL]
.fw_version);
printf("\tRLC RESTORE LIST GPM MEM: %ld\n",
fw_information
.fw_info_list
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM]
.fw_version);
printf("\tRLC RESTORE LIST SRM MEM: %ld\n",
fw_information
.fw_info_list
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM]
.fw_version);
printf(
"\tPSP SOSDRV: %ld\n\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PSP_SOSDRV]
.fw_version);
printf(
"\tPLDM BUNDLE: %ld\n\n",
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PLDM_BUNDLE]
.fw_version);
// Get temperature measurements
int64_t temp_measurements[AMDSMI_TEMPERATURE_TYPE__MAX + 1];
amdsmi_temperature_type_t temp_types[4] = {
AMDSMI_TEMPERATURE_TYPE_EDGE, AMDSMI_TEMPERATURE_TYPE_HOTSPOT,
AMDSMI_TEMPERATURE_TYPE_VRAM, AMDSMI_TEMPERATURE_TYPE_PLX};
for (const auto &temp_type : temp_types) {
ret = amdsmi_get_temp_metric(
processor_handles[j], temp_type,
AMDSMI_TEMP_CURRENT,
&temp_measurements[(int)(temp_type)]);
CHK_AMDSMI_RET(ret)
}
printf(" Output of amdsmi_get_temp_metric:\n");
printf("\tGPU Edge temp measurement: %ld\n",
temp_measurements[AMDSMI_TEMPERATURE_TYPE_EDGE]);
printf("\tGPU Hotspot temp measurement: %ld\n",
temp_measurements[AMDSMI_TEMPERATURE_TYPE_HOTSPOT]);
printf("\tGPU VRAM temp measurement: %ld\n",
temp_measurements[AMDSMI_TEMPERATURE_TYPE_VRAM]);
printf("\tGPU PLX temp measurement: %ld\n\n",
temp_measurements[AMDSMI_TEMPERATURE_TYPE_PLX]);
// Get bad pages
char bad_page_status_names[3][15] = {"RESERVED", "PENDING",
"UNRESERVABLE"};
uint32_t num_pages = 0;
std::vector<amdsmi_retired_page_record_t> bad_page_info(num_pages);
ret = amdsmi_get_gpu_bad_page_info(processor_handles[j], &num_pages,
bad_page_info.data());
std::cout << "num_pages = " << num_pages << "\n";
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_bad_page_info:\n");
if (!num_pages) {
printf("\tNo bad pages found.\n");
} else {
std::vector<amdsmi_retired_page_record_t> bad_page_info(num_pages);
ret = amdsmi_get_gpu_bad_page_info(processor_handles[j], &num_pages,
bad_page_info.data());
CHK_AMDSMI_RET(ret)
for (uint32_t page_it = 0; page_it < num_pages; page_it += 1) {
printf(" Page[%d]\n", page_it);
printf("\tAddress: %lu\n",
bad_page_info[page_it].page_address);
printf("\tSize: %lu\n", bad_page_info[page_it].page_size);
printf(
"\tStatus: %s\n",
bad_page_status_names[bad_page_info[page_it].status]);
}
}
printf("\n");
// Get ECC error counts
amdsmi_error_count_t err_cnt_info = {};
ret = amdsmi_get_gpu_total_ecc_count(processor_handles[j], &err_cnt_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_total_ecc_count:\n");
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
printf("\tUncorrectable errors: %lu\n\n",
err_cnt_info.uncorrectable_count);
// Get device name
amdsmi_board_info_t board_info = {};
ret = amdsmi_get_gpu_board_info(processor_handles[j], &board_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_board_info:\n");
std::cout << "\tdevice [" << j
<< "]\n\t\tProduct name: " << board_info.product_name
<< "\n"
<< "\t\tModel Number: " << board_info.model_number
<< "\n"
<< "\t\tBoard Serial: " << board_info.product_serial
<< "\n"
<< "\t\tManufacturer Name: " << board_info.manufacturer_name
<< "\n\n";
// Get temperature
int64_t val_i64 = 0;
ret = amdsmi_get_temp_metric(processor_handles[j], AMDSMI_TEMPERATURE_TYPE_EDGE,
AMDSMI_TEMP_CURRENT, &val_i64);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_temp_metric:\n");
std::cout << "\t\tTemperature: " << val_i64 << "C"
<< "\n\n";
// Get frame buffer
amdsmi_vram_usage_t vram_usage = {};
ret = amdsmi_get_gpu_vram_usage(processor_handles[j], &vram_usage);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_vram_usage:\n");
std::cout << "\t\tFrame buffer usage (MB): " << vram_usage.vram_used
<< "/" << vram_usage.vram_total << "\n\n";
amdsmi_power_cap_info_t cap_info = {};
ret = amdsmi_get_power_cap_info(processor_handles[j], 0, &cap_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_power_cap_info:\n");
std::cout << "\t\t Power Cap: " << cap_info.power_cap / 1000000
<< "W\n\n";
amdsmi_dpm_policy_t policy;
ret = amdsmi_get_soc_pstate(processor_handles[j], &policy);
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_AMDSMI_RET(ret)
std::cout << "\t amdsmi_get_soc_pstate total:" << policy.num_supported
<<" current:" << policy.current << "\n";
for (uint32_t x=0; x < policy.num_supported; x++) {
std::cout << x <<": (" << policy.policies[x].policy_id
<<"," << policy.policies[x].policy_description << ")\n";
}
}
// Get nearest GPUs
const char *topology_link_type_str[] = {
"AMDSMI_LINK_TYPE_INTERNAL",
"AMDSMI_LINK_TYPE_PCIE",
"AMDSMI_LINK_TYPE_XGMI",
"AMDSMI_LINK_TYPE_NOT_APPLICABLE",
"AMDSMI_LINK_TYPE_UNKNOWN",
};
printf("\tOutput of amdsmi_get_link_topology_nearest:\n");
for (uint32_t topo_link_type = AMDSMI_LINK_TYPE_INTERNAL; topo_link_type <= AMDSMI_LINK_TYPE_UNKNOWN; topo_link_type++) {
auto topology_nearest_info = amdsmi_topology_nearest_t();
ret = amdsmi_get_link_topology_nearest(processor_handles[j],
static_cast<amdsmi_link_type_t>(topo_link_type),
nullptr);
CHK_AMDSMI_RET(ret);
ret = amdsmi_get_link_topology_nearest(processor_handles[j],
static_cast<amdsmi_link_type_t>(topo_link_type),
&topology_nearest_info);
CHK_AMDSMI_RET(ret);
printf("\tNearest GPUs found at %s\n", topology_link_type_str[topo_link_type]);
for (uint32_t k = 0; k < topology_nearest_info.count; k++) {
amdsmi_bdf_t bdf = {};
ret = amdsmi_get_gpu_device_bdf(topology_nearest_info.processor_list[k], &bdf);
CHK_AMDSMI_RET(ret)
printf("\tGPU BDF %04" PRIx64 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32 "\n",
static_cast<uint64_t>(bdf.domain_number),
static_cast<uint32_t>(bdf.bus_number),
static_cast<uint32_t>(bdf.device_number),
static_cast<uint32_t>(bdf.function_number));
}
}
}
}
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
// and devices pointers
ret = amdsmi_shut_down();
CHK_AMDSMI_RET(ret)
return 0;
}
@@ -0,0 +1,301 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <cassert>
#include <cstdint>
#include <unistd.h>
#include <vector>
#include <iostream>
#include <iomanip>
#include "amd_smi/amdsmi.h"
#include <cstring>
#include <cmath>
#define SHOWLINESZ 256
#define CHK_AMDSMI_RET(RET) \
{ \
if (RET != AMDSMI_STATUS_SUCCESS) { \
const char *err_str; \
amdsmi_status_t status; \
status = amdsmi_get_esmi_err_msg(RET, &err_str); \
std::cout << "AMDSMI call returned " << status << " at line " \
<< __LINE__ << std::endl; \
std::cout << err_str << std::endl; \
return RET; \
} \
}
using std::cin;
using std::cout;
using std::endl;
using std::fixed;
using std::setprecision;
using std::vector;
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) {
amdsmi_status_t ret;
uint32_t proto_ver;
amdsmi_smu_fw_version_t smu_fw = {};
// Initialize esmi for AMD CPUs
ret = amdsmi_init(AMDSMI_INIT_AMD_CPUS);
CHK_AMDSMI_RET(ret)
// Get all sockets
uint32_t socket_count = 0;
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
CHK_AMDSMI_RET(ret)
// Allocate the memory for the sockets
vector<amdsmi_socket_handle> sockets(socket_count);
// Get the sockets of the system
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
CHK_AMDSMI_RET(ret)
cout << "Total Socket: " << socket_count << endl;
// For each socket, get cpus and cores
for (uint32_t i = 0; i < socket_count; i++) {
cout << endl << "Socket " << i << endl;
uint32_t cpu_count = 0;
uint32_t core_count = 0;
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU
processor_type_t processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &cpu_count);
CHK_AMDSMI_RET(ret)
// Allocate the memory for the cpus
vector<amdsmi_processor_handle> plist(cpu_count);
// Get the cpus for each socket
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &plist[0], &cpu_count);
CHK_AMDSMI_RET(ret)
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE
processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &core_count);
CHK_AMDSMI_RET(ret)
// Allocate the memory for the cpu cores
vector<amdsmi_processor_handle> core_list(core_count);
// Get the cpu cores for each socket
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &core_list[0], &core_count);
CHK_AMDSMI_RET(ret)
for (uint32_t index = 0; index < plist.size(); index++) {
ret = amdsmi_get_cpu_hsmp_proto_ver(plist[index], &proto_ver);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get hsmp proto version"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
cout<<"\n------------------------------------------";
cout<<"\n| HSMP Proto Version | "<< proto_ver <<"\t\t |"<< endl;
cout<<"------------------------------------------\n";
ret = amdsmi_get_cpu_smu_fw_version(plist[index], &smu_fw);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get smu fw version"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
cout<<"\n------------------------------------------";
cout<<"\n| SMU FW Version | "
<<(unsigned)smu_fw.major<<"."
<<(unsigned)smu_fw.minor<<"."
<<(unsigned)smu_fw.debug
<<"\t\t |"<<endl;
cout<<"------------------------------------------\n";
uint32_t err_bits = 0;
uint32_t prochot;
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
cout<<"\n-------------------------------------------------";
cout<<"\n| ProchotStatus:\t\t |";
ret = amdsmi_get_cpu_prochot_status(plist[index], &prochot);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get prochot status"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<setprecision(7)<< (prochot ? "active" : "inactive")<<"\t|";
} else {
err_bits |= 1 << ret;
cout<<" NA (Err:" <<ret<<" |";
}
cout<<"\n-------------------------------------------------\n";
size_t len;
char str[SHOWLINESZ] = {};
int retVal = 0;
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
cout<<"\n-------------------------------------------------";
cout<<"\n| fclk (Mhz)\t\t\t |";
retVal = snprintf(str, SHOWLINESZ, "\n| mclk (Mhz)\t\t\t |");
len = strlen(str);
uint32_t fclk, mclk;
err_bits = 0;
ret = amdsmi_get_cpu_fclk_mclk(plist[index], &fclk, &mclk);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu fclk mclk"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<setprecision(7)<<" "<<fclk<<"\t\t|";
retVal = snprintf(str + len, SHOWLINESZ - len, " %d\t\t|", mclk);
} else {
err_bits |= 1 << ret;
cout<<" NA (Err: "<<setprecision(2)<<ret<<" |";
retVal = snprintf(str + len, SHOWLINESZ - len, " NA (Err: %-2d) |", ret);
}
if (retVal > 0 && retVal < SHOWLINESZ)
cout << str;
else
cout <<"error writing to buffer" << endl;
cout<<"\n-------------------------------------------------\n";
uint32_t socket_power;
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
cout<<"\n-------------------------------------------------";
cout<<"\n| Power (Watts)\t\t\t | ";
ret = amdsmi_get_cpu_socket_power(plist[index], &socket_power);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu socket power"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<fixed<<setprecision(3)<<static_cast<double>(socket_power)/1000<<"\t|";
} else {
err_bits |= 1 << ret;
cout<<" NA (Err:" <<ret<<" |";
}
uint32_t power_limit = 0;
cout<<"\n| PowerLimit (Watts)\t\t | ";
ret = amdsmi_get_cpu_socket_power_cap(plist[index], &power_limit);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<fixed<<setprecision(3)<<static_cast<double>(power_limit)/1000<<"\t|";
} else {
err_bits |= 1 << ret;
cout<<" NA (Err:" <<ret<<" |";
}
uint32_t power_max = 0;
cout<<"\n| PowerLimitMax (Watts)\t\t | ";
ret = amdsmi_get_cpu_socket_power_cap_max(plist[index], &power_max);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu socket power cap max"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<fixed<<setprecision(3)<<static_cast<double>(power_max)/1000<<"\t|";
} else {
err_bits |= 1 << ret;
cout<<" NA (Err:" <<ret<<" |";
}
cout<<"\n-------------------------------------------------\n";
uint32_t input_power;
power_max = 0;
cout<<"\nEnter the max power to be set:\n";
cin>>input_power;
ret = amdsmi_get_cpu_socket_power_cap_max(plist[index], &power_max);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu socket power cap max"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if ((ret == AMDSMI_STATUS_SUCCESS) && (input_power > power_max)) {
cout<<"Input power is more than max power limit,"
" limiting to "<<static_cast<double>(power_max)/1000<<"Watts\n";
input_power = power_max;
}
ret = amdsmi_set_cpu_socket_power_cap(plist[index], input_power);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to set cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<"CPU ["<<index<<"] power_limit set to "
<<fixed<<setprecision(3)<<static_cast<double>(input_power)/1000<<" Watts successfully\n";
}
power_limit = 0;
cout<<"\n| PowerLimit (Watts) \t\t | ";
ret = amdsmi_get_cpu_socket_power_cap(plist[index], &power_limit);
if(ret != AMDSMI_STATUS_SUCCESS)
cout<<"Failed to get cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
if (!ret) {
cout<<fixed<<setprecision(3)<<static_cast<double>(power_limit)/1000<<"\t|";
} else {
err_bits |= 1 << ret;
cout<<" NA (Err:" <<ret<<" |";
}
cout<<"\n-------------------------------------------------\n";
double fraction_q10 = 1/pow(2,10);
double fraction_uq10 = fraction_q10;
amdsmi_hsmp_metrics_table_t mtbl = {};
ret = amdsmi_get_hsmp_metrics_table(plist[index], &mtbl);
if (ret != AMDSMI_STATUS_SUCCESS) {
cout<<"Failed to get Metrics Table for CPU["<<index<<"], Err["<<ret<<"]" << endl;
} else {
cout<<"\n| METRICS TABLE \t\t\t\t |\n";
cout<<"\n| ACCUMULATOR COUNTER | "<<mtbl.accumulation_counter<<"\t\t|";
cout<<"\n| SOCKET POWER LIMIT | "<<(mtbl.socket_power_limit * fraction_uq10)<<" W\t\t|";
cout<<"\n| MAX SOCKET POWER LIMIT | "<<(mtbl.max_socket_power_limit * fraction_uq10)<<" W\t\t|";
cout<<"\n| SOCKET POWER | "<<(mtbl.socket_power * fraction_uq10)<<" W\t\t|\n";
cout<<"\n| Effective frequency per AID: \t\t\t\t\t\t|";
cout<<"\n-------------------------------------------------------------------------";
cout<<"\n| AID | SOCCLK \t\t| VCLK \t\t| DCLK \t\t| LCLK \t\t|";
cout<<"\n-------------------------------------------------------------------------";
for(uint32_t j = 0; j < 4 ; j++){
cout<<fixed<<setprecision(3)<<"\n| ["<<j<<"] | "
<<(mtbl.socclk_frequency[j] * fraction_uq10)<<"MHz\t| "
<<(mtbl.vclk_frequency[j] * fraction_uq10)<<"MHz\t| "
<<(mtbl.dclk_frequency[j] * fraction_uq10)<<"MHz\t| "
<<(mtbl.lclk_frequency[j] * fraction_uq10)<<"MHz\t| ";
}
cout<<"\n-------------------------------------------------------------------------\n";
cout<<"\n-------------------------------------------------------------------------\n";
}
}
}
// Clean up resources allocated at amdsmi_init
ret = amdsmi_shut_down();
CHK_AMDSMI_RET(ret)
return 0;
}
+724
Melihat File
@@ -0,0 +1,724 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package goamdsmi
/*
#cgo CFLAGS: -Wall -I/opt/rocm/include
#cgo LDFLAGS: -L/opt/rocm/lib -L/opt/rocm/lib64 -lgoamdsmi_shim64 -Wl,--unresolved-symbols=ignore-in-object-files
#include <cstdint>
#include <amdsmi_go_shim.h>
*/
import "C"
// ``GO_gpu_init`` initializes the GPU and reports whether the initialization was
// successful. This function must be called before using other AMD SMI
// functions.
//
// Output: ``bool``, returns true on success or false on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// GPU initialization is successful...
// }
func GO_gpu_init() (bool) {
return bool(C.goamdsmi_gpu_init())
}
// ``GO_gpu_shutdown`` shuts down the GPU and reports whether the shutdown was successful.
//
// Output: ``bool``, returns true on success or false on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_shutdown() {
// GPU shutdown is successful...
// }
func GO_gpu_shutdown() (bool) {
return bool(C.goamdsmi_gpu_shutdown())
}
// ``GO_gpu_num_monitor_devices`` returns the number of GPU monitor devices
// available.
//
// Output: ``uint``, returns the number of GPU monitor devices on success or 0 on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_shutdown() {
// GPU shutdown is successful...
// }
func GO_gpu_num_monitor_devices() (uint) {
return uint(C.goamdsmi_gpu_num_monitor_devices())
}
// ``GO_gpu_dev_name_get`` returns the name of the GPU device at the specified GPU
// index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``char*``, returns GPU device name on success or "NA" on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// goamdsmi.GO_gpu_dev_name_get(i)
// }
// }
func GO_gpu_dev_name_get(i int) (*C.char) {
return C.goamdsmi_gpu_dev_name_get(C.uint(i))
}
// ``GO_gpu_dev_id_get`` returns the device ID of the GPU device at the specified GPU
// index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint16``, returns GPU device ID on success or ``0xFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// value16 := goamdsmi.GO_gpu_dev_id_get(i)
// }
// }
func GO_gpu_dev_id_get(i int) (C.uint16_t) {
return C.uint16_t(C.goamdsmi_gpu_dev_id_get(C.uint(i)))
}
// ``GO_gpu_dev_pci_id_get`` returns the device PCI ID of the device at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU devices PCI ID on success or ``0xFFFFFFFFFFFFFFFF``
// on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// dev_pci_id := int(goamdsmi.GO_gpu_dev_pci_id_get())
// }
func GO_gpu_dev_pci_id_get(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_pci_id_get(C.uint(i))
}
// ``GO_gpu_dev_vbios_version_get`` returns the VBIOS version of the GPU device at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``char*``, returns VBIOS version on success or "NA" on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// dev_pci_id := int(goamdsmi.GO_gpu_dev_pci_id_get())
// }
func GO_gpu_dev_vbios_version_get(i int) (*C.char) {
return C.goamdsmi_gpu_dev_vbios_version_get(C.uint(i))
}
// ``GO_gpu_dev_vendor_name_get`` returns the vendor name of the GPU device at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``char*``, returns the GPU device name on success or "NA" on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// goamdsmi.GO_gpu_dev_vendor_name_get()
// }
// }
func GO_gpu_dev_vendor_name_get(i int) (*C.char) {
return C.goamdsmi_gpu_dev_vendor_name_get(C.uint(i))
}
// ``GO_gpu_dev_power_cap_get`` returns the power cap of the GPU at the specified
// GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU power cap on success or ``0xFFFFFFFFFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_power_cap := int(goamdsmi.GO_gpu_dev_power_cap_get(i))
// }
// }
func GO_gpu_dev_power_cap_get(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_power_cap_get(C.uint(i))
}
// ``GO_gpu_dev_power_get`` returns the power of the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU power on success or ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_power := int(goamdsmi.GO_gpu_dev_power_get(i))
// }
// }
func GO_gpu_dev_power_get(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_power_get(C.uint(i))
}
// ``GO_gpu_dev_temp_metric_get`` returns the temperature of the GPU at the
// specified GPU index, sensor, and metric number.
//
// Input parameters:
// - int, GPU index.
// - int, sensor number.
// - int, metric number.
//
// Output: ``uint64``, returns GPU temperature on success or ``0xFFFFFFFFFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// temp := int(goamdsmi.GO_gpu_dev_temp_metric_get(i, 1, 0))
// }
// }
func GO_gpu_dev_temp_metric_get(i int, sensor int, metric int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_temp_metric_get(C.uint(i), C.uint(sensor), C.uint(metric))
}
// ``GO_gpu_dev_perf_level_get`` returns the perf level of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_perf_level := int(goamdsmi.GO_gpu_dev_perf_level_get(i))
// }
// }
func GO_gpu_dev_perf_level_get(i int) (C.uint32_t) {
return C.goamdsmi_gpu_dev_perf_level_get(C.uint(i))
}
// ``GO_gpu_dev_overdrive_level_get`` returns the overdrive level of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_overdrive_level := int(goamdsmi.GO_gpu_dev_overdrive_level_get(i))
// }
// }
func GO_gpu_dev_overdrive_level_get(i int) (C.uint32_t) {
return C.goamdsmi_gpu_dev_perf_level_get(C.uint(i))
}
// ``GO_gpu_dev_mem_overdrive_level_get`` returns the mem overdrive level of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// mem_overdrive_level := int(goamdsmi.GO_gpu_dev_mem_overdrive_level_get(i))
// }
// }
func GO_gpu_dev_mem_overdrive_level_get(i int) (C.uint32_t) {
return C.goamdsmi_gpu_dev_overdrive_level_get(C.uint(i))
}
// ``GO_gpu_dev_gpu_clk_freq_get_sclk`` returns the system clock (SCLK) frequency of
// the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU SCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_sclk_freq := int(goamdsmi.GO_gpu_dev_gpu_clk_freq_get_sclk(i))
// }
// }
func GO_gpu_dev_gpu_clk_freq_get_sclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(C.uint(i))
}
// ``GO_gpu_dev_gpu_clk_freq_get_mclk`` returns the memory clock (MCLK) frequency of
// the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU MCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_sclk_freq := int(goamdsmi.GO_gpu_dev_gpu_clk_freq_get_mclk(i))
// }
// }
func GO_gpu_dev_gpu_clk_freq_get_mclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(C.uint(i))
}
// ``GO_gpu_od_volt_freq_range_min_get_sclk`` returns the minimum system clock
// (SCLK) frequency of the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU minimum SCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_min_sclk := int(goamdsmi.GO_gpu_od_volt_freq_range_min_get_sclk(i))
// }
// }
func GO_gpu_od_volt_freq_range_min_get_sclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_od_volt_freq_range_min_get_sclk(C.uint(i))
}
// ``GO_gpu_od_volt_freq_range_min_get_mclk`` returns the minimum memory clock
// (MCLK) frequency of the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU minimum MCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_min_mclk := int(goamdsmi.GO_gpu_od_volt_freq_range_min_get_mclk(i))
// }
// }
func GO_gpu_od_volt_freq_range_min_get_mclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_od_volt_freq_range_min_get_mclk(C.uint(i))
}
// ``GO_gpu_od_volt_freq_range_max_get_sclk`` returns the maximum system clock
// (SCLK) frequency of the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU maximum SCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_max_sclk := int(goamdsmi.GO_gpu_od_volt_freq_range_max_get_sclk(i))
// }
// }
func GO_gpu_od_volt_freq_range_max_get_sclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_od_volt_freq_range_max_get_sclk(C.uint(i))
}
// ``GO_gpu_od_volt_freq_range_max_get_mclk`` returns the maximum memory clock
// (MCLK) frequency of the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU maximum MCLK frequency level on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_max_mclk := int(goamdsmi.GO_gpu_od_volt_freq_range_max_get_mclk(i))
// }
// }
func GO_gpu_od_volt_freq_range_max_get_mclk(i int) (C.uint64_t) {
return C.goamdsmi_gpu_od_volt_freq_range_max_get_mclk(C.uint(i))
}
// ``GO_gpu_dev_gpu_busy_percent_get`` returns the busy percentage of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint32``, returns GPU busy percentage on success or ``0xFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// dev_busy_perc := int(goamdsmi.GO_gpu_dev_gpu_busy_percent_get(i))
// }
// }
func GO_gpu_dev_gpu_busy_percent_get(i int) (C.uint32_t) {
return C.goamdsmi_gpu_dev_gpu_busy_percent_get(C.uint(i))
}
// ``GO_gpu_dev_gpu_memory_busy_percent_get`` returns the memory busy percentage of
// the GPU at the specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU memory busy percentage on success or
// ``0xFFFFFFFFFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// mem_busy_perc := int(goamdsmi.GO_gpu_dev_gpu_memory_busy_percent_get(i))
// }
// }
func GO_gpu_dev_gpu_memory_busy_percent_get(i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_gpu_memory_busy_percent_get(C.uint(i))
}
// ``GO_gpu_dev_gpu_memory_usage_get`` returns the memory usage of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU memory usage on success or ``0xFFFFFFFFFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// mem_usage := int(goamdsmi.GO_gpu_dev_gpu_memory_usage_get(i))
// }
// }
func GO_gpu_dev_gpu_memory_usage_get (i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_gpu_memory_usage_get(C.uint(i))
}
// ``GO_gpu_dev_gpu_memory_total_get`` returns the total memory of the GPU at the
// specified GPU index.
//
// Input parameter: ``int``, GPU index.
//
// Output: ``uint64``, returns GPU memory usage on success or ``0xFFFFFFFFFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_gpu_init() {
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
// for i := 0; i < num_gpus; i++ {
// mem_total := int(goamdsmi.GO_gpu_dev_gpu_memory_total_get(i))
// }
// }
func GO_gpu_dev_gpu_memory_total_get (i int) (C.uint64_t) {
return C.goamdsmi_gpu_dev_gpu_memory_total_get(C.uint(i))
}
//CPU ESMI or AMDSMI calls
// ``GO_cpu_init`` initializes the CPU and reports whether the initialization was
// successful.
//
// Output: ``bool``, returns true on success or false on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// CPU initialization is successful...
// }
func GO_cpu_init() (bool) {
return bool(C.goamdsmi_cpu_init())
}
// ``GO_cpu_number_of_sockets_get`` returns the number of available CPU sockets.
//
// Output: ``uint``, returns the number of CPU sockets on success or 0 on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
// }
func GO_cpu_number_of_sockets_get() (uint) {
return uint(C.goamdsmi_cpu_number_of_sockets_get())
}
// ``GO_cpu_number_of_threads_get`` returns the number of available CPU sockets.
//
// Output: ``uint``, returns the number of CPU threads on success or 0 on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
// }
func GO_cpu_number_of_threads_get() (uint) {
return uint(C.goamdsmi_cpu_number_of_threads_get())
}
// ``GO_cpu_threads_per_core_get`` returns the thread count per available CPU core.
//
// Output: ``uint``, returns the CPU thread count on success or 0 on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_threads_per_core := int(goamdsmi.GO_cpu_threads_per_core_get())
// }
func GO_cpu_threads_per_core_get() (uint) {
return uint(C.goamdsmi_cpu_threads_per_core_get())
}
// ``GO_cpu_core_energy_get`` returns the CPU core energy for the specified thread
// index.
//
// Input parameter: ``int``, thread index.
//
// Output: ``uint64``, returns CPU core energy on success or ``0xFFFFFFFFFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
// for i := 0; i < num_threads; i++ {
// core_energy := int(goamdsmi.GO_cpu_core_energy_get(i))
// }
// }
func GO_cpu_core_energy_get(i int) (C.uint64_t) {
return C.goamdsmi_cpu_core_energy_get(C.uint(i))
}
// ``GO_cpu_core_boostlimit_get`` returns the CPU core boost limit for the specified
// thread index.
//
// Input parameter: ``int``, thread index.
//
// Output: ``uint32``, returns CPU core boost limit on success or ``0xFFFFFFFF`` on
// fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
// for i := 0; i < num_threads; i++ {
// core_boost_limit := int(goamdsmi.GO_cpu_core_boostlimit_get(i))
// }
// }
func GO_cpu_core_boostlimit_get(i int) (C.uint32_t) {
return C.goamdsmi_cpu_core_boostlimit_get(C.uint(i))
}
// ``GO_cpu_socket_energy_get`` returns the CPU socket energy for the specified
// socket index.
//
// Input parameter: ``int``, socket index.
//
// Output: ``uint64``, returns socket energy level on success or ``0xFFFFFFFFFFFFFFFF``
// on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
// for i := 0; i < num_sockets; i++ {
// socket_energy := int(goamdsmi.GO_cpu_socket_energy_get(i))
// }
// }
func GO_cpu_socket_energy_get(i int) (C.uint64_t) {
return C.goamdsmi_cpu_socket_energy_get(C.uint(i))
}
// ``GO_cpu_socket_power_get`` returns the socket power for the specified socket
// index.
//
// Input parameter: ``int``, socket index.
//
// Output: ``uint32``, returns socket energy level on success or ``0xFFFFFFFF``
// on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
// for i := 0; i < num_sockets; i++ {
// socket_power := int(goamdsmi.GO_cpu_socket_power_get(i))
// }
// }
func GO_cpu_socket_power_get(i int) (C.uint32_t) {
return C.goamdsmi_cpu_socket_power_get(C.uint(i))
}
// ``GO_cpu_socket_power_cap_get`` returns the socket power cap for the specified
// socket index.
//
// Input parameter: ``int``, socket index.
//
// Output: ``uint32``, returns socket power cap on success or ``0xFFFFFFFF``
// on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
// for i := 0; i < num_sockets; i++ {
// socket_power_cap := int(goamdsmi.GO_cpu_socket_power_cap_get(i))
// }
// }
func GO_cpu_socket_power_cap_get(i int) (C.uint32_t) {
return C.goamdsmi_cpu_socket_power_cap_get(C.uint(i))
}
// ``GO_cpu_socket_power_cap_get`` returns the PROCHOT status for the specified
// socket index.
//
// Input parameter: ``int``, socket index.
//
// Output: ``uint32``, returns PROCHOT status on success or ``0xFFFFFFFF`` on fail.
//
// Example:
//
// import "github.com/ROCm/amdsmi"
//
// if true == goamdsmi.GO_cpu_init() {
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
// for i := 0; i < num_sockets; i++ {
// prochot_status := int(goamdsmi.GO_cpu_prochot_status_get(i))
// }
// }
func GO_cpu_prochot_status_get(i int) (C.uint32_t) {
return C.goamdsmi_cpu_prochot_status_get(C.uint(i))
}
@@ -0,0 +1,115 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2024, Advanced Micro Devices, Inc.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" CMake AMD goamdsmi_shim Library ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
set(AMDSMI_DIR "" CACHE PATH "path to amdsmi installation")
if(ENABLE_DEBUG_LEVEL)
add_definitions(-DENABLE_DEBUG_LEVEL=${ENABLE_DEBUG_LEVEL})
message("**** Enabling Debug Level=${ENABLE_DEBUG_LEVEL} ****")
else()
add_definitions(-DENABLE_DEBUG_LEVEL=0)
endif()
set(GOAMDSMI_SHIM "goamdsmi_shim")
set(GOAMDSMI_SHIM_LIB "goamdsmi")
set(GOAMDSMI_SHIM_COMPONENT "lib${GOAMDSMI_SHIM}")
set(GOAMDSMI_SHIM_TARGET "${GOAMDSMI_SHIM}64")
# The following default version values should be updated as appropriate for
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
# by git tags (through "git describe") when they are present.
set(VERSION_MAJOR 1)
set(VERSION_MINOR 0)
set(VERSION_PATCH 0)
set(VERSION_NUM_COMMIT 0)
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
set(${GOAMDSMI_SHIM}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${GOAMDSMI_SHIM}_VERSION_MINOR "${VERSION_MINOR}")
set(${GOAMDSMI_SHIM}_VERSION_PATCH "0")
set(${GOAMDSMI_SHIM}_VERSION_BUILD "0")
message("SOVERSION: ${SO_VERSION_STRING}")
project(${GOAMDSMI_SHIM_TARGET})
if(NOT DEFINED CPACK_PACKAGE_VENDOR)
set(CPACK_PACKAGE_VENDOR "AMD")
endif()
if(NOT DEFINED CPACK_PACKAGE_CONTACT)
set(CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc.")
endif()
if(NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD CGO wrapper")
endif()
if(NOT GOAMDSMI_SHIM_PACKAGE)
set(GOAMDSMI_SHIM_PACKAGE goamdsmi_shim_lib64)
endif()
set(CPACK_PACKAGE_FILE_NAME "${GOAMDSMI_SHIM_PACKAGE}-${SO_VERSION_STRING}")
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fpic -fno-rtti -m64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
# Use this instead of above for 32 bit
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif()
set(go_amd_smi_headers)
set(go_amd_smi_sources)
set(go_amd_smi_includes)
add_subdirectory(smiwrapper)
list(APPEND go_amd_smi_headers smiwrapper/goamdsmi.h ${go_amd_smi_headers})
list(APPEND go_amd_smi_headers smiwrapper/amdsmi_go_shim.h ${go_amd_smi_headers})
list(APPEND go_amd_smi_sources smiwrapper/amdsmi_go_shim.c)
list(APPEND go_amd_smi_includes ${CMAKE_CURRENT_SOURCE_DIR}/smiwrapper)
add_library(${GOAMDSMI_SHIM_TARGET} SHARED ${go_amd_smi_sources} ${go_amd_smi_headers} ${go_amd_smi_includes})
target_link_libraries(${GOAMDSMI_SHIM_TARGET} pthread rt m)
target_link_libraries(${GOAMDSMI_SHIM_TARGET} amd_smi)
target_link_libraries(${GOAMDSMI_SHIM_TARGET} -L${AMDSMI_DIR}/lib)
target_link_libraries(${GOAMDSMI_SHIM_TARGET} -L${AMDSMI_DIR}/lib64)
## Set the VERSION and SOVERSION values
set_property(TARGET ${GOAMDSMI_SHIM_TARGET} PROPERTY SOVERSION "${VERSION_MAJOR}")
set_property(TARGET ${GOAMDSMI_SHIM_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}")
## If the library is a release, strip the target library
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(TARGET ${GOAMDSMI_SHIM_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} lib${GOAMDSMI_SHIM_TARGET}.so)
endif()
set(go_amd_smi_install_headers smiwrapper/goamdsmi.h smiwrapper/amdsmi_go_shim.h)
## Add the install directives for the runtime library.
install(
TARGETS ${GOAMDSMI_SHIM_TARGET}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT ${GOAMDSMI_SHIM_COMPONENT})
install(
FILES ${go_amd_smi_install_headers}
DESTINATION include)
include_directories(${go_amd_smi_includes})
@@ -0,0 +1,14 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2024, Advanced Micro Devices, Inc.
set(go_amd_smi_headers ${CMAKE_CURRENT_SOURCE_DIR}/goamdsmi.h ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_go_shim.h
CACHE INTERNAL "")
set(go_amd_smi_sources ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_go_shim.c CACHE INTERNAL "")
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${go_amd_smi_amdsmi_includes})
add_library(go_amd_smi_ OBJECT ${go_amd_smi_sources} ${go_amd_smi_headers})
### Shared libraries need PIC
set_property(TARGET ${go_amd_smi_} PROPERTY POSITION_INDEPENDENT_CODE 1)
@@ -0,0 +1,661 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdint.h>
#include <string.h>
#include "amdsmi_go_shim.h"
#include <amd_smi/amdsmi.h>
#include <unistd.h>
#define nullptr ((void*)0)
#define MAX_SOCKET_ACROSS_SYSTEM 4
#define CPU_0 0
#define GPU_SENSOR_0 0
#define MAX_CPU_PER_SOCKET 4
#define MAX_PHYSICALCORE_ACROSS_SYSTEM 384
#define MAX_LOGICALCORE_ACROSS_SYSTEM 768
#define MAX_GPU_DEVICE_ACROSS_SYSTEM 24
#define MAX_GPU_POWER_FROM_DRIVER 0xFFFF
#define AMDSMI_DRIVER_NAME "AMDSMI"
#define AMDSMI_LIB_FILE "/opt/rocm/lib/libamd_smi.so"
#define AMDSMI_LIB64_FILE "/opt/rocm/lib64/libamd_smi.so"
#define AMDGPU_DRIVER_NAME "AMDGPUDriver"
#define AMDGPU_INITSTATE_FILE "/sys/module/amdgpu/initstate"
#define AMDHSMP_DRIVER_NAME "AMDHSMPDriver"
#define AMDHSMP_INITSTATE_FILE "/dev/hsmp"
static uint32_t num_apuSockets = GOAMDSMI_VALUE_0;
static uint32_t num_cpuSockets = GOAMDSMI_VALUE_0;
static uint32_t num_gpuSockets = GOAMDSMI_VALUE_0;
static uint32_t cpuInitCompleted = false;
static uint32_t gpuInitCompleted = false;
static uint32_t apuInitCompleted = false;
static uint32_t num_cpu_inAllSocket = GOAMDSMI_VALUE_0;
static uint32_t num_cpu_physicalCore_inAllSocket = GOAMDSMI_VALUE_0;
static uint32_t num_gpu_devices_inAllSocket = GOAMDSMI_VALUE_0;
static amdsmi_socket_handle amdsmi_apusocket_handle_all_socket[MAX_SOCKET_ACROSS_SYSTEM+MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
static amdsmi_socket_handle amdsmi_cpusocket_handle_all_socket[MAX_SOCKET_ACROSS_SYSTEM] = {0};
static amdsmi_socket_handle amdsmi_gpusocket_handle_all_socket[MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
static amdsmi_processor_handle amdsmi_processor_handle_all_cpu_across_socket[MAX_SOCKET_ACROSS_SYSTEM*MAX_CPU_PER_SOCKET] = {0};
static amdsmi_processor_handle amdsmi_processor_handle_all_cpu_physicalCore_across_socket[MAX_PHYSICALCORE_ACROSS_SYSTEM] = {0};
static amdsmi_processor_handle amdsmi_processor_handle_all_gpu_device_across_socket[MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
goamdsmi_status_t is_file_present(const char* driver_name, const char* file_name)
{
if(0 == access(file_name, F_OK))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, %s found \"%s\" and returns:%d\n", driver_name, file_name, GOAMDSMI_STATUS_SUCCESS);}
return GOAMDSMI_STATUS_SUCCESS;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, %s not found, missing \"%s\" and returns:%d\n", driver_name, file_name, GOAMDSMI_STATUS_FAILURE);}
return GOAMDSMI_STATUS_FAILURE;
}
goamdsmi_status_t check_amdgpu_driver()
{
return is_file_present(AMDGPU_DRIVER_NAME, AMDGPU_INITSTATE_FILE);
}
goamdsmi_status_t check_hsmp_driver()
{
return is_file_present(AMDHSMP_DRIVER_NAME, AMDHSMP_INITSTATE_FILE);
}
goamdsmi_status_t go_shim_amdsmiapu_init(goamdsmi_Init_t goamdsmi_Init)
{
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) && (true == cpuInitCompleted))
{
if((GOAMDSMI_VALUE_0 == num_cpuSockets)||(GOAMDSMI_VALUE_0 == num_cpu_inAllSocket)||(GOAMDSMI_VALUE_0 == num_cpu_physicalCore_inAllSocket))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, Returns previous enumurated AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
return GOAMDSMI_STATUS_FAILURE;
}
else
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Returns previous enumurated AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
return GOAMDSMI_STATUS_SUCCESS;
}
}
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) && (true == gpuInitCompleted))
{
if((GOAMDSMI_VALUE_0 == num_gpuSockets)||(GOAMDSMI_VALUE_0 == num_gpu_devices_inAllSocket))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, Returns previous enumurated AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_gpuSockets, num_gpu_devices_inAllSocket);}
return GOAMDSMI_STATUS_FAILURE;
}
else
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Returns previous enumurated AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_gpuSockets, num_gpu_devices_inAllSocket);}
return GOAMDSMI_STATUS_SUCCESS;
}
}
if ((GOAMDSMI_STATUS_SUCCESS == check_amdgpu_driver()) && (GOAMDSMI_STATUS_SUCCESS == check_hsmp_driver()))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified APU machine and going to enumurate APU\n");}
if( (AMDSMI_STATUS_SUCCESS == amdsmi_init(AMDSMI_INIT_AMD_APUS)) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_socket_handles(&num_apuSockets, nullptr)) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_socket_handles(&num_apuSockets, &amdsmi_apusocket_handle_all_socket[0])) &&
(GOAMDSMI_VALUE_0 != num_apuSockets))
{
cpuInitCompleted = true;
gpuInitCompleted = true;
apuInitCompleted = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Identified APU machine ApuNumSockets=%d\n",num_apuSockets);}
for(uint32_t socket_counter = 0; socket_counter < num_apuSockets; socket_counter++)
{
uint32_t num_cpu = GOAMDSMI_VALUE_0;
uint32_t num_cpu_physicalCores = GOAMDSMI_VALUE_0;
uint32_t num_gpu_devices = GOAMDSMI_VALUE_0;
//CPU
processor_type_t cpu_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
processor_type_t cpu_core_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_processor_type, nullptr, &num_cpu)) &&
(GOAMDSMI_VALUE_0 != num_cpu) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_processor_type, &amdsmi_processor_handle_all_cpu_across_socket[num_cpu_inAllSocket], &num_cpu)))
{
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_core_processor_type, nullptr, &num_cpu_physicalCores)) &&
(GOAMDSMI_VALUE_0 != num_cpu_physicalCores) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_core_processor_type, &amdsmi_processor_handle_all_cpu_physicalCore_across_socket[num_cpu_physicalCore_inAllSocket], &num_cpu_physicalCores)))
{
num_cpu_physicalCore_inAllSocket = num_cpu_physicalCore_inAllSocket+num_cpu_physicalCores;
}
num_cpu_inAllSocket = num_cpu_inAllSocket+num_cpu;
num_cpuSockets = num_cpuSockets+1;
}
//GPU
processor_type_t gpu_device_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU;
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], gpu_device_processor_type, nullptr, &num_gpu_devices)) &&
(GOAMDSMI_VALUE_0 != num_gpu_devices) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], gpu_device_processor_type, &amdsmi_processor_handle_all_gpu_device_across_socket[num_gpu_devices_inAllSocket], &num_gpu_devices)))
{
num_gpu_devices_inAllSocket = num_gpu_devices_inAllSocket+num_gpu_devices;
num_gpuSockets = num_gpuSockets+1;
}
}
}
}
else if(GOAMDSMI_CPU_INIT == goamdsmi_Init)
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Going to enumurate only CPU\n");}
cpuInitCompleted = true;
if (GOAMDSMI_STATUS_SUCCESS == check_hsmp_driver())
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified CPU Driver and going to enumurate only CPU\n");}
if( (AMDSMI_STATUS_SUCCESS != amdsmi_init(AMDSMI_INIT_AMD_CPUS)) ||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_cpuSockets, nullptr)) ||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_cpuSockets, &amdsmi_cpusocket_handle_all_socket[0])) ||
(GOAMDSMI_VALUE_0 == num_cpuSockets))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, AMDSMICPUInit:0, CpuNumSockets=0\n");}
return GOAMDSMI_STATUS_FAILURE;
}
}
else
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_3)) {printf("AMDSMI, Status, Missing CPU Driver and not going to enumurate only CPU\n");}
}
//CPU
for(uint32_t cpu_socket_counter = 0; cpu_socket_counter < num_cpuSockets; cpu_socket_counter++)
{
uint32_t num_cpu = GOAMDSMI_VALUE_0;
uint32_t num_cpu_physicalCores = GOAMDSMI_VALUE_0;
processor_type_t cpu_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
processor_type_t cpu_core_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_processor_type, nullptr, &num_cpu)) &&
(GOAMDSMI_VALUE_0 != num_cpu) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_processor_type, &amdsmi_processor_handle_all_cpu_across_socket[num_cpu_inAllSocket], &num_cpu)))
{
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_core_processor_type, nullptr, &num_cpu_physicalCores)) &&
(GOAMDSMI_VALUE_0 != num_cpu_physicalCores) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_core_processor_type, &amdsmi_processor_handle_all_cpu_physicalCore_across_socket[num_cpu_physicalCore_inAllSocket], &num_cpu_physicalCores)))
{
num_cpu_physicalCore_inAllSocket = num_cpu_physicalCore_inAllSocket+num_cpu_physicalCores;
}
num_cpu_inAllSocket = num_cpu_inAllSocket+num_cpu;
}
}
}
else if(GOAMDSMI_GPU_INIT == goamdsmi_Init)
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Going to enumurate only GPU\n");}
gpuInitCompleted = true;
if (GOAMDSMI_STATUS_SUCCESS == check_amdgpu_driver())
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified GPU Driver and going to enumurate only GPU\n");}
if( (AMDSMI_STATUS_SUCCESS != amdsmi_init(AMDSMI_INIT_AMD_GPUS)) ||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_gpuSockets, nullptr)) ||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_gpuSockets, &amdsmi_gpusocket_handle_all_socket[0])) ||
(GOAMDSMI_VALUE_0 == num_gpuSockets))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, AMDSMIGPUInit:0, GpuNumSockets=0\n");}
return GOAMDSMI_STATUS_FAILURE;
}
}
else
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_3)) {printf("AMDSMI, Status, Missing GPU Driver and not going to enumurate only GPU\n");}
}
//GPU
for(uint32_t gpu_socket_counter = 0; gpu_socket_counter < num_gpuSockets; gpu_socket_counter++)
{
uint32_t num_gpu_devices = GOAMDSMI_VALUE_0;
processor_type_t gpu_device_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU;
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_gpusocket_handle_all_socket[gpu_socket_counter], gpu_device_processor_type, nullptr, &num_gpu_devices)) &&
(GOAMDSMI_VALUE_0 != num_gpu_devices) &&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_gpusocket_handle_all_socket[gpu_socket_counter], gpu_device_processor_type, &amdsmi_processor_handle_all_gpu_device_across_socket[num_gpu_devices_inAllSocket], &num_gpu_devices)))
{
num_gpu_devices_inAllSocket = num_gpu_devices_inAllSocket+num_gpu_devices;
}
}
}
//CPU
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) && ((GOAMDSMI_VALUE_0 == num_cpuSockets)||(GOAMDSMI_VALUE_0 == num_cpu_inAllSocket)||(GOAMDSMI_VALUE_0 == num_cpu_physicalCore_inAllSocket)))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, CPU Enumuration Failed AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", GOAMDSMI_STATUS_FAILURE, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
return GOAMDSMI_STATUS_FAILURE;
}
//GPU
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) && ((GOAMDSMI_VALUE_0 == num_gpuSockets)||(GOAMDSMI_VALUE_0 == num_gpu_devices_inAllSocket)))
{
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, GPU Enumuration Failed AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_gpuSockets, num_gpu_devices_inAllSocket);}
return GOAMDSMI_STATUS_FAILURE;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1))
{
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) || apuInitCompleted) printf("AMDSMI, Status, AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", GOAMDSMI_STATUS_SUCCESS, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) || apuInitCompleted) printf("AMDSMI, Status, AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_gpuSockets, num_gpu_devices_inAllSocket);
}
return GOAMDSMI_STATUS_SUCCESS;
}
////////////////////////////////////////////////------------CPU------------////////////////////////////////////////////////
bool goamdsmi_cpu_init()
{
bool cpu_init_success = false;
if(GOAMDSMI_STATUS_SUCCESS == go_shim_amdsmiapu_init(GOAMDSMI_CPU_INIT))
{
if((num_cpu_inAllSocket) && (num_cpu_physicalCore_inAllSocket)) cpu_init_success = true;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, InitAMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", cpu_init_success?"Success":"Failed", cpu_init_success?1:0, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
return cpu_init_success;
}
uint32_t goamdsmi_cpu_threads_per_core_get()
{
bool readSuccess = false;
uint32_t threads_per_core_temp = GOAMDSMI_VALUE_0;
if((AMDSMI_STATUS_SUCCESS == amdsmi_get_threads_per_core(&threads_per_core_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, CpuThreadsPerCore:%lu\n", readSuccess?"Success":"Failed", (unsigned long)(threads_per_core_temp));}
return threads_per_core_temp;
}
uint32_t goamdsmi_cpu_number_of_threads_get()
{
bool readSuccess = false;
uint32_t number_of_threads = GOAMDSMI_VALUE_0;
uint32_t num_threads_per_core = goamdsmi_cpu_threads_per_core_get();
if(0 != num_threads_per_core)
{
readSuccess = true;
number_of_threads = num_cpu_physicalCore_inAllSocket*num_threads_per_core;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, CpuNumThreads:%lu\n", readSuccess?"Success":"Failed", (unsigned long)(number_of_threads));}
return number_of_threads;
}
uint32_t goamdsmi_cpu_number_of_sockets_get()
{
uint32_t number_of_sockets = num_cpuSockets;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, CpuNumSockets:%lu\n", (unsigned long)(number_of_sockets));}
return number_of_sockets;
}
uint64_t goamdsmi_cpu_core_energy_get(uint32_t thread_index)
{
bool readSuccess = false;
uint64_t core_energy_temp = GOAMDSMI_UINT64_MAX;
uint32_t physicalCore_index = thread_index%num_cpu_physicalCore_inAllSocket;
if (AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_core_energy(amdsmi_processor_handle_all_cpu_physicalCore_across_socket[physicalCore_index], &core_energy_temp)) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Thread:%d PC:%d, CpuCoreEnergy:%llu, CpuCoreEnergyJoules:%.6f, CpuCoreEnergyKJoules:%.9f\n", readSuccess?"Success":"Failed", thread_index, physicalCore_index, (unsigned long long)(core_energy_temp), ((double)(core_energy_temp))/1000000, ((double)(core_energy_temp))/1000000000);}
return core_energy_temp;
}
uint64_t goamdsmi_cpu_socket_energy_get(uint32_t socket_index)
{
bool readSuccess = false;
uint64_t socket_energy_temp = GOAMDSMI_UINT64_MAX;
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_energy(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_energy_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketEnergy:%llu, CpuSocketEnergyJoules:%.6f, CpuSocketEnergyKJoules:%.9f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long long)(socket_energy_temp), ((double)(socket_energy_temp))/1000000, ((double)(socket_energy_temp))/1000000000);}
return socket_energy_temp;
}
uint32_t goamdsmi_cpu_prochot_status_get(uint32_t socket_index)
{
bool readSuccess = false;
uint32_t prochot_temp = GOAMDSMI_UINT32_MAX;
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_prochot_status(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &prochot_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuProchotStatus:%lu\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(prochot_temp));}
return prochot_temp;
}
uint32_t goamdsmi_cpu_socket_power_get(uint32_t socket_index)
{
bool readSuccess = false;
uint32_t socket_power_temp = GOAMDSMI_UINT32_MAX;
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_power(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_power_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketPower:%lu, CpuSocketPowerWatt:%.3f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(socket_power_temp), ((double)(socket_power_temp))/1000);}
return socket_power_temp;
}
uint32_t goamdsmi_cpu_socket_power_cap_get(uint32_t socket_index)
{
bool readSuccess = false;
uint32_t socket_power_cap_temp = GOAMDSMI_UINT32_MAX;
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_power_cap(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_power_cap_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketPowerCap:%lu, CpuSocketPowerCapWatt:%.3f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(socket_power_cap_temp), ((double)(socket_power_cap_temp))/1000);}
return socket_power_cap_temp;
}
uint32_t goamdsmi_cpu_core_boostlimit_get(uint32_t thread_index)
{
bool readSuccess = false;
uint32_t core_boostlimit_temp = GOAMDSMI_UINT32_MAX;
uint32_t physicalCore_index = thread_index%num_cpu_physicalCore_inAllSocket;
if (AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_core_boostlimit(amdsmi_processor_handle_all_cpu_physicalCore_across_socket[physicalCore_index], &core_boostlimit_temp)) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Thread:%d PC:%d, CpuCoreBoostLimit:%lu\n", readSuccess?"Success":"Failed", thread_index, physicalCore_index, (unsigned long)(core_boostlimit_temp));}
return core_boostlimit_temp;
}
////////////////////////////////////////////////------------GPU------------////////////////////////////////////////////////
bool goamdsmi_gpu_init()
{
bool gpu_init_success = false;
if(GOAMDSMI_STATUS_SUCCESS == go_shim_amdsmiapu_init(GOAMDSMI_GPU_INIT))
{
if((num_gpu_devices_inAllSocket)) gpu_init_success = true;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, InitAMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", gpu_init_success?"Success":"Failed", gpu_init_success?1:0, num_gpuSockets, num_gpu_devices_inAllSocket);}
return gpu_init_success;
}
bool goamdsmi_gpu_shutdown()
{
return false;
}
uint32_t goamdsmi_gpu_num_monitor_devices()
{
uint32_t gpu_num_monitor_devices = num_gpu_devices_inAllSocket;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, GpuNumMonitorDevices:%lu\n", (unsigned long)(gpu_num_monitor_devices));}
return gpu_num_monitor_devices;
}
char* goamdsmi_gpu_dev_name_get(uint32_t dv_ind)
{
uint32_t len = 256;
char* dev_name = (char*)malloc(sizeof(char)*len);dev_name[0] = '\0';
strcpy(dev_name, GOAMDSMI_STRING_NA);
return dev_name;
}
uint16_t goamdsmi_gpu_dev_id_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint16_t gpu_dev_id_temp = GOAMDSMI_UINT16_MAX;
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_id(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &gpu_dev_id_temp))) readSuccess = true;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuDevId:%d\n", readSuccess?"Success":"Failed", dv_ind, gpu_dev_id_temp);}
return gpu_dev_id_temp;
}
uint64_t goamdsmi_gpu_dev_pci_id_get(uint32_t dv_ind)
{
uint64_t gpu_pci_id = GOAMDSMI_UINT64_MAX;
return gpu_pci_id;
}
char* goamdsmi_gpu_dev_vendor_name_get(uint32_t dv_ind)
{
uint32_t len = 256;
char* gpu_vendor_name = (char*)malloc(sizeof(char)*len);gpu_vendor_name[0] = '\0';
strcpy(gpu_vendor_name, GOAMDSMI_STRING_NA);
return gpu_vendor_name;
}
char* goamdsmi_gpu_dev_vbios_version_get(uint32_t dv_ind)
{
uint32_t len = 256;
char* vbios_version = (char*)malloc(sizeof(char)*len);vbios_version[0] = '\0';
strcpy(vbios_version, GOAMDSMI_STRING_NA);
return vbios_version;
}
uint64_t goamdsmi_gpu_dev_power_cap_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_power_cap = GOAMDSMI_UINT64_MAX;
amdsmi_power_cap_info_t amdsmi_power_cap_info_temp = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_cap_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], GPU_SENSOR_0, &amdsmi_power_cap_info_temp)))
{
readSuccess = true;
gpu_power_cap = amdsmi_power_cap_info_temp.power_cap;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuPowerCap:%llu, GpuPowerCapInWatt:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_power_cap), ((double)(gpu_power_cap))/1000000);}
return gpu_power_cap;
}
uint64_t goamdsmi_gpu_dev_power_get(uint32_t dv_ind)
{
uint64_t gpu_power = GOAMDSMI_UINT64_MAX;
uint64_t gpu_power_temp = GOAMDSMI_UINT64_MAX;
amdsmi_power_info_t amdsmi_power_info_temp = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &amdsmi_power_info_temp)))
{
gpu_power_temp = amdsmi_power_info_temp.average_socket_power;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerAverage:%llu, GpuPowerAverageinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power_temp), ((double)(gpu_power_temp))/1000000);}
if(MAX_GPU_POWER_FROM_DRIVER == gpu_power_temp)
{
gpu_power_temp = amdsmi_power_info_temp.current_socket_power;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerCurrent:%llu, GpuPowerCurrentinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power_temp), ((double)(gpu_power_temp))/1000000);}
}
gpu_power = gpu_power_temp;
gpu_power = (gpu_power)*1000000;//to maintain backward compatibity with old ROCM SMI
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success for Gpu:%d, GpuPower:%llu, GpuPowerinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
return gpu_power;
}
amdsmi_gpu_metrics_t metrics = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_metrics_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &metrics)))
{
gpu_power_temp = metrics.average_socket_power;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerAverageFromMetrics:%llu, GpuPowerAverageFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)gpu_power_temp, ((double)(gpu_power_temp))/1000000);}
if(MAX_GPU_POWER_FROM_DRIVER == gpu_power_temp)
{
gpu_power_temp = metrics.current_socket_power;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerCurrentFromMetrics:%llu, GpuPowerCurrentFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)gpu_power_temp, ((double)(gpu_power_temp))/1000000);}
}
gpu_power = gpu_power_temp;
gpu_power = (gpu_power)*1000000;//to maintain backward compatibity with old ROCM SMI
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerFromMetrics:%llu, GpuPowerFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
return gpu_power;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed for Gpu:%d, GpuPower:%llu, GpuPowerinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
return gpu_power;
}
uint64_t goamdsmi_gpu_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor, uint32_t metric)
{
bool readSuccess = false;
uint64_t gpu_temperature = GOAMDSMI_UINT64_MAX;
uint64_t gpu_temperature_temp = GOAMDSMI_UINT64_MAX;
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_temp_metric(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], sensor, metric, &gpu_temperature_temp)))
{
readSuccess = true;
gpu_temperature = gpu_temperature_temp;
gpu_temperature = (gpu_temperature)*1000;//to maintain backward compatibity with old ROCM SMI
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d Sensor:%d Metric:%d, GpuTemperature:%llu, GpuTemperatureInDegree:%.3f\n", readSuccess?"Success":"Failed", dv_ind, sensor, metric, (unsigned long long)(gpu_temperature), ((double)(gpu_temperature))/1000);}
}
return gpu_temperature;
}
uint32_t goamdsmi_gpu_dev_overdrive_level_get(uint32_t dv_ind)
{
uint32_t gpu_overdrive_level = GOAMDSMI_UINT32_MAX;
return gpu_overdrive_level;
}
uint32_t goamdsmi_gpu_dev_mem_overdrive_level_get(uint32_t dv_ind)
{
uint32_t gpu_mem_overdrive_level = GOAMDSMI_UINT32_MAX;
return gpu_mem_overdrive_level;
}
uint32_t goamdsmi_gpu_dev_perf_level_get(uint32_t dv_ind)
{
uint32_t gpu_perf = GOAMDSMI_UINT32_MAX;
return gpu_perf;
}
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_sclk_freq = GOAMDSMI_UINT64_MAX;
amdsmi_frequencies_t freq = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_clk_freq(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_CLK_TYPE_SYS, &freq)))
{
readSuccess = true;
gpu_sclk_freq = freq.frequency[freq.current];
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuSclkFreq:%llu, GpuSclkFreqMhz:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_sclk_freq), ((double)(gpu_sclk_freq))/1000000);}
return gpu_sclk_freq;
}
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_memclk_freq = GOAMDSMI_UINT64_MAX;
amdsmi_frequencies_t freq = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_clk_freq(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_CLK_TYPE_MEM, &freq)))
{
readSuccess = true;
gpu_memclk_freq = freq.frequency[freq.current];
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMclkFreq:%llu, GpuMclkFreqMhz:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memclk_freq), ((double)(gpu_memclk_freq))/1000000);}
return gpu_memclk_freq;
}
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_sclk(uint32_t dv_ind)
{
uint64_t gpu_min_sclk = GOAMDSMI_UINT64_MAX;
return gpu_min_sclk;
}
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_mclk(uint32_t dv_ind)
{
uint64_t gpu_min_memclk = GOAMDSMI_UINT64_MAX;
return gpu_min_memclk;
}
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_sclk(uint32_t dv_ind)
{
uint64_t gpu_max_sclk = GOAMDSMI_UINT64_MAX;
return gpu_max_sclk;
}
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_mclk(uint32_t dv_ind)
{
uint64_t gpu_max_memclk = GOAMDSMI_UINT64_MAX;
return gpu_max_memclk;
}
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint32_t gpu_busy_percent = GOAMDSMI_UINT32_MAX;
amdsmi_engine_usage_t amdsmi_engine_usage_temp;
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_activity(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &amdsmi_engine_usage_temp))
{
readSuccess = true;
gpu_busy_percent = amdsmi_engine_usage_temp.gfx_activity;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuBusyPerc:%lu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long)(gpu_busy_percent));}
return gpu_busy_percent;
}
uint64_t goamdsmi_gpu_dev_gpu_memory_busy_percent_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_memory_busy_percent = GOAMDSMI_UINT64_MAX;
uint64_t gpu_memory_usage_temp = GOAMDSMI_UINT64_MAX;
uint64_t gpu_memory_total_temp = GOAMDSMI_UINT64_MAX;
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_usage(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_usage_temp))&&
(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_total(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_total_temp)))
{
readSuccess = true;
gpu_memory_busy_percent = (uint64_t)(gpu_memory_usage_temp*100)/gpu_memory_total_temp;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryBusyPerc:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_busy_percent));}
return gpu_memory_busy_percent;
}
uint64_t goamdsmi_gpu_dev_gpu_memory_usage_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_memory_usage = GOAMDSMI_UINT64_MAX;
uint64_t gpu_memory_usage_temp = GOAMDSMI_UINT64_MAX;
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_usage(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_usage_temp))
{
readSuccess = true;
gpu_memory_usage = (uint64_t)gpu_memory_usage_temp;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryUsage:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_usage));}
return gpu_memory_usage;
}
uint64_t goamdsmi_gpu_dev_gpu_memory_total_get(uint32_t dv_ind)
{
bool readSuccess = false;
uint64_t gpu_memory_total = GOAMDSMI_UINT64_MAX;
uint64_t gpu_memory_total_temp = GOAMDSMI_UINT64_MAX;
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_total(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_total_temp))
{
readSuccess = true;
gpu_memory_total = (uint64_t)gpu_memory_total_temp;
}
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryTotal:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_total));}
return gpu_memory_total;
}
@@ -0,0 +1,559 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "goamdsmi.h"
////////////////////////////////////////////////------------CPU------------////////////////////////////////////////////////
/**
* @brief Go language stub to initialize the AMDSMI library
*
* @retval ::bool value of true upon success
* @retval false is returned upon failure.
*
*/
bool goamdsmi_cpu_init();
/**
* @brief Go language stub to get the core energy for a given core
*
* @details Given a core index @p num, this function will call the
* esmi_core_energy_get() function to update the @p penergy in micro Joules.
* This value is then passed as a uint64_t val to the Go routine that called it.
*
* @param[in] num is the core index
*
* @retval ::uint64_t value of the penergy in micro Joules.
* @retval zero is returned upon failure.
*
*/
uint64_t goamdsmi_cpu_core_energy_get(uint32_t num);
/**
* @brief Go language stub to get the socket energy for a given socket
*
* @details Given a socket index @p socket_idx, this function will call the
* esmi_socket_energy_get() function to get the socket energy counter of an
* online cpu in that socket. This value is then passed as a uint64_t val to
* the Go routine that called it.
*
* @param[in] socket_idx is the socket index
*
* @retval ::uint64_t value of the socket energy counter
* @retval zero is returned upon failure.
*
*/
uint64_t goamdsmi_cpu_socket_energy_get(uint32_t socket_idx);
/**
* @brief Go language stub to get normalized status of
* the processor's PROCHOT status.
* 1 - PROCHOT active, 0 - PROCHOT inactive
*
* @details Given a socket index @p socket_idx and this function will get
* PROCHOT at @p prochot.
*
* @param[in] socket_idx a socket index
*
* @retval ::uint32_t value of the prochot status
* @retval -1 is returned upon failure or if status is inactive.
*
*/
uint32_t goamdsmi_cpu_prochot_status_get(uint32_t socket_idx);
/**
* @brief Go language stub to get the instantaneous power
* consumption of the provided socket.
*
* @details Given a socket index @p sock_ind this function will
* get the current power consumption (in milliwatts).
*
* @param[in] sock_ind a socket index
*
* @retval ::uint32_t value of the socket power
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_cpu_socket_power_get(uint32_t sock_ind);
/**
* @brief Go language stub to get the current power cap value
* for a given socket.
*
* @details This function will return the valid power cap @p pcap for a given
* socket @p sock_ind, this value will be used by the system to limit
* the power usage (in milliwatts).
*
* @param[in] sock_ind a socket index
*
* @retval ::uint32_t value of the socket power cap
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_cpu_socket_power_cap_get(uint32_t sock_ind);
/**
* @brief Go language stub to get the boostlimit value for a given core
*
* @details This function will return the core's current boost limit
* @p boostlimit for a particular @p socket
*
* @param[in] socket a socket index
*
* @retval ::uint32_t value of the boostlimit
* @retval -1 is returned upon failure..
*
*/
uint32_t goamdsmi_cpu_core_boostlimit_get(uint32_t socket);
/**
* @brief Go stub to get the number of threads per core in the system
*
* @retval ::Number of threads per core
* @retval Zero is returned upon failure.
*/
uint32_t goamdsmi_cpu_threads_per_core_get();
/**
* @brief Go stub to get the number of threads available in the system
*
* @retval ::Number of threads
* @retval Zero is returned upon failure.
*/
uint32_t goamdsmi_cpu_number_of_threads_get();
/**
* @brief Go stub to get the total number of processor sockets
* available in the system
*
* @retval ::Number of threads per core
* @retval Zero is returned upon failure.
*/
uint32_t goamdsmi_cpu_threads_per_core_get();
/**
* @brief Go stub to get the number of threads available in the system
*
* @retval ::Number of threads
* @retval Zero is returned upon failure.
*/
uint32_t goamdsmi_cpu_number_of_threads_get();
/**
* @brief Go stub to get the total number of processor sockets
* available in the system
*
* @retval ::uint32_t value of the socket number
* @retval Zero is returned upon failure.
*/
uint32_t goamdsmi_cpu_number_of_sockets_get();
////////////////////////////////////////////////------------GPU------------////////////////////////////////////////////////
/**
* @brief Go language stub to initialize the ROCm-SMI library
*
* @retval ::bool value of true upon success
* @retval false is returned upon failure.
*
*/
bool goamdsmi_gpu_init();
/**
* @brief Go language stub to shut down the ROCm-SMI library
* and do necessary clean up
*
* @retval ::bool value of true upon success
* @retval false is returned upon failure.
*
*/
bool goamdsmi_gpu_shutdown();
/**
* @brief Go language stub to get the number of GPU devices
*
* @details This function will call the rsmi_num_monitor_devices()
* function to return the number of GPU devices to be monitored.
* This value is then passed as a uint val to the Go routine that
* called it.
*
* @retval ::uint32_t value of num GPUs
* @retval zero is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_num_monitor_devices();
/**
* @brief Go language stub to get the gpu device name string
*
* @details This function will call the rsmi_dev_name_get()
* function to write the gpu device name string (up to len characters)
* for device dv_ind and return a char pointer. This value is then
* passed as char * to the Go routine that called it. The caller of this
* function must free the allocated buffer for the device name.
*
* @param[in] ::uint32_t device index
*
* @retval ::char* VBIOS identifier
* @retval NA is returned upon failure.
*
*/
char* goamdsmi_gpu_dev_name_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU device id
*
* @details This function will call the rsmi_dev_id_get()
* function to return the GPU device id. This value is then
* passed as a uint16_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint16_t value of num GPUs
* @retval -1 is returned upon failure.
*
*/
uint16_t goamdsmi_gpu_dev_id_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU unique pci id
*
* @details This function will call the rsmi_dev_pci_id_get()
* function to return the unique PCI device identifier
* associated for a device. This value is then passed as
* a uint64_t val to the Go routine that called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t value of pci id
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_pci_id_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the VBIOS identifier string
*
* @details This function will call the rsmi_dev_vbios_ver_get()
* function to write the VBIOS char array (up to len characters)
* for device dv_ind and return a char pointer. This value is then
* passed as char pointer to the Go routine that called it. The caller
* of this funcion must free the allocated buffer for the vbios
* identifier
*
* @param[in] ::uint32_t device index
* @param[in] ::char* vbios buffer of length
*
* @retval ::char* VBIOS identifier
* @retval NA is returned upon failure
*
*/
char* goamdsmi_gpu_dev_vbios_version_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the vendor
*
* @details This function will call the rsmi_dev_vendor_name_get()
* function to write the name of the vendor char array (up to len
* characters) for a device dv_ind and return a char pointer. This
* value is then passed as a char pointer to the Go routine that
* called it. The caller of this funcion must free the allocated
* buffer for the vbios identifier
*
* @param[in] ::uint32_t device index
*
* @retval ::char* vendor name
* @retval NA is returned upon failure.
*
*/
char* goamdsmi_gpu_dev_vendor_name_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU power cap
*
* @details This function will call the rsmi_dev_power_cap_get()
* function to return the gpu power cap. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t GPU power cap
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_power_cap_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU power
*
* @details This function will call the rsmi_dev_power_get()
* function to return the gpu power. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t GPU power
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_power_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU current temperature
*
* @details This function will call the rsmi_dev_temp_metric_get()
* function to return the gpu current temperature. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, uint32_t sensor, uint32_t metric
*
* @retval ::uint64_t GPU current temperature
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor, uint32_t metric);
/**
* @brief Go language stub to get the overdrive level of the device
*
* @details This function will call the rsmi_dev_overdrive_level_get()
* function to return the overdrive percentage. This value is then
* passed as a uint32_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint32_t overdrive level
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_dev_overdrive_level_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the memory overdrive level of the device
*
* @details This function will call the rsmi_dev_mem_overdrive_level_get()
* function to return the memory overdrive percentage. This value is then
* passed as a uint32_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint32_t memory overdrive level
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_dev_mem_overdrive_level_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the performance level of the device
*
* @details This function will call the rsmi_dev_perf_level_get()
* function to return the rsmi_dev_perf_level_t. This value is then
* passed as a uint32_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint32_t performance level (rsmi_dev_perf_level_t)
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_dev_perf_level_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU SCLK limit
*
* @details This function will call the rsmi_dev_gpu_clk_freq_get()
* function to return the gpu SCLK Limit. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint64_t GPU SCLK Limit
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU MCLK limit
*
* @details This function will call the rsmi_dev_gpu_clk_freq_get()
* function to return the gpu MCLK Limit. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint64_t GPU MCLK Limit
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the minimum supported SCLK frequency
*
* @details This function will call the rsmi_od_volt_freq_data_get()
* function to return the minium supported SCLK frequency.
* This value is then passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t mimimum supported sclk frequency
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_sclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the minimum supported MCLK frequency
*
* @details This function will call the rsmi_od_volt_freq_data_get()
* function to return the minium supported MCLK frequency.
* This value is then passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t mimimum supported mclk sfrequency
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_mclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the maximum supported SCLK frequency
*
* @details This function will call the rsmi_od_volt_freq_data_get()
* function to return the maxium supported SCLK frequency.
* This value is then passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t maximum supported sclk frequency
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_sclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the maximum supported MCLK frequency
*
* @details This function will call the rsmi_od_volt_freq_data_get()
* function to return the maxium supported MCLK frequency.
* This value is then passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index
*
* @retval ::uint64_t maximum supported mclk sfrequency
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_mclk(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU Activity
*
* @details This function will call the rsmi_dev_gpu_activity_get()
* function to return the current GPU use. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint32_t GPU Activity use
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU Memory Use percent
*
* @details This function will call the rsmi_dev_memory_busy_percent_get()
* function to return the current device memory use percent. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint32_t GPU Activity use
* @retval -1 is returned upon failure.
*
*/
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU Memory Use percent
*
* @details This function will call the rsmi_dev_memory_busy_percent_get()
* function to return the current device memory use percent. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint64_t GPU memory use percent
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_gpu_memory_busy_percent_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the GPU Memory Usage
*
* @details This function will call the rsmi_dev_memory_usage_get()
* function to return the amount of memory currently being used. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint64_t GPU memory usage
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_gpu_memory_usage_get(uint32_t dv_ind);
/**
* @brief Go language stub to get the Total amount of GPU Memory
*
* @details This function will call the rsmi_dev_memory_total_get()
* function to return the total amount of memory. This value is then
* passed as a uint64_t val to the Go routine that
* called it.
*
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
*
* @retval ::uint64_t Total GPU memory
* @retval -1 is returned upon failure.
*
*/
uint64_t goamdsmi_gpu_dev_gpu_memory_total_get(uint32_t dv_ind);
@@ -0,0 +1,63 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef GO_AMD_SMI_H_
#define GO_AMD_SMI_H_
#include <stdbool.h>
#include <stdio.h>
#define GOAMDSMI_VALUE_0 0
#define GOAMDSMI_UINT16_MAX 0xFFFF
#define GOAMDSMI_UINT32_MAX 0xFFFFFFFF
#define GOAMDSMI_UINT64_MAX 0xFFFFFFFFFFFFFFFF
#define GOAMDSMI_STRING_NA "NA"
/**
* @brief Go language stub to initialize the Debug Level prints
* -DENABLE_DEBUG_LEVEL=1 (or) -DENABLE_DEBUG_LEVEL=<Enable_Debug_level_number> must be passed at cmake time
*
* @retval ::bool value of true upon enabling logs
* @retval false is returned upon if user does not want to enable logs.
*
*/
#define enable_debug_level(debug_level) ((ENABLE_DEBUG_LEVEL >= debug_level)?true:false)
typedef enum {
GOAMDSMI_STATUS_SUCCESS = 0x0, //!< Operation successful
GOAMDSMI_STATUS_FAILURE = 0x1, //!< Operation failed
} goamdsmi_status_t;
typedef enum {
GOAMDSMI_CPU_INIT = 0x0, //!< CPU Init
GOAMDSMI_GPU_INIT = 0x1, //!< GPU Init
} goamdsmi_Init_t;
typedef enum {
GOAMDSMI_DEBUG_LEVEL_0 = 0x0, //!< Debug Level as 0
GOAMDSMI_DEBUG_LEVEL_1 = 0x1, //!< Debug Level as 1
GOAMDSMI_DEBUG_LEVEL_2 = 0x2, //!< Debug Level as 2
GOAMDSMI_DEBUG_LEVEL_3 = 0x3, //!< Debug Level as 3
} goamdsmi_Enable_Debug_Level_t;
#endif
File diff ditekan karena terlalu besar Load Diff
@@ -0,0 +1,476 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
#define _UAPI_ASM_X86_AMD_HSMP_H_
#include <linux/types.h>
#pragma pack(4)
#define HSMP_MAX_MSG_LEN 8
/*
* HSMP Messages supported
*/
enum hsmp_message_ids {
HSMP_TEST = 1, /* 01h Increments input value by 1 */
HSMP_GET_SMU_VER, /* 02h SMU FW version */
HSMP_GET_PROTO_VER, /* 03h HSMP interface version */
HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */
HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */
HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */
HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */
HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */
HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */
HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */
HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */
HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */
HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */
HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */
HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */
HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */
HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */
HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */
HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */
HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */
HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */
HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */
HSMP_MSG_ID_MAX,
};
struct hsmp_message {
__u32 msg_id; /* Message ID */
__u16 num_args; /* Number of input argument words in message */
__u16 response_sz; /* Number of expected output/response words */
__u32 args[HSMP_MAX_MSG_LEN];/* argument/response buffer */
__u16 sock_ind; /* socket number */
};
enum hsmp_msg_type {
HSMP_RSVD = -1,
HSMP_SET = 0,
HSMP_GET = 1,
HSMP_SET_GET = 2,
};
enum hsmp_proto_versions {
HSMP_PROTO_VER2 = 2,
HSMP_PROTO_VER3,
HSMP_PROTO_VER4,
HSMP_PROTO_VER5,
HSMP_PROTO_VER6,
HSMP_PROTO_VER7
};
struct hsmp_msg_desc {
int num_args;
int response_sz;
enum hsmp_msg_type type;
};
/*
* User may use these comments as reference, please find the
* supported list of messages and message definition in the
* HSMP chapter of respective family/model PPR.
*
* Not supported messages would return -ENOMSG.
*/
static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
/* RESERVED */
{0, 0, HSMP_RSVD},
/*
* HSMP_TEST, num_args = 1, response_sz = 1
* input: args[0] = xx
* output: args[0] = xx + 1
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
* output: args[0] = smu fw ver
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
* output: args[0] = proto version
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
* output: args[0] = socket power in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
* input: args[0] = power limit value in mWatts
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = socket power limit value in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
* output: args[0] = maximuam socket power limit in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
* input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
* input: args[0] = boost limit value in MHz
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
* input: args[0] = apic id
* output: args[0] = boost limit value in MHz
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
* output: args[0] = proc hot status
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
* input: args[0] = min link width[15:8] + max link width[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
* input: args[0] = df pstate[7:0]
*/
{1, 0, HSMP_SET},
/* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
{0, 0, HSMP_SET},
/*
* HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
* output: args[0] = fclk in MHz, args[1] = mclk in MHz
*/
{0, 2, HSMP_GET},
/*
* HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = core clock in MHz
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
* output: args[0] = average c0 residency
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
* input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
* input: args[0] = nbioid[23:16]
* output: args[0] = max dpm level[15:8] + min dpm level[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
* output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
* bw in percentage[7:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
* output: args[0] = temperature in degree celsius. [15:8] integer part +
* [7:5] fractional part
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = refresh rate[3] + temperature range[2:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
* DIMM address[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
* DIMM address[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
* input: args[0] = apic id of the core[31:0]
* output: args[0] = frequency in MHz[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
* output: args[0] = power in mW[31:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
* output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
* input: args[0] = link id[15:8] + bw type[2:0]
* output: args[0] = io bandwidth in Mbps[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
* input: args[0] = link id[15:8] + bw type[2:0]
* output: args[0] = xgmi bandwidth in Mbps[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
* input: args[0] = min link width[15:8] + max link width[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
* input: args[0] = link rate control value
* output: args[0] = previous link rate control value
*/
{1, 1, HSMP_SET},
/*
* HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0/1
* input: args[0] = set/get power mode[31] + power efficiency mode[2:0]
* output: args[0] = current power efficiency mode[2:0]
*/
{1, 1, HSMP_SET_GET},
/*
* HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
* input: args[0] = min df pstate[15:8] + max df pstate[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
* output: args[0] = metrics table version
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
*/
{0, 0, HSMP_GET},
/*
* HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
* output: args[0] = lower 32 bits of the address
* output: args[1] = upper 32 bits of the address
*/
{0, 2, HSMP_GET},
/*
* HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
* input: args[0] = min xGMI p-state[15:8] + max xGMI state[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
* input: args[0] = set/get policy[31] +
* disable/enable independent control[0]
* output: args[0] = current policy[0]
*/
{1, 1, HSMP_SET_GET},
/*
* HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
* input: args[0] = set/get policy[31] + enable/disable DFC[0]
* output: args[0] = current policy[0]
*/
{1, 1, HSMP_SET_GET},
/* RESERVED(0x29-0x2f) */
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
{0, 0, HSMP_RSVD},
/*
* HSMP_GET_RAPL_UNITS, response_sz = 1
* output: args[0] = tu value[19:16] + esu value[12:8]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
* input: args[0] = Apic id[15:0]
* output: args[0] = lower 32 bits of energy
* output: args[1] = upper 32 bits of energy
*/
{1, 2, HSMP_GET},
/*
* HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
* output: args[0] = lower 32 bits of energy
* output: args[1] = upper 32 bits of energy
*/
{0, 2, HSMP_GET},
};
/* Metrics table (supported only with proto version 6) */
struct hsmp_metric_table {
__u32 accumulation_counter;
/* TEMPERATURE */
__u32 max_socket_temperature;
__u32 max_vr_temperature;
__u32 max_hbm_temperature;
__u64 max_socket_temperature_acc;
__u64 max_vr_temperature_acc;
__u64 max_hbm_temperature_acc;
/* POWER */
__u32 socket_power_limit;
__u32 max_socket_power_limit;
__u32 socket_power;
/* ENERGY */
__u64 timestamp;
__u64 socket_energy_acc;
__u64 ccd_energy_acc;
__u64 xcd_energy_acc;
__u64 aid_energy_acc;
__u64 hbm_energy_acc;
/* FREQUENCY */
__u32 cclk_frequency_limit;
__u32 gfxclk_frequency_limit;
__u32 fclk_frequency;
__u32 uclk_frequency;
__u32 socclk_frequency[4];
__u32 vclk_frequency[4];
__u32 dclk_frequency[4];
__u32 lclk_frequency[4];
__u64 gfxclk_frequency_acc[8];
__u64 cclk_frequency_acc[96];
/* FREQUENCY RANGE */
__u32 max_cclk_frequency;
__u32 min_cclk_frequency;
__u32 max_gfxclk_frequency;
__u32 min_gfxclk_frequency;
__u32 fclk_frequency_table[4];
__u32 uclk_frequency_table[4];
__u32 socclk_frequency_table[4];
__u32 vclk_frequency_table[4];
__u32 dclk_frequency_table[4];
__u32 lclk_frequency_table[4];
__u32 max_lclk_dpm_range;
__u32 min_lclk_dpm_range;
/* XGMI */
__u32 xgmi_width;
__u32 xgmi_bitrate;
__u64 xgmi_read_bandwidth_acc[8];
__u64 xgmi_write_bandwidth_acc[8];
/* ACTIVITY */
__u32 socket_c0_residency;
__u32 socket_gfx_busy;
__u32 dram_bandwidth_utilization;
__u64 socket_c0_residency_acc;
__u64 socket_gfx_busy_acc;
__u64 dram_bandwidth_acc;
__u32 max_dram_bandwidth;
__u64 dram_bandwidth_utilization_acc;
__u64 pcie_bandwidth_acc[4];
/* THROTTLERS */
__u32 prochot_residency_acc;
__u32 ppt_residency_acc;
__u32 socket_thm_residency_acc;
__u32 vr_thm_residency_acc;
__u32 hbm_thm_residency_acc;
__u32 spare;
/* New items at the end to maintain driver compatibility */
__u32 gfxclk_frequency[8];
};
/* Reset to default packing */
#pragma pack()
/* Define unique ioctl command for hsmp msgs using generic _IOWR */
#define HSMP_BASE_IOCTL_NR 0xF8
#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
#endif /*_ASM_X86_AMD_HSMP_H_*/
@@ -0,0 +1,118 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
#define AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
#include <map>
#include "rocm_smi/rocm_smi.h"
#include "amd_smi/amdsmi.h"
#ifdef ENABLE_ESMI_LIB
extern "C" {
#include <cstdint>
#include <e_smi/e_smi.h>
}
#endif
namespace amd::smi {
// Define a map of rsmi status codes to amdsmi status codes
const std::map<rsmi_status_t, amdsmi_status_t> rsmi_status_map = {
{RSMI_STATUS_SUCCESS, AMDSMI_STATUS_SUCCESS},
{RSMI_STATUS_INVALID_ARGS, AMDSMI_STATUS_INVAL},
{RSMI_STATUS_NOT_SUPPORTED, AMDSMI_STATUS_NOT_SUPPORTED},
{RSMI_STATUS_FILE_ERROR, AMDSMI_STATUS_FILE_ERROR},
{RSMI_STATUS_PERMISSION, AMDSMI_STATUS_NO_PERM},
{RSMI_STATUS_OUT_OF_RESOURCES, AMDSMI_STATUS_OUT_OF_RESOURCES},
{RSMI_STATUS_INTERNAL_EXCEPTION, AMDSMI_STATUS_INTERNAL_EXCEPTION},
{RSMI_STATUS_INPUT_OUT_OF_BOUNDS, AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS},
{RSMI_STATUS_INIT_ERROR, AMDSMI_STATUS_NOT_INIT},
{RSMI_INITIALIZATION_ERROR, AMDSMI_STATUS_NOT_INIT},
{RSMI_STATUS_NOT_YET_IMPLEMENTED, AMDSMI_STATUS_NOT_YET_IMPLEMENTED},
{RSMI_STATUS_NOT_FOUND, AMDSMI_STATUS_NOT_FOUND},
{RSMI_STATUS_INSUFFICIENT_SIZE, AMDSMI_STATUS_INSUFFICIENT_SIZE},
{RSMI_STATUS_INTERRUPT, AMDSMI_STATUS_INTERRUPT},
{RSMI_STATUS_UNEXPECTED_SIZE, AMDSMI_STATUS_UNEXPECTED_SIZE},
{RSMI_STATUS_NO_DATA, AMDSMI_STATUS_NO_DATA},
{RSMI_STATUS_UNEXPECTED_DATA, AMDSMI_STATUS_UNEXPECTED_DATA},
{RSMI_STATUS_BUSY, AMDSMI_STATUS_BUSY},
{RSMI_STATUS_REFCOUNT_OVERFLOW, AMDSMI_STATUS_REFCOUNT_OVERFLOW},
{RSMI_STATUS_DIRECTORY_NOT_FOUND, AMDSMI_STATUS_DIRECTORY_NOT_FOUND},
{RSMI_STATUS_SETTING_UNAVAILABLE, AMDSMI_STATUS_SETTING_UNAVAILABLE},
{RSMI_STATUS_AMDGPU_RESTART_ERR, AMDSMI_STATUS_AMDGPU_RESTART_ERR},
{RSMI_STATUS_UNKNOWN_ERROR, AMDSMI_STATUS_UNKNOWN_ERROR},
};
const std::map<unsigned, amdsmi_vram_type_t> vram_type_map = {
{0, AMDSMI_VRAM_TYPE_UNKNOWN},
{1, AMDSMI_VRAM_TYPE_GDDR1},
{2, AMDSMI_VRAM_TYPE_DDR2},
{3, AMDSMI_VRAM_TYPE_GDDR3},
{4, AMDSMI_VRAM_TYPE_GDDR4},
{5, AMDSMI_VRAM_TYPE_GDDR5},
{6, AMDSMI_VRAM_TYPE_HBM},
{7, AMDSMI_VRAM_TYPE_DDR3},
{8, AMDSMI_VRAM_TYPE_DDR4},
{9, AMDSMI_VRAM_TYPE_GDDR6},
{10, AMDSMI_VRAM_TYPE_DDR5},
{11, AMDSMI_VRAM_TYPE_LPDDR4},
{12, AMDSMI_VRAM_TYPE_LPDDR5},
{13, AMDSMI_VRAM_TYPE_HBM3E},
};
amdsmi_status_t rsmi_to_amdsmi_status(rsmi_status_t status);
amdsmi_vram_type_t vram_type_value(unsigned type);
#ifdef ENABLE_ESMI_LIB
// Define a map of esmi status codes to amdsmi status codes
const std::map<esmi_status_t, amdsmi_status_t> esmi_status_map = {
{ESMI_SUCCESS, AMDSMI_STATUS_SUCCESS},
{ESMI_INITIALIZED, AMDSMI_STATUS_SUCCESS},
{ESMI_INVALID_INPUT, AMDSMI_STATUS_INVAL},
{ESMI_NOT_SUPPORTED, AMDSMI_STATUS_NOT_SUPPORTED},
{ESMI_PERMISSION, AMDSMI_STATUS_NO_PERM},
{ESMI_INTERRUPTED, AMDSMI_STATUS_INTERRUPT},
{ESMI_IO_ERROR, AMDSMI_STATUS_IO},
{ESMI_FILE_ERROR, AMDSMI_STATUS_FILE_ERROR},
{ESMI_NO_MEMORY, AMDSMI_STATUS_OUT_OF_RESOURCES},
{ESMI_DEV_BUSY, AMDSMI_STATUS_BUSY},
{ESMI_NOT_INITIALIZED, AMDSMI_STATUS_NOT_INIT},
{ESMI_UNEXPECTED_SIZE, AMDSMI_STATUS_UNEXPECTED_SIZE},
{ESMI_UNKNOWN_ERROR, AMDSMI_STATUS_UNKNOWN_ERROR},
{ESMI_NO_ENERGY_DRV, AMDSMI_STATUS_NO_ENERGY_DRV},
{ESMI_NO_MSR_DRV, AMDSMI_STATUS_NO_MSR_DRV},
{ESMI_NO_HSMP_DRV, AMDSMI_STATUS_NO_HSMP_DRV},
{ESMI_NO_HSMP_SUP, AMDSMI_STATUS_NO_HSMP_SUP},
{ESMI_NO_DRV, AMDSMI_STATUS_NO_DRV},
{ESMI_FILE_NOT_FOUND, AMDSMI_STATUS_FILE_NOT_FOUND},
{ESMI_ARG_PTR_NULL, AMDSMI_STATUS_ARG_PTR_NULL},
{ESMI_HSMP_TIMEOUT, AMDSMI_STATUS_HSMP_TIMEOUT},
{ESMI_NO_HSMP_MSG_SUP, AMDSMI_STATUS_NO_HSMP_MSG_SUP},
};
amdsmi_status_t esmi_to_amdsmi_status(esmi_status_t status);
#endif
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
@@ -0,0 +1,224 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#pragma once
#include <vector>
#include "amd_smi/amdsmi.h"
#pragma pack(1)
#define CPER_MAX_OAM_COUNT (8)
enum cper_error_severity {
CPER_SEV_FATAL_UNCORRECTED = 0,
CPER_SEV_FATAL = 1,
CPER_SEV_FATAL_CORRECTED = 2,
CPER_SEV_UNUSED = 10,
};
enum cper_aca_reg {
CPER_ACA_REG_CTL_LO = 0,
CPER_ACA_REG_CTL_HI = 1,
CPER_ACA_REG_STATUS_LO = 2,
CPER_ACA_REG_STATUS_HI = 3,
CPER_ACA_REG_ADDR_LO = 4,
CPER_ACA_REG_ADDR_HI = 5,
CPER_ACA_REG_MISC0_LO = 6,
CPER_ACA_REG_MISC0_HI = 7,
CPER_ACA_REG_CONFIG_LO = 8,
CPER_ACA_REG_CONFIG_HI = 9,
CPER_ACA_REG_IPID_LO = 10,
CPER_ACA_REG_IPID_HI = 11,
CPER_ACA_REG_SYND_LO = 12,
CPER_ACA_REG_SYND_HI = 13,
CPER_ACA_REG_COUNT = 32,
};
struct cper_sec_desc {
uint32_t sec_offset; /* Offset from the start of CPER entry */
uint32_t sec_length;
uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */
uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */
union {
struct {
uint8_t fru_id : 1;
uint8_t fru_text : 1;
uint8_t reserved : 6;
} valid_bits;
uint8_t valid_mask;
};
uint8_t reserved;
union {
struct {
uint32_t primary : 1;
uint32_t reserved1 : 2;
uint32_t exceed_err_threshold : 1;
uint32_t latent_err : 1; /* "Deferred" error Creation*/
uint32_t reserved2 : 27;
} flags_bits;
uint32_t flags_mask;
};
amdsmi_cper_guid_t sec_type; /* AMD non-Standard, AMD Crashdump */
char fru_id[16]; /* FRU Serial ID */
amdsmi_cper_sev_t severity;
char fru_text[20]; /* "OAM%d" */
};
struct cper_sec_nonstd_err_info {
amdsmi_cper_guid_t error_type;
union {
struct {
uint64_t ms_chk : 1;
uint64_t target_addr_id : 1;
uint64_t req_id : 1;
uint64_t resp_id : 1;
uint64_t instr_ptr : 1;
uint64_t reserved : 59;
} valid_bits;
uint64_t valid_mask;
};
union {
struct {
uint64_t err_type_valid : 1;
uint64_t pcc_valid : 1;
uint64_t uncorr_valid : 1;
uint64_t precise_ip_valid : 1;
uint64_t restartable_ip_valid : 1;
uint64_t overflow_valid : 1;
uint64_t reserved1 : 10;
uint64_t err_type : 2;
uint64_t pcc : 1;
uint64_t uncorr : 1;
uint64_t precised_ip : 1;
uint64_t restartable_ip : 1;
uint64_t overflow : 1;
uint64_t reserved2 : 41;
} ms_chk_bits;
uint64_t ms_chk_mask;
};
uint64_t target_addr_id;
uint64_t req_id;
uint64_t resp_id;
uint64_t instr_ptr;
};
struct cper_sec_nonstd_err_ctx {
uint16_t reg_ctx_type;
uint16_t reg_arr_size;
uint32_t msr_addr;
uint64_t mm_reg_addr;
uint32_t reg_dump[CPER_ACA_REG_COUNT]; /* This buffer can grow */
};
struct cper_sec_nonstd_err_hdr {
union {
struct {
uint64_t apic_id : 1;
uint64_t fw_id : 1;
uint64_t err_info_cnt : 6; /* should match context_cnt */
uint64_t err_context_cnt : 6; /* should match info_cnt */
} valid_bits;
uint64_t valid_mask;
};
uint64_t apic_id;
char fw_id[48];
};
struct cper_sec_nonstd_err_body {
struct cper_sec_nonstd_err_info err_info;
struct cper_sec_nonstd_err_ctx err_ctx;
};
struct cper_sec_nonstd_err {
struct cper_sec_nonstd_err_hdr hdr;
struct cper_sec_nonstd_err_body body[]; /* Variable Size, today only 1 entry */
};
struct cper_sec_crashdump_data {
uint16_t reg_ctx_type;
uint16_t reg_arr_size;
uint32_t reserved1;
uint64_t reserved2;
union {
struct {
uint32_t status_lo;
uint32_t status_hi;
uint32_t addr_lo;
uint32_t addr_hi;
uint32_t ipid_lo;
uint32_t ipid_hi;
uint32_t synd_lo;
uint32_t synd_hi;
} fatal_err;
struct {
uint64_t msg[CPER_MAX_OAM_COUNT];
} boot_err;
} dump;
};
struct cper_sec_crashdump {
uint64_t reserved1;
uint64_t reserved2;
char fw_id[48];
uint64_t reserved3[8];
struct cper_sec_crashdump_data data;
};
struct cper_sec {
union {
struct {
uint8_t fru_id : 1;
uint8_t fru_text : 1;
uint8_t reserved : 6;
} valid_bits;
uint8_t valid_mask;
};
union {
struct cper_sec_crashdump crashdump;
struct cper_sec_nonstd_err runtime_err;
};
};
/* General CPER record structure */
struct cper_1_0 {
struct cper_hdr *hdr;
struct cper_sec_desc *sec_desc; /* Variable Size */
struct cper_sec *sec; /* Variable Size */
};
#pragma pack()
amdsmi_status_t amdsmi_get_gpu_cper_entries_by_path(const char *amdgpu_ring_cper_file, uint32_t severity_mask,
char *cper_data, uint64_t *buf_size, amdsmi_cper_hdr_t **cper_hdrs,
uint64_t *entry_count, uint64_t *cursor, uint64_t product_serial);
std::vector<int> cper_decode(const amdsmi_cper_hdr_t *cper);
@@ -0,0 +1,68 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
#include <unistd.h>
#include <vector>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_lib_loader.h"
#include "amd_smi/impl/amdgpu_drm.h"
#include "amd_smi/impl/xf86drm.h"
#include "amd_smi/impl/scoped_fd.h"
namespace amd::smi {
class AMDSmiDrm {
public:
amdsmi_status_t init();
amdsmi_status_t cleanup();
amdsmi_status_t get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const;
amdsmi_status_t get_drm_path_by_index(uint32_t gpu_index, std::string *drm_path) const;
std::vector<amdsmi_bdf_t> get_bdfs();
std::vector<std::string>& get_drm_paths();
bool check_if_drm_is_supported();
uint32_t get_vendor_id();
private:
// when file is not found, the empty string will be returned
std::string find_file_in_folder(const std::string& folder,
const std::string& regex);
std::vector<std::string> drm_paths_; // drm path (renderD128 for example)
std::vector<amdsmi_bdf_t> drm_bdfs_; // bdf
uint32_t vendor_id;
AMDSmiLibraryLoader lib_loader_; // lazy load libdrm
std::mutex drm_mutex_;
};
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
@@ -0,0 +1,95 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
#include <map>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_processor.h"
#include "amd_smi/impl/amd_smi_drm.h"
namespace amd::smi {
// PID, amdsmi_proc_info_t
using GPUComputeProcessList_t = std::map<amdsmi_process_handle_t, amdsmi_proc_info_t>;
using ComputeProcessListClassType_t = uint16_t;
enum class ComputeProcessListType_t : ComputeProcessListClassType_t
{
kAllProcesses,
kAllProcessesOnDevice,
};
class AMDSmiGPUDevice: public AMDSmiProcessor {
public:
AMDSmiGPUDevice(uint32_t gpu_id, std::string path, amdsmi_bdf_t bdf, AMDSmiDrm& drm):
AMDSmiProcessor(AMDSMI_PROCESSOR_TYPE_AMD_GPU), gpu_id_(gpu_id), path_(path), bdf_(bdf), drm_(drm) {}
AMDSmiGPUDevice(uint32_t gpu_id, AMDSmiDrm& drm):
AMDSmiProcessor(AMDSMI_PROCESSOR_TYPE_AMD_GPU), gpu_id_(gpu_id), drm_(drm) {
if (check_if_drm_is_supported()) this->get_drm_data();
}
~AMDSmiGPUDevice() {
}
amdsmi_status_t get_drm_data();
pthread_mutex_t* get_mutex();
uint32_t get_gpu_id() const;
uint32_t get_card_id(); // -e feature + we can get card_id for our internal functions
uint32_t get_drm_render_minor(); // -e feature + we can get card_id for our internal functions
uint64_t get_kfd_gpu_id(); // Used to decode vram usage for KFD processes
std::string& get_gpu_path();
amdsmi_bdf_t get_bdf();
bool check_if_drm_is_supported() { return drm_.check_if_drm_is_supported(); }
uint32_t get_vendor_id();
const GPUComputeProcessList_t& amdgpu_get_compute_process_list(ComputeProcessListType_t list_type = ComputeProcessListType_t::kAllProcessesOnDevice);
// New methods for -e feature
std::string bdf_to_string() const; // -e feature
std::vector<uint64_t> get_bitmask_from_numa_node(int32_t node_id, uint32_t size) const;
std::vector<uint64_t> get_bitmask_from_local_cpulist(uint32_t drm_card, uint32_t size) const;
private:
uint32_t gpu_id_;
std::string path_;
amdsmi_bdf_t bdf_;
uint32_t vendor_id_;
AMDSmiDrm& drm_;
uint32_t card_index_;
uint32_t drm_render_minor_;
uint64_t kfd_gpu_id_; // Used to decode vram usage for KFD processes
GPUComputeProcessList_t compute_process_list_;
int32_t get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
ComputeProcessListType_t list_type);
};
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
@@ -0,0 +1,81 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
#include <dlfcn.h>
#include <cstring>
#include <iostream>
#include <mutex> // NOLINT(build/c++11)
#include "amd_smi/amdsmi.h"
namespace amd::smi {
class AMDSmiLibraryLoader {
public:
AMDSmiLibraryLoader();
amdsmi_status_t load(const char* filename);
template<typename T> amdsmi_status_t load_symbol(T* func_handler,
const char* func_name);
amdsmi_status_t unload();
~AMDSmiLibraryLoader();
private:
void* libHandler_;
std::mutex library_mutex_;
bool library_loaded_ = false;
};
template<typename T> amdsmi_status_t AMDSmiLibraryLoader::load_symbol(
T* func_handler,
const char* func_name) {
if (!libHandler_) {
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
}
if (!func_handler || !func_name) {
return AMDSMI_STATUS_FAIL_LOAD_SYMBOL;
}
std::lock_guard<std::mutex> guard(library_mutex_);
*reinterpret_cast<void**>(func_handler) =
dlsym(libHandler_, func_name);
if (*func_handler == nullptr) {
char* error = dlerror();
std::cerr << "AMDSmiLibraryLoader: Fail to load the symbol "
<< func_name << ": " << error << std::endl;
return AMDSMI_STATUS_FAIL_LOAD_SYMBOL;
}
return AMDSMI_STATUS_SUCCESS;
}
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
@@ -0,0 +1,47 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
#define AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
#include <string>
#include "amd_smi/amdsmi.h"
namespace amd::smi {
class AMDSmiProcessor {
public:
explicit AMDSmiProcessor(processor_type_t type) : processor_type_(type) {}
explicit AMDSmiProcessor(processor_type_t type, uint32_t index) : processor_type_(type), pindex_(index) {}
explicit AMDSmiProcessor(const std::string& id) : processor_identifier_(id) {}
virtual ~AMDSmiProcessor() {}
processor_type_t get_processor_type() const { return processor_type_;}
const std::string& get_processor_id() const { return processor_identifier_;}
uint32_t get_processor_index() const { return pindex_;}
private:
processor_type_t processor_type_;
uint32_t pindex_;
std::string processor_identifier_;
};
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
@@ -0,0 +1,80 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
#define AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
#include <string>
#include <vector>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_processor.h"
namespace amd::smi {
class AMDSmiSocket {
public:
explicit AMDSmiSocket(const std::string& id) : socket_identifier_(id) {}
explicit AMDSmiSocket(uint32_t index) : sindex_(index) {}
~AMDSmiSocket();
const std::string& get_socket_id() const { return socket_identifier_;}
uint32_t get_socket_index() { return sindex_;}
void add_processor(AMDSmiProcessor* processor) {
switch (processor->get_processor_type()) {
case AMDSMI_PROCESSOR_TYPE_AMD_GPU:
processors_.push_back(processor);
break;
case AMDSMI_PROCESSOR_TYPE_AMD_CPU:
cpu_processors_.push_back(processor);
break;
case AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE:
cpu_core_processors_.push_back(processor);
break;
default:
break;
}
}
std::vector<AMDSmiProcessor*>& get_processors() { return processors_;}
std::vector<AMDSmiProcessor*>& get_processors(processor_type_t type) {
switch (type) {
case AMDSMI_PROCESSOR_TYPE_AMD_GPU:
return processors_;
case AMDSMI_PROCESSOR_TYPE_AMD_CPU:
return cpu_processors_;
case AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE:
return cpu_core_processors_;
default:
return processors_;
}
}
amdsmi_status_t get_processor_count(uint32_t* processor_count) const;
amdsmi_status_t get_processor_count(processor_type_t type, uint32_t* processor_count) const;
private:
uint32_t sindex_;
std::string socket_identifier_;
std::vector<AMDSmiProcessor*> processors_;
std::vector<AMDSmiProcessor*> cpu_processors_;
std::vector<AMDSmiProcessor*> cpu_core_processors_;
};
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
@@ -0,0 +1,84 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
#define AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
#include <vector>
#include <set>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_socket.h"
#include "amd_smi/impl/amd_smi_processor.h"
#include "amd_smi/impl/amd_smi_drm.h"
namespace amd::smi {
// Singleton: Only one system in an application
class AMDSmiSystem {
public:
static AMDSmiSystem& getInstance() {
static AMDSmiSystem instance;
return instance;
}
amdsmi_status_t init(uint64_t flags);
amdsmi_status_t cleanup();
std::vector<AMDSmiSocket*>& get_sockets() {return sockets_;}
amdsmi_status_t handle_to_socket(amdsmi_socket_handle socket_handle,
AMDSmiSocket** socket);
amdsmi_status_t handle_to_processor(amdsmi_processor_handle processor_handle,
AMDSmiProcessor** device);
amdsmi_status_t gpu_index_to_handle(uint32_t gpu_index,
amdsmi_processor_handle* processor_handle);
amdsmi_status_t get_cpu_family(uint32_t *cpu_family);
amdsmi_status_t get_cpu_model(uint32_t *cpu_model);
amdsmi_status_t get_cpu_model_name(uint32_t socket_id, std::string *model_name);
amdsmi_status_t get_sys_cpu_cores_per_socket(uint32_t *core_num) ;
amdsmi_status_t get_sys_num_of_cpu_sockets(uint32_t *sock_num);
std::vector<uint32_t> get_cpu_sockets_from_numa_node(int32_t numa_node);
private:
AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {}
/* The GPU socket id is used to identify the socket, so that the XCDs
on the same physical device will be collected under the same socket.
The BD part of the BDF is used as GPU socket to represent a phyiscal device.
*/
amdsmi_status_t get_gpu_socket_id(uint32_t index, std::string& socketid);
amdsmi_status_t populate_amd_gpu_devices();
amdsmi_status_t populate_amd_cpus();
uint64_t init_flag_;
AMDSmiDrm drm_;
std::vector<AMDSmiSocket*> sockets_;
std::set<AMDSmiProcessor*> processors_; // Track valid processors
};
} // namespace amd::smi
#endif // AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
@@ -0,0 +1,201 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
#define AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
#include <dirent.h>
#include <limits>
#include <type_traits>
#include <string>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_gpu_device.h"
#define SMIGPUDEVICE_MUTEX(MUTEX) \
amd::smi::pthread_wrap _pw(*(MUTEX)); \
amd::smi::ScopedPthread _lock(_pw, true); \
if (_lock.mutex_not_acquired()) { \
return AMDSMI_STATUS_BUSY; \
}
extern "C" {
void amdsmi_free_name_value_pairs(void *p);
}
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice* device, std::string* full_path);
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info);
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, uint32_t sensor_ind, int *cap);
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain, int *max_freq, int *min_freq, int *num_dpm, int *sleep_state_freq);
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks);
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device, uint32_t *num_pages, amdsmi_retired_page_record_t *info);
amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* device, uint32_t *threshold);
amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device);
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt);
amdsmi_status_t smi_amdgpu_get_driver_version(amd::smi::AMDSmiGPUDevice* device, int *length, char *version);
amdsmi_status_t smi_amdgpu_get_pcie_speed_from_pcie_type(uint16_t pcie_type, uint32_t *pcie_speed);
amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice* device, char *market_name);
amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDevice* device, bool *enabled);
std::string smi_split_string(std::string str, char delim);
std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus);
amdsmi_status_t smi_clear_char_and_reinitialize(char buffer[], uint32_t len,
std::string newString);
/**
* @brief Get the device index given the processor handle.
*
* @details Given a processor handle @p processor_handle
* and a pointer to a uint32_t @p device_index will be returned.
*
* @param[in] processor_handle Device which to query
*
* @param[inout] device_index a pointer to uint32_t to which the matching device
* index will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for device_index.
* ::AMDSMI_STATUS_API_FAILED is returned if the corresponding device
* index for the processor handle cannot be found.
*/
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
uint32_t* device_index);
/**
* @brief Get total number of devices
*
* @details Given a pointer to a uint32_t @p total_num_devices will be returned
*
* @param[inout] total_num_devices a pointer to uint32_t to which the total number
* of devices will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for total_num_devices.
*/
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices);
/**
* @brief Get the processor handle given the device index.
*
* @details Given a uint32_t @p device_index and a pointer to
* a processor handle @p processor_handle, the device index will be used to
* find the processor handle of the device and store it in the provided pointer
*
* @param[in] device_index a uint32_t to value to help find the corresponding
* processor handle
*
* @param[inout] processor_handle a pointer to amdsmi_processor_handle
* which the corresponding processor_handle will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for processor_handle.
* ::AMDSMI_STATUS_API_FAILED is returned if the device_index is cannot
* be found.
*/
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
uint32_t device_index,
amdsmi_processor_handle *processor_handle);
/**
* @brief Get an int environment var or return default if does not exist
*
* @details Given a const char* @p name and a default int @p def
* and call getenv with name. On any error, return default int
*
* @param[in] name a const char* containing ENV var name
*
* @param[in] def default int in case of error
*
* @retval int of environment variable
*/
int read_env_ms(const char* name, int def);
template<typename>
constexpr bool is_dependent_false_v = false;
template<typename T>
inline constexpr bool is_supported_type_v = (
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint8_t> ||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint16_t> ||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint32_t> ||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint64_t>
);
template<typename T>
constexpr T get_std_num_limit()
{
if constexpr (is_supported_type_v<T>) {
return std::numeric_limits<T>::max();
} else {
return std::numeric_limits<T>::min();
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
}
}
template<typename T>
constexpr bool is_std_num_limit(T value)
{
return (value == get_std_num_limit<T>());
}
template<typename T, typename U, typename V = T>
constexpr T translate_umax_or_assign_value(U source_value, V target_value)
{
T result{};
if constexpr (is_supported_type_v<T> && is_supported_type_v<U>) {
// If the source value is uint<U>::max(), then return is uint<T>::max()
if (is_std_num_limit(source_value)) {
result = get_std_num_limit<T>();
} else {
result = static_cast<T>(target_value);
}
return result;
} else {
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
}
return result;
}
template<typename A, typename T>
void fill_2d_array(A& arr, T value) {
for (auto& row : arr) {
std::fill(std::begin(row), std::end(row), value);
}
}
/**
* @brief Get the product serial number given the processor handle.
*
* @param[in] processor_handle a pointer to amdsmi_processor_handle
* which the corresponding processor_handle will be stored
*
* @retval ::The serial number
* ::0 if it cannot be determined
*/
uint64_t get_product_serial_number(amdsmi_processor_handle processor_handle);
#endif // AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
@@ -0,0 +1,41 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef GPUVSMI_UUID_H_
#define GPUVSMI_UUID_H_
/**
* \brief Generates uuid for device with specified parameters
*
* \param [out] str String buffer where to output generated uuid
*
* \param [in] serial Asic serial
*
* \param [in] did Device ID
*
* \param [in] idx PF/VF index
*
* \return SMI_RET_CODE indicating result.
*/
amdsmi_status_t amdsmi_uuid_gen(char *str, uint64_t serial, uint16_t did, uint8_t idx);
#endif

Some files were not shown because too many files have changed in this diff Show More