diff --git a/projects/rocm-smi-lib/.gitignore b/projects/rocm-smi-lib/.gitignore index 3e67867c25..4618c88520 100644 --- a/projects/rocm-smi-lib/.gitignore +++ b/projects/rocm-smi-lib/.gitignore @@ -3,12 +3,14 @@ # any tracked files which get ignored after the change. # VisualStudioCode +.venv/ .vscode/ +_build # below files are generated via CMake include/rocm_smi/rocm_smi64Config.h oam/include/oam/oamConfig.h -python_smi_tools/rsmiBindings.py +python_smi_tools/rsmiBindingsInit.py # Build directory build/ @@ -28,3 +30,6 @@ README.html !.clang-format !.clang-tidy !.clangd + +# avoid duplicating contributing.md due to conf.py +docs/CHANGELOG.md \ No newline at end of file diff --git a/projects/rocm-smi-lib/docs/c++_tutorials.rst b/projects/rocm-smi-lib/docs/c++_tutorials.rst new file mode 100644 index 0000000000..8c8d82aacd --- /dev/null +++ b/projects/rocm-smi-lib/docs/c++_tutorials.rst @@ -0,0 +1,34 @@ +==================== +C++ Tutorials +==================== + +This chapter contains the ROCm SMI C++ API tutorials. + +.. code-block:: c++ + + #include + #include "rocm_smi/rocm_smi.h" + int main() { + + rsmi_status_t ret; + uint32_t num_devices; + uint16_t dev_id; + + // We will skip return code checks for this example, but it + // is recommended to always check this as some calls may not + // apply for some devices or ROCm releases + + ret = rsmi_init(0); + ret = rsmi_num_monitor_devices(&num_devices); + + for (int i=0; i < num_devices; ++i) { + ret = rsmi_dev_id_get(i, &dev_id); + // dev_id holds the device ID of device i, upon a + // successful call + } + ret = rsmi_shut_down(); + return 0; + } + +For more examples please check the `C++ example `_ +or `tests. `_ diff --git a/projects/rocm-smi-lib/docs/c++_usage.md b/projects/rocm-smi-lib/docs/c++_usage.md new file mode 100644 index 0000000000..d1393125a3 --- /dev/null +++ b/projects/rocm-smi-lib/docs/c++_usage.md @@ -0,0 +1,2 @@ +```{include} ../README.md +``` \ No newline at end of file diff --git a/projects/rocm-smi-lib/docs/conf.py b/projects/rocm-smi-lib/docs/conf.py index a4c84ace61..b0a99742a0 100755 --- a/projects/rocm-smi-lib/docs/conf.py +++ b/projects/rocm-smi-lib/docs/conf.py @@ -5,9 +5,18 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html import re +import pathlib +import shutil +import sys from rocm_docs import ROCmDocs +# We need to add the location of the rocrand Python module to the PATH +# in order to build the documentation of that module +docs_dir_path = pathlib.Path(__file__).parent +python_dir_path = docs_dir_path.parent / 'python_smi_tools' +sys.path.append(str(python_dir_path)) + with open('../CMakeLists.txt', encoding='utf-8') as f: match = re.search(r'get_package_version_number\(\"?([0-9.]+)[^0-9.]+', f.read()) if not match: @@ -15,6 +24,8 @@ with open('../CMakeLists.txt', encoding='utf-8') as f: version_number = match[1] left_nav_title = f"ROCm SMI LIB {version_number} Documentation" +shutil.copy2('../CHANGELOG.md','./CHANGELOG.md') + # for PDF output on Read the Docs project = "ROCm SMI LIB Documentation" author = "Advanced Micro Devices, Inc." @@ -31,5 +42,9 @@ docs_core.setup() external_projects_current_project = "rocm_smi_lib" +suppress_warnings = ["etoc.toctree"] + for sphinx_var in ROCmDocs.SPHINX_VARS: globals()[sphinx_var] = getattr(docs_core, sphinx_var) + +extensions += ['sphinx.ext.mathjax'] \ No newline at end of file diff --git a/projects/rocm-smi-lib/docs/doxygen/Doxyfile b/projects/rocm-smi-lib/docs/doxygen/Doxyfile index 1d22ab9965..de59e8e333 100644 --- a/projects/rocm-smi-lib/docs/doxygen/Doxyfile +++ b/projects/rocm-smi-lib/docs/doxygen/Doxyfile @@ -81,17 +81,6 @@ OUTPUT_DIRECTORY = . CREATE_SUBDIRS = NO -# Controls the number of sub-directories that will be created when -# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every -# level increment doubles the number of directories, resulting in 4096 -# directories at level 8 which is the default and also the maximum value. The -# sub-directories are organized in 2 levels, the first level always has a fixed -# number of 16 directories. -# Minimum value: 0, maximum value: 8, default value: 8. -# This tag requires that the tag CREATE_SUBDIRS is set to YES. - -CREATE_SUBDIRS_LEVEL = 8 - # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -231,14 +220,6 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO -# By default Python docstrings are displayed as preformatted text and doxygen's -# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the -# doxygen's special commands can be used and the contents of the docstring -# documentation blocks is shown as doxygen documentation. -# The default value is: YES. - -PYTHON_DOCSTRING = YES - # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -464,19 +445,6 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 -# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use -# during processing. When set to 0 doxygen will based this on the number of -# cores available in the system. You can set it explicitly to a value larger -# than 0 to get more control over the balance between CPU load and processing -# speed. At this moment only the input processing can be done using multiple -# threads. Since this is still an experimental feature the default is set to 1, -# which effectively disables parallel processing. Please report any issues you -# encounter. Generating dot graphs in parallel is controlled by the -# DOT_NUM_THREADS setting. -# Minimum value: 0, maximum value: 32, default value: 1. - -NUM_PROC_THREADS = 1 - #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -540,12 +508,6 @@ EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO -# If this flag is set to YES, the name of an unnamed parameter in a declaration -# will be determined by the corresponding definition. By default unnamed -# parameters remain unnamed in the output. -# The default value is: YES. - -RESOLVE_UNNAMED_PARAMS = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these @@ -616,12 +578,6 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO -# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class -# will show which file needs to be included to use the class. -# The default value is: YES. - -SHOW_HEADERFILE = YES - # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -833,13 +789,6 @@ WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES -# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete -# function parameter documentation. If set to NO, doxygen will accept that some -# parameters have no documentation without warning. -# The default value is: YES. - -WARN_IF_INCOMPLETE_DOC = YES - # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong parameter @@ -850,14 +799,6 @@ WARN_IF_INCOMPLETE_DOC = YES WARN_NO_PARAMDOC = NO -# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about -# undocumented enumeration values. If set to NO, doxygen will accept -# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: NO. - -WARN_IF_UNDOC_ENUM_VAL = NO - # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but @@ -878,16 +819,6 @@ WARN_AS_ERROR = NO WARN_FORMAT = "$file:$line: $text" -# In the $text part of the WARN_FORMAT command it is possible that a reference -# to a more specific place is given. To make it easier to jump to this place -# (outside of doxygen) the user can define a custom "cut" / "paste" string. -# Example: -# WARN_LINE_FORMAT = "'vi $file +$line'" -# See also: WARN_FORMAT -# The default value is: at line $line of file $file. - -WARN_LINE_FORMAT = "at line $line of file $file" - # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). In case the file specified cannot be opened for writing the @@ -907,8 +838,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ../../README.md \ - ../../include/rocm_smi/rocm_smi.h +INPUT = ../../include/rocm_smi/rocm_smi.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -920,16 +850,6 @@ INPUT = ../../README.md \ INPUT_ENCODING = UTF-8 -# This tag can be used to specify the character encoding of the source files -# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify -# character encoding on a per file pattern basis. Doxygen will compare the file -# name with each pattern and apply the encoding instead of the default -# INPUT_ENCODING) if there is a match. The character encodings are a list of the -# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding -# "INPUT_ENCODING" for further information on supported encodings. - -INPUT_FILE_ENCODING = - # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. @@ -1077,16 +997,7 @@ FILTER_SOURCE_PATTERNS = # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. -USE_MDFILE_AS_MAINPAGE = ../../README.md - -# The Fortran standard specifies that for fixed formatted Fortran code all -# characters from position 72 are to be considered as comment. A common -# extension is to allow longer lines before the automatic comment starts. The -# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can -# be processed before the automatic comment starts. -# Minimum value: 7, maximum value: 10000, default value: 72. - -FORTRAN_COMMENT_AFTER = 72 +USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing @@ -1288,19 +1199,6 @@ HTML_EXTRA_STYLESHEET = ../_doxygen/extra_stylesheet.css HTML_EXTRA_FILES = -# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output -# should be rendered with a dark or light theme. -# Possible values are: LIGHT always generate light mode output, DARK always -# generate dark mode output, AUTO_LIGHT automatically set the mode according to -# the user preference, use light mode if no preference is set (the default), -# AUTO_DARK automatically set the mode according to the user preference, use -# dark mode if no preference is set and TOGGLE allow to user to switch between -# light and dark mode via a button. -# The default value is: AUTO_LIGHT. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE = AUTO_LIGHT - # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a color-wheel, see @@ -1395,13 +1293,6 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" -# This tag determines the URL of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDURL = - # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1601,18 +1492,6 @@ DISABLE_INDEX = NO GENERATE_TREEVIEW = NO -# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the -# FULL_SIDEBAR option determines if the side bar is limited to only the treeview -# area (value NO) or if it should extend to the full height of the window (value -# YES). Setting this to YES gives a layout similar to -# https://docs.readthedocs.io with more room for contents, but less room for the -# project logo, title, and description. If either GENERATE_TREEVIEW or -# DISABLE_INDEX is set to NO, this option has no effect. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FULL_SIDEBAR = NO - # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1637,24 +1516,6 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO -# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email -# addresses. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -OBFUSCATE_EMAILS = YES - -# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg -# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see -# https://inkscape.org) to generate formulas as SVG images instead of PNGs for -# the HTML output. These images will generally look nicer at scaled resolutions. -# Possible values are: png (the default) and svg (looks nicer but requires the -# pdf2svg or inkscape tool). -# The default value is: png. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FORMULA_FORMAT = png - # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML @@ -1681,17 +1542,6 @@ FORMULA_MACROFILE = USE_MATHJAX = NO -# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. -# Note that the different versions of MathJax have different requirements with -# regards to the different settings, so it is possible that also other MathJax -# settings have to be changed when switching between the different MathJax -# versions. -# Possible values are: MathJax_2 and MathJax_3. -# The default value is: MathJax_2. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_VERSION = MathJax_2 - # When MathJax is enabled you can set the default output format to be used for # the MathJax output. For more details about the output format see MathJax # version 2 (see: @@ -2379,36 +2229,6 @@ HAVE_DOT = NO DOT_NUM_THREADS = 0 -# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of -# subgraphs. When you want a differently looking font in the dot files that -# doxygen generates you can specify fontname, fontcolor and fontsize attributes. -# For details please see Node, -# Edge and Graph Attributes specification You need to make sure dot is able -# to find the font, which can be done by putting it in a standard location or by -# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the -# directory containing the font. Default graphviz fontsize is 14. -# The default value is: fontname=Helvetica,fontsize=10. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" - -# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can -# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. Complete documentation about -# arrows shapes. -# The default value is: labelfontname=Helvetica,labelfontsize=10. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" - -# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes -# around nodes set 'shape=plain' or 'shape=plaintext' Shapes specification -# The default value is: shape=box,height=0.2,width=0.4. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" - # You can set the path where dot can find font specified with fontname in # DOT_COMMON_ATTR and others dot attributes. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2464,28 +2284,6 @@ UML_LOOK = NO UML_LIMIT_NUM_FIELDS = 10 -# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and -# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS -# tag is set to YES, doxygen will add type and arguments for attributes and -# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen -# will not generate fields with class member information in the UML graphs. The -# class diagrams will look similar to the default class diagrams but using UML -# notation for the relationships. -# Possible values are: NO, YES and NONE. -# The default value is: NO. -# This tag requires that the tag UML_LOOK is set to YES. - -DOT_UML_DETAILS = NO - -# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters -# to display on a single line. If the actual line length exceeds this threshold -# significantly it will wrapped across multiple lines. Some heuristics are apply -# to avoid ugly line breaks. -# Minimum value: 0, maximum value: 1000, default value: 17. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_WRAP_THRESHOLD = 17 - # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and # collaboration graphs will show the relations between templates and their # instances. @@ -2552,13 +2350,6 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES -# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels -# of child directories generated in directory dependency graphs by dot. -# Minimum value: 1, maximum value: 25, default value: 1. -# This tag requires that the tag DIRECTORY_GRAPH is set to YES. - -DIR_GRAPH_MAX_DEPTH = 1 - # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: diff --git a/projects/rocm-smi-lib/docs/python_api.rst b/projects/rocm-smi-lib/docs/python_api.rst new file mode 100644 index 0000000000..604803b9c0 --- /dev/null +++ b/projects/rocm-smi-lib/docs/python_api.rst @@ -0,0 +1,269 @@ +==================== +Python API Reference +==================== + +This chapter describes the ROCm SMI Python module API. + +.. default-domain:: py +.. py:currentmodule:: rocm_smi + +Functions +--------- + +.. autofunction:: rocm_smi.driverInitialized + +.. autofunction:: rocm_smi.formatJson + +.. autofunction:: rocm_smi.formatCsv + +.. autofunction:: rocm_smi.formatMatrixToJSON + +.. autofunction:: rocm_smi.getBus + +.. autofunction:: rocm_smi.getFanSpeed + +.. autofunction:: rocm_smi.getGpuUse + +.. autofunction:: rocm_smi.getDRMDeviceId + +.. autofunction:: rocm_smi.getSubsystemId + +.. autofunction:: rocm_smi.getVendor + +.. autofunction:: rocm_smi.getGUID + +.. autofunction:: rocm_smi.getTargetGfxVersion + +.. autofunction:: rocm_smi.getNodeId + +.. autofunction:: rocm_smi.getDeviceName + +.. autofunction:: rocm_smi.getRev + +.. autofunction:: rocm_smi.getMaxPower + +.. autofunction:: rocm_smi.getMemInfo + +.. autofunction:: rocm_smi.getProcessName + +.. autofunction:: rocm_smi.getPerfLevel + +.. autofunction:: rocm_smi.getPid + +.. autofunction:: rocm_smi.getPidList + +.. autofunction:: rocm_smi.getPower + +.. autofunction:: rocm_smi.getRasEnablement + +.. autofunction:: rocm_smi.getTemp + +.. autofunction:: rocm_smi.findFirstAvailableTemp + +.. autofunction:: rocm_smi.getTemperatureLabel + +.. autofunction:: rocm_smi.getPowerLabel + +.. autofunction:: rocm_smi.getVbiosVersion + +.. autofunction:: rocm_smi.getVersion + +.. autofunction:: rocm_smi.getComputePartition + +.. autofunction:: rocm_smi.getMemoryPartition + +.. autofunction:: rocm_smi.print2DArray + +.. autofunction:: rocm_smi.printEmptyLine + +.. autofunction:: rocm_smi.printErrLog + +.. autofunction:: rocm_smi.printInfoLog + +.. autofunction:: rocm_smi.printEventList + +.. autofunction:: rocm_smi.printLog + +.. autofunction:: rocm_smi.printListLog + +.. autofunction:: rocm_smi.printLogSpacer + +.. autofunction:: rocm_smi.printSysLog + +.. autofunction:: rocm_smi.printTableLog + +.. autofunction:: rocm_smi.printTableRow + +.. autofunction:: rocm_smi.checkIfSecondaryDie + +.. autofunction:: rocm_smi.resetClocks + +.. autofunction:: rocm_smi.resetFans + +.. autofunction:: rocm_smi.resetPowerOverDrive + +.. autofunction:: rocm_smi.resetProfile + +.. autofunction:: rocm_smi.resetXgmiErr + +.. autofunction:: rocm_smi.resetPerfDeterminism + +.. autofunction:: rocm_smi.resetComputePartition + +.. autofunction:: rocm_smi.resetMemoryPartition + +.. autofunction:: rocm_smi.setClockRange + +.. autofunction:: rocm_smi.setClockExtremum + +.. autofunction:: rocm_smi.setVoltageCurve + +.. autofunction:: rocm_smi.setPowerPlayTableLevel + +.. autofunction:: rocm_smi.setClockOverDrive + +.. autofunction:: rocm_smi.setClocks + +.. autofunction:: rocm_smi.setPerfDeterminism + +.. autofunction:: rocm_smi.resetGpu + +.. autofunction:: rocm_smi.isRasControlAvailable + +.. autofunction:: rocm_smi.setRas + +.. autofunction:: rocm_smi.setFanSpeed + +.. autofunction:: rocm_smi.setPerformanceLevel + +.. autofunction:: rocm_smi.setPowerOverDrive + +.. autofunction:: rocm_smi.setProfile + +.. autofunction:: rocm_smi.setComputePartition + +.. autofunction:: rocm_smi.progressbar + +.. autofunction:: rocm_smi.showProgressbar + +.. autofunction:: rocm_smi.setMemoryPartition + +.. autofunction:: rocm_smi.showVersion + +.. autofunction:: rocm_smi.showAllConcise + +.. autofunction:: rocm_smi.showAllConciseHw + +.. autofunction:: rocm_smi.showBus + +.. autofunction:: rocm_smi.showClocks + +.. autofunction:: rocm_smi.showCurrentClocks + +.. autofunction:: rocm_smi.showCurrentFans + +.. autofunction:: rocm_smi.showCurrentTemps + +.. autofunction:: rocm_smi.showFwInfo + +.. autofunction:: rocm_smi.showGpusByPid + +.. autofunction:: rocm_smi.getCoarseGrainUtil + +.. autofunction:: rocm_smi.showGpuUse + +.. autofunction:: rocm_smi.showEnergy + +.. autofunction:: rocm_smi.showId + +.. autofunction:: rocm_smi.showMaxPower + +.. autofunction:: rocm_smi.showMemInfo + +.. autofunction:: rocm_smi.showMemUse + +.. autofunction:: rocm_smi.showMemVendor + +.. autofunction:: rocm_smi.showOverDrive + +.. autofunction:: rocm_smi.showPcieBw + +.. autofunction:: rocm_smi.showPcieReplayCount + +.. autofunction:: rocm_smi.showPerformanceLevel + +.. autofunction:: rocm_smi.showPids + +.. autofunction:: rocm_smi.showPower + +.. autofunction:: rocm_smi.showPowerPlayTable + +.. autofunction:: rocm_smi.showProduct + +.. autofunction:: rocm_smi.showProfile + +.. autofunction:: rocm_smi.showRange + +.. autofunction:: rocm_smi.showRasInfo + +.. autofunction:: rocm_smi.showRetiredPages + +.. autofunction:: rocm_smi.showSerialNumber + +.. autofunction:: rocm_smi.showUId + +.. autofunction:: rocm_smi.showVbiosVersion + +.. autofunction:: rocm_smi.showEvents + +.. autofunction:: rocm_smi.showDriverVersion + +.. autofunction:: rocm_smi.showVoltage + +.. autofunction:: rocm_smi.showVoltageCurve + +.. autofunction:: rocm_smi.showXgmiErr + +.. autofunction:: rocm_smi.showAccessibleTopology + +.. autofunction:: rocm_smi.showWeightTopology + +.. autofunction:: rocm_smi.showHopsTopology + +.. autofunction:: rocm_smi.showTypeTopology + +.. autofunction:: rocm_smi.showNumaTopology + +.. autofunction:: rocm_smi.showHwTopology + +.. autofunction:: rocm_smi.showNodesBw + +.. autofunction:: rocm_smi.showComputePartition + +.. autofunction:: rocm_smi.showMemoryPartition + +.. autofunction:: rocm_smi.checkAmdGpus + +.. autofunction:: rocm_smi.component_str + +.. autofunction:: rocm_smi.confirmOutOfSpecWarning + +.. autofunction:: rocm_smi.doesDeviceExist + +.. autofunction:: rocm_smi.initializeRsmi + +.. autofunction:: rocm_smi.isAmdDevice + +.. autofunction:: rocm_smi.listDevices + +.. autofunction:: rocm_smi.load + +.. autofunction:: rocm_smi.padHexValue + +.. autofunction:: rocm_smi.profileString + +.. autofunction:: rocm_smi.relaunchAsSudo + +.. autofunction:: rocm_smi.rsmi_ret_ok + +.. autofunction:: rocm_smi.save diff --git a/projects/rocm-smi-lib/docs/python_tutorials.rst b/projects/rocm-smi-lib/docs/python_tutorials.rst new file mode 100644 index 0000000000..78a4a43db2 --- /dev/null +++ b/projects/rocm-smi-lib/docs/python_tutorials.rst @@ -0,0 +1,29 @@ +==================== +Python Tutorials +==================== + +This chapter is the rocm_smi Python api tutorials. + +.. code-block:: python + + import sys + sys.path.append("/opt/rocm/libexec/rocm_smi/") + try: + import rocm_smi + except ImportError: + raise ImportError("Could not import /opt/rocm/libexec/rocm_smi/rocm_smi.py") + + class prof_utils: + def __init__(self, mode) -> None: + rocm_smi.initializeRsmi() + + def getPower(self, device): + return rocm_smi.getPower(device) + + def listDevices(self): + return rocm_smi.listDevices() + + def getMemInfo(self, device): + (memUsed, memTotal) = rocm_smi.getMemInfo(device, "vram") + return round(float(memUsed)/float(memTotal) * 100, 2) + diff --git a/projects/rocm-smi-lib/docs/python_usage.md b/projects/rocm-smi-lib/docs/python_usage.md new file mode 100644 index 0000000000..ff73ae5a48 --- /dev/null +++ b/projects/rocm-smi-lib/docs/python_usage.md @@ -0,0 +1,2 @@ +```{include} ../python_smi_tools/README.md +``` \ No newline at end of file diff --git a/projects/rocm-smi-lib/docs/sphinx/_toc.yml.in b/projects/rocm-smi-lib/docs/sphinx/_toc.yml.in index 94ade1c5b7..45a4096a6f 100644 --- a/projects/rocm-smi-lib/docs/sphinx/_toc.yml.in +++ b/projects/rocm-smi-lib/docs/sphinx/_toc.yml.in @@ -5,9 +5,27 @@ defaults: maxdepth: 6 root: index subtrees: -- caption: API +- caption: Tutorials entries: - - file: doxygen/html/index + - file: c++_tutorials + title: C++ Tutorials + - file: python_tutorials + title: Python Tutorials +- caption: How to Guide + entries: + - file: c++_usage + title: C++ How to Guide + - file: python_usage + title: Python How to Guide +- caption: Reference + entries: + - file: doxygen/html/index + title: C++ Reference + - file: python_api + title: Python Reference - caption: About entries: - file: license + title: License + - file: CHANGELOG + title: Changelog \ No newline at end of file diff --git a/projects/rocm-smi-lib/docs/sphinx/requirements.in b/projects/rocm-smi-lib/docs/sphinx/requirements.in index e290475dc5..f55321431c 100644 --- a/projects/rocm-smi-lib/docs/sphinx/requirements.in +++ b/projects/rocm-smi-lib/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]>=0.30.3 +rocm-docs-core[api_reference]>=0.31.0 diff --git a/projects/rocm-smi-lib/docs/sphinx/requirements.txt b/projects/rocm-smi-lib/docs/sphinx/requirements.txt index 4fecda2643..e8ac65f0ad 100644 --- a/projects/rocm-smi-lib/docs/sphinx/requirements.txt +++ b/projects/rocm-smi-lib/docs/sphinx/requirements.txt @@ -41,7 +41,7 @@ docutils==0.16 # myst-parser # pydata-sphinx-theme # sphinx -doxysphinx==3.3.4 +doxysphinx==3.3.7 # via rocm-docs-core fastjsonschema==2.18.0 # via rocm-docs-core @@ -116,7 +116,7 @@ requests==2.28.2 # via # pygithub # sphinx -rocm-docs-core[api_reference]>=0.30.3 +rocm-docs-core[api_reference]>=0.31.0 # via -r requirements.in smmap==5.0.0 # via gitdb diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 120669d7cf..d58cf14408 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -815,7 +815,7 @@ typedef rsmi_pcie_bandwidth_t rsmi_pcie_bandwidth; */ typedef struct { /* Utilization */ - uint16_t average_gfx_activity; + uint16_t average_gfx_activity; //!< Average graphics activity uint16_t average_umc_activity; //!< memory controller uint16_t average_mm_activity; //!< UVD or VCN } rsmi_activity_metric_counter_t; @@ -1616,7 +1616,7 @@ rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id); * * @param[in] dv_ind a device index * - * @param[inout] revision a pointer to uint32_t to which the XGMI physical id + * @param[inout] id a pointer to uint32_t to which the XGMI physical id * will be written * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. @@ -1634,7 +1634,7 @@ rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id); * * @param[in] dv_ind a device index * - * @param[inout] gpu_id a pointer to uint64_t to which the KFD gpu id will be + * @param[inout] guid a pointer to uint64_t to which the KFD gpu id will be * written. If the @p guid parameter is nullptr, this function will return * ::RSMI_STATUS_INVALID_ARGS. If the GPU ID is not supported with * the device index queried, gpu_id will return MAX UINT64 value an @@ -4541,6 +4541,7 @@ rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t* xcd_counter_value); rsmi_status_t rsmi_dev_metrics_log_get(uint32_t dv_ind); +/** @} */ // end of DevMetricsHeaderInfoGet #ifdef __cplusplus } diff --git a/projects/rocm-smi-lib/python_smi_tools/README.md b/projects/rocm-smi-lib/python_smi_tools/README.md index d9cc2a2a6b..a567302c73 100644 --- a/projects/rocm-smi-lib/python_smi_tools/README.md +++ b/projects/rocm-smi-lib/python_smi_tools/README.md @@ -1,4 +1,4 @@ -## Radeon Open Compute (ROCm) - System Management Interface - Command Line Tool +# Radeon Open Compute (ROCm) - System Management Interface - Command Line Tool This tool acts as a command line interface for manipulating and monitoring the amdgpu kernel, and is intended to replace diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index 97936951a4..53bf297c28 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -67,7 +67,6 @@ validClockNames = clk_type_names[1:-2] validClockNames.append('pcie') validClockNames.sort() - def driverInitialized(): """ Returns true if amdgpu is found in the list of initialized modules """ @@ -84,8 +83,8 @@ def driverInitialized(): def formatJson(device, log): """ Print out in JSON format - @param device: DRM device identifier - @param log: String to parse and output into JSON format + :param device: DRM device identifier + :param log: String to parse and output into JSON format """ global JSON_DATA for line in log.splitlines(): @@ -155,13 +154,19 @@ def formatCsv(deviceList): def formatMatrixToJSON(deviceList, matrix, metricName): """ Format symmetric matrix of GPU permutations to become JSON print-ready. - @param deviceList: List of DRM devices (can be a single-item list) - @param metricName: Title of the item to print to the log - @param matrix: symmetric matrix full of values of every permutation of DRM devices. - example: - GPU0 GPU1 - GPU0 0 40 - GPU1 40 0 + :param deviceList: List of DRM devices (can be a single-item list) + :param metricName: Title of the item to print to the log + :param matrix: symmetric matrix full of values of every permutation of DRM devices. + + Matrix example: + + .. math:: + + \\begin{bmatrix} + & GPU0 & GPU1 \\\\ + GPU0 & 0 & 40 \\\\ + GPU1 & 40 & 0 + \\end{bmatrix} Where matrix content is: [[0, 40], [40, 0]] """ @@ -180,9 +185,9 @@ def formatMatrixToJSON(deviceList, matrix, metricName): def getBus(device, silent=False): """ Return the bus identifier of a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ bdfid = c_uint64(0) ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid)) @@ -203,9 +208,9 @@ def getFanSpeed(device, silent=True): or (None,None) if either current fan speed or max fan speed cannot be obtained - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is on. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is on. """ fanLevel = c_int64() fanMax = c_int64() @@ -243,9 +248,9 @@ def getFanSpeed(device, silent=True): def getGpuUse(device, silent=False): """ Return the current GPU usage as a percentage - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ percent = c_uint32() ret = rocmsmi.rsmi_dev_busy_percent_get(device, byref(percent)) @@ -257,9 +262,9 @@ def getGpuUse(device, silent=False): def getDRMDeviceId(device, silent=False): """ Return the hexadecimal value of a device's ID - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ dv_id = c_short() ret = rocmsmi.rsmi_dev_id_get(device, byref(dv_id)) @@ -272,9 +277,9 @@ def getDRMDeviceId(device, silent=False): def getRev(device, silent=False): """ Return the hexadecimal value of a device's Revision - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ dv_rev = c_short() ret = rocmsmi.rsmi_dev_revision_get(device, byref(dv_rev)) @@ -286,9 +291,9 @@ def getRev(device, silent=False): def getSubsystemId(device, silent=False): """ Return the a device's subsystem id - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ model = create_string_buffer(MAX_BUFF_SIZE) ret = rocmsmi.rsmi_dev_subsystem_name_get(device, model, MAX_BUFF_SIZE) @@ -302,9 +307,9 @@ def getSubsystemId(device, silent=False): def getVendor(device, silent=False): """ Return the a device's vendor id - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ vendor = create_string_buffer(MAX_BUFF_SIZE) device_vendor = "N/A" @@ -319,9 +324,9 @@ def getGUID(device, silent=False): """ Return the uint64 value of device's GUID, also referred as GPU ID - reported by KFD. - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ guid = c_uint64() ret = rocmsmi.rsmi_dev_guid_get(device, byref(guid)) @@ -334,9 +339,9 @@ def getTargetGfxVersion(device, silent=False): """ Return the uint64 value of device's target graphics version as reported by KFD - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ gfx_version = c_uint64() gfx_ver_ret = "N/A" @@ -349,9 +354,9 @@ def getNodeId(device, silent=False): """ Return the uint32 value of device's node id reported by KFD. - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ node_id = c_uint32() ret = rocmsmi.rsmi_dev_node_id_get(device, byref(node_id)) @@ -362,11 +367,11 @@ def getNodeId(device, silent=False): def getDeviceName(device, silent=False): """ Return the uint64 value of device's target - graphics version as reported by KFD + graphics version as reported by KFD - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ # Retrieve the device series series = create_string_buffer(MAX_BUFF_SIZE) @@ -379,9 +384,9 @@ def getDeviceName(device, silent=False): def getMaxPower(device, silent=False): """ Return the maximum power cap of a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ power_cap = c_uint64() ret = rocmsmi.rsmi_dev_power_cap_get(device, 0, byref(power_cap)) @@ -392,14 +397,14 @@ def getMaxPower(device, silent=False): def getMemInfo(device, memType, silent=False): """ Returns a tuple of (memory_used, memory_total) of - the requested memory type usage for the device specified + the requested memory type usage for the device specified - @param device: DRM device identifier - @param type: [vram|vis_vram|gtt] Memory type to return - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off, - which exposes any issue accessing the different - memory types. + :param device: DRM device identifier + :param type: [vram|vis_vram|gtt] Memory type to return + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off, + which exposes any issue accessing the different + memory types. """ memType = memType.upper() if memType not in memory_type_l: @@ -424,7 +429,7 @@ def getMemInfo(device, memType, silent=False): def getProcessName(pid): """ Get the process name of a specific pid - @param pid: Process ID of a program to be parsed + :param pid: Process ID of a program to be parsed """ if int(pid) < 1: logging.debug('PID must be greater than 0') @@ -449,9 +454,9 @@ def getProcessName(pid): def getPerfLevel(device, silent=False): """ Return the current performance level of a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ perf = rsmi_dev_perf_level_t() ret = rocmsmi.rsmi_dev_perf_level_get(device, byref(perf)) @@ -463,7 +468,7 @@ def getPerfLevel(device, silent=False): def getPid(name): """ Get the process id of a specific application - @param name: Process name of a program to be parsed + :param name: Process name of a program to be parsed """ return check_output(['pidof', name]) @@ -485,17 +490,18 @@ def getPidList(): def getPower(device): """ Return dictionary of power responses. + Response power dictionary: + + .. code-block:: python + + { + 'power': string wattage response or 'N/A' (for not RSMI_STATUS_SUCCESS), + 'power_type': power type string - 'Current Socket' or 'Average', + 'unit': W (Watt) + 'ret': response of rsmi_dev_power_get(device, byref(power), byref(power_type)) + } - @param device: DRM device identifier - - Response power dictionary: - { - 'power': string wattage response or 'N/A' (for not RSMI_STATUS_SUCCESS), - 'power_type': power type string - 'Current Socket' or 'Average', - 'unit': W (Watt) - 'ret': response of rsmi_dev_power_get(device, byref(power), byref(power_type)) - } - + :param device: DRM device identifier """ power = c_int64(0) @@ -522,10 +528,10 @@ def getPower(device): def getRasEnablement(device, block, silent=True): """ Return RAS enablement state for a given device - @param device: DRM device identifier - @param block: RAS block identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is on. + :param device: DRM device identifier + :param block: RAS block identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is on. """ state = rsmi_ras_err_state_t() ret = rocmsmi.rsmi_dev_ecc_status_get(device, rsmi_gpu_block_d[block], byref(state)) @@ -538,10 +544,10 @@ def getRasEnablement(device, block, silent=True): def getTemp(device, sensor, silent=True): """ Display the current temperature from a given device's sensor - @param device: DRM device identifier - @param sensor: Temperature sensor identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is on. + :param device: DRM device identifier + :param sensor: Temperature sensor identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is on. """ temp = c_int64(0) metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT @@ -553,8 +559,9 @@ def getTemp(device, sensor, silent=True): def findFirstAvailableTemp(device): """ Discovers the first available device temperature to display - Returns a tuple of (temp_type, temp_value) for the device specified - @param device: DRM device identifier + Returns a tuple of (temp_type, temp_value) for the device specified + + :param device: DRM device identifier """ temp = c_int64(0) metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT @@ -572,9 +579,9 @@ def findFirstAvailableTemp(device): def getTemperatureLabel(deviceList): """ Discovers the the first identified power label + Returns a string label value - Returns a string label value - @param device: DRM device identifier + :param device: DRM device identifier """ # Default label is Edge tempLabel = temp_type_lst[0].lower() @@ -587,8 +594,9 @@ def getTemperatureLabel(deviceList): def getPowerLabel(deviceList): """ Discovers the the first identified power label - Returns a string label value - @param device: DRM device identifier + Returns a string label value + + :param device: DRM device identifier """ power = c_int64(0) # Default label is AvgPower @@ -605,9 +613,9 @@ def getPowerLabel(deviceList): def getVbiosVersion(device, silent=False): """ Returns the VBIOS version for a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ vbios = create_string_buffer(256) ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256) @@ -622,10 +630,10 @@ def getVbiosVersion(device, silent=False): def getVersion(deviceList, component, silent=False): """ Return the software version for the specified component - @param deviceList: List of DRM devices (can be a single-item list) - @param component: Component (currently only driver) - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. + :param deviceList: List of DRM devices (can be a single-item list) + :param component: Component (currently only driver) + :param silent: Turn on to silence error output + (you plan to handle manually). Default is off. """ ver_str = create_string_buffer(256) ret = rocmsmi.rsmi_version_str_get(component, ver_str, 256) @@ -637,9 +645,9 @@ def getVersion(deviceList, component, silent=False): def getComputePartition(device, silent=True): """ Return the current compute partition of a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is on. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is on. """ currentComputePartition = create_string_buffer(MAX_BUFF_SIZE) ret = rocmsmi.rsmi_dev_compute_partition_get(device, currentComputePartition, MAX_BUFF_SIZE) @@ -651,9 +659,9 @@ def getComputePartition(device, silent=True): def getMemoryPartition(device, silent=True): """ Return the current memory partition of a given device - @param device: DRM device identifier - @param silent=Turn on to silence error output - (you plan to handle manually). Default is on. + :param device: DRM device identifier + :param silent: Turn on to silence error output + (you plan to handle manually). Default is on. """ currentMemoryPartition = create_string_buffer(MAX_BUFF_SIZE) ret = rocmsmi.rsmi_dev_memory_partition_get(device, currentMemoryPartition, MAX_BUFF_SIZE) @@ -703,8 +711,8 @@ def printEmptyLine(): def printErrLog(device, err): """ Print out an error to the SMI log - @param device: DRM device identifier - @param err: Error string to print + :param device: DRM device identifier + :param err: Error string to print """ global PRINT_JSON devName = device @@ -719,9 +727,9 @@ def printErrLog(device, err): def printInfoLog(device, metricName, value): """ Print out an info line to the SMI log - @param device: DRM device identifier - @param metricName: Title of the item to print to the log - @param value: The item's value to print to the log + :param device: DRM device identifier + :param metricName: Title of the item to print to the log + :param value: The item's value to print to the log """ global PRINT_JSON @@ -739,9 +747,9 @@ def printInfoLog(device, metricName, value): def printEventList(device, delay, eventList): """ Print out notification events for a specified device - @param device: DRM device identifier - @param delay: Notification delay in ms - @param eventList: List of event type names (can be a single-item list) + :param device: DRM device identifier + :param delay: Notification delay in ms + :param eventList: List of event type names (can be a single-item list) """ mask = 0 ret = rocmsmi.rsmi_event_notification_init(device) @@ -765,9 +773,9 @@ def printEventList(device, delay, eventList): def printLog(device, metricName, value=None, extraSpace=False, useItalics=False): """ Print out to the SMI log - @param device: DRM device identifier - @param metricName: Title of the item to print to the log - @param value: The item's value to print to the log + :param device: DRM device identifier + :param metricName: Title of the item to print to the log + :param value: The item's value to print to the log """ red = '\033[91m' green = '\033[92m' @@ -815,8 +823,8 @@ def printLog(device, metricName, value=None, extraSpace=False, useItalics=False) def printListLog(metricName, valuesList): """ Print out to the SMI log for the lists - @param metricName: Title of the item to print to the log - @param valuesList: The item's list of values to print to the log + :param metricName: Title of the item to print to the log + :param valuesList: The item's list of values to print to the log """ global PRINT_JSON listStr = '' @@ -838,13 +846,13 @@ def printListLog(metricName, valuesList): def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0): """ Prints [name of the option]/[name of the program] in the spacer to explain data below - If no parameters are given, a default fill of the '=' string is used in the spacer + If no parameters are given, a default fill of the '=' string is used in the spacer - @param displayString: name of item to be displayed inside of the log spacer - @param fill: padding string which surrounds the given display string - @param contentSizeToFit: providing an integer > 0 allows - ability to dynamically change output padding/fill based on this value - instead of appWidth. Handy for concise info output. + :param displayString: name of item to be displayed inside of the log spacer + :param fill: padding string which surrounds the given display string + :param contentSizeToFit: providing an integer > 0 allows + ability to dynamically change output padding/fill based on this value + instead of appWidth. Handy for concise info output. """ global appWidth, PRINT_JSON resizeValue = appWidth @@ -870,8 +878,8 @@ def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0): def printSysLog(SysComponentName, value): """ Print out to the SMI log for repeated features - @param SysComponentName: Title of the item to print to the log - @param value: The item's value to print to the log + :param SysComponentName: Title of the item to print to the log + :param value: The item's value to print to the log """ global PRINT_JSON, JSON_DATA if PRINT_JSON: @@ -888,12 +896,12 @@ def printSysLog(SysComponentName, value): def printTableLog(column_headers, data_matrix, device=None, tableName=None, anchor='>', v_delim=' '): """ Print out to the SMI log for the lists - @param column_headers: Header names for each column - @param data_matrix: Matrix of values - @param device: DRM device identifier - @param tableName: Title of the table to print to the log - @param anchor: Alignment direction of the print output - @param v_delim: Boundary String delimiter for the print output + :param column_headers: Header names for each column + :param data_matrix: Matrix of values + :param device: DRM device identifier + :param tableName: Title of the table to print to the log + :param anchor: Alignment direction of the print output + :param v_delim: Boundary String delimiter for the print output """ # Usage: the length of col_Names would be determining column width. # If additional space is needed, please pad corresponding column name with spaces @@ -925,9 +933,9 @@ def printTableLog(column_headers, data_matrix, device=None, tableName=None, anch def printTableRow(space, displayString, v_delim=" "): """ Print out a line of a matrix table - @param space: The item's spacing to print - @param displayString: The item's value to print - @param v_delim: Boundary String delimiter for the print output + :param space: The item's spacing to print + :param displayString: The item's value to print + :param v_delim: Boundary String delimiter for the print output """ if space: print(space % (displayString), end=v_delim) @@ -940,7 +948,7 @@ def checkIfSecondaryDie(device): MI200 device specific feature check. The secondary dies lacks power management features. - @param device: The device to check + :param device: The device to check """ energy_count = c_uint64() counter_resoution = c_float() @@ -959,7 +967,7 @@ def resetClocks(deviceList): Reset clocks to default values by setting performance level to auto, as well as setting OverDrive back to 0 - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Reset Clocks ') for device in deviceList: @@ -983,7 +991,7 @@ def resetClocks(deviceList): def resetFans(deviceList): """ Reset fans to driver control for a list of devices. - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Reset GPU Fan Speed ') for device in deviceList: @@ -999,7 +1007,7 @@ def resetFans(deviceList): def resetPowerOverDrive(deviceList, autoRespond): """ Reset Power OverDrive to the default value - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ setPowerOverDrive(deviceList, 0, autoRespond) @@ -1007,7 +1015,7 @@ def resetPowerOverDrive(deviceList, autoRespond): def resetProfile(deviceList): """ Reset profile for a list of a devices. - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Reset Profile ') for device in deviceList: @@ -1027,7 +1035,7 @@ def resetProfile(deviceList): def resetXgmiErr(deviceList): """ Reset the XGMI Error value - @param deviceList: Reset XGMI error count for these devices + :param deviceList: Reset XGMI error count for these devices """ printLogSpacer('Reset XGMI Error Status ') for device in deviceList: @@ -1042,7 +1050,7 @@ def resetXgmiErr(deviceList): def resetPerfDeterminism(deviceList): """ Reset Performance Determinism - @param deviceList: Disable Performance Determinism for these devices + :param deviceList: Disable Performance Determinism for these devices """ printLogSpacer('Disable Performance Determinism') for device in deviceList: @@ -1057,7 +1065,7 @@ def resetPerfDeterminism(deviceList): def resetComputePartition(deviceList): """ Reset Compute Partition to its boot state - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(" Reset compute partition to its boot state ") for device in deviceList: @@ -1084,7 +1092,7 @@ def resetComputePartition(deviceList): def resetMemoryPartition(deviceList): """ Reset current memory partition to its boot state - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(" Reset memory partition to its boot state ") for device in deviceList: @@ -1123,12 +1131,11 @@ def resetMemoryPartition(deviceList): def setClockRange(deviceList, clkType, minvalue, maxvalue, autoRespond): """ Set the range for the specified clktype in the PowerPlay table for a list of devices. - Parameters: - deviceList -- List of DRM devices (can be a single-item list) - clktype -- [sclk|mclk] Which clock type to apply the range to - minvalue -- Minimum value to apply to the clock range - maxvalue -- Maximum value to apply to the clock range - autoRespond -- Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param clktype: [sclk|mclk] Which clock type to apply the range to + :param minvalue: Minimum value to apply to the clock range + :param maxvalue: Maximum value to apply to the clock range + :param autoRespond: Response to automatically provide for all prompts """ global RETCODE if clkType not in {'sclk', 'mclk'}: @@ -1158,12 +1165,11 @@ def setClockRange(deviceList, clkType, minvalue, maxvalue, autoRespond): def setClockExtremum(deviceList, level, clkType, clkValue, autoRespond): """ Set the range for the specified clktype in the PowerPlay table for a list of devices. - Parameters: - deviceList -- List of DRM devices (can be a single-item list) - level -- [min|max] Minimum value or Maximum value - clktype -- [sclk|mclk] Which clock type to apply the range to - clkValue -- clock value to apply to the level - autoRespond -- Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param level: [min|max] Minimum value or Maximum value + :param clktype: [sclk|mclk] Which clock type to apply the range to + :param clkValue: clock value to apply to the level + :param autoRespond: Response to automatically provide for all prompts """ global RETCODE if level not in {'min', 'max'}: @@ -1204,12 +1210,11 @@ def setClockExtremum(deviceList, level, clkType, clkValue, autoRespond): def setVoltageCurve(deviceList, point, clk, volt, autoRespond): """ Set voltage curve for a point in the PowerPlay table for a list of devices. - Parameters: - deviceList -- List of DRM devices (can be a single-item list) - point -- Point on the voltage curve to modify - clk -- Clock speed specified for this curve point - volt -- Voltage specified for this curve point - autoRespond -- Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param point: Point on the voltage curve to modify + :param clk: Clock speed specified for this curve point + :param volt: Voltage specified for this curve point + :param autoRespond: Response to automatically provide for all prompts """ global RETCODE value = '%s %s %s' % (point, clk, volt) @@ -1233,13 +1238,12 @@ def setVoltageCurve(deviceList, point, clk, volt, autoRespond): def setPowerPlayTableLevel(deviceList, clkType, point, clk, volt, autoRespond): """ Set clock frequency and voltage for a level in the PowerPlay table for a list of devices. - Parameters: - deviceList -- List of DRM devices (can be a single-item list) - clktype -- [sclk|mclk] Which clock type to apply the range to - point -- Point on the voltage curve to modify - clk -- Clock speed specified for this curve point - volt -- Voltage specified for this curve point - autoRespond -- Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param clktype: [sclk|mclk] Which clock type to apply the range to + :param point: Point on the voltage curve to modify + :param clk: Clock speed specified for this curve point + :param volt: Voltage specified for this curve point + :param autoRespond: Response to automatically provide for all prompts """ global RETCODE value = '%s %s %s' % (point, clk, volt) @@ -1278,10 +1282,10 @@ def setPowerPlayTableLevel(deviceList, clkType, point, clk, volt, autoRespond): def setClockOverDrive(deviceList, clktype, value, autoRespond): """ Set clock speed to OverDrive for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param type: [sclk|mclk] Clock type to set - @param value: [0-20] OverDrive percentage - @param autoRespond: Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param type: [sclk|mclk] Clock type to set + :param value: [0-20] OverDrive percentage + :param autoRespond: Response to automatically provide for all prompts """ printLogSpacer(' Set Clock OverDrive (Range: 0% to 20%) ') global RETCODE @@ -1341,9 +1345,9 @@ def setClockOverDrive(deviceList, clktype, value, autoRespond): def setClocks(deviceList, clktype, clk): """ Set clock frequency levels for a list of devices. - @param deviceList: List of DRM devices (can be a single-item list) - @param clktype: [validClockNames] Clock type to set - @param clk: Clock frequency level to set + :param deviceList: List of DRM devices (can be a single-item list) + :param clktype: [validClockNames] Clock type to set + :param clk: Clock frequency level to set """ global RETCODE if not clk: @@ -1432,8 +1436,8 @@ def setPerfDeterminism(deviceList, clkvalue): """ Set clock frequency level for a list of devices to enable performance determinism. - @param deviceList: List of DRM devices (can be a single-item list) - @param value: Clock frequency level to set + :param deviceList: List of DRM devices (can be a single-item list) + :param value: Clock frequency level to set """ global RETCODE try: @@ -1455,7 +1459,7 @@ def setPerfDeterminism(deviceList, clkvalue): def resetGpu(device): """ Perform a GPU reset on the specified device - @param device: DRM device identifier + :param device: DRM device identifier """ printLogSpacer(' Reset GPU ') global RETCODE @@ -1480,9 +1484,7 @@ def resetGpu(device): def isRasControlAvailable(device): """ Check if RAS control is available for a specified device. - Parameters: - device -- DRM device identifier - + :param device: DRM device identifier """ path = os.path.join('/sys/kernel/debug/dri', 'card%d' % device, 'device', 'ras_ctrl') @@ -1497,13 +1499,11 @@ def isRasControlAvailable(device): def setRas(deviceList, rasAction, rasBlock, rasType): """ Perform a RAS action on the devices - Parameters: - deviceList -- List of DRM devices (can be a single-item list) - rasAction -- [enable|disable|inject] RAS Action to perform - rasBlock -- [$validRasBlocks] RAS block - rasType -- [ce|ue] Error type to enable/disable - + :param deviceList: List of DRM devices (can be a single-item list) + :param rasAction: [enable|disable|inject] RAS Action to perform + :param rasBlock: [$validRasBlocks] RAS block + :param rasType: [ce|ue] Error type to enable/disable """ global RETCODE printLog(None, "This is experimental feature, use 'amdgpuras' tool for ras error manipulations for newer vbios") @@ -1551,8 +1551,8 @@ def setRas(deviceList, rasAction, rasBlock, rasType): def setFanSpeed(deviceList, fan): """ Set fan speed for a list of devices. - @param deviceList: List of DRM devices (can be a single-item list) - @param level: [0-255] Fan speed level + :param deviceList: List of DRM devices (can be a single-item list) + :param level: [0-255] Fan speed level """ printLogSpacer(' Set GPU Fan Speed ') for device in deviceList: @@ -1574,8 +1574,8 @@ def setFanSpeed(deviceList, fan): def setPerformanceLevel(deviceList, level): """ Set the Performance Level for a specified device. - @param deviceList: List of DRM devices (can be a single-item list) - @param level: Performance Level to set + :param deviceList: List of DRM devices (can be a single-item list) + :param level: Performance Level to set """ printLogSpacer(' Set Performance Level ') validLevels = ['auto', 'low', 'high', 'manual'] @@ -1595,9 +1595,9 @@ def setPowerOverDrive(deviceList, value, autoRespond): available to the GPU in Watts. May be limited by the maximum power the VBIOS is configured to allow this card to use in OverDrive mode. - @param deviceList: List of DRM devices (can be a single-item list) - @param value: New maximum power to assign to the target device, in Watts - @param autoRespond: Response to automatically provide for all prompts + :param deviceList: List of DRM devices (can be a single-item list) + :param value: New maximum power to assign to the target device, in Watts + :param autoRespond: Response to automatically provide for all prompts """ global RETCODE, PRINT_JSON try: @@ -1695,8 +1695,8 @@ def setPowerOverDrive(deviceList, value, autoRespond): def setProfile(deviceList, profile): """ Set Power Profile, or set CUSTOM Power Profile values for a list of devices. - @param deviceList: List of DRM devices (can be a single-item list) - @param profile: Profile to set + :param deviceList: List of DRM devices (can be a single-item list) + :param profile: Profile to set """ printLogSpacer(' Set Power Profile ') status = rsmi_power_profile_status_t() @@ -1735,8 +1735,8 @@ def setProfile(deviceList, profile): def setComputePartition(deviceList, computePartitionType): """ Sets compute partitioning for a list of device - @param deviceList: List of DRM devices (can be a single-item list) - @param computePartition: Compute Partition type to set as + :param deviceList: List of DRM devices (can be a single-item list) + :param computePartition: Compute Partition type to set as """ printLogSpacer(' Set compute partition to %s ' % (str(computePartitionType).upper())) for device in deviceList: @@ -1797,8 +1797,8 @@ def showProgressbar(title="", timeInSeconds=13): def setMemoryPartition(deviceList, memoryPartition): """ Sets memory partition (memory partition) for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param memoryPartition: Memory Partition type to set as + :param deviceList: List of DRM devices (can be a single-item list) + :param memoryPartition: Memory Partition type to set as """ printLogSpacer(' Set memory partition to %s ' % (str(memoryPartition).upper())) for device in deviceList: @@ -1871,7 +1871,7 @@ def showVersion(isCSV=False): def showAllConcise(deviceList): """ Display critical info for all devices in a concise format - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON, appWidth if PRINT_JSON: @@ -1979,7 +1979,7 @@ def showAllConcise(deviceList): def showAllConciseHw(deviceList): """ Display critical Hardware info - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON if PRINT_JSON: @@ -2032,7 +2032,7 @@ def showAllConciseHw(deviceList): def showBus(deviceList): """ Display PCI Bus info - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' PCI Bus ID ') for device in deviceList: @@ -2045,7 +2045,7 @@ def showClocks(deviceList): Current clocks marked with a '*' symbol - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ freq = rsmi_frequencies_t() bw = rsmi_pcie_bandwidth_t() @@ -2099,8 +2099,8 @@ def showClocks(deviceList): def showCurrentClocks(deviceList, clk_defined=None, concise=False): """ Display all clocks for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param clk-type: Clock type to display + :param deviceList: List of DRM devices (can be a single-item list) + :param clk-type: Clock type to display """ global PRINT_JSON freq = rsmi_frequencies_t() @@ -2176,7 +2176,7 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False): def showCurrentFans(deviceList): """ Display the current fan speed for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON printLogSpacer(' Current Fan Metric ') @@ -2209,7 +2209,7 @@ def showCurrentFans(deviceList): def showCurrentTemps(deviceList): """ Display all available temperatures for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Temperature ') for device in deviceList: @@ -2225,8 +2225,8 @@ def showCurrentTemps(deviceList): def showFwInfo(deviceList, fwType): """ Show the requested FW information for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param fwType: [$validFwBlocks] FW block version to display (all if left empty) + :param deviceList: List of DRM devices (can be a single-item list) + :param fwType: [$validFwBlocks] FW block version to display (all if left empty) """ if not fwType or 'all' in fwType: firmware_blocks = fw_block_names_l @@ -2269,7 +2269,7 @@ def showGpusByPid(pidList): Print out the GPU(s) used by a specific KFD process If pidList is empty, print all used GPUs for all KFD processes - @param pidList: List of PIDs to check + :param pidList: List of PIDs to check """ printLogSpacer(' GPUs Indexed by PID ') # If pidList is empty then we were given 0 arguments, so they want all PIDs @@ -2303,13 +2303,16 @@ def showGpusByPid(pidList): def getCoarseGrainUtil(device, typeName=None): """ Find Coarse Grain Utilization - If typeName is not given, will return array with of all available sensors, - where sensor type and value could be addressed like this: + If typeName is not given, will return array with of all available sensors, + where sensor type and value could be addressed like this: + + .. code-block:: python for ut_counter in utilization_counters: printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val) - @param device: DRM device identifier - @param typeName: 'GFX Activity', 'Memory Activity' + + :param device: DRM device identifier + :param typeName: 'GFX Activity', 'Memory Activity' """ timestamp = c_uint64(0) @@ -2340,7 +2343,7 @@ def getCoarseGrainUtil(device, typeName=None): def showGpuUse(deviceList): """ Display GPU use for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' % time GPU is busy ') for device in deviceList: @@ -2363,7 +2366,7 @@ def showEnergy(deviceList): Default counter value is 10000b, indicating energy status unit is 15.3 micro-Joules increment. - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ power = c_uint64() timestamp = c_uint64() @@ -2380,7 +2383,7 @@ def showEnergy(deviceList): def showId(deviceList): """ Display the device IDs for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' ID ') for device in deviceList: @@ -2396,7 +2399,7 @@ def showMaxPower(deviceList): """ Display the maximum Graphics Package Power that this GPU will attempt to consume before it begins throttling performance - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Power Cap ') for device in deviceList: @@ -2410,8 +2413,8 @@ def showMaxPower(deviceList): def showMemInfo(deviceList, memType): """ Display Memory information for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param memType: [$validMemTypes] Type of memory information to display + :param deviceList: List of DRM devices (can be a single-item list) + :param memType: [$validMemTypes] Type of memory information to display """ # Python will pass in a list of values as a single-value list # If we get 'all' as the string, just set the list to all supported types @@ -2436,7 +2439,7 @@ def showMemInfo(deviceList, memType): def showMemUse(deviceList): """ Display GPU memory usage for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ memoryUse = c_uint64() avgMemBandwidth = c_uint16() @@ -2463,7 +2466,7 @@ def showMemUse(deviceList): def showMemVendor(deviceList): """ Display GPU memory vendor for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ vendor = create_string_buffer(256) printLogSpacer(' Memory Vendor ') @@ -2482,8 +2485,8 @@ def showMemVendor(deviceList): def showOverDrive(deviceList, odtype): """ Display current OverDrive level for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param odtype: [sclk|mclk] OverDrive type + :param deviceList: List of DRM devices (can be a single-item list) + :param odtype: [sclk|mclk] OverDrive type """ rsmi_od = c_uint32() printLogSpacer(' OverDrive Level ') @@ -2512,7 +2515,7 @@ def showOverDrive(deviceList, odtype): def showPcieBw(deviceList): """ Display estimated PCIe bandwidth usage for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ sent = c_uint64() received = c_uint64() @@ -2534,7 +2537,7 @@ def showPcieBw(deviceList): def showPcieReplayCount(deviceList): """ Display number of PCIe replays for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ counter = c_uint64() printLogSpacer(' PCIe Replay Counter ') @@ -2548,7 +2551,7 @@ def showPcieReplayCount(deviceList): def showPerformanceLevel(deviceList): """ Display current Performance Level for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Show Performance Level ') for device in deviceList: @@ -2619,9 +2622,9 @@ def showPids(verbose): def showPower(deviceList): """ Display Current (also known as instant) Socket or Average - Graphics Package Power Consumption for a list of devices + Graphics Package Power Consumption for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ secondaryPresent=False printLogSpacer(' Power Consumption ') @@ -2647,7 +2650,7 @@ def showPower(deviceList): def showPowerPlayTable(deviceList): """ Display current GPU Memory clock frequencies and voltages for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON if PRINT_JSON: @@ -2690,7 +2693,7 @@ def showPowerPlayTable(deviceList): def showProduct(deviceList): """ Show the requested product information for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Product Info ') for device in deviceList: @@ -2725,7 +2728,7 @@ def showProduct(deviceList): def showProfile(deviceList): """ Display available Power Profiles for a list of devices. - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON if PRINT_JSON: @@ -2754,8 +2757,8 @@ def showProfile(deviceList): def showRange(deviceList, rangeType): """ Show the range for either the sclk or voltage for the specified devices - @param deviceList: List of DRM devices (can be a single-item list) - @param rangeType: [sclk|voltage] Type of range to return + :param deviceList: List of DRM devices (can be a single-item list) + :param rangeType: [sclk|voltage] Type of range to return """ global RETCODE if rangeType not in {'sclk', 'mclk', 'voltage'}: @@ -2794,8 +2797,8 @@ def showRange(deviceList, rangeType): def showRasInfo(deviceList, rasType): """ Show the requested RAS information for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param rasType: [$validRasBlocks] RAS counter to display (all if left empty) + :param deviceList: List of DRM devices (can be a single-item list) + :param rasType: [$validRasBlocks] RAS counter to display (all if left empty) """ state = rsmi_ras_err_state_t() if not rasType or 'all' in rasType: @@ -2835,8 +2838,8 @@ def showRasInfo(deviceList, rasType): def showRetiredPages(deviceList, retiredType='all'): """ Show retired pages of a specified type for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param retiredType: Type of retired pages to show (default = all) + :param deviceList: List of DRM devices (can be a single-item list) + :param retiredType: Type of retired pages to show (default = all) """ printLogSpacer(' Pages Info ') num_pages = c_uint32() @@ -2864,7 +2867,7 @@ def showRetiredPages(deviceList, retiredType='all'): def showSerialNumber(deviceList): """ Display the serial number for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Serial Number ') for device in deviceList: @@ -2886,7 +2889,7 @@ def showSerialNumber(deviceList): def showUId(deviceList): """ Display the unique device ID for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Unique ID ') for device in deviceList: @@ -2902,7 +2905,7 @@ def showUId(deviceList): def showVbiosVersion(deviceList): """ Display the VBIOS version for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' VBIOS ') for device in deviceList: @@ -2933,8 +2936,8 @@ class _Getch: def showEvents(deviceList, eventTypes): """ Display a blocking list of events for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) - @param eventTypes: List of event type names (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) + :param eventTypes: List of event type names (can be a single-item list) """ printLogSpacer(' Show Events ') printLog(None, 'press \'q\' or \'ctrl + c\' to quit', None) @@ -3079,8 +3082,8 @@ def showTempGraph(deviceList): def showDriverVersion(deviceList, component): """ Display the software version for the specified component - @param deviceList: List of DRM devices (can be a single-item list) - @param component: Component (currently only driver) + :param deviceList: List of DRM devices (can be a single-item list) + :param component: Component (currently only driver) """ printLogSpacer(' Version of System Component ') printSysLog(component_str(component) + ' version', getVersion(deviceList, component)) @@ -3090,7 +3093,7 @@ def showDriverVersion(deviceList, component): def showVoltage(deviceList): """ Display the current voltage (in millivolts) for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Current voltage ') for device in deviceList: @@ -3108,7 +3111,7 @@ def showVoltage(deviceList): def showVoltageCurve(deviceList): """ Show the voltage curve points for the specified devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Voltage Curve Points ') odvf = rsmi_od_volt_freq_data_t() @@ -3129,7 +3132,7 @@ def showXgmiErr(deviceList): This reads the XGMI error file, and interprets the return value from the sysfs file - @param deviceList: Show XGMI error state for these devices + :param deviceList: Show XGMI error state for these devices """ printLogSpacer('XGMI Error status') xe = rsmi_xgmi_status_t() @@ -3162,7 +3165,7 @@ def showAccessibleTopology(deviceList): This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ devices_ind = range(len(deviceList)) accessible = c_bool() @@ -3197,7 +3200,7 @@ def showWeightTopology(deviceList): This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ global PRINT_JSON devices_ind = range(len(deviceList)) @@ -3244,7 +3247,7 @@ def showHopsTopology(deviceList): This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ linktype = c_char_p() devices_ind = range(len(deviceList)) @@ -3290,7 +3293,7 @@ def showTypeTopology(deviceList): This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ devices_ind = range(len(deviceList)) hops = c_uint64() @@ -3339,7 +3342,7 @@ def showNumaTopology(deviceList): This reads the HW Topology file and display the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' Numa Nodes ') numa_numbers = c_int32() @@ -3362,7 +3365,7 @@ def showHwTopology(deviceList): This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ showWeightTopology(deviceList) printEmptyLine() @@ -3377,7 +3380,7 @@ def showNodesBw(deviceList): """ Display max and min bandwidth between nodes. Currently supports XGMI only. This reads the HW Topology file and displays the matrix for the nodes - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ devices_ind = range(len(deviceList)) minBW = c_uint32() @@ -3426,7 +3429,7 @@ def showNodesBw(deviceList): def showComputePartition(deviceList): """ Returns the current compute partitioning for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ currentComputePartition = create_string_buffer(256) printLogSpacer(' Current Compute Partition ') @@ -3444,7 +3447,7 @@ def showComputePartition(deviceList): def showMemoryPartition(deviceList): """ Returns the current memory partition for a list of devices - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ memoryPartition = create_string_buffer(256) printLogSpacer(' Current Memory Partition ') @@ -3464,7 +3467,7 @@ def checkAmdGpus(deviceList): """ Check if there are any AMD GPUs being queried, return False if there are none - @param deviceList: List of DRM devices (can be a single-item list) + :param deviceList: List of DRM devices (can be a single-item list) """ for device in deviceList: if isAmdDevice(device): @@ -3475,7 +3478,7 @@ def checkAmdGpus(deviceList): def component_str(component): """ Returns the component String value - @param component: Component (currently only driver) + :param component: Component (currently only driver) """ switcher = { 0: 'Driver' @@ -3486,7 +3489,7 @@ def component_str(component): def confirmOutOfSpecWarning(autoRespond): """ Print the warning for running outside of specification and prompt user to accept the terms. - @param autoRespond: Response to automatically provide for all prompts + :param autoRespond: Response to automatically provide for all prompts """ print(''' ******WARNING******\n @@ -3514,7 +3517,7 @@ def confirmOutOfSpecWarning(autoRespond): def doesDeviceExist(device): """ Check whether the specified device exists - @param device: DRM device identifier + :param device: DRM device identifier """ availableDevices = listDevices() filePath = '/sys/kernel/debug/dri/%d/' % (int(device)) @@ -3543,7 +3546,7 @@ def initializeRsmi(): def isAmdDevice(device): """ Return whether the specified device is an AMD device or not - @param device: DRM device identifier + :param device: DRM device identifier """ vendorID = c_uint16() # Retrieve card vendor @@ -3568,8 +3571,8 @@ def listDevices(): def load(savefilepath, autoRespond): """ Load clock frequencies and fan speeds from a specified file. - @param savefilepath: Path to the save file - @param autoRespond: Response to automatically provide for all prompts + :param savefilepath: Path to the save file + :param autoRespond: Response to automatically provide for all prompts """ printLogSpacer(' Load Settings ') if not os.path.isfile(savefilepath): @@ -3606,8 +3609,8 @@ def load(savefilepath, autoRespond): def padHexValue(value, length): """ Pad a hexadecimal value with a given length of zeros - @param value: A hexadecimal value to be padded with zeros - @param length: Number of zeros to pad the hexadecimal value + :param value: A hexadecimal value to be padded with zeros + :param length: Number of zeros to pad the hexadecimal value """ # Ensure value entered meets the minimum length and is hexadecimal if len(value) > 2 and length > 1 and value[:2].lower() == '0x' \ @@ -3643,13 +3646,13 @@ def relaunchAsSudo(): def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False): """ Returns true if RSMI call status is 0 (success) - If status is not 0, error logs are written to the debug log and false is returned + If status is not 0, error logs are written to the debug log and false is returned - @param device: DRM device identifier - @param my_ret: Return of RSMI call (rocm_smi_lib API) - @param metric: Parameter of GPU currently being analyzed - @param silent: Echo verbose error reponse. - True silences err output, False does not silence err output (default). + :param device: DRM device identifier + :param my_ret: Return of RSMI call (rocm_smi_lib API) + :param metric: Parameter of GPU currently being analyzed + :param silent: Echo verbose error reponse. + True silences err output, False does not silence err output (default). """ global RETCODE global PRINT_JSON @@ -3681,8 +3684,8 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False): def save(deviceList, savefilepath): """ Save clock frequencies and fan speeds for a list of devices to a specified file path. - @param deviceList: List of DRM devices (can be a single-item list) - @param savefilepath: Path to use to create the save file + :param deviceList: List of DRM devices (can be a single-item list) + :param savefilepath: Path to use to create the save file """ perfLevels = {} clocks = {} diff --git a/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py b/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py new file mode 100644 index 0000000000..6a55370c0a --- /dev/null +++ b/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py @@ -0,0 +1,642 @@ +#!/usr/bin/env python3 +"""ROCm_SMI_LIB CLI Tool Python Bindings""" +# NOTE: You MUST call rsmiBindings.initRsmiBindings() when using this library! +# TODO: Get most (or all) of these from rocm_smi.h to avoid mismatches and redundancy + +from __future__ import print_function +from ctypes import * +from enum import Enum + +import sys + +if 'sphinx' in sys.modules: + path_librocm = str() + def initRsmiBindings(silent=False): + # Empty function for document generation + exit() + + SMI_HASH = '@PKG_VERSION_HASH@' +else: + from rsmiBindingsInit import * + + +# Device ID +dv_id = c_uint64() +# GPU ID +gpu_id = c_uint32(0) + + +# Policy enums +RSMI_MAX_NUM_FREQUENCIES = 33 +RSMI_MAX_FAN_SPEED = 255 +RSMI_NUM_VOLTAGE_CURVE_POINTS = 3 + + +class rsmi_status_t(c_int): + RSMI_STATUS_SUCCESS = 0x0 + RSMI_STATUS_INVALID_ARGS = 0x1 + RSMI_STATUS_NOT_SUPPORTED = 0x2 + RSMI_STATUS_FILE_ERROR = 0x3 + RSMI_STATUS_PERMISSION = 0x4 + RSMI_STATUS_OUT_OF_RESOURCES = 0x5 + RSMI_STATUS_INTERNAL_EXCEPTION = 0x6 + RSMI_STATUS_INPUT_OUT_OF_BOUNDS = 0x7 + RSMI_STATUS_INIT_ERROR = 0x8 + RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR + RSMI_STATUS_NOT_YET_IMPLEMENTED = 0x9 + RSMI_STATUS_NOT_FOUND = 0xA + RSMI_STATUS_INSUFFICIENT_SIZE = 0xB + RSMI_STATUS_INTERRUPT = 0xC + RSMI_STATUS_UNEXPECTED_SIZE = 0xD + RSMI_STATUS_NO_DATA = 0xE + RSMI_STATUS_UNEXPECTED_DATA = 0xF + RSMI_STATUS_BUSY = 0x10 + RSMI_STATUS_REFCOUNT_OVERFLOW = 0x11 + RSMI_STATUS_SETTING_UNAVAILABLE = 0x12 + RSMI_STATUS_AMDGPU_RESTART_ERR = 0x13 + RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF + + +#Dictionary of rsmi ret codes and it's verbose output +rsmi_status_verbose_err_out = { + rsmi_status_t.RSMI_STATUS_SUCCESS: 'Operation was successful', + rsmi_status_t.RSMI_STATUS_INVALID_ARGS: 'Invalid arguments provided', + rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED: 'Not supported on the given system', + rsmi_status_t.RSMI_STATUS_FILE_ERROR: 'Problem accessing a file', + rsmi_status_t.RSMI_STATUS_PERMISSION: 'Permission denied', + rsmi_status_t.RSMI_STATUS_OUT_OF_RESOURCES: 'Unable to acquire memory or other resource', + rsmi_status_t.RSMI_STATUS_INTERNAL_EXCEPTION: 'An internal exception was caught', + rsmi_status_t.RSMI_STATUS_INPUT_OUT_OF_BOUNDS: 'Provided input is out of allowable or safe range', + rsmi_status_t.RSMI_INITIALIZATION_ERROR: 'Error occured during rsmi initialization', + rsmi_status_t.RSMI_STATUS_NOT_YET_IMPLEMENTED: 'Requested function is not implemented on this setup', + rsmi_status_t.RSMI_STATUS_NOT_FOUND: 'Item searched for but not found', + rsmi_status_t.RSMI_STATUS_INSUFFICIENT_SIZE: 'Insufficient resources available', + rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occured during execution', + rsmi_status_t.RSMI_STATUS_UNEXPECTED_SIZE: 'Unexpected amount of data read', + rsmi_status_t.RSMI_STATUS_NO_DATA: 'No data found for the given input', + rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA: 'Unexpected data received', + rsmi_status_t.RSMI_STATUS_BUSY: 'Busy - resources are preventing call the ability to execute', + rsmi_status_t.RSMI_STATUS_REFCOUNT_OVERFLOW: 'Data overflow - data exceeded INT32_MAX', + rsmi_status_t.RSMI_STATUS_SETTING_UNAVAILABLE: 'Requested setting is unavailable for current device', + rsmi_status_t.RSMI_STATUS_AMDGPU_RESTART_ERR: 'Could not successfully restart the amdgpu driver', + rsmi_status_t.RSMI_STATUS_UNKNOWN_ERROR: 'Unknown error occured' +} + + +class rsmi_init_flags_t(c_int): + RSMI_INIT_FLAG_ALL_GPUS = 0x1 + + +class rsmi_dev_perf_level_t(c_int): + RSMI_DEV_PERF_LEVEL_AUTO = 0 + RSMI_DEV_PERF_LEVEL_FIRST = RSMI_DEV_PERF_LEVEL_AUTO + RSMI_DEV_PERF_LEVEL_LOW = 1 + RSMI_DEV_PERF_LEVEL_HIGH = 2 + RSMI_DEV_PERF_LEVEL_MANUAL = 3 + RSMI_DEV_PERF_LEVEL_STABLE_STD = 4 + RSMI_DEV_PERF_LEVEL_STABLE_PEAK = 5 + RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK = 6 + RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK = 7 + RSMI_DEV_PERF_LEVEL_DETERMINISM = 8 + RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_DETERMINISM + RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 + + +notification_type_names = ['VM_FAULT', 'THERMAL_THROTTLE', 'GPU_RESET'] + + +class rsmi_evt_notification_type_t(c_int): + RSMI_EVT_NOTIF_VMFAULT = 0 + RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT + RSMI_EVT_NOTIF_THERMAL_THROTTLE = 1 + RSMI_EVT_NOTIF_GPU_PRE_RESET = 2 + RSMI_EVT_NOTIF_GPU_POST_RESET = 3 + RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET + + +class rsmi_voltage_metric_t(c_int): + RSMI_VOLT_CURRENT = 0 + RSMI_VOLT_FIRST = RSMI_VOLT_CURRENT + RSMI_VOLT_MAX = 1 + RSMI_VOLT_MIN_CRIT = 2 + RSMI_VOLT_MIN = 3 + RSMI_VOLT_MAX_CRIT = 4 + RSMI_VOLT_AVERAGE = 5 + RSMI_VOLT_LOWEST = 6 + RSMI_VOLT_HIGHEST = 7 + RSMI_VOLT_LAST = RSMI_VOLT_HIGHEST + RSMI_VOLT_UNKNOWN = 0x100 + + +class rsmi_voltage_type_t(c_int): + RSMI_VOLT_TYPE_FIRST = 0 + RSMI_VOLT_TYPE_VDDGFX = RSMI_VOLT_TYPE_FIRST + RSMI_VOLT_TYPE_LAST = RSMI_VOLT_TYPE_VDDGFX + RSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF + + +# The perf_level_string is correlated to rsmi_dev_perf_level_t +def perf_level_string(i): + switcher = { + 0: 'AUTO', + 1: 'LOW', + 2: 'HIGH', + 3: 'MANUAL', + 4: 'STABLE_STD', + 5: 'STABLE_PEAK', + 6: 'STABLE_MIN_MCLK', + 7: 'STABLE_MIN_SCLK', + 8: 'PERF_DETERMINISM', + } + return switcher.get(i, 'UNKNOWN') + + +rsmi_dev_perf_level = rsmi_dev_perf_level_t + + +class rsmi_sw_component_t(c_int): + RSMI_SW_COMP_FIRST = 0x0 + RSMI_SW_COMP_DRIVER = RSMI_SW_COMP_FIRST + RSMI_SW_COMP_LAST = RSMI_SW_COMP_DRIVER + + + +rsmi_event_handle_t = POINTER(c_uint) + + +class rsmi_event_group_t(Enum): + RSMI_EVNT_GRP_XGMI = 0 + RSMI_EVNT_GRP_XGMI_DATA_OUT = 10 + RSMI_EVNT_GRP_INVALID = 0xFFFFFFFF + + +class rsmi_event_type_t(c_int): + RSMI_EVNT_FIRST = rsmi_event_group_t.RSMI_EVNT_GRP_XGMI + RSMI_EVNT_XGMI_FIRST = rsmi_event_group_t.RSMI_EVNT_GRP_XGMI + RSMI_EVNT_XGMI_0_NOP_TX = RSMI_EVNT_XGMI_FIRST + RSMI_EVNT_XGMI_0_REQUEST_TX = 1 + RSMI_EVNT_XGMI_0_RESPONSE_TX = 2 + RSMI_EVNT_XGMI_0_BEATS_TX = 3 + RSMI_EVNT_XGMI_1_NOP_TX = 4 + RSMI_EVNT_XGMI_1_REQUEST_TX = 5 + RSMI_EVNT_XGMI_1_RESPONSE_TX = 6 + RSMI_EVNT_XGMI_1_BEATS_TX = 7 + RSMI_EVNT_XGMI_LAST = RSMI_EVNT_XGMI_1_BEATS_TX + + RSMI_EVNT_XGMI_DATA_OUT_FIRST = rsmi_event_group_t.RSMI_EVNT_GRP_XGMI_DATA_OUT + RSMI_EVNT_XGMI_DATA_OUT_0 = RSMI_EVNT_XGMI_DATA_OUT_FIRST + RSMI_EVNT_XGMI_DATA_OUT_1 = 11 + RSMI_EVNT_XGMI_DATA_OUT_2 = 12 + RSMI_EVNT_XGMI_DATA_OUT_3 = 13 + RSMI_EVNT_XGMI_DATA_OUT_4 = 14 + RSMI_EVNT_XGMI_DATA_OUT_5 = 15 + RSMI_EVNT_XGMI_DATA_OUT_LAST = RSMI_EVNT_XGMI_DATA_OUT_5 + + RSMI_EVNT_LAST = RSMI_EVNT_XGMI_DATA_OUT_LAST, + + +class rsmi_counter_command_t(c_int): + RSMI_CNTR_CMD_START = 0 + RSMI_CNTR_CMD_STOP = 1 + + +class rsmi_counter_value_t(Structure): + _fields_ = [('value', c_uint64), + ('time_enabled', c_uint64), + ('time_running', c_uint64)] + + +class rsmi_clk_type_t(c_int): + RSMI_CLK_TYPE_SYS = 0x0 + RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS + RSMI_CLK_TYPE_DF = 0x1 + RSMI_CLK_TYPE_DCEF = 0x2 + RSMI_CLK_TYPE_SOC = 0x3 + RSMI_CLK_TYPE_MEM = 0x4 + RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM + RSMI_CLK_INVALID = 0xFFFFFFFF + + +# Clock names here are correlated to the rsmi_clk_type_t values above +clk_type_names = ['sclk', 'sclk', 'fclk', 'dcefclk',\ + 'socclk', 'mclk', 'mclk', 'invalid'] +rsmi_clk_type_dict = {'RSMI_CLK_TYPE_SYS': 0x0, 'RSMI_CLK_TYPE_FIRST': 0x0,\ + 'RSMI_CLK_TYPE_DF': 0x1, 'RSMI_CLK_TYPE_DCEF': 0x2,\ + 'RSMI_CLK_TYPE_SOC': 0x3, 'RSMI_CLK_TYPE_MEM': 0x4,\ + 'RSMI_CLK_TYPE_LAST': 0X4, 'RSMI_CLK_INVALID': 0xFFFFFFFF} +rsmi_clk_names_dict = {'sclk': 0x0, 'fclk': 0x1, 'dcefclk': 0x2,\ + 'socclk': 0x3, 'mclk': 0x4} +rsmi_clk_type = rsmi_clk_type_t + + +class rsmi_temperature_metric_t(c_int): + RSMI_TEMP_CURRENT = 0x0 + RSMI_TEMP_FIRST = RSMI_TEMP_CURRENT + RSMI_TEMP_MAX = 0x1 + RSMI_TEMP_MIN = 0x2 + RSMI_TEMP_MAX_HYST = 0x3 + RSMI_TEMP_MIN_HYST = 0x4 + RSMI_TEMP_CRITICAL = 0x5 + RSMI_TEMP_CRITICAL_HYST = 0x6 + RSMI_TEMP_EMERGENCY = 0x7 + RSMI_TEMP_EMERGENCY_HYST = 0x8 + RSMI_TEMP_CRIT_MIN = 0x9 + RSMI_TEMP_CRIT_MIN_HYST = 0xA + RSMI_TEMP_OFFSET = 0xB + RSMI_TEMP_LOWEST = 0xC + RSMI_TEMP_HIGHEST = 0xD + RSMI_TEMP_LAST = RSMI_TEMP_HIGHEST + + +rsmi_temperature_metric = rsmi_temperature_metric_t + + +class rsmi_temperature_type_t(c_int): + RSMI_TEMP_TYPE_FIRST = 0 + RSMI_TEMP_TYPE_EDGE = RSMI_TEMP_TYPE_FIRST + RSMI_TEMP_TYPE_JUNCTION = 1 + RSMI_TEMP_TYPE_MEMORY = 2 + RSMI_TEMP_TYPE_HBM_0 = 3 + RSMI_TEMP_TYPE_HBM_1 = 4 + RSMI_TEMP_TYPE_HBM_2 = 5 + RSMI_TEMP_TYPE_HBM_3 = 6 + RSMI_TEMP_TYPE_LAST = RSMI_TEMP_TYPE_HBM_3 + + +# temp_type_lst list correlates to rsmi_temperature_type_t +temp_type_lst = ['edge', 'junction', 'memory', 'HBM 0', 'HBM 1', 'HBM 2', 'HBM 3'] + + +class rsmi_power_profile_preset_masks_t(c_uint64): + RSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1 + RSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2 + RSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4 + RSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8 + RSMI_PWR_PROF_PRST_VR_MASK = 0x10 + RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20 + RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT = 0x40 + RSMI_PWR_PROF_PRST_LAST = RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT + RSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF + + +rsmi_power_profile_preset_masks = rsmi_power_profile_preset_masks_t + + +class rsmi_gpu_block_t(c_int): + RSMI_GPU_BLOCK_INVALID = 0x0000000000000000 + RSMI_GPU_BLOCK_FIRST = 0x0000000000000001 + RSMI_GPU_BLOCK_UMC = RSMI_GPU_BLOCK_FIRST + RSMI_GPU_BLOCK_SDMA = 0x0000000000000002 + RSMI_GPU_BLOCK_GFX = 0x0000000000000004 + RSMI_GPU_BLOCK_MMHUB = 0x0000000000000008 + RSMI_GPU_BLOCK_ATHUB = 0x0000000000000010 + RSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020 + RSMI_GPU_BLOCK_HDP = 0x0000000000000040 + RSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080 + RSMI_GPU_BLOCK_DF = 0x0000000000000100 + RSMI_GPU_BLOCK_SMN = 0x0000000000000200 + RSMI_GPU_BLOCK_SEM = 0x0000000000000400 + RSMI_GPU_BLOCK_MP0 = 0x0000000000000800 + RSMI_GPU_BLOCK_MP1 = 0x0000000000001000 + RSMI_GPU_BLOCK_FUSE = 0x0000000000002000 + RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_FUSE + RSMI_GPU_BLOCK_RESERVED = 0x8000000000000000 + + +rsmi_gpu_block = rsmi_gpu_block_t + + +# The following dictionary correlates with rsmi_gpu_block_t enum +rsmi_gpu_block_d = { + 'UMC' : 0x0000000000000001, + 'SDMA' : 0x0000000000000002, + 'GFX' : 0x0000000000000004, + 'MMHUB': 0x0000000000000008, + 'ATHUB': 0x0000000000000010, + 'PCIE_BIF': 0x0000000000000020, + 'HDP': 0x0000000000000040, + 'XGMI_WAFL': 0x0000000000000080, + 'DF': 0x0000000000000100, + 'SMN': 0x0000000000000200, + 'SEM': 0x0000000000000400, + 'MP0': 0x0000000000000800, + 'MP1': 0x0000000000001000, + 'FUSE': 0x0000000000002000 + } + + +class rsmi_ras_err_state_t(c_int): + RSMI_RAS_ERR_STATE_NONE = 0 + RSMI_RAS_ERR_STATE_DISABLED = 1 + RSMI_RAS_ERR_STATE_PARITY = 2 + RSMI_RAS_ERR_STATE_SING_C = 3 + RSMI_RAS_ERR_STATE_MULT_UC = 4 + RSMI_RAS_ERR_STATE_POISON = 5 + RSMI_RAS_ERR_STATE_ENABLED = 6 + RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_ENABLED + RSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF + + +# Error type list correlates to rsmi_ras_err_state_t +rsmi_ras_err_stale_readable = ['no errors', 'ECC disabled', + 'unknown type err', 'single correctable err', + 'multiple uncorrectable err', + 'page isolated, treat as uncorrectable err', + 'ECC enabled', 'status invalid'] +rsmi_ras_err_stale_machine = ['none', 'disabled', 'unknown error', + 'sing', 'mult', 'position', 'enabled'] + +validRasTypes = ['ue', 'ce'] + +validRasActions = ['disable', 'enable', 'inject'] + +validRasBlocks = ['fuse', 'mp1', 'mp0', 'sem', 'smn', 'df', 'xgmi_wafl', 'hdp', 'pcie_bif', + + 'athub', 'mmhub', 'gfx', 'sdma', 'umc'] + + +class rsmi_memory_type_t(c_int): + RSMI_MEM_TYPE_FIRST = 0 + RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST + RSMI_MEM_TYPE_VIS_VRAM = 1 + RSMI_MEM_TYPE_GTT = 2 + RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT + + +# memory_type_l includes names for with rsmi_memory_type_t +# Usage example to get corresponding names: +# memory_type_l[rsmi_memory_type_t.RSMI_MEM_TYPE_VRAM] will return string 'vram' +memory_type_l = ['VRAM', 'VIS_VRAM', 'GTT'] + + +class rsmi_freq_ind_t(c_int): + RSMI_FREQ_IND_MIN = 0 + RSMI_FREQ_IND_MAX = 1 + RSMI_FREQ_IND_INVALID = 0xFFFFFFFF + + +rsmi_freq_ind = rsmi_freq_ind_t + + +class rsmi_fw_block_t(c_int): + RSMI_FW_BLOCK_FIRST = 0 + RSMI_FW_BLOCK_ASD = RSMI_FW_BLOCK_FIRST + RSMI_FW_BLOCK_CE = 1 + RSMI_FW_BLOCK_DMCU = 2 + RSMI_FW_BLOCK_MC = 3 + RSMI_FW_BLOCK_ME = 4 + RSMI_FW_BLOCK_MEC = 5 + RSMI_FW_BLOCK_MEC2 = 6 + RSMI_FW_BLOCK_MES = 7 + RSMI_FW_BLOCK_MES_KIQ = 8 + RSMI_FW_BLOCK_PFP = 9 + RSMI_FW_BLOCK_RLC = 10 + RSMI_FW_BLOCK_RLC_SRLC = 11 + RSMI_FW_BLOCK_RLC_SRLG = 12 + RSMI_FW_BLOCK_RLC_SRLS = 13 + RSMI_FW_BLOCK_SDMA = 14 + RSMI_FW_BLOCK_SDMA2 = 15 + RSMI_FW_BLOCK_SMC = 16 + RSMI_FW_BLOCK_SOS = 17 + RSMI_FW_BLOCK_TA_RAS = 18 + RSMI_FW_BLOCK_TA_XGMI = 19 + RSMI_FW_BLOCK_UVD = 20 + RSMI_FW_BLOCK_VCE = 21 + RSMI_FW_BLOCK_VCN = 22 + RSMI_FW_BLOCK_LAST = RSMI_FW_BLOCK_VCN + + +# The following list correlated to the rsmi_fw_block_t +fw_block_names_l = ['ASD', 'CE', 'DMCU', 'MC', 'ME', 'MEC', 'MEC2', 'MES', 'MES KIQ', 'PFP',\ + 'RLC', 'RLC SRLC', 'RLC SRLG', 'RLC SRLS', 'SDMA', 'SDMA2',\ + 'SMC', 'SOS', 'TA RAS', 'TA XGMI', 'UVD', 'VCE', 'VCN'] + + +rsmi_bit_field_t = c_uint64() +rsmi_bit_field = rsmi_bit_field_t + +class rsmi_utilization_counter_type(c_int): + RSMI_UTILIZATION_COUNTER_FIRST = 0 + RSMI_COARSE_GRAIN_GFX_ACTIVITY = RSMI_UTILIZATION_COUNTER_FIRST + RSMI_COARSE_GRAIN_MEM_ACTIVITY = 1 + RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY + +utilization_counter_name = ['GFX Activity', 'Memory Activity'] + +class rsmi_utilization_counter_t(Structure): + _fields_ = [('type', c_int), + ('val', c_uint64)] + + +class rsmi_xgmi_status_t(c_int): + RSMI_XGMI_STATUS_NO_ERRORS = 0 + RSMI_XGMI_STATUS_ERROR = 1 + RSMI_XGMI_STATUS_MULTIPLE_ERRORS = 2 + + +class rsmi_memory_page_status_t(c_int): + RSMI_MEM_PAGE_STATUS_RESERVED = 0 + RSMI_MEM_PAGE_STATUS_PENDING = 1 + RSMI_MEM_PAGE_STATUS_UNRESERVABLE = 2 + + +memory_page_status_l = ['reserved', 'pending', 'unreservable'] + + +class rsmi_retired_page_record_t(Structure): + _fields_ = [('page_address', c_uint64), + ('page_size', c_uint64), + ('status', c_int)] + + +RSMI_MAX_NUM_POWER_PROFILES = (sizeof(rsmi_bit_field_t) * 8) + + +class rsmi_power_profile_status_t(Structure): + _fields_ = [('available_profiles', c_uint32), + ('current', c_uint64), + ('num_profiles', c_uint32)] + + +rsmi_power_profile_status = rsmi_power_profile_status_t + + +class rsmi_frequencies_t(Structure): + _fields_ = [('has_deep_sleep', c_bool), + ('num_supported', c_int32), + ('current', c_uint32), + ('frequency', c_uint64 * RSMI_MAX_NUM_FREQUENCIES)] + + +rsmi_frequencies = rsmi_frequencies_t + + +class rsmi_pcie_bandwidth_t(Structure): + _fields_ = [('transfer_rate', rsmi_frequencies_t), + ('lanes', c_uint32 * RSMI_MAX_NUM_FREQUENCIES)] + + +rsmi_pcie_bandwidth = rsmi_pcie_bandwidth_t + + +class rsmi_version_t(Structure): + _fields_ = [('major', c_uint32), + ('minor', c_uint32), + ('patch', c_uint32), + ('build', c_char_p)] + + +rsmi_version = rsmi_version_t + + +class rsmi_range_t(Structure): + _fields_ = [('lower_bound', c_uint64), + ('upper_bound', c_uint64)] + + +rsmi_range = rsmi_range_t + + +class rsmi_od_vddc_point_t(Structure): + _fields_ = [('frequency', c_uint64), + ('voltage', c_uint64)] + + +rsmi_od_vddc_point = rsmi_od_vddc_point_t + + +class rsmi_freq_volt_region_t(Structure): + _fields_ = [('freq_range', rsmi_range_t), + ('volt_range', rsmi_range_t)] + + +rsmi_freq_volt_region = rsmi_freq_volt_region_t + + +class rsmi_od_volt_curve_t(Structure): + _fields_ = [('vc_points', rsmi_od_vddc_point_t *\ + RSMI_NUM_VOLTAGE_CURVE_POINTS)] + + +rsmi_od_volt_curve = rsmi_od_volt_curve_t + + +class rsmi_od_volt_freq_data_t(Structure): + _fields_ = [('curr_sclk_range', rsmi_range_t), + ('curr_mclk_range', rsmi_range_t), + ('sclk_freq_limits', rsmi_range_t), + ('mclk_freq_limits', rsmi_range_t), + ('curve', rsmi_od_volt_curve_t), + ('num_regions', c_uint32)] + + +rsmi_od_volt_freq_data = rsmi_od_volt_freq_data_t + + +class rsmi_error_count_t(Structure): + _fields_ = [('correctable_err', c_uint64), + ('uncorrectable_err', c_uint64)] + + +class rsmi_evt_notification_data_t(Structure): + _fields_ = [('dv_ind', c_uint32), + ('event', rsmi_evt_notification_type_t), + ('message', c_char*64)] + + +class rsmi_process_info_t(Structure): + _fields_ = [('process_id', c_uint32), + ('pasid', c_uint32), + ('vram_usage', c_uint64), + ('sdma_usage', c_uint64), + ('cu_occupancy', c_uint32)] + + +class rsmi_func_id_iter_handle(Structure): + _fields_ = [('func_id_iter', POINTER(c_uint)), + ('container_ptr', POINTER(c_uint)), + ('id_type', c_uint32)] + + +rsmi_func_id_iter_handle_t = POINTER(rsmi_func_id_iter_handle) + + +RSMI_DEFAULT_VARIANT = 0xFFFFFFFFFFFFFFFF + + +class submodule_union(Union): + _fields_ = [('memory_type', c_int), # rsmi_memory_type_t, + ('temp_metric', c_int), # rsmi_temperature_metric_t, + ('evnt_type', c_int), # rsmi_event_type_t, + ('evnt_group', c_int), # rsmi_event_group_t, + ('clk_type', c_int), # rsmi_clk_type_t, + ('fw_block', c_int), # rsmi_fw_block_t, + ('gpu_block_type', c_int)] # rsmi_gpu_block_t + + +class rsmi_func_id_value_t(Union): + _fields_ = [('id', c_uint64), + ('name', c_char_p), + ('submodule', submodule_union)] + +class rsmi_compute_partition_type_t(c_int): + RSMI_COMPUTE_PARTITION_INVALID = 0 + RSMI_COMPUTE_PARTITION_CPX = 1 + RSMI_COMPUTE_PARTITION_SPX = 2 + RSMI_COMPUTE_PARTITION_DPX = 3 + RSMI_COMPUTE_PARTITION_TPX = 4 + RSMI_COMPUTE_PARTITION_QPX = 5 + +rsmi_compute_partition_type_dict = { + #'RSMI_COMPUTE_PARTITION_INVALID': 0, + 'CPX': 1, + 'SPX': 2, + 'DPX': 3, + 'TPX': 4, + 'QPX': 5 +} + +rsmi_compute_partition_type = rsmi_compute_partition_type_t + +# compute_partition_type_l includes string names for the rsmi_compute_partition_type_t +# Usage example to get corresponding names: +# compute_partition_type_l[rsmi_compute_partition_type_t.RSMI_COMPUTE_PARTITION_CPX] +# will return string 'CPX' +compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX'] + +class rsmi_memory_partition_type_t(c_int): + RSMI_MEMORY_PARTITION_UNKNOWN = 0 + RSMI_MEMORY_PARTITION_NPS1 = 1 + RSMI_MEMORY_PARTITION_NPS2 = 2 + RSMI_MEMORY_PARTITION_NPS4 = 3 + RSMI_MEMORY_PARTITION_NPS8 = 4 + +rsmi_memory_partition_type_dict = { + 'NPS1': 1, + 'NPS2': 2, + 'NPS4': 3, + 'NPS8': 4 +} + +rsmi_memory_partition_type = rsmi_memory_partition_type_t + +# memory_partition_type_l includes string names for the rsmi_compute_partition_type_t +# Usage example to get corresponding names: +# memory_partition_type_l[rsmi_memory_partition_type_t.RSMI_MEMORY_PARTITION_NPS2] +# will return string 'NPS2' +memory_partition_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8'] + +class rsmi_power_label(str, Enum): + AVG_POWER = '(Avg)' + CURRENT_SOCKET_POWER = '(Socket)' + +class rsmi_power_type_t(c_int): + RSMI_AVERAGE_POWER = 0, + RSMI_CURRENT_POWER = 1, + RSMI_INVALID_POWER = 0xFFFFFFFF + +rsmi_power_type_dict = { + 0: 'AVERAGE', + 1: 'CURRENT SOCKET', + 0xFFFFFFFF: 'INVALID_POWER_TYPE' +} \ No newline at end of file diff --git a/projects/rocm-smi-lib/python_smi_tools/rsmiBindingsInit.py.in b/projects/rocm-smi-lib/python_smi_tools/rsmiBindingsInit.py.in new file mode 100644 index 0000000000..12b92186ad --- /dev/null +++ b/projects/rocm-smi-lib/python_smi_tools/rsmiBindingsInit.py.in @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +"""ROCm_SMI_LIB CLI Tool Python Bindings""" +# NOTE: You MUST call rsmiBindings.initRsmiBindings() when using this library! +# TODO: Get most (or all) of these from rocm_smi.h to avoid mismatches and redundancy + +from __future__ import print_function +import ctypes.util +from ctypes import * +from enum import Enum + +import os + +# Use ROCm installation path if running from standard installation +# With File Reorg rsmiBindings.py and rsmiBindingsInit.py will be installed in +# /opt/rocm/libexec/rocm_smi. relative path changed accordingly. +# if ROCM_SMI_LIB_PATH is set, we can load 'librocm_smi64.so' from that location +# +# Library load is wrapped in a function so prints can be hidden for PRINT_JSON mode. +path_librocm = str() +def initRsmiBindings(silent=False): + def print_silent(*args): + if not silent: + print(args) + + rocm_smi_lib_path = os.getenv('ROCM_SMI_LIB_PATH') + if (rocm_smi_lib_path != None): + path_librocm = rocm_smi_lib_path + else: + path_librocm = os.path.dirname(os.path.realpath(__file__)) + '/../../@CMAKE_INSTALL_LIBDIR@/librocm_smi64.so.@VERSION_MAJOR@' + + if not os.path.isfile(path_librocm): + print_silent('Unable to find %s . Trying /opt/rocm*' % path_librocm) + for root, dirs, files in os.walk('/opt', followlinks=True): + if 'librocm_smi64.so.@VERSION_MAJOR@' in files: + path_librocm = os.path.join(os.path.realpath(root), 'librocm_smi64.so.@VERSION_MAJOR@') + if os.path.isfile(path_librocm): + print_silent('Using lib from %s' % path_librocm) + else: + print('Unable to find librocm_smi64.so.@VERSION_MAJOR@') + + # ----------> TODO: Support static libs as well as SO + try: + cdll.LoadLibrary(path_librocm) + return CDLL(path_librocm) + except OSError: + print('Unable to load the rocm_smi library.\n'\ + 'Set LD_LIBRARY_PATH to the folder containing librocm_smi64.so.@VERSION_MAJOR@\n'\ + '{0}Please refer to https://github.com/'\ + 'RadeonOpenCompute/rocm_smi_lib for the installation guide.{1}'\ + .format('\33[33m', '\033[0m')) + exit() + +SMI_HASH = '@PKG_VERSION_HASH@' diff --git a/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt b/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt index 645e598965..012ce9a2c0 100755 --- a/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt +++ b/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt @@ -55,10 +55,10 @@ set(${ROCM_SMI}_VERSION_BUILD "0") set(${ROCM_SMI}_VERSION_HASH "${PKG_VERSION_HASH}") message("SOVERSION: ${SO_VERSION_STRING}") -# Configure rsmiBindings.py.in with SO major version: +# Configure rsmiBindingsInit.py.in with SO major version: configure_file( - "${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindings.py.in" - "${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindings.py") + "${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindingsInit.py.in" + "${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindingsInit.py") # Create a configure file to get version info from within library configure_file( @@ -139,6 +139,9 @@ install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/${ROCM_SMI_TARGET}Config.h install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/kfd_ioctl.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocm_smi COMPONENT dev) +install(PROGRAMS ${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindingsInit.py + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI} + COMPONENT dev) install(PROGRAMS ${COMMON_SRC_ROOT}/python_smi_tools/rsmiBindings.py DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI} COMPONENT dev) diff --git a/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg b/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg index 2236d5fe10..d4238cb33e 100644 --- a/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg +++ b/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg @@ -759,8 +759,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = @CMAKE_CURRENT_SOURCE_DIR@/../README.md \ - @CMAKE_CURRENT_SOURCE_DIR@/../include/rocm_smi/rocm_smi.h +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/../include/rocm_smi/rocm_smi.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses