SWDEV-342812- Add NPS support
Updates:
* Added rsmi_dev_nps_mode_set and rsmi_dev_nps_mode_get
* Added ability to set multiple SYSFS files in debug build
* Added ability to see user's env variables set for debug build
* Added tests for rsmi_dev_nps_mode_set and rsmi_dev_nps_mode_get
* Added ability to restart AMD GPU driver, used in nps_mode_set
* Updated ROCm_SMI_Manual.pdf to include new APIs
* Added progress bar for long running python_smi_tools, used
in setting nps_mode if runs longer than .1 seconds
Change-Id: I6d61bedd28d7cba6aff432ad2d127ba741b7d15a
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/amdsmi commit: 9ef376cd61]
Этот коммит содержится в:
@@ -30,10 +30,9 @@ modules.builtin
|
||||
*.lzma
|
||||
*.xz
|
||||
*.lzo
|
||||
#*.patch
|
||||
*.patch
|
||||
*.gcno
|
||||
*.pyc
|
||||
*current_compute_partition
|
||||
|
||||
#
|
||||
# Top-level generic files/folders
|
||||
@@ -121,4 +120,9 @@ _deps
|
||||
# ROCm files
|
||||
# Removes generated config headers like rocmsmi64Config.h & oamConfig.h
|
||||
#
|
||||
*Config.h
|
||||
*Config.h
|
||||
|
||||
#
|
||||
# Fake SYSFS files
|
||||
#
|
||||
/device/*
|
||||
@@ -124,8 +124,10 @@ typedef enum {
|
||||
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
||||
//!< acquired because it is already
|
||||
//!< being used
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
//!< exceeded INT32_MAX
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR, //!< Could not successfully restart
|
||||
//!< the amdgpu driver
|
||||
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||
} rsmi_status_t;
|
||||
@@ -353,7 +355,8 @@ typedef rsmi_clk_type_t rsmi_clk_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Compute Partition types
|
||||
* @brief Compute Partition. This enum is used to identify
|
||||
* various compute partitioning settings.
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
@@ -365,13 +368,37 @@ typedef enum {
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
RSMI_COMPUTE_PARTITION_QPX //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
/// \cond Ignore in docs.
|
||||
typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* @brief NPS Modes. This enum is used to identify various
|
||||
* NPS mode types.
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_MEMORY_PARTITION_UNKNOWN = 0,
|
||||
RSMI_MEMORY_PARTITION_NPS1, //!< NPS1 - All CCD & XCD data is interleaved
|
||||
//!< accross all 8 HBM stacks (all stacks/1).
|
||||
RSMI_MEMORY_PARTITION_NPS2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
|
||||
//!< accross the 4 HBM stacks per AID pair
|
||||
//!< (8 stacks/2).
|
||||
RSMI_MEMORY_PARTITION_NPS4, //!< NPS4 - Each XCD data is interleaved accross
|
||||
//!< accross 2 (or single) HBM stacks
|
||||
//!< (8 stacks/8 or 8 stacks/4).
|
||||
RSMI_MEMORY_PARTITION_NPS8, //!< NPS8 - Each XCD uses a single HBM stack
|
||||
//!< (8 stacks/8). Or each XCD uses a single
|
||||
//!< HBM stack & CCDs share 2 non-interleaved
|
||||
//!< HBM stacks on its AID
|
||||
//!< (AID[1,2,3] = 6 stacks/6).
|
||||
} rsmi_nps_mode_type_t;
|
||||
/// \cond Ignore in docs.
|
||||
typedef rsmi_nps_mode_type_t rsmi_nps_mode_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* @brief Temperature Metrics. This enum is used to identify various
|
||||
* temperature metrics. Corresponding values will be in millidegress
|
||||
@@ -3512,7 +3539,7 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
* @param[inout] compute_partition a pointer to a char string variable,
|
||||
* which the device's current compute partition will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p compute_partition
|
||||
* @param[in] len the length of the caller provided buffer @p compute_partition
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
@@ -3537,7 +3564,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] compute_partition using enum ::rsmi_copmpute_partition_type_t,
|
||||
* @param[in] compute_partition using enum ::rsmi_compute_partition_type_t,
|
||||
* define what the selected device's compute partition setting should be
|
||||
* updated to.
|
||||
*
|
||||
@@ -3554,6 +3581,69 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
|
||||
/** @} */ // end of ComputePartition
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup NPSMode NPS Mode Functions
|
||||
* These functions are used to query the device's NPS mode (memory partition).
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Retrieves the NPS mode (memory partition) for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p nps_mode ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* nps mode string. Upon successful retreival, the obtained device's
|
||||
* nps mode string shall be stored in the passed @p nps_mode char string
|
||||
* variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] nps_mode a pointer to a char string variable,
|
||||
* which the device's nps mode will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p nps_mode ,
|
||||
* suggested length is 5 or greater.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire nps mode value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_nps_mode_get(uint32_t dv_ind, char *nps_mode, uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Modifies a selected device's NPS mode (memory partition) setting.
|
||||
*
|
||||
* @details Given a device index @p dv_ind and a type of nps mode
|
||||
* @p nps_mode, this function will attempt to update the selected
|
||||
* device's nps mode setting.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] nps_mode using enum ::rsmi_nps_mode_type_t,
|
||||
* define what the selected device's NPS mode setting should be updated to.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_PERMISSION function requires root access
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart
|
||||
* the amdgpu driver
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode);
|
||||
|
||||
/** @} */ // end of NPSMode
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup APISupport Supported Functions
|
||||
* API function support varies by both GPU type and the version of the
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2018, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2018-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -49,6 +49,7 @@
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
#define CHECK_DV_IND_RANGE \
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
|
||||
@@ -165,7 +166,9 @@ struct RocmSMI_env_vars {
|
||||
|
||||
// The integer value of sysfs field enum that is to be over-ridden.
|
||||
// Env. variable RSMI_DEBUG_ENUM_OVERRIDE is used to specify this.
|
||||
uint32_t enum_override;
|
||||
// A set of enum overrides, RSMI_DEBUG_ENUM_OVERRIDE now supports
|
||||
// comma delimited values.
|
||||
std::unordered_set<uint32_t> enum_overrides;
|
||||
|
||||
// Sysfs path overrides
|
||||
|
||||
|
||||
@@ -162,7 +162,8 @@ enum DevInfoTypes {
|
||||
kDevNumaNode,
|
||||
kDevGpuMetrics,
|
||||
kDevGpuReset,
|
||||
kDevComputePartition
|
||||
kDevComputePartition,
|
||||
kDevMemoryPartition
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -215,6 +216,7 @@ class Device {
|
||||
void DumpSupportedFunctions(void);
|
||||
bool DeviceAPISupported(std::string name, uint64_t variant,
|
||||
uint64_t sub_variant);
|
||||
rsmi_status_t restartAMDGpuDriver(void);
|
||||
|
||||
private:
|
||||
std::shared_ptr<Monitor> monitor_;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -113,6 +113,7 @@ class RocmSMI {
|
||||
uint64_t *weight);
|
||||
int get_node_index(uint32_t dv_ind, uint32_t *node_ind);
|
||||
const RocmSMI_env_vars& getEnv(void);
|
||||
void printEnvVarInfo(void);
|
||||
static const std::map<amd::smi::DevInfoTypes, std::string> devInfoTypesStrings;
|
||||
|
||||
private:
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2018, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2018-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -76,6 +76,8 @@ int WriteSysfsStr(std::string path, std::string val);
|
||||
|
||||
bool IsInteger(const std::string & n_str);
|
||||
|
||||
std::pair<bool, std::string> executeCommand(std::string command, bool stdOut = true);
|
||||
|
||||
rsmi_status_t handleException();
|
||||
rsmi_status_t
|
||||
GetDevValueVec(amd::smi::DevInfoTypes type,
|
||||
|
||||
@@ -18,6 +18,9 @@ import sys
|
||||
import subprocess
|
||||
import _thread
|
||||
import time
|
||||
import multiprocessing
|
||||
import trace
|
||||
from io import StringIO
|
||||
from time import ctime
|
||||
from subprocess import check_output
|
||||
from rsmiBindings import *
|
||||
@@ -509,7 +512,7 @@ def printEventList(device, delay, eventList):
|
||||
data.message.decode('utf8') + '\r']])
|
||||
|
||||
|
||||
def printLog(device, metricName, value):
|
||||
def printLog(device, metricName, value, extraSpace=False):
|
||||
""" Print out to the SMI log
|
||||
|
||||
@param device: DRM device identifier
|
||||
@@ -530,7 +533,13 @@ def printLog(device, metricName, value):
|
||||
if device is None:
|
||||
logstr = logstr[13:]
|
||||
# Force thread safe printing
|
||||
print(logstr + '\n', end='')
|
||||
lock = multiprocessing.Lock()
|
||||
lock.acquire()
|
||||
if extraSpace:
|
||||
print('\n' + logstr + '\n', end='', flush=True)
|
||||
else:
|
||||
print(logstr + '\n', end='', flush=True)
|
||||
lock.release()
|
||||
|
||||
|
||||
def printListLog(metricName, valuesList):
|
||||
@@ -1336,6 +1345,76 @@ def setComputePartition(deviceList, computePartitionType):
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def progressbar(it, prefix="", size=60, out=sys.stdout):
|
||||
count = len(it)
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
lock = multiprocessing.Lock()
|
||||
lock.acquire()
|
||||
print("{}[{}{}] {}/{} secs remain".format(prefix, u"█"*x, "."*(size-x), j, count),
|
||||
end='\r', file=out, flush=True)
|
||||
lock.release()
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i+1)
|
||||
lock = multiprocessing.Lock()
|
||||
lock.acquire()
|
||||
print("\n", flush=True, file=out)
|
||||
lock.release()
|
||||
|
||||
def showProgressbar(title="", timeInSeconds=13):
|
||||
if title != "":
|
||||
title += ": "
|
||||
for i in progressbar(range(timeInSeconds), title, 40):
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def setNPSMode(deviceList, npsMode):
|
||||
""" Sets nps mode (memory partition) for a list of devices
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
@param npsMode: NPS Mode type to set as
|
||||
"""
|
||||
printLogSpacer(' Set nps mode to %s ' % (str(npsMode).upper()))
|
||||
for device in deviceList:
|
||||
npsMode = npsMode.upper()
|
||||
if npsMode not in nps_mode_type_l:
|
||||
printErrLog(device, 'Invalid nps mode type %s'
|
||||
'\nValid nps mode types are %s'
|
||||
% ( npsMode.upper(),
|
||||
(', '.join(map(str, nps_mode_type_l))) ))
|
||||
return (None, None)
|
||||
|
||||
t1 = multiprocessing.Process(target=showProgressbar,
|
||||
args=("Updating NPS mode",13,))
|
||||
t1.start()
|
||||
addExtraLine=True
|
||||
start=time.time()
|
||||
ret = rocmsmi.rsmi_dev_nps_mode_set(device,
|
||||
rsmi_nps_mode_type_dict[npsMode])
|
||||
stop=time.time()
|
||||
duration=stop-start
|
||||
if t1.is_alive():
|
||||
t1.terminate()
|
||||
t1.join()
|
||||
if duration < float(0.1): # For longer runs, add extra line before output
|
||||
addExtraLine=False # This is to prevent overriding progress bar
|
||||
|
||||
if rsmi_ret_ok(ret, device, silent=True):
|
||||
printLog(device,
|
||||
'Successfully set nps mode to %s' % (npsMode),
|
||||
None, addExtraLine)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_PERMISSION:
|
||||
printLog(device, 'Permission denied', None, addExtraLine)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
printLog(device, 'Not supported on the given system', None, addExtraLine)
|
||||
else:
|
||||
rsmi_ret_ok(ret, device)
|
||||
printErrLog(device, 'Failed to retrieve NPS mode, even though device supports it.')
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def showAllConcise(deviceList):
|
||||
""" Display critical info for all devices in a concise format
|
||||
|
||||
@@ -2780,9 +2859,28 @@ def showComputePartition(deviceList):
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
else:
|
||||
rsmi_ret_ok(ret, device)
|
||||
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.', None)
|
||||
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.')
|
||||
printLogSpacer()
|
||||
|
||||
def showNPSMode(deviceList):
|
||||
""" Returns the current NPS mode for a list of devices
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
"""
|
||||
npsMode = create_string_buffer(256)
|
||||
printLogSpacer(' Current NPS Mode ')
|
||||
for device in deviceList:
|
||||
ret = rocmsmi.rsmi_dev_nps_mode_get(device, npsMode, 256)
|
||||
if rsmi_ret_ok(ret, device, silent=True) and npsMode.value.decode():
|
||||
printLog(device, 'NPS Mode', npsMode.value.decode())
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
else:
|
||||
rsmi_ret_ok(ret, device)
|
||||
printErrLog(device, 'Failed to retrieve NPS mode, even though device supports it.')
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def checkAmdGpus(deviceList):
|
||||
""" Check if there are any AMD GPUs being queried,
|
||||
return False if there are none
|
||||
@@ -3130,6 +3228,7 @@ if __name__ == '__main__':
|
||||
action='store_true')
|
||||
groupDisplay.add_argument('--shownodesbw', help='Shows the numa nodes ', action='store_true')
|
||||
groupDisplay.add_argument('--showcomputepartition', help='Shows current compute partitioning ', action='store_true')
|
||||
groupDisplay.add_argument('--shownpsmode', help='Shows current nps mode ', action='store_true')
|
||||
|
||||
groupActionReset.add_argument('-r', '--resetclocks', help='Reset clocks and OverDrive to default',
|
||||
action='store_true')
|
||||
@@ -3176,8 +3275,10 @@ if __name__ == '__main__':
|
||||
metavar='SCLK', nargs=1)
|
||||
groupAction.add_argument('--setcomputepartition', help='Set compute partition',
|
||||
choices=compute_partition_type_l + [x.lower() for x in compute_partition_type_l],
|
||||
type=str, nargs=1
|
||||
)
|
||||
type=str, nargs=1)
|
||||
groupAction.add_argument('--setnpsmode', help='Set nps mode',
|
||||
choices=nps_mode_type_l + [x.lower() for x in nps_mode_type_l],
|
||||
type=str, nargs=1)
|
||||
groupAction.add_argument('--rasenable', help='Enable RAS for specified block and error type', type=str, nargs=2,
|
||||
metavar=('BLOCK', 'ERRTYPE'))
|
||||
groupAction.add_argument('--rasdisable', help='Disable RAS for specified block and error type', type=str, nargs=2,
|
||||
@@ -3215,7 +3316,8 @@ if __name__ == '__main__':
|
||||
or args.resetclocks or args.setprofile or args.resetprofile or args.setoverdrive or args.setmemoverdrive \
|
||||
or args.setpoweroverdrive or args.resetpoweroverdrive or args.rasenable or args.rasdisable or \
|
||||
args.rasinject or args.gpureset or args.setperfdeterminism or args.setslevel or args.setmlevel or \
|
||||
args.setvc or args.setsrange or args.setmrange or args.setclock or args.setcomputepartition:
|
||||
args.setvc or args.setsrange or args.setmrange or args.setclock or \
|
||||
args.setcomputepartition or args.setnpsmode:
|
||||
relaunchAsSudo()
|
||||
|
||||
# If there is one or more device specified, use that for all commands, otherwise use a
|
||||
@@ -3278,6 +3380,7 @@ if __name__ == '__main__':
|
||||
args.showreplaycount = True
|
||||
args.showvc = True
|
||||
args.showcomputepartition = True
|
||||
args.shownpsmode = True
|
||||
|
||||
if not PRINT_JSON:
|
||||
args.showprofile = True
|
||||
@@ -3408,6 +3511,8 @@ if __name__ == '__main__':
|
||||
showEnergy(deviceList)
|
||||
if args.showcomputepartition:
|
||||
showComputePartition(deviceList)
|
||||
if args.shownpsmode:
|
||||
showNPSMode(deviceList)
|
||||
if args.setclock:
|
||||
setClocks(deviceList, args.setclock[0], [int(args.setclock[1])])
|
||||
if args.setsclk:
|
||||
@@ -3448,6 +3553,8 @@ if __name__ == '__main__':
|
||||
setPerfDeterminism(deviceList, args.setperfdeterminism[0])
|
||||
if args.setcomputepartition:
|
||||
setComputePartition(deviceList, args.setcomputepartition[0])
|
||||
if args.setnpsmode:
|
||||
setNPSMode(deviceList, args.setnpsmode[0])
|
||||
if args.resetprofile:
|
||||
resetProfile(deviceList)
|
||||
if args.resetxgmierr:
|
||||
|
||||
@@ -66,6 +66,10 @@ class rsmi_status_t(c_int):
|
||||
RSMI_STATUS_INTERRUPT = 0xC
|
||||
RSMI_STATUS_UNEXPECTED_SIZE = 0xD
|
||||
RSMI_STATUS_NO_DATA = 0xE
|
||||
RSMI_STATUS_UNEXPECTED_DATA = 0xF
|
||||
RSMI_STATUS_BUSY = 0x10
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW = 0x11
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR = 0x12
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF
|
||||
|
||||
|
||||
@@ -86,6 +90,10 @@ rsmi_status_verbose_err_out = {
|
||||
rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occured during execution',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_SIZE: 'Unexpected amount of data read',
|
||||
rsmi_status_t.RSMI_STATUS_NO_DATA: 'No data found for the given input',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA: 'Unexpected data received',
|
||||
rsmi_status_t.RSMI_STATUS_BUSY: 'Busy - resources are preventing call the ability to execute',
|
||||
rsmi_status_t.RSMI_STATUS_REFCOUNT_OVERFLOW: 'Data overflow - data exceeded INT32_MAX',
|
||||
rsmi_status_t.RSMI_STATUS_AMDGPU_RESTART_ERR: 'Could not successfully restart the amdgpu driver',
|
||||
rsmi_status_t.RSMI_STATUS_UNKNOWN_ERROR: 'Unknown error occured'
|
||||
}
|
||||
|
||||
@@ -606,4 +614,26 @@ rsmi_compute_partition_type = rsmi_compute_partition_type_t
|
||||
# Usage example to get corresponding names:
|
||||
# compute_partition_type_l[rsmi_compute_partition_type_t.RSMI_COMPUTE_PARTITION_CPX]
|
||||
# will return string 'CPX'
|
||||
compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX']
|
||||
compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX']
|
||||
|
||||
class rsmi_nps_mode_type_t(c_int):
|
||||
RSMI_MEMORY_PARTITION_UNKNOWN = 0
|
||||
RSMI_MEMORY_PARTITION_NPS1 = 1
|
||||
RSMI_MEMORY_PARTITION_NPS2 = 2
|
||||
RSMI_MEMORY_PARTITION_NPS4 = 3
|
||||
RSMI_MEMORY_PARTITION_NPS8 = 4
|
||||
|
||||
rsmi_nps_mode_type_dict = {
|
||||
'NPS1': 1,
|
||||
'NPS2': 2,
|
||||
'NPS4': 3,
|
||||
'NPS8': 4
|
||||
}
|
||||
|
||||
rsmi_nps_mode_type = rsmi_nps_mode_type_t
|
||||
|
||||
# nps_mode_type_l includes string names for the rsmi_compute_partition_type_t
|
||||
# Usage example to get corresponding names:
|
||||
# nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2]
|
||||
# will return string 'NPS2'
|
||||
nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
|
||||
Двоичные данные
Двоичный файл не отображается.
@@ -100,6 +100,33 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(RET) { \
|
||||
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
||||
std::cout << "This function is not supported in the current environment." \
|
||||
<< std::endl; \
|
||||
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
|
||||
std::cout << "[ERROR] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
|
||||
<< std::endl; \
|
||||
} else { \
|
||||
CHK_RSMI_RET(RET) \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(RET) { \
|
||||
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
||||
std::cout << "This function is not supported in the current environment." \
|
||||
<< std::endl; \
|
||||
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
|
||||
std::cout << "[WARN] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
|
||||
<< std::endl; \
|
||||
} else if ((RET) == RSMI_STATUS_INSUFFICIENT_SIZE) { \
|
||||
std::cout << "[WARN] RSMI_STATUS_INSUFFICIENT_SIZE retrieved." \
|
||||
<< std::endl; \
|
||||
} else { \
|
||||
CHK_RSMI_RET(RET) \
|
||||
} \
|
||||
}
|
||||
|
||||
static void print_test_header(const char *str, uint32_t dv_ind) {
|
||||
std::cout << "********************************" << std::endl;
|
||||
std::cout << "*** " << str << std::endl;
|
||||
@@ -158,6 +185,30 @@ mapStringToRSMIComputePartitionTypes {
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
||||
};
|
||||
|
||||
static const std::string
|
||||
nps_mode_string(rsmi_nps_mode_type_t partition) {
|
||||
switch (partition) {
|
||||
case RSMI_MEMORY_PARTITION_NPS1:
|
||||
return "NPS1";
|
||||
case RSMI_MEMORY_PARTITION_NPS2:
|
||||
return "NPS2";
|
||||
case RSMI_MEMORY_PARTITION_NPS4:
|
||||
return "NPS4";
|
||||
case RSMI_MEMORY_PARTITION_NPS8:
|
||||
return "NPS8";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static std::map<std::string, rsmi_nps_mode_type_t>
|
||||
mapStringToRSMINpsModeTypes {
|
||||
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
|
||||
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
|
||||
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
|
||||
{"NPS8", RSMI_MEMORY_PARTITION_NPS8}
|
||||
};
|
||||
|
||||
static const char *
|
||||
perf_level_string(rsmi_dev_perf_level_t perf_lvl) {
|
||||
switch (perf_lvl) {
|
||||
@@ -184,7 +235,7 @@ static bool isUserRunningAsSudo() {
|
||||
return isRunningWithSudo;
|
||||
}
|
||||
|
||||
bool isFileWritable(rsmi_status_t response) {
|
||||
static bool isFileWritable(rsmi_status_t response) {
|
||||
// Clock files may not be writable, causing sets to
|
||||
// return RSMI_STATUS_PERMISSION. If running as sudo,
|
||||
// this means file is not writable.
|
||||
@@ -492,35 +543,23 @@ static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
||||
rsmi_status_t ret;
|
||||
uint32_t buffer_len = 10;
|
||||
char originalComputePartition[buffer_len];
|
||||
originalComputePartition[0] = '\0';
|
||||
print_test_header("Compute Partitioning Control", dv_ind);
|
||||
/**
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
*/
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, originalComputePartition, buffer_len);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, originalComputePartition,
|
||||
buffer_len);
|
||||
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
std::cout << "Device does not support the compute partition feature."
|
||||
<< std::endl;
|
||||
std::cout << "*********************************************" << std::endl;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
} else {
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "Original compute partition is " << originalComputePartition
|
||||
<< "." << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Original Compute Partition: "
|
||||
<< (((originalComputePartition == nullptr)
|
||||
|| ((originalComputePartition != nullptr)
|
||||
&& (originalComputePartition[0] == '\0')))
|
||||
? "UNKNOWN" : originalComputePartition)
|
||||
<< std::endl << std::endl;
|
||||
|
||||
for (int newComputePartition = RSMI_COMPUTE_PARTITION_CPX;
|
||||
newComputePartition <= RSMI_COMPUTE_PARTITION_QPX;
|
||||
newComputePartition++) {
|
||||
@@ -550,6 +589,59 @@ static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static rsmi_status_t test_set_nps_mode(uint32_t dv_ind) {
|
||||
rsmi_status_t ret;
|
||||
uint32_t buffer_len = 10;
|
||||
char originalNpsMode[buffer_len];
|
||||
originalNpsMode[0] = '\0';
|
||||
print_test_header("NPS Mode Control", dv_ind);
|
||||
|
||||
ret = rsmi_dev_nps_mode_get(dv_ind, originalNpsMode, buffer_len);
|
||||
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::cout << "Original NPS Mode: "
|
||||
<< (((originalNpsMode == nullptr)
|
||||
|| ((originalNpsMode != nullptr)
|
||||
&& (originalNpsMode[0] == '\0')))
|
||||
? "UNKNOWN" : originalNpsMode)
|
||||
<< std::endl << std::endl;
|
||||
|
||||
for (int newNpsMode = RSMI_MEMORY_PARTITION_NPS1;
|
||||
newNpsMode <= RSMI_MEMORY_PARTITION_NPS8;
|
||||
newNpsMode++) {
|
||||
rsmi_nps_mode_type_t newMemoryPartition
|
||||
= static_cast<rsmi_nps_mode_type_t>(newNpsMode);
|
||||
std::cout << "Attempting to set NPS mode to "
|
||||
<< nps_mode_string(newMemoryPartition) << "..."
|
||||
<< std::endl;
|
||||
ret = rsmi_dev_nps_mode_set(dv_ind, newMemoryPartition);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
// do not continue attempting to set, device does not support setting
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
std::cout << "Done setting NPS mode to "
|
||||
<< nps_mode_string(newMemoryPartition)
|
||||
<< "." << std::endl;
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
|
||||
std::string myNpsMode = originalNpsMode;
|
||||
if (myNpsMode.empty() == false) {
|
||||
std::cout << "Resetting compute partition to " << originalNpsMode
|
||||
<< "... " << std::endl;
|
||||
rsmi_nps_mode_type_t origNpsModeType
|
||||
= mapStringToRSMINpsModeTypes[originalNpsMode];
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
std::cout << "Done" << std::endl;
|
||||
ret = rsmi_dev_nps_mode_set(dv_ind, origNpsModeType);
|
||||
}
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
int main() {
|
||||
rsmi_status_t ret;
|
||||
|
||||
@@ -577,10 +669,31 @@ int main() {
|
||||
<< "rsmi_dev_compute_partition_get()..."
|
||||
<< std::endl;
|
||||
char current_compute_partition[256];
|
||||
current_compute_partition[0] = '\0';
|
||||
ret = rsmi_dev_compute_partition_get(i, current_compute_partition, 256);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
std::cout << "\t**Current Compute Partition setting: "
|
||||
<< current_compute_partition << std::endl;
|
||||
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
||||
std::cout << "\t**Current Compute Partition: "
|
||||
<< (((current_compute_partition == nullptr)
|
||||
|| ((current_compute_partition != nullptr)
|
||||
&& (current_compute_partition[0] == '\0')))
|
||||
? "UNKNOWN" : current_compute_partition)
|
||||
<< std::endl;
|
||||
|
||||
std::cout << std::endl << std::endl;
|
||||
std::cout << "Starting to call "
|
||||
<< "rsmi_dev_nps_mode_get()..."
|
||||
<< std::endl;
|
||||
uint32_t len = 5;
|
||||
char nps_mode[len];
|
||||
nps_mode[0] = '\0';
|
||||
ret = rsmi_dev_nps_mode_get(i, nps_mode, len);
|
||||
CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(ret)
|
||||
std::cout << "\t**NPS Mode: "
|
||||
<< (((nps_mode == nullptr)
|
||||
|| ((nps_mode != nullptr)
|
||||
&& (nps_mode[0] == '\0')))
|
||||
? "UNKNOWN" : nps_mode)
|
||||
<< std::endl;
|
||||
|
||||
ret = rsmi_dev_gpu_metrics_info_get(i, &p);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
@@ -672,6 +785,9 @@ int main() {
|
||||
|
||||
ret = test_set_freq(i);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_set_nps_mode(i);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -62,6 +62,7 @@
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
|
||||
#include "rocm_smi/rocm_smi_common.h" // Should go before rocm_smi.h
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
@@ -1696,6 +1697,7 @@ mapStringToRSMIComputePartitionTypes {
|
||||
|
||||
std::map<rsmi_compute_partition_type_t, std::string>
|
||||
mapRSMIToStringComputePartitionTypes {
|
||||
{RSMI_COMPUTE_PARTITION_INVALID, "UNKNOWN"},
|
||||
{RSMI_COMPUTE_PARTITION_CPX, "CPX"},
|
||||
{RSMI_COMPUTE_PARTITION_SPX, "SPX"},
|
||||
{RSMI_COMPUTE_PARTITION_DPX, "DPX"},
|
||||
@@ -1703,6 +1705,23 @@ mapRSMIToStringComputePartitionTypes {
|
||||
{RSMI_COMPUTE_PARTITION_QPX, "QPX"}
|
||||
};
|
||||
|
||||
std::map<rsmi_nps_mode_type_t, std::string>
|
||||
mapRSMIToStringNPSModeTypes {
|
||||
{RSMI_MEMORY_PARTITION_UNKNOWN, "UNKNOWN"},
|
||||
{RSMI_MEMORY_PARTITION_NPS1, "NPS1"},
|
||||
{RSMI_MEMORY_PARTITION_NPS2, "NPS2"},
|
||||
{RSMI_MEMORY_PARTITION_NPS4, "NPS4"},
|
||||
{RSMI_MEMORY_PARTITION_NPS8, "NPS8"}
|
||||
};
|
||||
|
||||
std::map<std::string, rsmi_nps_mode_type_t>
|
||||
mapStringToNPSModeTypes {
|
||||
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
|
||||
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
|
||||
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
|
||||
{"NPS8", RSMI_MEMORY_PARTITION_NPS8}
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_id_name_str_from_line(uint64_t id, std::string ln,
|
||||
std::istringstream *ln_str) {
|
||||
@@ -2780,71 +2799,84 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) {
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_OUT_OF_RESOURCES:
|
||||
*status_string = "Unable to acquire memory or other resource";
|
||||
*status_string = "RSMI_STATUS_OUT_OF_RESOURCES: Unable to acquire memory "
|
||||
"or other resource";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INTERNAL_EXCEPTION:
|
||||
*status_string = "An internal exception was caught";
|
||||
*status_string = "RSMI_STATUS_INTERNAL_EXCEPTION: An internal exception "
|
||||
"was caught";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INPUT_OUT_OF_BOUNDS:
|
||||
*status_string = "The provided input is out of allowable or safe range";
|
||||
*status_string = "RSMI_STATUS_INPUT_OUT_OF_BOUNDS: The provided input is "
|
||||
"out of allowable or safe range";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INIT_ERROR:
|
||||
*status_string = "An error occurred during initialization, during "
|
||||
"monitor discovery or when when initializing internal data structures";
|
||||
*status_string = "RSMI_STATUS_INIT_ERROR: An error occurred during "
|
||||
"initialization, during monitor discovery or when when "
|
||||
"initializing internal data structures";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_NOT_YET_IMPLEMENTED:
|
||||
*status_string = "The called function has not been implemented in this "
|
||||
"system for this device type";
|
||||
*status_string = "RSMI_STATUS_NOT_YET_IMPLEMENTED: The called function "
|
||||
"has not been implemented in this system for this "
|
||||
"device type";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_NOT_FOUND:
|
||||
*status_string = "An item required to complete the call was not found";
|
||||
*status_string = "RSMI_STATUS_NOT_FOUND: An item required to complete "
|
||||
"the call was not found";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INSUFFICIENT_SIZE:
|
||||
*status_string = "Not enough resources were available to fully execute"
|
||||
" the call";
|
||||
*status_string = "RSMI_STATUS_INSUFFICIENT_SIZE: Not enough resources "
|
||||
"were available to fully execute the call";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INTERRUPT:
|
||||
*status_string = "An interrupt occurred while executing the function";
|
||||
*status_string = "RSMI_STATUS_INTERRUPT: An interrupt occurred while "
|
||||
"executing the function";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNEXPECTED_SIZE:
|
||||
*status_string = "Data (usually from reading a file) was out of"
|
||||
" range from what was expected";
|
||||
*status_string = "RSMI_STATUS_UNEXPECTED_SIZE: Data (usually from reading"
|
||||
" a file) was out of range from what was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_NO_DATA:
|
||||
*status_string = "No data was found (usually from reading a file) "
|
||||
"where data was expected";
|
||||
*status_string = "RSMI_STATUS_NO_DATA: No data was found (usually from "
|
||||
"reading a file) where data was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNEXPECTED_DATA:
|
||||
*status_string = "Data (usually from reading a file) was not of the "
|
||||
"type that was expected";
|
||||
*status_string = "RSMI_STATUS_UNEXPECTED_DATA: Data (usually from reading"
|
||||
" a file) was not of the type that was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_BUSY:
|
||||
*status_string = "A resource or mutex could not be acquired "
|
||||
"because it is already being used";
|
||||
*status_string = "RSMI_STATUS_BUSY: A resource or mutex could not be "
|
||||
"acquired because it is already being used";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_REFCOUNT_OVERFLOW:
|
||||
*status_string = "An internal reference counter exceeded INT32_MAX";
|
||||
*status_string = "RSMI_STATUS_REFCOUNT_OVERFLOW: An internal reference "
|
||||
"counter exceeded INT32_MAX";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_AMDGPU_RESTART_ERR:
|
||||
*status_string = "RSMI_STATUS_AMDGPU_RESTART_ERR: Could not successfully "
|
||||
"restart the amdgpu driver";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNKNOWN_ERROR:
|
||||
*status_string = "An unknown error prevented the call from completing"
|
||||
" successfully";
|
||||
*status_string = "RSMI_STATUS_UNKNOWN_ERROR: An unknown error prevented "
|
||||
"the call from completing successfully";
|
||||
break;
|
||||
|
||||
default:
|
||||
*status_string = "An unknown error occurred";
|
||||
*status_string = "RSMI_STATUS_UNKNOWN_ERROR: An unknown error occurred";
|
||||
return RSMI_STATUS_UNKNOWN_ERROR;
|
||||
}
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
@@ -3718,12 +3750,8 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
static rsmi_status_t
|
||||
get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
|
||||
if (compute_partition.c_str() == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
CHK_SUPPORT_NAME_ONLY(compute_partition.c_str())
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevComputePartition,
|
||||
@@ -3811,14 +3839,18 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
}
|
||||
|
||||
// do nothing if compute_partition is the current compute partition
|
||||
get_compute_partition(dv_ind, currentComputePartition);
|
||||
rsmi_status_t ret_get = get_compute_partition(dv_ind, currentComputePartition);
|
||||
// we can try to set, even if we get unexpected data
|
||||
if (ret_get != RSMI_STATUS_SUCCESS
|
||||
&& ret_get != RSMI_STATUS_UNEXPECTED_DATA) {
|
||||
return ret_get;
|
||||
}
|
||||
rsmi_compute_partition_type_t currRSMIComputePartition
|
||||
= mapStringToRSMIComputePartitionTypes[currentComputePartition];
|
||||
if (currRSMIComputePartition == compute_partition) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
newComputePartitionStr = mapRSMIToStringComputePartitionTypes[compute_partition];
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevComputePartition,
|
||||
newComputePartitionStr);
|
||||
@@ -3826,6 +3858,134 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
CATCH
|
||||
}
|
||||
|
||||
static rsmi_status_t get_nps_mode(uint32_t dv_ind, std::string &nps_mode) {
|
||||
TRY
|
||||
CHK_SUPPORT_NAME_ONLY(nps_mode.c_str())
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevMemoryPartition,
|
||||
dv_ind, &val_str);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (mapStringToNPSModeTypes[val_str]) {
|
||||
case RSMI_MEMORY_PARTITION_UNKNOWN:
|
||||
// Retrieved an unknown NPS mode
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
case RSMI_MEMORY_PARTITION_NPS1:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS2:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS4:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS8:
|
||||
break;
|
||||
default:
|
||||
// Retrieved an unknown NPS mode
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
nps_mode = val_str;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode) {
|
||||
TRY
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
bool isCorrectDevice = false;
|
||||
char boardName[128];
|
||||
boardName[0] = '\0';
|
||||
// rsmi_dev_nps_mode_set is only available for for discrete variant,
|
||||
// others are required to update through bios settings
|
||||
rsmi_dev_name_get(dv_ind, boardName, 128);
|
||||
std::string myBoardName = boardName;
|
||||
if (!myBoardName.empty()) {
|
||||
std::transform(myBoardName.begin(), myBoardName.end(), myBoardName.begin(),
|
||||
::tolower);
|
||||
if (myBoardName.find("mi") != std::string::npos &&
|
||||
myBoardName.find("00x") != std::string::npos) {
|
||||
isCorrectDevice = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isCorrectDevice == false) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
std::string newNPSMode
|
||||
= mapRSMIToStringNPSModeTypes[nps_mode];
|
||||
std::string currentNPSMode;
|
||||
|
||||
switch (nps_mode) {
|
||||
case RSMI_MEMORY_PARTITION_UNKNOWN:
|
||||
// Retrieved an unknown NPS mode
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
case RSMI_MEMORY_PARTITION_NPS1:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS2:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS4:
|
||||
break;
|
||||
case RSMI_MEMORY_PARTITION_NPS8:
|
||||
break;
|
||||
default:
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
// do nothing if nps_mode is the current NPS mode
|
||||
rsmi_status_t ret_get = get_nps_mode(dv_ind, currentNPSMode);
|
||||
// we can try to set, even if we get unexpected data
|
||||
if (ret_get != RSMI_STATUS_SUCCESS
|
||||
&& ret_get != RSMI_STATUS_UNEXPECTED_DATA) {
|
||||
return ret_get;
|
||||
}
|
||||
rsmi_nps_mode_type_t currRSMINpsMode
|
||||
= mapStringToNPSModeTypes[currentNPSMode];
|
||||
if (currRSMINpsMode == nps_mode) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevMemoryPartition, newNPSMode);
|
||||
|
||||
if (amd::smi::ErrnoToRsmiStatus(ret) != RSMI_STATUS_SUCCESS) {
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
return dev->restartAMDGpuDriver();
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_nps_mode_get(uint32_t dv_ind, char *nps_mode,
|
||||
uint32_t len) {
|
||||
CHK_SUPPORT_NAME_ONLY(nps_mode)
|
||||
if ((len == 0) || (nps_mode == nullptr)) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
TRY
|
||||
std::string returning_nps_mode;
|
||||
rsmi_status_t ret = get_nps_mode(dv_ind,
|
||||
returning_nps_mode);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) { return ret; }
|
||||
|
||||
std::size_t length = returning_nps_mode.copy(nps_mode, len);
|
||||
nps_mode[length]='\0';
|
||||
|
||||
if (len < (returning_nps_mode.size() + 1)) {
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
enum iterator_handle_type {
|
||||
FUNC_ITER = 0,
|
||||
VARIANT_ITER,
|
||||
|
||||
@@ -122,6 +122,7 @@ static const char *kDevSerialNumberFName = "serial_number";
|
||||
static const char *kDevNumaNodeFName = "numa_node";
|
||||
static const char *kDevGpuMetricsFName = "gpu_metrics";
|
||||
static const char *kDevComputePartitionFName = "current_compute_partition";
|
||||
static const char *kDevMemoryPartitionFName = "current_memory_partition";
|
||||
|
||||
// Firmware version files
|
||||
static const char *kDevFwVersionAsdFName = "fw_version/asd_fw_version";
|
||||
@@ -292,6 +293,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevGpuMetrics, kDevGpuMetricsFName},
|
||||
{kDevGpuReset, kDevGpuResetFName},
|
||||
{kDevComputePartition, kDevComputePartitionFName},
|
||||
{kDevMemoryPartition, kDevMemoryPartitionFName},
|
||||
};
|
||||
|
||||
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
|
||||
@@ -417,6 +419,8 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
{"rsmi_dev_gpu_reset", {{kDevGpuResetFName}, {}}},
|
||||
{"rsmi_dev_compute_partition_get", {{kDevComputePartitionFName}, {}}},
|
||||
{"rsmi_dev_compute_partition_set", {{kDevComputePartitionFName}, {}}},
|
||||
{"rsmi_dev_memory_partition_get", {{kDevMemoryPartitionFName}, {}}},
|
||||
{"rsmi_dev_memory_partition_set", {{kDevMemoryPartitionFName}, {}}},
|
||||
|
||||
// These functions with variants, but no sensors/units. (May or may not
|
||||
// have mandatory dependencies.)
|
||||
@@ -564,9 +568,9 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
|
||||
auto sysfs_path = path_;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (env_->path_DRM_root_override && type == env_->enum_override) {
|
||||
if (env_->path_DRM_root_override
|
||||
&& (env_->enum_overrides.find(type) != env_->enum_overrides.end())) {
|
||||
sysfs_path = env_->path_DRM_root_override;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -698,6 +702,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
case kDevPowerODVoltage:
|
||||
case kDevSOCClk:
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
return writeDevInfoStr(type, val);
|
||||
|
||||
default:
|
||||
@@ -925,6 +930,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevPCIEThruPut:
|
||||
case kDevSerialNumber:
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
return readDevInfoStr(type, val);
|
||||
break;
|
||||
|
||||
@@ -1102,6 +1108,44 @@ bool Device::DeviceAPISupported(std::string name, uint64_t variant,
|
||||
return false;
|
||||
}
|
||||
|
||||
rsmi_status_t Device::restartAMDGpuDriver(void) {
|
||||
REQUIRE_ROOT_ACCESS
|
||||
bool restartSuccessful = true;
|
||||
bool success = false;
|
||||
std::string out = "";
|
||||
bool wasGdmServiceActive = false;
|
||||
|
||||
// sudo systemctl is-active gdm
|
||||
// we do not care about the success of checking if gdm is active
|
||||
std::tie(success, out) = executeCommand("systemctl is-active gdm");
|
||||
(out == "active") ? (restartSuccessful &= success) :
|
||||
(restartSuccessful = true);
|
||||
|
||||
// if gdm is active -> sudo systemctl stop gdm
|
||||
// TODO: are are there other display manager's we need to take into account?
|
||||
// see https://en.wikipedia.org/wiki/GNOME_Display_Manager
|
||||
if (success && (out == "active")) {
|
||||
wasGdmServiceActive = true;
|
||||
std::tie(success, out) = executeCommand("systemctl stop gdm&", false);
|
||||
restartSuccessful &= success;
|
||||
}
|
||||
|
||||
// sudo modprobe -r amdgpu
|
||||
// sudo modprobe amdgpu
|
||||
std::tie(success, out) =
|
||||
executeCommand("modprobe -r amdgpu && modprobe amdgpu&", false);
|
||||
restartSuccessful &= success;
|
||||
|
||||
// if gdm was active -> sudo systemctl start gdm
|
||||
if (wasGdmServiceActive) {
|
||||
std::tie(success, out) = executeCommand("systemctl start gdm&", false);
|
||||
restartSuccessful &= success;
|
||||
}
|
||||
|
||||
return (restartSuccessful ? RSMI_STATUS_SUCCESS :
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR);
|
||||
}
|
||||
|
||||
#undef RET_IF_NONZERO
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include <cerrno>
|
||||
#include <unordered_map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
@@ -141,7 +142,8 @@ const std::map<amd::smi::DevInfoTypes, std::string> amd::smi::RocmSMI::devInfoTy
|
||||
{amd::smi::kDevNumaNode, amdSMI + "kDevNumaNode"},
|
||||
{amd::smi::kDevGpuMetrics, amdSMI + "kDevGpuMetrics"},
|
||||
{amd::smi::kDevGpuReset, amdSMI + "kDevGpuReset"},
|
||||
{amd::smi::kDevComputePartition, amdSMI + "kDevComputePartition"}
|
||||
{amd::smi::kDevComputePartition, amdSMI + "kDevComputePartition"},
|
||||
{amd::smi::kDevMemoryPartition, amdSMI + "kDevMemoryPartition"}
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
@@ -305,6 +307,8 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
euid_ = geteuid();
|
||||
|
||||
GetEnvVariables();
|
||||
// To help debug env variable issues
|
||||
// printEnvVarInfo();
|
||||
|
||||
while (env_vars_.debug_inf_loop) {}
|
||||
|
||||
@@ -429,6 +433,31 @@ static uint32_t GetEnvVarUInteger(const char *ev_str) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static std::unordered_set<uint32_t> GetEnvVarUIntegerSets(const char *ev_str) {
|
||||
std::unordered_set<uint32_t> returnSet;
|
||||
#ifndef DEBUG
|
||||
(void)ev_str;
|
||||
#else
|
||||
ev_str = getenv(ev_str);
|
||||
if(ev_str == nullptr) { return returnSet; }
|
||||
std::string stringEnv = ev_str;
|
||||
|
||||
if (stringEnv.empty() == false) {
|
||||
// parse out values by commas
|
||||
std::string parsedVal;
|
||||
std::istringstream ev_str_ss(stringEnv);
|
||||
|
||||
while (std::getline(ev_str_ss, parsedVal, ',')) {
|
||||
int parsedInt = std::stoi(parsedVal);
|
||||
assert(parsedInt >= 0);
|
||||
uint32_t parsedUInt = static_cast<uint32_t>(parsedInt);
|
||||
returnSet.insert(parsedUInt);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return returnSet;
|
||||
}
|
||||
|
||||
// Get and store env. variables in this method
|
||||
void RocmSMI::GetEnvVariables(void) {
|
||||
#ifndef DEBUG
|
||||
@@ -437,15 +466,15 @@ void RocmSMI::GetEnvVariables(void) {
|
||||
env_vars_.path_DRM_root_override = nullptr;
|
||||
env_vars_.path_HWMon_root_override = nullptr;
|
||||
env_vars_.path_power_root_override = nullptr;
|
||||
env_vars_.enum_override = 0;
|
||||
env_vars_.debug_inf_loop = 0;
|
||||
env_vars_.enum_overrides.clear();
|
||||
#else
|
||||
env_vars_.debug_output_bitfield = GetEnvVarUInteger("RSMI_DEBUG_BITFIELD");
|
||||
env_vars_.path_DRM_root_override = getenv("RSMI_DEBUG_DRM_ROOT_OVERRIDE");
|
||||
env_vars_.path_HWMon_root_override = getenv("RSMI_DEBUG_HWMON_ROOT_OVERRIDE");
|
||||
env_vars_.path_power_root_override = getenv("RSMI_DEBUG_PP_ROOT_OVERRIDE");
|
||||
env_vars_.enum_override = GetEnvVarUInteger("RSMI_DEBUG_ENUM_OVERRIDE");
|
||||
env_vars_.debug_inf_loop = GetEnvVarUInteger("RSMI_DEBUG_INFINITE_LOOP");
|
||||
env_vars_.enum_overrides = GetEnvVarUIntegerSets("RSMI_DEBUG_ENUM_OVERRIDE");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -453,6 +482,43 @@ const RocmSMI_env_vars& RocmSMI::getEnv(void) {
|
||||
return env_vars_;
|
||||
}
|
||||
|
||||
void RocmSMI::printEnvVarInfo(void) {
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.debug_output_bitfield = "
|
||||
<< ((env_vars_.debug_output_bitfield == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_output_bitfield))
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_DRM_root_override = "
|
||||
<< ((env_vars_.path_DRM_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_DRM_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_HWMon_root_override = "
|
||||
<< ((env_vars_.path_HWMon_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_HWMon_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_power_root_override = "
|
||||
<< ((env_vars_.path_power_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_power_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.debug_inf_loop = "
|
||||
<< ((env_vars_.debug_inf_loop == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_output_bitfield))
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.enum_overrides = {";
|
||||
if (env_vars_.enum_overrides.empty()) {
|
||||
std::cout << "}" << std::endl;
|
||||
return;
|
||||
}
|
||||
for (auto it=env_vars_.enum_overrides.begin();
|
||||
it != env_vars_.enum_overrides.end(); ++it) {
|
||||
std::cout << *it;
|
||||
auto temp_it = it;
|
||||
if(++temp_it != env_vars_.enum_overrides.end()) {
|
||||
std::cout << ",";
|
||||
}
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
}
|
||||
|
||||
std::shared_ptr<Monitor>
|
||||
RocmSMI::FindMonitor(std::string monitor_path) {
|
||||
std::string tmp;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2018, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2018-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
@@ -234,5 +235,68 @@ rsmi_status_t ErrnoToRsmiStatus(int err) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string leftTrim(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
return std::regex_replace(s, std::regex("^\\s+"), "");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string rightTrim(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
return std::regex_replace(s, std::regex("\\s+$"), "");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string removeNewLines(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
return std::regex_replace(s, std::regex("\n+"), "");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string trim(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
// remove new lines -> trim white space at ends
|
||||
std::string noNewLines = removeNewLines(s);
|
||||
return leftTrim(rightTrim(noNewLines));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// defaults to trim stdOut
|
||||
std::pair<bool, std::string> executeCommand(std::string command, bool stdOut) {
|
||||
char buffer[128];
|
||||
std::string stdoutAndErr = "";
|
||||
bool successfulRun = true;
|
||||
command = "stdbuf -i0 -o0 -e0 " + command; // remove stdOut and err buffering
|
||||
|
||||
FILE *pipe = popen(command.c_str(), "r");
|
||||
if (!pipe) {
|
||||
stdoutAndErr = "[ERROR] popen failed to call " + command;
|
||||
successfulRun = false;
|
||||
} else {
|
||||
//read until end of process
|
||||
while (!feof(pipe)) {
|
||||
// use buffer to read and add to stdoutAndErr
|
||||
if (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
|
||||
stdoutAndErr += buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// any return code other than 0, is a failed execution
|
||||
if (pclose(pipe) != 0) {
|
||||
successfulRun = false;
|
||||
}
|
||||
|
||||
if (stdOut) {
|
||||
// remove leading and trailing spaces of output and new lines
|
||||
stdoutAndErr = trim(stdoutAndErr);
|
||||
}
|
||||
return std::make_pair(successfulRun, stdoutAndErr);
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -49,6 +49,24 @@ endif()
|
||||
|
||||
set(RSMI_INC_DIR ${ROCM_DIR}/include)
|
||||
set(RSMI_LIB_DIR ${ROCM_DIR}/lib)
|
||||
|
||||
|
||||
message("")
|
||||
message("Google Test Configuration init:")
|
||||
message("-----------ROCM_DIR: " ${ROCM_DIR})
|
||||
message("-----------GOOGLE_TEST_FRWK_NAME: " ${GOOGLE_TEST_FRWK_NAME})
|
||||
message("-----------RSMITST: " ${RSMITST})
|
||||
message("-----------RSMITST_ROOT: " ${RSMITST_ROOT})
|
||||
message("-----------RSMITST_LIBS: " ${RSMITST_LIBS})
|
||||
message("-----------PROJECT_BINARY_DIR: " ${PROJECT_BINARY_DIR})
|
||||
message("-----------RSMI_LIB_DIR: " ${RSMI_LIB_DIR})
|
||||
message("-----------GTEST_LIB_DIR: " ${GTEST_LIB_DIR})
|
||||
message("-----------RSMI_INC_DIR: " ${RSMI_INC_DIR})
|
||||
message("-----------rsmitstSources: " ${rsmitstSources})
|
||||
message("-----------functionalSources: " ${functionalSources})
|
||||
message("")
|
||||
|
||||
|
||||
#
|
||||
# Determine RSMI Header files are present
|
||||
# (no external source dependencies)
|
||||
@@ -76,6 +94,21 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message("")
|
||||
message("Google Test Configuration (after lib check):")
|
||||
message("-----------ROCM_DIR: " ${ROCM_DIR})
|
||||
message("-----------GOOGLE_TEST_FRWK_NAME: " ${GOOGLE_TEST_FRWK_NAME})
|
||||
message("-----------RSMITST: " ${RSMITST})
|
||||
message("-----------RSMITST_ROOT: " ${RSMITST_ROOT})
|
||||
message("-----------RSMITST_LIBS: " ${RSMITST_LIBS})
|
||||
message("-----------PROJECT_BINARY_DIR: " ${PROJECT_BINARY_DIR})
|
||||
message("-----------RSMI_LIB_DIR: " ${RSMI_LIB_DIR})
|
||||
message("-----------GTEST_LIB_DIR: " ${GTEST_LIB_DIR})
|
||||
message("-----------RSMI_INC_DIR: " ${RSMI_INC_DIR})
|
||||
message("-----------rsmitstSources: " ${rsmitstSources})
|
||||
message("-----------functionalSources: " ${functionalSources})
|
||||
message("")
|
||||
|
||||
string(TOLOWER "${RSMITST_BLD_TYPE}" tmp)
|
||||
if("${tmp}" STREQUAL release)
|
||||
set(BUILD_TYPE "Release")
|
||||
@@ -224,3 +257,18 @@ install(TARGETS ${RSMITST}
|
||||
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
|
||||
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
|
||||
|
||||
message("")
|
||||
message("Google Test Configuration:")
|
||||
message("-----------ROCM_DIR: " ${ROCM_DIR})
|
||||
message("-----------GOOGLE_TEST_FRWK_NAME: " ${GOOGLE_TEST_FRWK_NAME})
|
||||
message("-----------RSMITST: " ${RSMITST})
|
||||
message("-----------RSMITST_ROOT: " ${RSMITST_ROOT})
|
||||
message("-----------RSMITST_LIBS: " ${RSMITST_LIBS})
|
||||
message("-----------PROJECT_BINARY_DIR: " ${PROJECT_BINARY_DIR})
|
||||
message("-----------RSMI_LIB_DIR: " ${RSMI_LIB_DIR})
|
||||
message("-----------GTEST_LIB_DIR: " ${GTEST_LIB_DIR})
|
||||
message("-----------RSMI_INC_DIR: " ${RSMI_INC_DIR})
|
||||
message("-----------rsmitstSources: " ${rsmitstSources})
|
||||
message("-----------functionalSources: " ${functionalSources})
|
||||
message("")
|
||||
|
||||
|
||||
@@ -204,7 +204,6 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
// Note: new_computePartition is not set
|
||||
// DISPLAY_RSMI_ERR(err)
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED));
|
||||
IF_VERB(STANDARD) {
|
||||
@@ -222,7 +221,6 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
new_computePartition
|
||||
= rsmi_compute_partition_type::RSMI_COMPUTE_PARTITION_INVALID;
|
||||
err = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
// DISPLAY_RSMI_ERR(err)
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(err == RSMI_STATUS_PERMISSION));
|
||||
|
||||
Исполняемый файл
+298
@@ -0,0 +1,298 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/functional/npsmode_read_write.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
|
||||
TestNPSModeReadWrite::TestNPSModeReadWrite() : TestBase() {
|
||||
set_title("RSMI NPS Mode Read Test");
|
||||
set_description("The NPS Mode tests verifies that the memory "
|
||||
"parition setting can be read and updated properly.");
|
||||
}
|
||||
|
||||
TestNPSModeReadWrite::~TestNPSModeReadWrite(void) {
|
||||
}
|
||||
|
||||
void TestNPSModeReadWrite::SetUp(void) {
|
||||
TestBase::SetUp();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void TestNPSModeReadWrite::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void TestNPSModeReadWrite::DisplayResults(void) const {
|
||||
TestBase::DisplayResults();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestNPSModeReadWrite::Close() {
|
||||
// This will close handles opened within rsmitst utility calls and call
|
||||
// rsmi_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
static const std::string
|
||||
npsModeString(rsmi_nps_mode_type npsModeType) {
|
||||
switch (npsModeType) {
|
||||
case RSMI_MEMORY_PARTITION_NPS1:
|
||||
return "NPS1";
|
||||
case RSMI_MEMORY_PARTITION_NPS2:
|
||||
return "NPS2";
|
||||
case RSMI_MEMORY_PARTITION_NPS4:
|
||||
return "NPS4";
|
||||
case RSMI_MEMORY_PARTITION_NPS8:
|
||||
return "NPS8";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static const std::map<std::string, rsmi_nps_mode_type_t>
|
||||
mapStringToRSMINpsModeTypes {
|
||||
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
|
||||
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
|
||||
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
|
||||
{"NPS8", RSMI_MEMORY_PARTITION_NPS8}
|
||||
};
|
||||
|
||||
void TestNPSModeReadWrite::Run(void) {
|
||||
rsmi_status_t ret, err;
|
||||
char orig_nps_mode[255];
|
||||
char current_nps_mode[255];
|
||||
orig_nps_mode[0] = '\0';
|
||||
current_nps_mode[0] = '\0';
|
||||
rsmi_nps_mode_type new_nps_mode;
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
|
||||
PrintDeviceHeader(dv_ind);
|
||||
|
||||
//Standard checks to see if API is supported, before running full tests
|
||||
ret = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 255);
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
CHK_ERR_ASRT(ret)
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl << "\t**"
|
||||
<< "NPS Mode: "
|
||||
<< orig_nps_mode << std::endl;
|
||||
}
|
||||
|
||||
if ((orig_nps_mode == nullptr) ||
|
||||
(orig_nps_mode[0] == '\0')) {
|
||||
std::cout << "***System nps mode value is not defined or received unexpected data. "
|
||||
"Skip nps mode test." << std::endl;
|
||||
return;
|
||||
}
|
||||
EXPECT_TRUE(ret == RSMI_STATUS_SUCCESS);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
uint32_t length = 2;
|
||||
char smallBuffer[length];
|
||||
err = rsmi_dev_nps_mode_get(dv_ind, smallBuffer, length);
|
||||
size_t size = sizeof(smallBuffer)/sizeof(*smallBuffer);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INSUFFICIENT_SIZE);
|
||||
ASSERT_EQ((size_t)length, size);
|
||||
if (err == RSMI_STATUS_INSUFFICIENT_SIZE) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INSUFFICIENT_SIZE was returned "
|
||||
<< "and size matches length requested." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_nps_mode_get(dv_ind, nullptr, 255);
|
||||
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
|
||||
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_NOT_SUPPORTED was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 0);
|
||||
ASSERT_EQ(err, (RSMI_STATUS_INVALID_ARGS || RSMI_STATUS_NOT_SUPPORTED));
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/******************************/
|
||||
/* rsmi_dev_nps_mode_set(...) */
|
||||
/******************************/
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
|
||||
// Note: new_nps_mode is not set
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED));
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
} else if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "rsmi_dev_nps_mode_set not supported on this machine"
|
||||
<< "\n\t (if rsmi_dev_nps_mode_get work, then likely "
|
||||
<< "need to set in bios)"
|
||||
<< std::endl;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
}
|
||||
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
new_nps_mode = rsmi_nps_mode_type::RSMI_MEMORY_PARTITION_UNKNOWN;
|
||||
err = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(err == RSMI_STATUS_PERMISSION));
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
} else if (err == RSMI_STATUS_PERMISSION) {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
// tests should not continue if err is a permission issue
|
||||
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
|
||||
} else {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-run original get, so we can reset to later
|
||||
ret = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 255);
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
|
||||
for (int partition = RSMI_MEMORY_PARTITION_NPS1;
|
||||
partition <= RSMI_MEMORY_PARTITION_NPS8;
|
||||
partition++) {
|
||||
new_nps_mode
|
||||
= static_cast<rsmi_nps_mode_type>(partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "======== TEST RSMI_MEMORY_PARTITION_"
|
||||
<< npsModeString(new_nps_mode)
|
||||
<< " ===============" << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Attempting to set nps mode to: "
|
||||
<< npsModeString(new_nps_mode) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_nps_mode_get(dv_ind, current_nps_mode, 255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Current nps mode: " << current_nps_mode << std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
EXPECT_STREQ(npsModeString(new_nps_mode).c_str(), current_nps_mode);
|
||||
}
|
||||
|
||||
/* TEST RETURN TO ORIGINAL NPS MODE SETTING */
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "=========== TEST RETURN TO ORIGINAL NPS MODE "
|
||||
<< "SETTING ========" << std::endl;
|
||||
}
|
||||
new_nps_mode
|
||||
= mapStringToRSMINpsModeTypes.at(orig_nps_mode);
|
||||
ret = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Returning nps mode to: "
|
||||
<< npsModeString(new_nps_mode) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_nps_mode_get(dv_ind, current_nps_mode, 255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Attempted to set nps mode: "
|
||||
<< npsModeString(new_nps_mode) << std::endl
|
||||
<< "\t**" << "Current compute partition: " << current_nps_mode
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
EXPECT_STREQ(npsModeString(new_nps_mode).c_str(), current_nps_mode);
|
||||
|
||||
}
|
||||
}
|
||||
Исполняемый файл
+73
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
|
||||
#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
|
||||
|
||||
#include "rocm_smi_test/test_base.h"
|
||||
|
||||
class TestNPSModeReadWrite : public TestBase {
|
||||
public:
|
||||
TestNPSModeReadWrite();
|
||||
|
||||
// @Brief: Destructor for test case of TestNPSModeReadWrite
|
||||
virtual ~TestNPSModeReadWrite();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
};
|
||||
|
||||
#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
|
||||
@@ -87,6 +87,7 @@
|
||||
#include "rocm_smi_test/functional/gpu_metrics_read.h"
|
||||
#include "rocm_smi_test/functional/metrics_counter_read.h"
|
||||
#include "rocm_smi_test/functional/perf_determinism.h"
|
||||
#include "functional/npsmode_read_write.h"
|
||||
|
||||
static RSMITstGlobals *sRSMIGlvalues = nullptr;
|
||||
|
||||
@@ -277,7 +278,11 @@ TEST(rsmitstReadWrite, TestComputePartitionReadWrite) {
|
||||
TestComputePartitionReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
TEST(rsmitstReadOnly, TestConcurrentInit) {
|
||||
TEST(rsmitstReadWrite, TestNPSModeReadWrite) {
|
||||
TestNPSModeReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
TEST(rsmitstReadOnly, Test) {
|
||||
TestConcurrentInit tst;
|
||||
SetFlags(&tst);
|
||||
tst.DisplayTestInfo();
|
||||
|
||||
Ссылка в новой задаче
Block a user