SWDEV-335697- Add support for dynamic partitioning
Original updates:
* Added .gitignore to help with future commits
* Updated/added copyrights on modified or added files
* Updated rocm_smi.h/.cc
- Added 3 new SMI API functions:
rsmi_dev_compute_partition_set &
rsmi_dev_compute_partition_get
- Added helpful maps/enums used in
new get/set compute_partition API calls
* Updated rocm_smi.py
- Added --showcomputepartition
- Added --setcomputepartition
- Fixed a few mistypes
* Updated rsmiBindings.py - added helpful class/dict/list
* Updated rocm_smi_example.cc
- Added helpful MACRO to detect if api is not supported.
- Added current_compute_partition set/get rocm lib calls
- Added helpful macro to discover future RSMI errors
- Commented out test_set_freq, was having permission issues
on a Navi21
* Updated rocm_smi_main.cc
- Added helpful map to debug API calls, left in for future use
- Added comment to better understand a non-class function returns
* Added computepartition_read_write.cc/.h
- Added get/set compute partition API test calls
- Confirmed on devices that do not support the API calls, tests pass
* Updated rocm_smi_test/main.cc
- Calls new compute partition gtests
Added following updates from review feedback:
* Updated rocm_smi.h/cc
- Removed C++ API calls, adding support for both C/C++
API calls could cause confusion and adds extra work for us
- rsmi_dev_compute_partition_get -> Fixed an edge case where
user gives a small buffer length size (smaller than data
received), but does not receive the partial buffer back.
google Tests are updated to reflect this find.
* Updated rocm_smi_example.cc
- Fixed test_set_freq, issue was that file was not writable.
We now indicate this warning, so prior errors make sense.
- General test code cleanup. Removed extra code,
by creating loops for tests.
* Updated rocm_smi_main.cc
- Moved and got rid of an external reference to a map used
for debugging RSMI enums, now is a const public reference.
* Updated rocm_smi.py
- Updated python code to identify NOT_SUPPORTED due to
(currently) only a few GPU support the feature
Change-Id: I4a567acbb59d6771fb64df08d19175fe3604fd1b
[ROCm/rocm_smi_lib commit: 4d7f3f2bc7]
Tento commit je obsažen v:
@@ -0,0 +1,124 @@
|
||||
#
|
||||
# NOTE! Don't add files that are generated in specific
|
||||
# subdirectories here. Add them in the ".gitignore" file
|
||||
# in that subdirectory instead.
|
||||
#
|
||||
# NOTE! Please use 'git ls-files -i --exclude-standard'
|
||||
# command after changing this file, to see if there are
|
||||
# any tracked files which get ignored after the change.
|
||||
#
|
||||
# Normal rules
|
||||
#
|
||||
.*
|
||||
*.o
|
||||
*.o.*
|
||||
*.a
|
||||
*.s
|
||||
*.ko
|
||||
*.so
|
||||
*.so.dbg
|
||||
*.mod.c
|
||||
*.i
|
||||
*.lst
|
||||
*.symtypes
|
||||
*.order
|
||||
modules.builtin
|
||||
*.elf
|
||||
*.bin
|
||||
*.gz
|
||||
*.bz2
|
||||
*.lzma
|
||||
*.xz
|
||||
*.lzo
|
||||
#*.patch
|
||||
*.gcno
|
||||
*.pyc
|
||||
*current_compute_partition
|
||||
|
||||
#
|
||||
# Top-level generic files/folders
|
||||
#
|
||||
/[Bb][Ui][Ll][Dd]
|
||||
*/[Bb][Ui][Ll][Dd]
|
||||
/build
|
||||
*/build
|
||||
/[Gg][Tt][Ee][Ss][Tt][Ss]
|
||||
*/[Gg][Tt][Ee][Ss][Tt][Ss]
|
||||
/tags
|
||||
/TAGS
|
||||
/linux
|
||||
/vmlinux
|
||||
/vmlinuz
|
||||
/System.map
|
||||
/Module.markers
|
||||
Module.symvers
|
||||
|
||||
#
|
||||
# Debian directory (make deb-pkg)
|
||||
#
|
||||
/debian/
|
||||
|
||||
#
|
||||
# git files that we don't want to ignore even it they are dot-files
|
||||
#
|
||||
!.gitignore
|
||||
!.mailmap
|
||||
|
||||
### VisualStudioCode ###
|
||||
!.vscode/settings.json
|
||||
|
||||
#
|
||||
# Generated include files
|
||||
#
|
||||
include/config
|
||||
include/linux/version.h
|
||||
include/generated
|
||||
arch/*/include/generated
|
||||
|
||||
# git generated dirs
|
||||
patches-*
|
||||
|
||||
# quilt's files
|
||||
patches
|
||||
series
|
||||
|
||||
# cscope files
|
||||
cscope.*
|
||||
ncscope.*
|
||||
|
||||
# gnu global files
|
||||
GPATH
|
||||
GRTAGS
|
||||
GSYMS
|
||||
GTAGS
|
||||
|
||||
*.orig
|
||||
*~
|
||||
\#*#
|
||||
|
||||
#
|
||||
# Leavings from module signing
|
||||
#
|
||||
extra_certificates
|
||||
signing_key.priv
|
||||
signing_key.x509
|
||||
x509.genkey
|
||||
|
||||
#cmake files
|
||||
CMakeLists.txt.user
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
CMakeScripts
|
||||
Testing
|
||||
Makefile
|
||||
cmake_install.cmake
|
||||
install_manifest.txt
|
||||
compile_commands.json
|
||||
CTestTestfile.cmake
|
||||
_deps
|
||||
|
||||
#
|
||||
# ROCm files
|
||||
# Removes generated config headers like rocmsmi64Config.h & oamConfig.h
|
||||
#
|
||||
*Config.h
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -352,6 +352,26 @@ typedef enum {
|
||||
typedef rsmi_clk_type_t rsmi_clk_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Compute Partition types
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
/// \cond Ignore in docs.
|
||||
typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* @brief Temperature Metrics. This enum is used to identify various
|
||||
* temperature metrics. Corresponding values will be in millidegress
|
||||
@@ -3470,6 +3490,70 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
|
||||
/** @} */ // end of HWTopo
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup ComputePartition Compute Partition Functions
|
||||
* These functions are used to configure and query the device's
|
||||
* compute parition setting.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Retrieves the current compute partitioning for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p compute_partition ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* current compute partition setting string. Upon successful retreival,
|
||||
* the obtained device's compute partition settings string shall be stored in
|
||||
* the passed @p compute_partition char string variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] compute_partition a pointer to a char string variable,
|
||||
* which the device's current compute partition will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p compute_partition
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire compute partition value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Modifies a selected device's compute partition setting.
|
||||
*
|
||||
* @details Given a device index @p dv_ind, a type of compute partition
|
||||
* @p compute_partition, this function will attempt to update the selected
|
||||
* device's compute partition setting.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] compute_partition using enum ::rsmi_copmpute_partition_type_t,
|
||||
* define what the selected device's compute partition setting should be
|
||||
* updated to.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_PERMISSION function requires root access
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t compute_partition);
|
||||
|
||||
/** @} */ // end of ComputePartition
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup APISupport Supported Functions
|
||||
* API function support varies by both GPU type and the version of the
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -161,7 +161,8 @@ enum DevInfoTypes {
|
||||
kDevMemPageBad,
|
||||
kDevNumaNode,
|
||||
kDevGpuMetrics,
|
||||
kDevGpuReset
|
||||
kDevGpuReset,
|
||||
kDevComputePartition
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -113,6 +113,7 @@ class RocmSMI {
|
||||
uint64_t *weight);
|
||||
int get_node_index(uint32_t dv_ind, uint32_t *node_ind);
|
||||
const RocmSMI_env_vars& getEnv(void);
|
||||
static const std::map<amd::smi::DevInfoTypes, std::string> devInfoTypesStrings;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<Device>> devices_;
|
||||
|
||||
@@ -760,7 +760,7 @@ def resetPerfDeterminism(deviceList):
|
||||
if rsmi_ret_ok(ret, device, 'disable performance determinism'):
|
||||
printLog(device, 'Successfully disabled performance determinism', None)
|
||||
else:
|
||||
logging.error('GPU[%s]\t\t: Unable to diable performance determinism', device)
|
||||
logging.error('GPU[%s]\t\t: Unable to disable performance determinism', device)
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
@@ -1305,6 +1305,37 @@ def setProfile(deviceList, profile):
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def setComputePartition(deviceList, computePartitionType):
|
||||
""" Sets compute partitioning for a list of device
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
@param computePartition: Compute Partition type to set as
|
||||
"""
|
||||
printLogSpacer(' Set compute partition to %s ' % (str(computePartitionType).upper()))
|
||||
for device in deviceList:
|
||||
computePartitionType = computePartitionType.upper()
|
||||
if computePartitionType not in compute_partition_type_l:
|
||||
printErrLog(device, 'Invalid compute partition type %s'
|
||||
'\nValid compute partition types are %s'
|
||||
% ( computePartitionType.upper(),
|
||||
(', '.join(map(str, compute_partition_type_l))) ))
|
||||
return (None, None)
|
||||
ret = rocmsmi.rsmi_dev_compute_partition_set(device,
|
||||
rsmi_compute_partition_type_dict[computePartitionType])
|
||||
if rsmi_ret_ok(ret, device, silent=True):
|
||||
printLog(device,
|
||||
'Successfully set compute partition to %s' % (computePartitionType),
|
||||
None)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_PERMISSION:
|
||||
printLog(device, 'Permission denied', None)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
else:
|
||||
rsmi_ret_ok(ret, device)
|
||||
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.')
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def showAllConcise(deviceList):
|
||||
""" Display critical info for all devices in a concise format
|
||||
|
||||
@@ -2732,6 +2763,24 @@ def showNodesBw(deviceList):
|
||||
if nonXgmi:
|
||||
printLog(None,"Non-xGMI links detected and is currently not supported", None)
|
||||
|
||||
def showComputePartition(deviceList):
|
||||
""" Returns the current compute partitioning for a list of devices
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
"""
|
||||
currentComputePartition = create_string_buffer(256)
|
||||
printLogSpacer(' Current Compute Partition ')
|
||||
for device in deviceList:
|
||||
ret = rocmsmi.rsmi_dev_compute_partition_get(device, currentComputePartition, 256)
|
||||
if rsmi_ret_ok(ret, device, silent=True) and currentComputePartition.value.decode():
|
||||
printLog(device, 'Compute Partition', currentComputePartition.value.decode())
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
else:
|
||||
rsmi_ret_ok(ret, device)
|
||||
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.', None)
|
||||
printLogSpacer()
|
||||
|
||||
def checkAmdGpus(deviceList):
|
||||
""" Check if there are any AMD GPUs being queried,
|
||||
return False if there are none
|
||||
@@ -2905,6 +2954,8 @@ def relaunchAsSudo():
|
||||
"""
|
||||
if os.geteuid() != 0:
|
||||
os.execvp('sudo', ['sudo'] + sys.argv)
|
||||
#keeping below, if we want to run sudo with user's env variables
|
||||
#os.execvp('sudo', ['sudo', '-E'] + sys.argv)
|
||||
|
||||
|
||||
def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
|
||||
@@ -2936,7 +2987,6 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def save(deviceList, savefilepath):
|
||||
""" Save clock frequencies and fan speeds for a list of devices to a specified file path.
|
||||
|
||||
@@ -3077,6 +3127,7 @@ if __name__ == '__main__':
|
||||
groupDisplay.add_argument('--showenergycounter', help='Energy accumulator that stores amount of energy consumed',
|
||||
action='store_true')
|
||||
groupDisplay.add_argument('--shownodesbw', help='Shows the numa nodes ', action='store_true')
|
||||
groupDisplay.add_argument('--showcomputepartition', help='Shows current compute partitioning ', action='store_true')
|
||||
|
||||
groupActionReset.add_argument('-r', '--resetclocks', help='Reset clocks and OverDrive to default',
|
||||
action='store_true')
|
||||
@@ -3121,6 +3172,10 @@ if __name__ == '__main__':
|
||||
groupAction.add_argument('--setperfdeterminism',
|
||||
help='Set clock frequency limit to get minimal performance variation', type=int,
|
||||
metavar='SCLK', nargs=1)
|
||||
groupAction.add_argument('--setcomputepartition', help='Set compute partition',
|
||||
choices=compute_partition_type_l + [x.lower() for x in compute_partition_type_l],
|
||||
type=str, nargs=1
|
||||
)
|
||||
groupAction.add_argument('--rasenable', help='Enable RAS for specified block and error type', type=str, nargs=2,
|
||||
metavar=('BLOCK', 'ERRTYPE'))
|
||||
groupAction.add_argument('--rasdisable', help='Disable RAS for specified block and error type', type=str, nargs=2,
|
||||
@@ -3158,7 +3213,7 @@ if __name__ == '__main__':
|
||||
or args.resetclocks or args.setprofile or args.resetprofile or args.setoverdrive or args.setmemoverdrive \
|
||||
or args.setpoweroverdrive or args.resetpoweroverdrive or args.rasenable or args.rasdisable or \
|
||||
args.rasinject or args.gpureset or args.setperfdeterminism or args.setslevel or args.setmlevel or \
|
||||
args.setvc or args.setsrange or args.setmrange or args.setclock:
|
||||
args.setvc or args.setsrange or args.setmrange or args.setclock or args.setcomputepartition:
|
||||
relaunchAsSudo()
|
||||
|
||||
# If there is one or more device specified, use that for all commands, otherwise use a
|
||||
@@ -3220,6 +3275,7 @@ if __name__ == '__main__':
|
||||
args.showpidgpus = []
|
||||
args.showreplaycount = True
|
||||
args.showvc = True
|
||||
args.showcomputepartition = True
|
||||
|
||||
if not PRINT_JSON:
|
||||
args.showprofile = True
|
||||
@@ -3348,6 +3404,8 @@ if __name__ == '__main__':
|
||||
showVoltageCurve(deviceList)
|
||||
if args.showenergycounter:
|
||||
showEnergy(deviceList)
|
||||
if args.showcomputepartition:
|
||||
showComputePartition(deviceList)
|
||||
if args.setclock:
|
||||
setClocks(deviceList, args.setclock[0], [int(args.setclock[1])])
|
||||
if args.setsclk:
|
||||
@@ -3386,6 +3444,8 @@ if __name__ == '__main__':
|
||||
setClockRange(deviceList, 'mclk', args.setmrange[0], args.setmrange[1], args.autorespond)
|
||||
if args.setperfdeterminism:
|
||||
setPerfDeterminism(deviceList, args.setperfdeterminism[0])
|
||||
if args.setcomputepartition:
|
||||
setComputePartition(deviceList, args.setcomputepartition[0])
|
||||
if args.resetprofile:
|
||||
resetProfile(deviceList)
|
||||
if args.resetxgmierr:
|
||||
|
||||
@@ -582,3 +582,28 @@ class rsmi_func_id_value_t(Union):
|
||||
_fields_ = [('id', c_uint64),
|
||||
('name', c_char_p),
|
||||
('submodule', submodule_union)]
|
||||
|
||||
class rsmi_compute_partition_type_t(c_int):
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0
|
||||
RSMI_COMPUTE_PARTITION_CPX = 1
|
||||
RSMI_COMPUTE_PARTITION_SPX = 2
|
||||
RSMI_COMPUTE_PARTITION_DPX = 3
|
||||
RSMI_COMPUTE_PARTITION_TPX = 4
|
||||
RSMI_COMPUTE_PARTITION_QPX = 5
|
||||
|
||||
rsmi_compute_partition_type_dict = {
|
||||
#'RSMI_COMPUTE_PARTITION_INVALID': 0,
|
||||
'CPX': 1,
|
||||
'SPX': 2,
|
||||
'DPX': 3,
|
||||
'TPX': 4,
|
||||
'QPX': 5
|
||||
}
|
||||
|
||||
rsmi_compute_partition_type = rsmi_compute_partition_type_t
|
||||
|
||||
# compute_partition_type_l includes string names for the rsmi_compute_partition_type_t
|
||||
# Usage example to get corresponding names:
|
||||
# compute_partition_type_l[rsmi_compute_partition_type_t.RSMI_COMPUTE_PARTITION_CPX]
|
||||
# will return string 'CPX'
|
||||
compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX']
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -50,13 +50,14 @@
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <bitset>
|
||||
#include <map>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
#define PRINT_RSMI_ERR(RET) { \
|
||||
if (RET != RSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
std::cout << "RSMI call returned " << (RET) \
|
||||
std::cout << "[ERROR] RSMI call returned " << (RET) \
|
||||
<< " at line " << __LINE__ << std::endl; \
|
||||
rsmi_status_string((RET), &err_str); \
|
||||
std::cout << err_str << std::endl; \
|
||||
@@ -70,6 +71,11 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_AND_PRINT_RSMI_ERR_RET(RET) { \
|
||||
PRINT_RSMI_ERR(RET) \
|
||||
CHK_RSMI_RET(RET) \
|
||||
}
|
||||
|
||||
#define CHK_RSMI_RET_I(RET) { \
|
||||
PRINT_RSMI_ERR(RET) \
|
||||
if (RET != RSMI_STATUS_SUCCESS) { \
|
||||
@@ -85,6 +91,15 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_RSMI_NOT_SUPPORTED_RET(RET) { \
|
||||
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
||||
std::cout << "This function is not supported in the current environment." \
|
||||
<< std::endl; \
|
||||
} else { \
|
||||
CHK_RSMI_RET(RET) \
|
||||
} \
|
||||
}
|
||||
|
||||
static void print_test_header(const char *str, uint32_t dv_ind) {
|
||||
std::cout << "********************************" << std::endl;
|
||||
std::cout << "*** " << str << std::endl;
|
||||
@@ -92,6 +107,10 @@ static void print_test_header(const char *str, uint32_t dv_ind) {
|
||||
std::cout << "Device index: " << dv_ind << std::endl;
|
||||
}
|
||||
|
||||
static void print_mini_header(const char *str) {
|
||||
std::cout << "\n>> " << str << " <<" << std::endl;
|
||||
}
|
||||
|
||||
static const char *
|
||||
power_profile_string(rsmi_power_profile_preset_masks_t profile) {
|
||||
switch (profile) {
|
||||
@@ -112,6 +131,33 @@ power_profile_string(rsmi_power_profile_preset_masks_t profile) {
|
||||
}
|
||||
}
|
||||
|
||||
static const std::string
|
||||
compute_partition_string(rsmi_compute_partition_type partition) {
|
||||
switch (partition) {
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
return "CPX";
|
||||
case RSMI_COMPUTE_PARTITION_SPX:
|
||||
return "SPX";
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
return "DPX";
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
return "TPX";
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
return "QPX";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static std::map<std::string, rsmi_compute_partition_type_t>
|
||||
mapStringToRSMIComputePartitionTypes {
|
||||
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
||||
{"SPX", RSMI_COMPUTE_PARTITION_SPX},
|
||||
{"DPX", RSMI_COMPUTE_PARTITION_DPX},
|
||||
{"TPX", RSMI_COMPUTE_PARTITION_TPX},
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
||||
};
|
||||
|
||||
static const char *
|
||||
perf_level_string(rsmi_dev_perf_level_t perf_lvl) {
|
||||
switch (perf_lvl) {
|
||||
@@ -128,6 +174,34 @@ perf_level_string(rsmi_dev_perf_level_t perf_lvl) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool isUserRunningAsSudo() {
|
||||
bool isRunningWithSudo = false;
|
||||
auto myUID = getuid();
|
||||
auto myPrivledges = geteuid();
|
||||
if (myUID == myPrivledges) {
|
||||
isRunningWithSudo = true;
|
||||
}
|
||||
return isRunningWithSudo;
|
||||
}
|
||||
|
||||
bool isFileWritable(rsmi_status_t response) {
|
||||
// Clock files may not be writable, causing sets to
|
||||
// return RSMI_STATUS_PERMISSION. If running as sudo,
|
||||
// this means file is not writable.
|
||||
// isFileWritable(ret) - intends to capture this
|
||||
// response situation.
|
||||
bool fileWritable = true;
|
||||
if (isUserRunningAsSudo() && (response == RSMI_STATUS_PERMISSION)) {
|
||||
PRINT_RSMI_ERR(response)
|
||||
std::cout << "[WARN] User is running with sudo "
|
||||
<< "permissions, file is not writable." << std::endl;
|
||||
fileWritable = false;
|
||||
} else {
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(response)
|
||||
}
|
||||
return fileWritable;
|
||||
}
|
||||
|
||||
static rsmi_status_t test_power_profile(uint32_t dv_ind) {
|
||||
rsmi_status_t ret;
|
||||
rsmi_power_profile_status_t status;
|
||||
@@ -355,13 +429,19 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
|
||||
uint32_t freq_bitmask;
|
||||
rsmi_clk_type rsmi_clk;
|
||||
|
||||
// Clock files may not be writable, causing sets to
|
||||
// return RSMI_STATUS_PERMISSION even if running with
|
||||
// sudo. See isFileWritable() for more info.
|
||||
|
||||
print_test_header("Clock Frequency Control", dv_ind);
|
||||
for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST;
|
||||
clk <= RSMI_CLK_TYPE_LAST; ++clk) {
|
||||
std::string miniHeader = "Testing clock" + std::to_string(clk);
|
||||
print_mini_header(miniHeader.c_str());
|
||||
rsmi_clk = (rsmi_clk_type)clk;
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
||||
CHK_RSMI_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
std::cout << "Initial frequency for clock" << rsmi_clk << " is " <<
|
||||
f.current << std::endl;
|
||||
@@ -380,19 +460,20 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
|
||||
" to 0b" << freq_bm_str << " ..." << std::endl;
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
|
||||
CHK_RSMI_RET(ret)
|
||||
isFileWritable(ret);
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
||||
CHK_RSMI_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
std::cout << "Frequency is now index " << f.current << std::endl;
|
||||
std::cout << "Resetting mask to all frequencies." << std::endl;
|
||||
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
|
||||
CHK_RSMI_RET(ret)
|
||||
isFileWritable(ret);
|
||||
|
||||
ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
|
||||
CHK_RSMI_RET(ret)
|
||||
isFileWritable(ret);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -406,13 +487,75 @@ static void print_frequencies(rsmi_frequencies_t *f) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
||||
rsmi_status_t ret;
|
||||
uint32_t buffer_len = 10;
|
||||
char originalComputePartition[buffer_len];
|
||||
print_test_header("Compute Partitioning Control", dv_ind);
|
||||
/**
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
*/
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, originalComputePartition, buffer_len);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
std::cout << "Device does not support the compute partition feature."
|
||||
<< std::endl;
|
||||
std::cout << "*********************************************" << std::endl;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
} else {
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "Original compute partition is " << originalComputePartition
|
||||
<< "." << std::endl;
|
||||
}
|
||||
|
||||
for (int newComputePartition = RSMI_COMPUTE_PARTITION_CPX;
|
||||
newComputePartition <= RSMI_COMPUTE_PARTITION_QPX;
|
||||
newComputePartition++) {
|
||||
rsmi_compute_partition_type newPartition
|
||||
= static_cast<rsmi_compute_partition_type>(newComputePartition);
|
||||
std::cout << "Attempting to set compute partition to "
|
||||
<< compute_partition_string(newPartition) << "..."
|
||||
<< std::endl;
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
std::cout << "Done setting compute partition to "
|
||||
<< compute_partition_string(newPartition)
|
||||
<< "." << std::endl;
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
|
||||
std::string myComputePartition = originalComputePartition;
|
||||
if (myComputePartition.empty() == false) {
|
||||
std::cout << "Resetting compute partition to " << originalComputePartition
|
||||
<< "... " << std::endl;
|
||||
rsmi_compute_partition_type origComputePartitionType
|
||||
= mapStringToRSMIComputePartitionTypes[originalComputePartition];
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
std::cout << "Done" << std::endl;
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, origComputePartitionType);
|
||||
}
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
int main() {
|
||||
rsmi_status_t ret;
|
||||
|
||||
ret = rsmi_init(0);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
|
||||
std::string val_str;
|
||||
std::vector<std::string> val_vec;
|
||||
uint64_t val_ui64, val2_ui64;
|
||||
int64_t val_i64;
|
||||
@@ -424,98 +567,111 @@ int main() {
|
||||
rsmi_gpu_metrics_t p;
|
||||
|
||||
rsmi_num_monitor_devices(&num_monitor_devs);
|
||||
for (uint32_t i = 0; i< num_monitor_devs; ++i) {
|
||||
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
|
||||
ret = rsmi_dev_id_get(i, &val_ui16);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
std::cout << "\t**Device ID: 0x" << std::hex << val_ui64 << std::endl;
|
||||
|
||||
std::cout << std::endl << std::endl;
|
||||
std::cout << "Starting to call "
|
||||
<< "rsmi_dev_compute_partition_get()..."
|
||||
<< std::endl;
|
||||
char current_compute_partition[256];
|
||||
ret = rsmi_dev_compute_partition_get(i, current_compute_partition, 256);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
std::cout << "\t**Current Compute Partition setting: "
|
||||
<< current_compute_partition << std::endl;
|
||||
|
||||
ret = rsmi_dev_gpu_metrics_info_get(i, &p);
|
||||
CHK_RSMI_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**GPU METRICS" << std::endl;
|
||||
|
||||
ret = rsmi_dev_perf_level_get(i, &pfl);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Performance Level:" <<
|
||||
perf_level_string(pfl) << std::endl;
|
||||
|
||||
ret = rsmi_dev_overdrive_level_get(i, &val_ui32);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**OverDrive Level:" << val_ui32 << std::endl;
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Supported GPU Memory clock frequencies: ";
|
||||
std::cout << f.num_supported << std::endl;
|
||||
print_frequencies(&f);
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Supported GPU clock frequencies: ";
|
||||
std::cout << f.num_supported << std::endl;
|
||||
print_frequencies(&f);
|
||||
|
||||
char name[20];
|
||||
ret = rsmi_dev_name_get(i, name, 20);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
char name[128];
|
||||
ret = rsmi_dev_name_get(i, name, 128);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Monitor name: " << name << std::endl;
|
||||
|
||||
ret = rsmi_dev_temp_metric_get(i, 0, RSMI_TEMP_CURRENT, &val_i64);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Temperature: " << val_i64/1000 << "C" << std::endl;
|
||||
|
||||
ret = rsmi_dev_volt_metric_get(i, RSMI_VOLT_TYPE_VDDGFX,
|
||||
RSMI_VOLT_CURRENT, &val_i64);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Voltage: " << val_i64 << "mV" << std::endl;
|
||||
|
||||
ret = rsmi_dev_fan_speed_get(i, 0, &val_i64);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
ret = rsmi_dev_fan_speed_max_get(i, 0, &val_ui64);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Current Fan Speed: ";
|
||||
std::cout << val_i64/static_cast<int64_t>(val_ui64)*100;
|
||||
std::cout << "% ("<< val_i64 << "/" << val_ui64 << ")" << std::endl;
|
||||
|
||||
ret = rsmi_dev_fan_rpms_get(i, 0, &val_i64);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Current fan RPMs: " << val_i64 << std::endl;
|
||||
|
||||
ret = rsmi_dev_power_cap_get(i, 0, &val_ui64);
|
||||
CHK_RSMI_PERM_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Current Power Cap: " << val_ui64 << "uW" <<std::endl;
|
||||
|
||||
ret = rsmi_dev_power_cap_range_get(i, 0, &val_ui64, &val2_ui64);
|
||||
CHK_RSMI_PERM_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Power Cap Range: " << val2_ui64 << " to " <<
|
||||
val_ui64 << " uW" << std::endl;
|
||||
|
||||
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
|
||||
CHK_RSMI_PERM_RET(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t**Averge Power Usage: ";
|
||||
std::cout << static_cast<float>(val_ui64)/1000 << " W" <<
|
||||
std::endl;
|
||||
std::cout << static_cast<float>(val_ui64)/1000 << " W" << std::endl;
|
||||
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
std::cout << "\t=======" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "***** Testing write api's" << std::endl;
|
||||
for (uint32_t i = 0; i< num_monitor_devs; ++i) {
|
||||
ret = test_set_overdrive(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_set_perf_level(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
|
||||
ret = test_set_freq(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_set_fan_speed(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_power_cap(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_power_profile(i);
|
||||
CHK_RSMI_RET_I(ret)
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_set_compute_partitioning(i);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
|
||||
ret = test_set_freq(i);
|
||||
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -45,7 +45,7 @@
|
||||
#include <errno.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <poll.h>
|
||||
#include <fcntl.h>
|
||||
@@ -1678,13 +1678,30 @@ static std::vector<std::string> pci_name_files = {
|
||||
"/var/lib/pciutils/pci.ids"
|
||||
};
|
||||
|
||||
|
||||
enum eNameStrType {
|
||||
NAME_STR_VENDOR = 0,
|
||||
NAME_STR_DEVICE,
|
||||
NAME_STR_SUBSYS
|
||||
};
|
||||
|
||||
std::map<std::string, rsmi_compute_partition_type_t>
|
||||
mapStringToRSMIComputePartitionTypes {
|
||||
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
||||
{"SPX", RSMI_COMPUTE_PARTITION_SPX},
|
||||
{"DPX", RSMI_COMPUTE_PARTITION_DPX},
|
||||
{"TPX", RSMI_COMPUTE_PARTITION_TPX},
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
||||
};
|
||||
|
||||
std::map<rsmi_compute_partition_type_t, std::string>
|
||||
mapRSMIToStringComputePartitionTypes {
|
||||
{RSMI_COMPUTE_PARTITION_CPX, "CPX"},
|
||||
{RSMI_COMPUTE_PARTITION_SPX, "SPX"},
|
||||
{RSMI_COMPUTE_PARTITION_DPX, "DPX"},
|
||||
{RSMI_COMPUTE_PARTITION_TPX, "TPX"},
|
||||
{RSMI_COMPUTE_PARTITION_QPX, "QPX"}
|
||||
};
|
||||
|
||||
static std::string
|
||||
get_id_name_str_from_line(uint64_t id, std::string ln,
|
||||
std::istringstream *ln_str) {
|
||||
@@ -3697,6 +3714,117 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
CATCH
|
||||
}
|
||||
|
||||
static rsmi_status_t
|
||||
get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
|
||||
if (compute_partition.c_str() == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
CHK_SUPPORT_NAME_ONLY(compute_partition.c_str())
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevComputePartition,
|
||||
dv_ind, &val_str);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (mapStringToRSMIComputePartitionTypes[val_str]) {
|
||||
case RSMI_COMPUTE_PARTITION_INVALID:
|
||||
// Retrieved an unknown compute partition
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_SPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
break;
|
||||
default:
|
||||
// Retrieved an unknown compute partition
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
compute_partition = val_str;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
uint32_t len) {
|
||||
CHK_SUPPORT_NAME_ONLY(compute_partition)
|
||||
if ((len == 0) || (compute_partition == nullptr)) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
TRY
|
||||
std::string returning_compute_partition;
|
||||
rsmi_status_t ret = get_compute_partition(dv_ind,
|
||||
returning_compute_partition);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) { return ret; }
|
||||
|
||||
std::size_t length = returning_compute_partition.copy(compute_partition, len);
|
||||
compute_partition[length]='\0';
|
||||
|
||||
if (len < (returning_compute_partition.size() + 1)) {
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t compute_partition) {
|
||||
TRY
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
|
||||
std::string newComputePartitionStr
|
||||
= mapRSMIToStringComputePartitionTypes[compute_partition];
|
||||
std::string currentComputePartition;
|
||||
|
||||
switch (compute_partition) {
|
||||
case RSMI_COMPUTE_PARTITION_INVALID:
|
||||
// Retrieved an unknown compute partition
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_SPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
break;
|
||||
default:
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
// do nothing if compute_partition is the current compute partition
|
||||
get_compute_partition(dv_ind, currentComputePartition);
|
||||
rsmi_compute_partition_type_t currRSMIComputePartition
|
||||
= mapStringToRSMIComputePartitionTypes[currentComputePartition];
|
||||
if (currRSMIComputePartition == compute_partition) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
newComputePartitionStr = mapRSMIToStringComputePartitionTypes[compute_partition];
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevComputePartition,
|
||||
newComputePartitionStr);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
CATCH
|
||||
}
|
||||
|
||||
enum iterator_handle_type {
|
||||
FUNC_ITER = 0,
|
||||
VARIANT_ITER,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -121,6 +121,7 @@ static const char *kDevXGMIErrorFName = "xgmi_error";
|
||||
static const char *kDevSerialNumberFName = "serial_number";
|
||||
static const char *kDevNumaNodeFName = "numa_node";
|
||||
static const char *kDevGpuMetricsFName = "gpu_metrics";
|
||||
static const char *kDevComputePartitionFName = "current_compute_partition";
|
||||
|
||||
// Firmware version files
|
||||
static const char *kDevFwVersionAsdFName = "fw_version/asd_fw_version";
|
||||
@@ -290,6 +291,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevNumaNode, kDevNumaNodeFName},
|
||||
{kDevGpuMetrics, kDevGpuMetricsFName},
|
||||
{kDevGpuReset, kDevGpuResetFName},
|
||||
{kDevComputePartition, kDevComputePartitionFName},
|
||||
};
|
||||
|
||||
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
|
||||
@@ -413,6 +415,8 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
{"rsmi_topo_numa_affinity_get", {{kDevNumaNodeFName}, {}}},
|
||||
{"rsmi_dev_gpu_metrics_info_get", {{kDevGpuMetricsFName}, {}}},
|
||||
{"rsmi_dev_gpu_reset", {{kDevGpuResetFName}, {}}},
|
||||
{"rsmi_dev_compute_partition_get", {{kDevComputePartitionFName}, {}}},
|
||||
{"rsmi_dev_compute_partition_set", {{kDevComputePartitionFName}, {}}},
|
||||
|
||||
// These functions with variants, but no sensors/units. (May or may not
|
||||
// have mandatory dependencies.)
|
||||
@@ -563,9 +567,6 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
|
||||
if (env_->path_DRM_root_override && type == env_->enum_override) {
|
||||
sysfs_path = env_->path_DRM_root_override;
|
||||
|
||||
if (str) {
|
||||
sysfs_path += ".write";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -587,7 +588,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
|
||||
fs->open(sysfs_path);
|
||||
|
||||
if (!fs->is_open()) {
|
||||
return errno;
|
||||
return errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -696,6 +697,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
case kDevPCIEClk:
|
||||
case kDevPowerODVoltage:
|
||||
case kDevSOCClk:
|
||||
case kDevComputePartition:
|
||||
return writeDevInfoStr(type, val);
|
||||
|
||||
default:
|
||||
@@ -922,6 +924,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevVBiosVer:
|
||||
case kDevPCIEThruPut:
|
||||
case kDevSerialNumber:
|
||||
case kDevComputePartition:
|
||||
return readDevInfoStr(type, val);
|
||||
break;
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2021, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -280,7 +280,7 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
// Initialize the smu fiedls to zero as some of them only valid in
|
||||
// Initialize the smu fields to zero as some of them only valid in
|
||||
// a specific version.
|
||||
*smu = {};
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -73,6 +73,77 @@ static const char *kDeviceNamePrefix = "card";
|
||||
|
||||
static const char *kAMDMonitorTypes[] = {"radeon", "amdgpu", ""};
|
||||
|
||||
static const std::string amdSMI = "amd::smi::";
|
||||
const std::map<amd::smi::DevInfoTypes, std::string> amd::smi::RocmSMI::devInfoTypesStrings = {
|
||||
{amd::smi::kDevPerfLevel, amdSMI + "kDevPerfLevel"},
|
||||
{amd::smi::kDevOverDriveLevel, amdSMI + "kDevOverDriveLevel"},
|
||||
{amd::smi::kDevMemOverDriveLevel, amdSMI + "kDevMemOverDriveLevel"},
|
||||
{amd::smi::kDevDevID, amdSMI + "kDevDevID"},
|
||||
{amd::smi::kDevDevProdName, amdSMI + "kDevDevProdName"},
|
||||
{amd::smi::kDevDevProdNum, amdSMI + "kDevDevProdNum"},
|
||||
{amd::smi::kDevVendorID, amdSMI + "kDevVendorID"},
|
||||
{amd::smi::kDevSubSysDevID, amdSMI + "kDevSubSysDevID"},
|
||||
{amd::smi::kDevSubSysVendorID, amdSMI + "kDevSubSysVendorID"},
|
||||
{amd::smi::kDevGPUMClk, amdSMI + "kDevGPUMClk"},
|
||||
{amd::smi::kDevGPUSClk, amdSMI + "kDevGPUSClk"},
|
||||
{amd::smi::kDevDCEFClk, amdSMI + "kDevDCEFClk"},
|
||||
{amd::smi::kDevFClk, amdSMI + "kDevFClk"},
|
||||
{amd::smi::kDevSOCClk, amdSMI + "kDevSOCClk"},
|
||||
{amd::smi::kDevPCIEClk, amdSMI + "kDevPCIEClk"},
|
||||
{amd::smi::kDevPowerProfileMode, amdSMI + "kDevPowerProfileMode"},
|
||||
{amd::smi::kDevUsage, amdSMI + "kDevUsage"},
|
||||
{amd::smi::kDevPowerODVoltage, amdSMI + "kDevPowerODVoltage"},
|
||||
{amd::smi::kDevVBiosVer, amdSMI + "kDevVBiosVer"},
|
||||
{amd::smi::kDevPCIEThruPut, amdSMI + "kDevPCIEThruPut"},
|
||||
{amd::smi::kDevErrCntSDMA, amdSMI + "kDevErrCntSDMA"},
|
||||
{amd::smi::kDevErrCntUMC, amdSMI + "kDevErrCntUMC"},
|
||||
{amd::smi::kDevErrCntGFX, amdSMI + "kDevErrCntGFX"},
|
||||
{amd::smi::kDevErrCntMMHUB, amdSMI + "kDevErrCntMMHUB"},
|
||||
{amd::smi::kDevErrCntPCIEBIF, amdSMI + "kDevErrCntPCIEBIF"},
|
||||
{amd::smi::kDevErrCntHDP, amdSMI + "kDevErrCntHDP"},
|
||||
{amd::smi::kDevErrCntXGMIWAFL, amdSMI + "kDevErrCntXGMIWAFL"},
|
||||
{amd::smi::kDevErrCntFeatures, amdSMI + "kDevErrCntFeatures"},
|
||||
{amd::smi::kDevMemTotGTT, amdSMI + "kDevMemTotGTT"},
|
||||
{amd::smi::kDevMemTotVisVRAM, amdSMI + "kDevMemTotVisVRAM"},
|
||||
{amd::smi::kDevMemTotVRAM, amdSMI + "kDevMemTotVRAM"},
|
||||
{amd::smi::kDevMemUsedGTT, amdSMI + "kDevMemUsedGTT"},
|
||||
{amd::smi::kDevMemUsedVisVRAM, amdSMI + "kDevMemUsedVisVRAM"},
|
||||
{amd::smi::kDevMemUsedVRAM, amdSMI + "kDevMemUsedVRAM"},
|
||||
{amd::smi::kDevVramVendor, amdSMI + "kDevVramVendor"},
|
||||
{amd::smi::kDevPCIEReplayCount, amdSMI + "kDevPCIEReplayCount"},
|
||||
{amd::smi::kDevUniqueId, amdSMI + "kDevUniqueId"},
|
||||
{amd::smi::kDevDFCountersAvailable, amdSMI + "kDevDFCountersAvailable"},
|
||||
{amd::smi::kDevMemBusyPercent, amdSMI + "kDevMemBusyPercent"},
|
||||
{amd::smi::kDevXGMIError, amdSMI + "kDevXGMIError"},
|
||||
{amd::smi::kDevFwVersionAsd, amdSMI + "kDevFwVersionAsd"},
|
||||
{amd::smi::kDevFwVersionCe, amdSMI + "kDevFwVersionCe"},
|
||||
{amd::smi::kDevFwVersionDmcu, amdSMI + "kDevFwVersionDmcu"},
|
||||
{amd::smi::kDevFwVersionMc, amdSMI + "kDevFwVersionMc"},
|
||||
{amd::smi::kDevFwVersionMe, amdSMI + "kDevFwVersionMe"},
|
||||
{amd::smi::kDevFwVersionMec, amdSMI + "kDevFwVersionMec"},
|
||||
{amd::smi::kDevFwVersionMec2, amdSMI + "kDevFwVersionMec2"},
|
||||
{amd::smi::kDevFwVersionPfp, amdSMI + "kDevFwVersionPfp"},
|
||||
{amd::smi::kDevFwVersionRlc, amdSMI + "kDevFwVersionRlc"},
|
||||
{amd::smi::kDevFwVersionRlcSrlc, amdSMI + "kDevFwVersionRlcSrlc"},
|
||||
{amd::smi::kDevFwVersionRlcSrlg, amdSMI + "kDevFwVersionRlcSrlg"},
|
||||
{amd::smi::kDevFwVersionRlcSrls, amdSMI + "kDevFwVersionRlcSrls"},
|
||||
{amd::smi::kDevFwVersionSdma, amdSMI + "kDevFwVersionSdma"},
|
||||
{amd::smi::kDevFwVersionSdma2, amdSMI + "kDevFwVersionSdma2"},
|
||||
{amd::smi::kDevFwVersionSmc, amdSMI + "kDevFwVersionSmc"},
|
||||
{amd::smi::kDevFwVersionSos, amdSMI + "kDevFwVersionSos"},
|
||||
{amd::smi::kDevFwVersionTaRas, amdSMI + "kDevFwVersionTaRas"},
|
||||
{amd::smi::kDevFwVersionTaXgmi, amdSMI + "kDevFwVersionTaXgmi"},
|
||||
{amd::smi::kDevFwVersionUvd, amdSMI + "kDevFwVersionUvd"},
|
||||
{amd::smi::kDevFwVersionVce, amdSMI + "kDevFwVersionVce"},
|
||||
{amd::smi::kDevFwVersionVcn, amdSMI + "kDevFwVersionVcn"},
|
||||
{amd::smi::kDevSerialNumber, amdSMI + "kDevSerialNumber"},
|
||||
{amd::smi::kDevMemPageBad, amdSMI + "kDevMemPageBad"},
|
||||
{amd::smi::kDevNumaNode, amdSMI + "kDevNumaNode"},
|
||||
{amd::smi::kDevGpuMetrics, amdSMI + "kDevGpuMetrics"},
|
||||
{amd::smi::kDevGpuReset, amdSMI + "kDevGpuReset"},
|
||||
{amd::smi::kDevComputePartition, amdSMI + "kDevComputePartition"}
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -179,6 +250,8 @@ static bool bdfid_from_path(const std::string in_name, uint64_t *bdfid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 0 = successful bdfid found
|
||||
// 1 = not a good bdfid found
|
||||
static uint32_t ConstructBDFID(std::string path, uint64_t *bdfid) {
|
||||
assert(bdfid != nullptr);
|
||||
char tpath[256] = {'\0'};
|
||||
|
||||
Spustitelný soubor
+324
@@ -0,0 +1,324 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/functional/computepartition_read_write.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
|
||||
TestComputePartitionReadWrite::TestComputePartitionReadWrite() : TestBase() {
|
||||
set_title("RSMI Compute Partition Read/Write Test");
|
||||
set_description("The Compute Parition tests verifies that the compute "
|
||||
"parition can be read and updated properly.");
|
||||
}
|
||||
|
||||
TestComputePartitionReadWrite::~TestComputePartitionReadWrite(void) {
|
||||
}
|
||||
|
||||
void TestComputePartitionReadWrite::SetUp(void) {
|
||||
TestBase::SetUp();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void TestComputePartitionReadWrite::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void TestComputePartitionReadWrite::DisplayResults(void) const {
|
||||
TestBase::DisplayResults();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestComputePartitionReadWrite::Close() {
|
||||
// This will close handles opened within rsmitst utility calls and call
|
||||
// rsmi_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
static const std::string
|
||||
computePartitionString(rsmi_compute_partition_type computeParitionType) {
|
||||
/**
|
||||
* RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
* RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
* //!< shared memory
|
||||
* RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
* //!< together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
* //!< together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
* //!< work together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
* //!< work together with shared memory
|
||||
*/
|
||||
switch (computeParitionType) {
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
return "CPX";
|
||||
case RSMI_COMPUTE_PARTITION_SPX:
|
||||
return "SPX";
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
return "DPX";
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
return "TPX";
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
return "QPX";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static const std::map<std::string, rsmi_compute_partition_type_t>
|
||||
mapStringToRSMIComputePartitionTypes {
|
||||
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
||||
{"SPX", RSMI_COMPUTE_PARTITION_SPX},
|
||||
{"DPX", RSMI_COMPUTE_PARTITION_DPX},
|
||||
{"TPX", RSMI_COMPUTE_PARTITION_TPX},
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
||||
};
|
||||
|
||||
void TestComputePartitionReadWrite::Run(void) {
|
||||
rsmi_status_t ret, err;
|
||||
char orig_char_computePartition[255];
|
||||
char current_char_computePartition[255];
|
||||
rsmi_compute_partition_type new_computePartition;
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
|
||||
PrintDeviceHeader(dv_ind);
|
||||
|
||||
//Standard checks to see if API is supported, before running full tests
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
|
||||
255);
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
CHK_ERR_ASRT(ret)
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl << "\t**"
|
||||
<< "Original compute partition: "
|
||||
<< orig_char_computePartition << std::endl;
|
||||
}
|
||||
|
||||
if ((orig_char_computePartition == NULL) ||
|
||||
(orig_char_computePartition[0] == '\0')) {
|
||||
std::cout << "***System compute partition value is not defined. "
|
||||
"Skip compute partition test." << std::endl;
|
||||
return;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
uint32_t length = 2;
|
||||
char smallBuffer[length];
|
||||
err = rsmi_dev_compute_partition_get(dv_ind, smallBuffer, length);
|
||||
size_t size = sizeof(smallBuffer)/sizeof(*smallBuffer);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INSUFFICIENT_SIZE);
|
||||
ASSERT_EQ((size_t)length, size);
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INSUFFICIENT_SIZE) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INSUFFICIENT_SIZE was returned "
|
||||
<< "and size matches length requested." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_compute_partition_get(dv_ind, nullptr, 255);
|
||||
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_NOT_SUPPORTED was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition, 0);
|
||||
ASSERT_EQ(err, (RSMI_STATUS_INVALID_ARGS || RSMI_STATUS_NOT_SUPPORTED));
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
// Note: new_computePartition is not set
|
||||
// DISPLAY_RSMI_ERR(err)
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED));
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
} else {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
}
|
||||
}
|
||||
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
new_computePartition
|
||||
= rsmi_compute_partition_type::RSMI_COMPUTE_PARTITION_INVALID;
|
||||
err = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
// DISPLAY_RSMI_ERR(err)
|
||||
EXPECT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(err == RSMI_STATUS_PERMISSION));
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
} else if (err == RSMI_STATUS_PERMISSION) {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
// tests should not continue if err is a permission issue
|
||||
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
|
||||
} else {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-run original get, so we can reset to later
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
|
||||
255);
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
|
||||
/**
|
||||
* RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
* RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
* //!< shared memory
|
||||
* RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
* //!< together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
* //!< together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
* //!< work together with shared memory
|
||||
* RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
* //!< work together with shared memory
|
||||
*/
|
||||
|
||||
for (int partition = RSMI_COMPUTE_PARTITION_CPX;
|
||||
partition <= RSMI_COMPUTE_PARTITION_QPX;
|
||||
partition++) {
|
||||
new_computePartition
|
||||
= static_cast<rsmi_compute_partition_type>(partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "======== TEST RSMI_COMPUTE_PARTITION_"
|
||||
<< computePartitionString(new_computePartition)
|
||||
<< " ===============" << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Attempting to set compute partition to: "
|
||||
<< computePartitionString(new_computePartition) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
|
||||
255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Current compute partition: "
|
||||
<< current_char_computePartition
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
EXPECT_STREQ(computePartitionString(new_computePartition).c_str(),
|
||||
current_char_computePartition);
|
||||
}
|
||||
|
||||
/* TEST RETURN TO ORIGINAL COMPUTE PARTITIONING SETTING */
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "=========== TEST RETURN TO ORIGINAL COMPUTE PARTITIONING "
|
||||
<< "SETTING ========" << std::endl;
|
||||
}
|
||||
new_computePartition
|
||||
= mapStringToRSMIComputePartitionTypes.at(orig_char_computePartition);
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Returning compute partition to: "
|
||||
<< computePartitionString(new_computePartition) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
|
||||
255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Attempted to set compute partition: "
|
||||
<< computePartitionString(new_computePartition) << std::endl
|
||||
<< "\t**"
|
||||
<< "Current compute partition: " << current_char_computePartition
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
EXPECT_STREQ(computePartitionString(new_computePartition).c_str(),
|
||||
current_char_computePartition);
|
||||
}
|
||||
}
|
||||
Spustitelný soubor
+73
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
|
||||
#include "rocm_smi_test/test_base.h"
|
||||
|
||||
class TestComputePartitionReadWrite : public TestBase {
|
||||
public:
|
||||
TestComputePartitionReadWrite();
|
||||
|
||||
// @Brief: Destructor for test case of TestComputePartitionReadWrite
|
||||
virtual ~TestComputePartitionReadWrite();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
};
|
||||
|
||||
#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2018, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -81,6 +81,7 @@
|
||||
#include "functional/mutual_exclusion.h"
|
||||
#include "functional/evt_notif_read_write.h"
|
||||
#include "functional/init_shutdown_refcount.h"
|
||||
#include "functional/computepartition_read_write.h"
|
||||
#include "rocm_smi_test/functional/hw_topology_read.h"
|
||||
#include "rocm_smi_test/functional/gpu_metrics_read.h"
|
||||
#include "rocm_smi_test/functional/metrics_counter_read.h"
|
||||
@@ -267,6 +268,10 @@ TEST(rsmitstReadWrite, TestEvtNotifReadWrite) {
|
||||
TestEvtNotifReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
TEST(rsmitstReadWrite, TestComputePartitionReadWrite) {
|
||||
TestComputePartitionReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
TEST(rsmitstReadOnly, TestConcurrentInit) {
|
||||
TestConcurrentInit tst;
|
||||
SetFlags(&tst);
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele