Rename NPS -> memory partition + compute partition node fix

* Updates:
        - rocm_smi_lib + CLI:
          Rename all "NPS mode" -> "memory partition"
          related files/functions/API/CLI to align with correct
          technical naming
        - rocm_smi_main: fixed identifying primary card's unique id
          utilize rsmi_dev_unique_id_get to map which
          KFD nodes belong to it
        - rsmi_dev_*_partition*: now have better logging output
        - compute partition tests:
          Added 20 sec delay for workaround until GPU
          busy is confirmed as the issue
        - CPPLint fixes/formatting
        - [Example] Moved all endl to "\n" for efficiency
        - [Example] Added Edge & Junction temperature examples
        - [Example] Added rsmi_minmax_bandwidth_get() example - WIP

Change-Id: Ida6db6fda7e0ac9d696a34cb15b4746e69d58d51
Signed-off-by: Charis Poag <Charis.Poag@amd.com>


[ROCm/rocm_smi_lib commit: b251bb0c9f]
Этот коммит содержится в:
Charis Poag
2023-09-21 14:53:35 -05:00
родитель fce4f5fa08
Коммит d57d65a607
12 изменённых файлов: 847 добавлений и 503 удалений
+48 -44
Просмотреть файл
@@ -40,8 +40,8 @@
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_H_
#define INCLUDE_ROCM_SMI_ROCM_SMI_H_
#ifndef ROCM_SMI_ROCM_SMI_H_
#define ROCM_SMI_ROCM_SMI_H_
#ifdef __cplusplus
extern "C" {
@@ -379,27 +379,27 @@ typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
/// \endcond
/**
* @brief NPS Modes. This enum is used to identify various
* NPS mode types.
* @brief Memory Partitions. This enum is used to identify various
* memory partition types.
*/
typedef enum {
RSMI_MEMORY_PARTITION_UNKNOWN = 0,
RSMI_MEMORY_PARTITION_NPS1, //!< NPS1 - All CCD & XCD data is interleaved
//!< accross all 8 HBM stacks (all stacks/1).
RSMI_MEMORY_PARTITION_NPS2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
//!< accross the 4 HBM stacks per AID pair
//!< (8 stacks/2).
RSMI_MEMORY_PARTITION_NPS4, //!< NPS4 - Each XCD data is interleaved accross
//!< accross 2 (or single) HBM stacks
//!< (8 stacks/8 or 8 stacks/4).
RSMI_MEMORY_PARTITION_NPS8, //!< NPS8 - Each XCD uses a single HBM stack
//!< (8 stacks/8). Or each XCD uses a single
//!< HBM stack & CCDs share 2 non-interleaved
//!< HBM stacks on its AID
//!< (AID[1,2,3] = 6 stacks/6).
} rsmi_nps_mode_type_t;
RSMI_MEMORY_PARTITION_NPS1, //!< NPS1 - All CCD & XCD data is interleaved
//!< accross all 8 HBM stacks (all stacks/1).
RSMI_MEMORY_PARTITION_NPS2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
//!< accross the 4 HBM stacks per AID pair
//!< (8 stacks/2).
RSMI_MEMORY_PARTITION_NPS4, //!< NPS4 - Each XCD data is interleaved accross
//!< accross 2 (or single) HBM stacks
//!< (8 stacks/8 or 8 stacks/4).
RSMI_MEMORY_PARTITION_NPS8, //!< NPS8 - Each XCD uses a single HBM stack
//!< (8 stacks/8). Or each XCD uses a single
//!< HBM stack & CCDs share 2 non-interleaved
//!< HBM stacks on its AID
//!< (AID[1,2,3] = 6 stacks/6).
} rsmi_memory_partition_type_t;
/// \cond Ignore in docs.
typedef rsmi_nps_mode_type_t rsmi_nps_mode_type;
typedef rsmi_memory_partition_type_t rsmi_memory_partition_type;
/// \endcond
/**
@@ -2413,7 +2413,8 @@ rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind,
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
*
*/
rsmi_status_t rsmi_perf_determinism_mode_set(uint32_t dv_ind, uint64_t clkvalue);
rsmi_status_t rsmi_perf_determinism_mode_set(uint32_t dv_ind,
uint64_t clkvalue);
/**
* @brief Get the overdrive percent associated with the device with provided
@@ -3767,27 +3768,28 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
/** @} */ // end of ComputePartition
/*****************************************************************************/
/** @defgroup NPSMode NPS Mode Functions
* These functions are used to query the device's NPS mode (memory partition).
/** @defgroup memory_partition The Memory Partition Functions
* These functions are used to query and set the device's current memory
* partition.
* @{
*/
/**
* @brief Retrieves the NPS mode (memory partition) for a desired device
* @brief Retrieves the current memory partition for a desired device
*
* @details
* Given a device index @p dv_ind and a string @p nps_mode ,
* Given a device index @p dv_ind and a string @p memory_partition ,
* and uint32 @p len , this function will attempt to obtain the device's
* nps mode string. Upon successful retreival, the obtained device's
* nps mode string shall be stored in the passed @p nps_mode char string
* variable.
* memory partition string. Upon successful retreival, the obtained device's
* memory partition string shall be stored in the passed @p memory_partition
* char string variable.
*
* @param[in] dv_ind a device index
*
* @param[inout] nps_mode a pointer to a char string variable,
* which the device's nps mode will be written to.
* @param[inout] memory_partition a pointer to a char string variable,
* which the device's memory partition will be written to.
*
* @param[in] len the length of the caller provided buffer @p nps_mode ,
* @param[in] len the length of the caller provided buffer @p memory_partition ,
* suggested length is 5 or greater.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
@@ -3796,24 +3798,25 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
* large enough to hold the entire nps mode value. In this case,
* large enough to hold the entire memory partition value. In this case,
* only @p len bytes will be written.
*
*/
rsmi_status_t
rsmi_dev_nps_mode_get(uint32_t dv_ind, char *nps_mode, uint32_t len);
rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
uint32_t len);
/**
* @brief Modifies a selected device's NPS mode (memory partition) setting.
* @brief Modifies a selected device's current memory partition setting.
*
* @details Given a device index @p dv_ind and a type of nps mode
* @p nps_mode, this function will attempt to update the selected
* device's nps mode setting.
* @details Given a device index @p dv_ind and a type of memory partition
* @p memory_partition, this function will attempt to update the selected
* device's memory partition setting.
*
* @param[in] dv_ind a device index
*
* @param[in] nps_mode using enum ::rsmi_nps_mode_type_t,
* define what the selected device's NPS mode setting should be updated to.
* @param[in] memory_partition using enum ::rsmi_memory_partition_type_t,
* define what the selected device's current mode setting should be updated to.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_PERMISSION function requires root access
@@ -3825,14 +3828,15 @@ rsmi_dev_nps_mode_get(uint32_t dv_ind, char *nps_mode, uint32_t len);
*
*/
rsmi_status_t
rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode);
rsmi_dev_memory_partition_set(uint32_t dv_ind,
rsmi_memory_partition_type_t memory_partition);
/**
* @brief Reverts a selected device's NPS mode setting back to its
* @brief Reverts a selected device's memory partition setting back to its
* boot state.
*
* @details Given a device index @p dv_ind , this function will attempt to
* revert its NPS mode setting back to its boot state.
* revert its current memory partition setting back to its boot state.
*
* @param[in] dv_ind a device index
*
@@ -3844,9 +3848,9 @@ rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode);
* the amdgpu driver
*
*/
rsmi_status_t rsmi_dev_nps_mode_reset(uint32_t dv_ind);
rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind);
/** @} */ // end of NPSMode
/** @} */ // end of memory_partition
/*****************************************************************************/
/** @defgroup APISupport Supported Functions
@@ -4193,4 +4197,4 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // INCLUDE_ROCM_SMI_ROCM_SMI_H_
#endif // ROCM_SMI_ROCM_SMI_H_
+60 -54
Просмотреть файл
@@ -43,6 +43,9 @@ JSON_DATA = {}
# Version of the JSON output used to save clocks
CLOCK_JSON_VERSION = 1
# Apply max buffer to all data allocation
MAX_BUFF_SIZE = 256
headerString = ' ROCm System Management Interface '
footerString = ' End of ROCm SMI Log '
# Output formatting
@@ -529,8 +532,8 @@ def getComputePartition(device, silent=True):
@param silent=Turn on to silence error output
(you plan to handle manually). Default is on.
"""
currentComputePartition = create_string_buffer(256)
ret = rocmsmi.rsmi_dev_compute_partition_get(device, currentComputePartition, 256)
currentComputePartition = create_string_buffer(MAX_BUFF_SIZE)
ret = rocmsmi.rsmi_dev_compute_partition_get(device, currentComputePartition, MAX_BUFF_SIZE)
if rsmi_ret_ok(ret, device, 'get_compute_partition', silent) and currentComputePartition.value.decode():
return str(currentComputePartition.value.decode())
return "N/A"
@@ -543,10 +546,10 @@ def getMemoryPartition(device, silent=True):
@param silent=Turn on to silence error output
(you plan to handle manually). Default is on.
"""
currentNPSMode = create_string_buffer(256)
ret = rocmsmi.rsmi_dev_nps_mode_get(device, currentNPSMode, 256)
if rsmi_ret_ok(ret, device, 'get_NPS_mode', silent) and currentNPSMode.value.decode():
return str(currentNPSMode.value.decode())
currentMemoryPartition = create_string_buffer(MAX_BUFF_SIZE)
ret = rocmsmi.rsmi_dev_memory_partition_get(device, currentMemoryPartition, MAX_BUFF_SIZE)
if rsmi_ret_ok(ret, device, 'get_memory_partition', silent) and currentMemoryPartition.value.decode():
return str(currentMemoryPartition.value.decode())
return "N/A"
@@ -969,20 +972,20 @@ def resetComputePartition(deviceList):
printLogSpacer()
def resetNpsMode(deviceList):
""" Reset NPS mode to its boot state
def resetMemoryPartition(deviceList):
""" Reset current memory partition to its boot state
@param deviceList: List of DRM devices (can be a single-item list)
"""
printLogSpacer(" Reset nps mode to its boot state ")
printLogSpacer(" Reset memory partition to its boot state ")
for device in deviceList:
originalPartition = getMemoryPartition(device)
t1 = multiprocessing.Process(target=showProgressbar,
args=("Resetting NPS mode",13,))
args=("Resetting memory partition",13,))
t1.start()
addExtraLine=True
start=time.time()
ret = rocmsmi.rsmi_dev_nps_mode_reset(device)
ret = rocmsmi.rsmi_dev_memory_partition_reset(device)
stop=time.time()
duration=stop-start
if t1.is_alive():
@@ -990,9 +993,9 @@ def resetNpsMode(deviceList):
t1.join()
if duration < float(0.1): # For longer runs, add extra line before output
addExtraLine=False # This is to prevent overriding progress bar
if rsmi_ret_ok(ret, device, 'reset_NPS_mode', silent=True):
if rsmi_ret_ok(ret, device, 'reset_memory_partition', silent=True):
resetBootState = getMemoryPartition(device)
printLog(device, "Successfully reset nps mode (" +
printLog(device, "Successfully reset memory partition (" +
originalPartition + ") to boot state (" +
resetBootState + ")", None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_PERMISSION:
@@ -1000,8 +1003,8 @@ def resetNpsMode(deviceList):
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None, addExtraLine)
else:
rsmi_ret_ok(ret, device, 'reset_NPS_mode')
printErrLog(device, 'Failed to reset nps mode to boot state')
rsmi_ret_ok(ret, device, 'reset_memory_partition')
printErrLog(device, 'Failed to reset memory partition to boot state')
printLogSpacer()
@@ -1631,29 +1634,29 @@ def showProgressbar(title="", timeInSeconds=13):
time.sleep(1)
def setNPSMode(deviceList, npsMode):
""" Sets nps mode (memory partition) for a list of devices
def setMemoryPartition(deviceList, memoryPartition):
""" Sets memory partition (memory partition) for a list of devices
@param deviceList: List of DRM devices (can be a single-item list)
@param npsMode: NPS Mode type to set as
@param memoryPartition: Memory Partition type to set as
"""
printLogSpacer(' Set nps mode to %s ' % (str(npsMode).upper()))
printLogSpacer(' Set memory partition to %s ' % (str(memoryPartition).upper()))
for device in deviceList:
npsMode = npsMode.upper()
if npsMode not in nps_mode_type_l:
printErrLog(device, 'Invalid nps mode type %s'
'\nValid nps mode types are %s'
% ( npsMode.upper(),
(', '.join(map(str, nps_mode_type_l))) ))
memoryPartition = memoryPartition.upper()
if memoryPartition not in memory_partition_type_l:
printErrLog(device, 'Invalid memory partition type %s'
'\nValid memory partition types are %s'
% ( memoryPartition.upper(),
(', '.join(map(str, memory_partition_type_l))) ))
return (None, None)
t1 = multiprocessing.Process(target=showProgressbar,
args=("Updating NPS mode",13,))
args=("Updating memory partition",13,))
t1.start()
addExtraLine=True
start=time.time()
ret = rocmsmi.rsmi_dev_nps_mode_set(device,
rsmi_nps_mode_type_dict[npsMode])
ret = rocmsmi.rsmi_dev_memory_partition_set(device,
rsmi_memory_partition_type_dict[memoryPartition])
stop=time.time()
duration=stop-start
if t1.is_alive():
@@ -1662,17 +1665,17 @@ def setNPSMode(deviceList, npsMode):
if duration < float(0.1): # For longer runs, add extra line before output
addExtraLine=False # This is to prevent overriding progress bar
if rsmi_ret_ok(ret, device, 'set_NPS_mode', silent=True):
if rsmi_ret_ok(ret, device, 'set_memory_partition', silent=True):
printLog(device,
'Successfully set nps mode to %s' % (npsMode),
'Successfully set memory partition to %s' % (memoryPartition),
None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_PERMISSION:
printLog(device, 'Permission denied', None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None, addExtraLine)
else:
rsmi_ret_ok(ret, device, 'set_NPS_mode')
printErrLog(device, 'Failed to retrieve NPS mode, even though device supports it.')
rsmi_ret_ok(ret, device, 'set_memory_partition')
printErrLog(device, 'Failed to retrieve memory partition, even though device supports it.')
printLogSpacer()
def showVersion(isCSV=False):
@@ -2580,7 +2583,6 @@ def getDevProductInfo(device, silent=False):
"""
# Retrieve card vendor
MAX_BUFF_SIZE = 256
MAX_DESC_SIZE = 20
device_series = "N/A"
device_model = "N/A"
@@ -3344,22 +3346,22 @@ def showComputePartition(deviceList):
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.')
printLogSpacer()
def showNPSMode(deviceList):
""" Returns the current NPS mode for a list of devices
def showMemoryPartition(deviceList):
""" Returns the current memory partition for a list of devices
@param deviceList: List of DRM devices (can be a single-item list)
"""
npsMode = create_string_buffer(256)
printLogSpacer(' Current NPS Mode ')
memoryPartition = create_string_buffer(256)
printLogSpacer(' Current Memory Partition ')
for device in deviceList:
ret = rocmsmi.rsmi_dev_nps_mode_get(device, npsMode, 256)
if rsmi_ret_ok(ret, device, 'get_NPS_mode',silent=True) and npsMode.value.decode():
printLog(device, 'NPS Mode', npsMode.value.decode())
ret = rocmsmi.rsmi_dev_memory_partition_get(device, memoryPartition, 256)
if rsmi_ret_ok(ret, device, 'get_memory_partition',silent=True) and memoryPartition.value.decode():
printLog(device, 'Memory Partition', memoryPartition.value.decode())
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None)
else:
rsmi_ret_ok(ret, device, 'get_NPS_mode')
printErrLog(device, 'Failed to retrieve NPS mode, even though device supports it.')
rsmi_ret_ok(ret, device, 'get_memory_partition')
printErrLog(device, 'Failed to retrieve current memory partition, even though device supports it.')
printLogSpacer()
@@ -3556,6 +3558,9 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
if my_ret != rsmi_status_t.RSMI_STATUS_SUCCESS:
err_str = c_char_p()
rocmsmi.rsmi_status_string(my_ret, byref(err_str))
# leaving the commented out prints/logs to help identify errors in the future
# print("error string = " + str(err_str))
# print("error string (w/ decode)= " + str(err_str.value.decode()))
returnString = ''
if device is not None:
returnString += '%s GPU[%s]:' % (my_ret, device)
@@ -3566,6 +3571,7 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
if err_str.value is not None:
returnString += '%s\t' % (err_str.value.decode())
if not PRINT_JSON:
# logging.debug('%s', returnString)
if not silent:
logging.debug('%s', returnString)
if my_ret in rsmi_status_verbose_err_out:
@@ -3722,7 +3728,7 @@ if __name__ == '__main__':
action='store_true')
groupDisplay.add_argument('--shownodesbw', help='Shows the numa nodes ', action='store_true')
groupDisplay.add_argument('--showcomputepartition', help='Shows current compute partitioning ', action='store_true')
groupDisplay.add_argument('--shownpsmode', help='Shows current NPS mode ', action='store_true')
groupDisplay.add_argument('--showmemorypartition', help='Shows current memory partition ', action='store_true')
groupActionReset.add_argument('-r', '--resetclocks', help='Reset clocks and OverDrive to default',
action='store_true')
@@ -3734,7 +3740,7 @@ if __name__ == '__main__':
groupActionReset.add_argument('--resetxgmierr', help='Reset XGMI error count', action='store_true')
groupActionReset.add_argument('--resetperfdeterminism', help='Disable performance determinism', action='store_true')
groupActionReset.add_argument('--resetcomputepartition', help='Resets to boot compute partition state', action='store_true')
groupActionReset.add_argument('--resetnpsmode', help='Resets to boot NPS mode state', action='store_true')
groupActionReset.add_argument('--resetmemorypartition', help='Resets to boot memory partition state', action='store_true')
groupAction.add_argument('--setclock',
help='Set Clock Frequency Level(s) for specified clock (requires manual Perf level)',
metavar=('TYPE','LEVEL'), nargs=2)
@@ -3772,8 +3778,8 @@ if __name__ == '__main__':
groupAction.add_argument('--setcomputepartition', help='Set compute partition',
choices=compute_partition_type_l + [x.lower() for x in compute_partition_type_l],
type=str, nargs=1)
groupAction.add_argument('--setnpsmode', help='Set nps mode',
choices=nps_mode_type_l + [x.lower() for x in nps_mode_type_l],
groupAction.add_argument('--setmemorypartition', help='Set memory partition',
choices=memory_partition_type_l + [x.lower() for x in memory_partition_type_l],
type=str, nargs=1)
groupAction.add_argument('--rasenable', help='Enable RAS for specified block and error type', type=str, nargs=2,
metavar=('BLOCK', 'ERRTYPE'))
@@ -3823,7 +3829,7 @@ if __name__ == '__main__':
or args.setpoweroverdrive or args.resetpoweroverdrive or args.rasenable or args.rasdisable or \
args.rasinject or args.gpureset or args.setperfdeterminism or args.setslevel or args.setmlevel or \
args.setvc or args.setsrange or args.setmrange or args.setclock or \
args.setcomputepartition or args.setnpsmode or args.resetcomputepartition or args.resetnpsmode:
args.setcomputepartition or args.setmemorypartition or args.resetcomputepartition or args.resetmemorypartition:
relaunchAsSudo()
# If there is one or more device specified, use that for all commands, otherwise use a
@@ -3886,7 +3892,7 @@ if __name__ == '__main__':
args.showreplaycount = True
args.showvc = True
args.showcomputepartition = True
args.shownpsmode = True
args.showmemorypartition = True
if not PRINT_JSON:
args.showprofile = True
@@ -4015,8 +4021,8 @@ if __name__ == '__main__':
showEnergy(deviceList)
if args.showcomputepartition:
showComputePartition(deviceList)
if args.shownpsmode:
showNPSMode(deviceList)
if args.showmemorypartition:
showMemoryPartition(deviceList)
if args.setclock:
setClocks(deviceList, args.setclock[0], [int(args.setclock[1])])
if args.setsclk:
@@ -4057,8 +4063,8 @@ if __name__ == '__main__':
setPerfDeterminism(deviceList, args.setperfdeterminism[0])
if args.setcomputepartition:
setComputePartition(deviceList, args.setcomputepartition[0])
if args.setnpsmode:
setNPSMode(deviceList, args.setnpsmode[0])
if args.setmemorypartition:
setMemoryPartition(deviceList, args.setmemorypartition[0])
if args.resetprofile:
resetProfile(deviceList)
if args.resetxgmierr:
@@ -4067,8 +4073,8 @@ if __name__ == '__main__':
resetPerfDeterminism(deviceList)
if args.resetcomputepartition:
resetComputePartition(deviceList)
if args.resetnpsmode:
resetNpsMode(deviceList)
if args.resetmemorypartition:
resetMemoryPartition(deviceList)
if args.rasenable:
setRas(deviceList, 'enable', args.rasenable[0], args.rasenable[1])
if args.rasdisable:
+6 -6
Просмотреть файл
@@ -634,27 +634,27 @@ rsmi_compute_partition_type = rsmi_compute_partition_type_t
# will return string 'CPX'
compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX']
class rsmi_nps_mode_type_t(c_int):
class rsmi_memory_partition_type_t(c_int):
RSMI_MEMORY_PARTITION_UNKNOWN = 0
RSMI_MEMORY_PARTITION_NPS1 = 1
RSMI_MEMORY_PARTITION_NPS2 = 2
RSMI_MEMORY_PARTITION_NPS4 = 3
RSMI_MEMORY_PARTITION_NPS8 = 4
rsmi_nps_mode_type_dict = {
rsmi_memory_partition_type_dict = {
'NPS1': 1,
'NPS2': 2,
'NPS4': 3,
'NPS8': 4
}
rsmi_nps_mode_type = rsmi_nps_mode_type_t
rsmi_memory_partition_type = rsmi_memory_partition_type_t
# nps_mode_type_l includes string names for the rsmi_compute_partition_type_t
# memory_partition_type_l includes string names for the rsmi_compute_partition_type_t
# Usage example to get corresponding names:
# nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2]
# memory_partition_type_l[rsmi_memory_partition_type_t.RSMI_MEMORY_PARTITION_NPS2]
# will return string 'NPS2'
nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
memory_partition_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
class rsmi_power_label(str, Enum):
AVG_POWER = '(Avg)'
+171 -149
Просмотреть файл
@@ -58,9 +58,9 @@
if (RET != RSMI_STATUS_SUCCESS) { \
const char *err_str; \
std::cout << "[ERROR] RSMI call returned " << (RET) \
<< " at line " << __LINE__ << std::endl; \
<< " at line " << __LINE__ << "\n"; \
rsmi_status_string((RET), &err_str); \
std::cout << err_str << std::endl; \
std::cout << err_str << "\n"; \
} \
}
@@ -100,10 +100,10 @@
} \
} else if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
std::cout << "Not Supported." \
<< std::endl; \
<< "\n"; \
} else if ((RET) == RSMI_STATUS_NOT_YET_IMPLEMENTED) { \
std::cout << "Not Yet Implemented." \
<< std::endl; \
<< "\n"; \
} else { \
CHK_RSMI_RET(RET) \
} \
@@ -112,7 +112,7 @@
#define CHK_RSMI_NOT_SUPPORTED_RET(RET) { \
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
std::cout << "Not Supported." \
<< std::endl; \
<< "\n"; \
} else { \
CHK_RSMI_RET(RET) \
} \
@@ -121,10 +121,10 @@
#define CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(RET) { \
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
std::cout << "Not Supported." \
<< std::endl; \
<< "\n"; \
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
std::cout << "[ERROR] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
<< std::endl; \
<< "\n"; \
} else { \
CHK_RSMI_RET(RET) \
} \
@@ -133,10 +133,10 @@
#define CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(RET) {\
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
std::cout << "Not Supported."\
<< std::endl; \
<< "\n"; \
} else if ((RET) == RSMI_STATUS_SETTING_UNAVAILABLE) { \
std::cout << "[WARN] RSMI_STATUS_SETTING_UNAVAILABLE retrieved." \
<< std::endl; \
<< "\n"; \
} else { \
CHK_RSMI_RET(RET) \
} \
@@ -145,27 +145,27 @@
#define CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(RET) { \
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
std::cout << "Not Supported." \
<< std::endl; \
<< "\n"; \
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
std::cout << "[WARN] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
<< std::endl; \
<< "\n"; \
} else if ((RET) == RSMI_STATUS_INSUFFICIENT_SIZE) { \
std::cout << "[WARN] RSMI_STATUS_INSUFFICIENT_SIZE retrieved." \
<< std::endl; \
<< "\n"; \
} else { \
CHK_RSMI_RET(RET) \
} \
}
static void print_test_header(const char *str, uint32_t dv_ind) {
std::cout << "********************************" << std::endl;
std::cout << "*** " << str << std::endl;
std::cout << "********************************" << std::endl;
std::cout << "Device index: " << dv_ind << std::endl;
std::cout << "********************************" << "\n";
std::cout << "*** " << str << "\n";
std::cout << "********************************" << "\n";
std::cout << "Device index: " << dv_ind << "\n";
}
static void print_mini_header(const char *str) {
std::cout << "\n>> " << str << " <<" << std::endl;
std::cout << "\n>> " << str << " <<" << "\n";
}
static const char *
@@ -189,7 +189,7 @@ power_profile_string(rsmi_power_profile_preset_masks_t profile) {
}
static const std::string
compute_partition_string(rsmi_compute_partition_type partition) {
compute_partition_string(rsmi_compute_partition_type_t partition) {
switch (partition) {
case RSMI_COMPUTE_PARTITION_CPX:
return "CPX";
@@ -216,7 +216,7 @@ mapStringToRSMIComputePartitionTypes {
};
static const std::string
nps_mode_string(rsmi_nps_mode_type_t partition) {
memory_partition_string(rsmi_memory_partition_type_t partition) {
switch (partition) {
case RSMI_MEMORY_PARTITION_NPS1:
return "NPS1";
@@ -231,8 +231,8 @@ nps_mode_string(rsmi_nps_mode_type_t partition) {
}
}
static std::map<std::string, rsmi_nps_mode_type_t>
mapStringToRSMINpsModeTypes {
static std::map<std::string, rsmi_memory_partition_type_t>
mapStringToRSMIMemoryPartitionTypes {
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
@@ -274,7 +274,7 @@ static bool isFileWritable(rsmi_status_t response) {
bool fileWritable = true;
if (isUserRunningAsSudo() && (response == RSMI_STATUS_PERMISSION)) {
std::cout << "[WARN] User is running with sudo "
<< "permissions, file is not writable." << std::endl;
<< "permissions, file is not writable." << "\n";
fileWritable = false;
} else {
CHK_AND_PRINT_RSMI_ERR_RET(response)
@@ -292,23 +292,23 @@ static rsmi_status_t test_power_profile(uint32_t dv_ind) {
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_NOT_SUPPORTED_RET(ret)
if (ret != RSMI_STATUS_SUCCESS) {
std::cout << "***Skipping Power Profile test." << std::endl;
std::cout << "***Skipping Power Profile test." << "\n";
return RSMI_STATUS_SUCCESS;
}
CHK_RSMI_RET(ret)
std::cout << "The available power profiles are:" << std::endl;
std::cout << "The available power profiles are:" << "\n";
uint64_t tmp = 1;
while (tmp <= RSMI_PWR_PROF_PRST_LAST) {
if ((tmp & status.available_profiles) == tmp) {
std::cout << "\t" <<
power_profile_string((rsmi_power_profile_preset_masks_t)tmp) << std::endl;
power_profile_string((rsmi_power_profile_preset_masks_t)tmp) << "\n";
}
tmp = tmp << 1;
}
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
power_profile_string(status.current) << "\n";
// Try setting the profile to a different power profile
rsmi_bit_field_t diff_profiles;
@@ -326,40 +326,40 @@ static rsmi_status_t test_power_profile(uint32_t dv_ind) {
} else if (diff_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) {
new_prof = RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK;
} else {
std::cout << "No other non-custom power profiles to set to" << std::endl;
std::cout << "No other non-custom power profiles to set to" << "\n";
return ret;
}
std::cout << "Setting power profile to " << power_profile_string(new_prof)
<< "..." << std::endl;
<< "..." << "\n";
ret = rsmi_dev_power_profile_set(dv_ind, 0, new_prof);
CHK_RSMI_RET(ret)
std::cout << "Done." << std::endl;
std::cout << "Done." << "\n";
rsmi_dev_perf_level_t pfl;
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "Performance Level is now " <<
perf_level_string(pfl) << std::endl;
perf_level_string(pfl) << "\n";
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_RET(ret)
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
std::cout << "Resetting perf level to auto..." << std::endl;
power_profile_string(status.current) << "\n";
std::cout << "Resetting perf level to auto..." << "\n";
ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
CHK_RSMI_RET(ret)
std::cout << "Done." << std::endl;
std::cout << "Done." << "\n";
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "Performance Level is now " <<
perf_level_string(pfl) << std::endl;
perf_level_string(pfl) << "\n";
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_RET(ret)
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
power_profile_string(status.current) << "\n";
return ret;
}
@@ -376,12 +376,12 @@ static rsmi_status_t test_power_cap(uint32_t dv_ind) {
ret = rsmi_dev_power_cap_get(dv_ind, 0, &orig);
CHK_RSMI_RET(ret)
std::cout << "Original Power Cap: " << orig << " uW" << std::endl;
std::cout << "Original Power Cap: " << orig << " uW" << "\n";
std::cout << "Power Cap Range: " << max << " uW to " << min <<
" uW" << std::endl;
" uW" << "\n";
new_cap = (max + min)/2;
std::cout << "Setting new cap to " << new_cap << "..." << std::endl;
std::cout << "Setting new cap to " << new_cap << "..." << "\n";
ret = rsmi_dev_power_cap_set(dv_ind, 0, new_cap);
CHK_RSMI_RET(ret)
@@ -389,15 +389,15 @@ static rsmi_status_t test_power_cap(uint32_t dv_ind) {
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
CHK_RSMI_RET(ret)
std::cout << "New Power Cap: " << new_cap << " uW" << std::endl;
std::cout << "Resetting cap to " << orig << "..." << std::endl;
std::cout << "New Power Cap: " << new_cap << " uW" << "\n";
std::cout << "Resetting cap to " << orig << "..." << "\n";
ret = rsmi_dev_power_cap_set(dv_ind, 0, orig);
CHK_RSMI_RET(ret)
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
CHK_RSMI_RET(ret)
std::cout << "Current Power Cap: " << new_cap << " uW" << std::endl;
std::cout << "Current Power Cap: " << new_cap << " uW" << "\n";
return ret;
}
@@ -407,21 +407,21 @@ static rsmi_status_t test_set_overdrive(uint32_t dv_ind) {
uint32_t val;
print_test_header("Overdrive Control", dv_ind);
std::cout << "Set Overdrive level to 0%..." << std::endl;
std::cout << "Set Overdrive level to 0%..." << "\n";
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0);
CHK_RSMI_RET(ret)
std::cout << "Set Overdrive level to 10%..." << std::endl;
std::cout << "Set Overdrive level to 10%..." << "\n";
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 10);
CHK_RSMI_RET(ret)
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
CHK_RSMI_RET(ret)
std::cout << "\t**New OverDrive Level:" << std::dec << val << std::endl;
std::cout << "Reset Overdrive level to 0%..." << std::endl;
std::cout << "\t**New OverDrive Level:" << std::dec << val << "\n";
std::cout << "Reset Overdrive level to 0%..." << "\n";
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0);
CHK_RSMI_RET(ret)
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
CHK_RSMI_RET(ret)
std::cout << "\t**New OverDrive Level:" << std::dec << val << std::endl;
std::cout << "\t**New OverDrive Level:" << std::dec << val << "\n";
return ret;
}
@@ -437,21 +437,21 @@ static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
std::cout << "Original fan speed: ";
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &orig_speed);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << orig_speed << std::endl;
std::cout << orig_speed << "\n";
} else {
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "***Skipping Fan Speed Control test." << std::endl;
std::cout << "***Skipping Fan Speed Control test." << "\n";
return RSMI_STATUS_SUCCESS;
}
if (orig_speed == 0) {
std::cout << "***System fan speed value is 0. Skip fan test." << std::endl;
std::cout << "***System fan speed value is 0. Skip fan test." << "\n";
return RSMI_STATUS_SUCCESS;
}
new_speed = 1.1 * static_cast<double>(orig_speed);
std::cout << "Setting fan speed to " << new_speed << std::endl;
std::cout << "Setting fan speed to " << new_speed << "\n";
ret = rsmi_dev_fan_speed_set(dv_ind, 0, static_cast<uint64_t>(new_speed));
CHK_RSMI_RET(ret)
@@ -461,7 +461,7 @@ static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_spd);
CHK_RSMI_RET(ret)
std::cout << "New fan speed: " << cur_spd << std::endl;
std::cout << "New fan speed: " << cur_spd << "\n";
assert(
(cur_spd > static_cast<int64_t>(0.95 * static_cast<double>(new_speed)) &&
@@ -469,7 +469,7 @@ static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
(cur_spd >
static_cast<int64_t>(0.95 * static_cast<double>(RSMI_MAX_FAN_SPEED))));
std::cout << "Resetting fan control to auto..." << std::endl;
std::cout << "Resetting fan control to auto..." << "\n";
ret = rsmi_dev_fan_reset(dv_ind, 0);
CHK_RSMI_RET(ret)
@@ -479,7 +479,7 @@ static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_spd);
CHK_RSMI_RET(ret)
std::cout << "End fan speed: " << cur_spd << std::endl;
std::cout << "End fan speed: " << cur_spd << "\n";
return ret;
}
@@ -494,29 +494,29 @@ static rsmi_status_t test_set_perf_level(uint32_t dv_ind) {
ret = rsmi_dev_perf_level_get(dv_ind, &orig_pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**Original Perf Level:" << perf_level_string(orig_pfl) <<
std::endl;
"\n";
pfl =
(rsmi_dev_perf_level_t)((orig_pfl + 1) % (RSMI_DEV_PERF_LEVEL_LAST + 1));
std::cout << "Set Performance Level to " << (uint32_t)pfl << " ..." <<
std::endl;
"\n";
ret = rsmi_dev_perf_level_set_v1(dv_ind, pfl);
if (ret != RSMI_STATUS_SUCCESS) {
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "***Skipping Performance Level Control test." << std::endl;
std::cout << "***Skipping Performance Level Control test." << "\n";
return RSMI_STATUS_SUCCESS;
}
CHK_RSMI_RET(ret)
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << std::endl;
std::cout << "Reset Perf level to " << orig_pfl << " ..." << std::endl;
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << "\n";
std::cout << "Reset Perf level to " << orig_pfl << " ..." << "\n";
ret = rsmi_dev_perf_level_set_v1(dv_ind, orig_pfl);
CHK_RSMI_RET(ret)
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << std::endl;
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << "\n";
return ret;
}
@@ -541,7 +541,7 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
std::cout << "Initial frequency for clock" << rsmi_clk << " is " <<
f.current << std::endl;
f.current << "\n";
// Set clocks to something other than the usual default of the lowest
// frequency.
@@ -554,7 +554,7 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
freq_bm_str.size()-1));
std::cout << "Setting frequency mask for clock " << rsmi_clk <<
" to 0b" << freq_bm_str << " ..." << std::endl;
" to 0b" << freq_bm_str << " ..." << "\n";
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
@@ -562,15 +562,15 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
std::cout << "Frequency is now index " << f.current << std::endl;
std::cout << "Resetting mask to all frequencies." << std::endl;
std::cout << "Frequency is now index " << f.current << "\n";
std::cout << "Resetting mask to all frequencies." << "\n";
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
CHK_FILE_PERMISSIONS(ret)
}
std::cout << std::endl;
std::cout << "\n";
return RSMI_STATUS_SUCCESS;
}
@@ -581,19 +581,19 @@ static void print_frequencies(rsmi_frequencies_t *f) {
if (j == f->current) {
std::cout << " *";
}
std::cout << std::endl;
std::cout << "\n";
}
}
static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
rsmi_status_t ret;
uint32_t buffer_len = 10;
char originalComputePartition[buffer_len];
const uint32_t kLength = 10;
char originalComputePartition[kLength];
originalComputePartition[0] = '\0';
print_test_header("Compute Partitioning Control", dv_ind);
ret = rsmi_dev_compute_partition_get(dv_ind, originalComputePartition,
buffer_len);
kLength);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
return RSMI_STATUS_SUCCESS;
@@ -604,95 +604,94 @@ static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|| ((originalComputePartition != nullptr)
&& (originalComputePartition[0] == '\0')))
? "UNKNOWN" : originalComputePartition)
<< std::endl << std::endl;
<< "\n" << "\n";
for (int newComputePartition = RSMI_COMPUTE_PARTITION_CPX;
newComputePartition <= RSMI_COMPUTE_PARTITION_QPX;
newComputePartition++) {
rsmi_compute_partition_type newPartition
= static_cast<rsmi_compute_partition_type>(newComputePartition);
rsmi_compute_partition_type_t newPartition
= static_cast<rsmi_compute_partition_type_t>(newComputePartition);
std::cout << "Attempting to set compute partition to "
<< compute_partition_string(newPartition) << "..."
<< std::endl;
<< "\n";
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
std::cout << "Done setting compute partition to "
<< compute_partition_string(newPartition) << "." << std::endl;
std::cout << std::endl << std::endl;
<< compute_partition_string(newPartition) << "." << "\n";
std::cout << "\n" << "\n";
}
std::cout << "About to initate compute partition reset..." << std::endl;
std::cout << "About to initate compute partition reset..." << "\n";
ret = rsmi_dev_compute_partition_reset(dv_ind);
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "Done resetting compute partition." << std::endl;
std::cout << "Done resetting compute partition." << "\n";
std::string myComputePartition = originalComputePartition;
if (myComputePartition.empty() == false) {
std::cout << "Resetting back to original compute partition to "
<< originalComputePartition << "... " << std::endl;
<< originalComputePartition << "... " << "\n";
rsmi_compute_partition_type origComputePartitionType
= mapStringToRSMIComputePartitionTypes[originalComputePartition];
ret = rsmi_dev_compute_partition_set(dv_ind, origComputePartitionType);
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
std::cout << "Done" << std::endl;
std::cout << "Done" << "\n";
}
return RSMI_STATUS_SUCCESS;
}
static rsmi_status_t test_set_nps_mode(uint32_t dv_ind) {
static rsmi_status_t test_set_memory_partition(uint32_t dv_ind) {
rsmi_status_t ret;
uint32_t buffer_len = 10;
char originalNpsMode[buffer_len];
originalNpsMode[0] = '\0';
print_test_header("NPS Mode Control", dv_ind);
const uint32_t kLength = 10;
char originalMemoryPartition[kLength];
originalMemoryPartition[0] = '\0';
print_test_header("Memory Partition Control", dv_ind);
ret = rsmi_dev_nps_mode_get(dv_ind, originalNpsMode, buffer_len);
ret = rsmi_dev_memory_partition_get(dv_ind, originalMemoryPartition, kLength);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
return RSMI_STATUS_SUCCESS;
}
std::cout << "Original NPS Mode: "
<< (((originalNpsMode == nullptr)
|| ((originalNpsMode != nullptr)
&& (originalNpsMode[0] == '\0')))
? "UNKNOWN" : originalNpsMode)
<< std::endl << std::endl;
std::cout << "Original Memory Partition: "
<< (((originalMemoryPartition == nullptr)
|| ((originalMemoryPartition != nullptr)
&& (originalMemoryPartition[0] == '\0')))
? "UNKNOWN" : originalMemoryPartition)
<< "\n\n";
for (int newNpsMode = RSMI_MEMORY_PARTITION_NPS1;
newNpsMode <= RSMI_MEMORY_PARTITION_NPS8;
newNpsMode++) {
rsmi_nps_mode_type_t newMemoryPartition
= static_cast<rsmi_nps_mode_type_t>(newNpsMode);
std::cout << "Attempting to set NPS mode to "
<< nps_mode_string(newMemoryPartition) << "..."
<< std::endl;
ret = rsmi_dev_nps_mode_set(dv_ind, newMemoryPartition);
for (int newMemPartition = RSMI_MEMORY_PARTITION_NPS1;
newMemPartition <= RSMI_MEMORY_PARTITION_NPS8;
newMemPartition++) {
rsmi_memory_partition_type_t newMemoryPartition
= static_cast<rsmi_memory_partition_type_t>(newMemPartition);
std::cout << "Attempting to set memory partition to "
<< memory_partition_string(newMemoryPartition) << "..."
<< "\n";
ret = rsmi_dev_memory_partition_set(dv_ind, newMemoryPartition);
CHK_RSMI_NOT_SUPPORTED_RET(ret)
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
// do not continue attempting to set, device does not support setting
return RSMI_STATUS_SUCCESS;
}
std::cout << "Done setting NPS mode to "
<< nps_mode_string(newMemoryPartition)
<< "." << std::endl;
std::cout << std::endl << std::endl;
std::cout << "Done setting memory partition to "
<< memory_partition_string(newMemoryPartition)
<< "." << "\n\n\n";
}
std::cout << "About to initate nps mode reset..." << std::endl;
ret = rsmi_dev_nps_mode_reset(dv_ind);
std::cout << "About to initate memory partition reset...\n";
ret = rsmi_dev_memory_partition_reset(dv_ind);
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "Done resetting nps mode." << std::endl;
std::cout << "Done resetting memory partition.\n";
std::string myNpsMode = originalNpsMode;
if (myNpsMode.empty() == false) {
std::cout << "Resetting compute partition to " << originalNpsMode
<< "... " << std::endl;
rsmi_nps_mode_type_t origNpsModeType
= mapStringToRSMINpsModeTypes[originalNpsMode];
ret = rsmi_dev_nps_mode_set(dv_ind, origNpsModeType);
std::string myMemPart = originalMemoryPartition;
if (myMemPart.empty() == false) {
std::cout << "Resetting memory partition to " << originalMemoryPartition
<< "...\n";
rsmi_memory_partition_type_t origMemoryPartitionType
= mapStringToRSMIMemoryPartitionTypes[originalMemoryPartition];
ret = rsmi_dev_memory_partition_set(dv_ind, origMemoryPartitionType);
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "Done" << std::endl;
std::cout << "Done\n";
}
return RSMI_STATUS_SUCCESS;
}
@@ -717,10 +716,10 @@ int main() {
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
ret = rsmi_dev_id_get(i, &val_ui16);
CHK_RSMI_RET_I(ret)
std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << std::endl;
std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << "\n";
ret = rsmi_dev_revision_get(i, &val_ui16);
CHK_RSMI_RET_I(ret)
std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << std::endl;
std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << "\n";
char current_compute_partition[256];
current_compute_partition[0] = '\0';
@@ -732,69 +731,92 @@ int main() {
? "UNKNOWN" : current_compute_partition);
if (ret != RSMI_STATUS_SUCCESS) {
std::cout << ", RSMI_STATUS = ";
} else {
std::cout << std::endl;
} else {
std::cout << "\n";
}
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
uint32_t len = 5;
char nps_mode[len];
nps_mode[0] = '\0';
ret = rsmi_dev_nps_mode_get(i, nps_mode, len);
std::cout << "\t**NPS Mode: "
<< (((nps_mode == nullptr)
|| ((nps_mode != nullptr)
&& (nps_mode[0] == '\0')))
? "UNKNOWN" : nps_mode);
const uint32_t kLength = 5;
char memory_partition[kLength];
memory_partition[0] = '\0';
ret = rsmi_dev_memory_partition_get(i, memory_partition, kLength);
std::cout << "\t**Current Memory Partition: "
<< (((memory_partition == nullptr)
|| ((memory_partition != nullptr)
&& (memory_partition[0] == '\0')))
? "UNKNOWN" : memory_partition);
if (ret != RSMI_STATUS_SUCCESS) {
std::cout << ", RSMI_STATUS = ";
} else {
std::cout << std::endl;
std::cout << "\n";
}
CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(ret)
std::cout << "\t**rsmi_minmax_bandwidth_get(0, " << i << ", ...): ";
ret = rsmi_dev_pci_id_get(0, &val_ui64);
ret = rsmi_dev_pci_id_get(i, &val2_ui64);
if (i > 0 && val_ui64 != val2_ui64) {
uint64_t min_bandwidth = 0;
uint64_t max_bandwidth = 0;
ret = rsmi_minmax_bandwidth_get(0, i, &min_bandwidth, &max_bandwidth);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
std::cout << "\nMinimum Bandwidth: " << min_bandwidth
<< "\nMaximum Bandwidth: " << max_bandwidth;
} else {
std::cout << "Not Supported\n";
}
ret = rsmi_dev_gpu_metrics_info_get(i, &p);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**GPU METRICS" << std::endl;
std::cout << "\t**GPU METRICS" << "\n";
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**Performance Level:" <<
perf_level_string(pfl) << std::endl;
perf_level_string(pfl) << "\n";
ret = rsmi_dev_overdrive_level_get(i, &val_ui32);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**OverDrive Level:" << val_ui32 << std::endl;
std::cout << "\t**OverDrive Level:" << val_ui32 << "\n";
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**Supported GPU Memory clock frequencies: ";
std::cout << f.num_supported << std::endl;
std::cout << f.num_supported << "\n";
print_frequencies(&f);
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**Supported GPU clock frequencies: ";
std::cout << f.num_supported << std::endl;
std::cout << f.num_supported << "\n";
print_frequencies(&f);
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SOC, &f);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
std::cout << "\t**Supported GPU clock frequencies (SOC clk): ";
std::cout << f.num_supported << std::endl;
std::cout << f.num_supported << "\n";
std::cout << "\t**Current value (SOC clk): ";
std::cout << f.current << std::endl;
std::cout << f.current << "\n";
print_frequencies(&f);
std::cout << "\t**Monitor name: ";
char name[128];
ret = rsmi_dev_name_get(i, name, 128);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << name << std::endl;
std::cout << name << "\n";
std::cout << "\t**Temperature: ";
ret = rsmi_dev_temp_metric_get(i, 0, RSMI_TEMP_CURRENT, &val_i64);
std::cout << "\t**Temperature (edge): ";
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE,
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << val_i64/1000 << "C" << std::endl;
std::cout << val_i64/1000 << "C" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t**Temperature (junction): ";
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_JUNCTION,
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << val_i64/1000 << "C" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
@@ -802,7 +824,7 @@ int main() {
ret = rsmi_dev_volt_metric_get(i, RSMI_VOLT_TYPE_VDDGFX,
RSMI_VOLT_CURRENT, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << val_i64 << "mV" << std::endl;
std::cout << val_i64 << "mV" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
@@ -813,21 +835,21 @@ int main() {
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << (static_cast<float>(val_i64)/val_ui64) * 100;
std::cout << "% (" << std::dec << val_i64 << "/"
<< std::dec << val_ui64 << ")" << std::endl;
<< std::dec << val_ui64 << ")" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t**Current fan RPMs: ";
ret = rsmi_dev_fan_rpms_get(i, 0, &val_i64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << std::dec << val_i64 << std::endl;
std::cout << std::dec << val_i64 << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t**Current Power Cap: ";
ret = rsmi_dev_power_cap_get(i, 0, &val_ui64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << std::dec << val_ui64 << "uW" <<std::endl;
std::cout << std::dec << val_ui64 << "uW" <<"\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
@@ -835,23 +857,23 @@ int main() {
ret = rsmi_dev_power_cap_range_get(i, 0, &val_ui64, &val2_ui64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << std::dec << val2_ui64 << " to "
<< std::dec << val_ui64 << " uW" << std::endl;
<< std::dec << val_ui64 << " uW" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t**Average Power Usage: ";
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
if (ret == RSMI_STATUS_SUCCESS) {
std::cout << static_cast<float>(val_ui64)/1000 << " W" << std::endl;
std::cout << static_cast<float>(val_ui64)/1000 << " W" << "\n";
}
CHK_RSMI_NOT_SUPPORTED_RET(ret)
std::cout << "\t=======" << std::endl;
std::cout << "\t=======" << "\n";
}
std::cout << "***** Testing write api's" << std::endl;
std::cout << "***** Testing write api's" << "\n";
if (isUserRunningAsSudo() == false) {
std::cout << "Write APIs require users to execute with sudo. "
<< "Cannot proceed." << std::endl;
<< "Cannot proceed." << "\n";
return 0;
}
@@ -877,7 +899,7 @@ int main() {
ret = test_set_freq(i);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
ret = test_set_nps_mode(i);
ret = test_set_memory_partition(i);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
}
+286 -47
Просмотреть файл
@@ -417,7 +417,6 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
if (ret == ENOENT) {
return rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED;
}
return amd::smi::ErrnoToRsmiStatus(ret);
}
@@ -1823,8 +1822,8 @@ mapRSMIToStringComputePartitionTypes {
{RSMI_COMPUTE_PARTITION_QPX, "QPX"}
};
std::map<rsmi_nps_mode_type_t, std::string>
mapRSMIToStringNPSModeTypes {
std::map<rsmi_memory_partition_type_t, std::string>
mapRSMIToStringMemoryPartitionTypes {
{RSMI_MEMORY_PARTITION_UNKNOWN, "UNKNOWN"},
{RSMI_MEMORY_PARTITION_NPS1, "NPS1"},
{RSMI_MEMORY_PARTITION_NPS2, "NPS2"},
@@ -1832,8 +1831,8 @@ mapRSMIToStringNPSModeTypes {
{RSMI_MEMORY_PARTITION_NPS8, "NPS8"}
};
std::map<std::string, rsmi_nps_mode_type_t>
mapStringToNPSModeTypes {
std::map<std::string, rsmi_memory_partition_type_t>
mapStringToMemoryPartitionTypes {
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
@@ -4240,7 +4239,7 @@ rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst,
rsmi_status_t
rsmi_minmax_bandwidth_get(uint32_t dv_ind_src, uint32_t dv_ind_dst,
uint64_t *min_bandwidth, uint64_t *max_bandwidth){
uint64_t *min_bandwidth, uint64_t *max_bandwidth) {
TRY
uint32_t dv_ind = dv_ind_src;
@@ -4438,9 +4437,20 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
ss << __PRETTY_FUNCTION__ << "| ======= start =======, dv_ind = "
<< dv_ind;
LOG_TRACE(ss);
if ((len == 0) || (compute_partition == nullptr)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: len was 0 or compute_partition variable was null"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
CHK_SUPPORT_NAME_ONLY(compute_partition)
@@ -4449,14 +4459,46 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
rsmi_status_t ret = get_compute_partition(dv_ind,
returning_compute_partition);
if (ret != RSMI_STATUS_SUCCESS) { return ret; }
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: could not retrieve current compute partition"
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = returning_compute_partition.copy(compute_partition, len);
compute_partition[length]='\0';
if (len < (returning_compute_partition.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: requested size was insufficient"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << compute_partition
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
@@ -4504,6 +4546,16 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
break;
case RSMI_COMPUTE_PARTITION_INVALID:
default:
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: requested setting was invalid"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
@@ -4512,32 +4564,78 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
rsmi_status_t available_ret =
is_available_compute_partition(dv_ind, newComputePartitionStr);
if (available_ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: not an available compute partition setting"
<< " | Returning = "
<< getRSMIStatusString(available_ret) << " |";
LOG_ERROR(ss);
return available_ret;
}
// do nothing if compute_partition is the current compute partition
rsmi_status_t ret_get = get_compute_partition(dv_ind, currentComputePartition);
rsmi_status_t ret_get =
get_compute_partition(dv_ind, currentComputePartition);
// we can try to set, even if we get unexpected data
if (ret_get != RSMI_STATUS_SUCCESS
&& ret_get != RSMI_STATUS_UNEXPECTED_DATA) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Cause: could retrieve current compute partition or retrieved"
<< " unexpected data"
<< " | Returning = "
<< getRSMIStatusString(ret_get) << " |";
LOG_ERROR(ss);
return ret_get;
}
rsmi_compute_partition_type_t currRSMIComputePartition
= mapStringToRSMIComputePartitionTypes[currentComputePartition];
if (currRSMIComputePartition == compute_partition) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success - compute partition was already set at requested value"
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << newComputePartitionStr
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |";
LOG_TRACE(ss);
return RSMI_STATUS_SUCCESS;
}
GET_DEV_FROM_INDX
int ret = dev->writeDevInfo(amd::smi::kDevComputePartition,
newComputePartitionStr);
return amd::smi::ErrnoToRsmiStatus(ret);
rsmi_status_t returnResponse = amd::smi::ErrnoToRsmiStatus(ret);
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << newComputePartitionStr
<< " | Returning = "
<< getRSMIStatusString(returnResponse) << " |";
LOG_TRACE(ss);
// TODO(charpoag): investigate providing GPU busy state occured with
return returnResponse;
CATCH
}
static rsmi_status_t get_nps_mode(uint32_t dv_ind, std::string &nps_mode) {
static rsmi_status_t get_memory_partition(uint32_t dv_ind,
std::string &memory_partition) {
TRY
CHK_SUPPORT_NAME_ONLY(nps_mode.c_str())
CHK_SUPPORT_NAME_ONLY(memory_partition.c_str())
std::string val_str;
DEVICE_MUTEX
@@ -4548,7 +4646,7 @@ static rsmi_status_t get_nps_mode(uint32_t dv_ind, std::string &nps_mode) {
return ret;
}
switch (mapStringToNPSModeTypes[val_str]) {
switch (mapStringToMemoryPartitionTypes[val_str]) {
case RSMI_MEMORY_PARTITION_NPS1:
case RSMI_MEMORY_PARTITION_NPS2:
case RSMI_MEMORY_PARTITION_NPS4:
@@ -4556,16 +4654,17 @@ static rsmi_status_t get_nps_mode(uint32_t dv_ind, std::string &nps_mode) {
break;
case RSMI_MEMORY_PARTITION_UNKNOWN:
default:
// Retrieved an unknown NPS mode
// Retrieved an unknown memory partition
return RSMI_STATUS_UNEXPECTED_DATA;
}
nps_mode = val_str;
memory_partition = val_str;
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode) {
rsmi_dev_memory_partition_set(uint32_t dv_ind,
rsmi_memory_partition_type_t memory_partition) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
@@ -4575,7 +4674,7 @@ rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode) {
bool isCorrectDevice = false;
char boardName[128];
boardName[0] = '\0';
// rsmi_dev_nps_mode_set is only available for for discrete variant,
// rsmi_dev_memory_partition_set is only available for for discrete variant,
// others are required to update through bios settings
rsmi_dev_name_get(dv_ind, boardName, 128);
std::string myBoardName = boardName;
@@ -4589,14 +4688,24 @@ rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode) {
}
if (!isCorrectDevice) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: device board name does not support this action"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_NOT_SUPPORTED;
}
std::string newNPSMode
= mapRSMIToStringNPSModeTypes[nps_mode];
std::string currentNPSMode;
std::string newMemoryPartition
= mapRSMIToStringMemoryPartitionTypes[memory_partition];
std::string currentMemoryPartition;
switch (nps_mode) {
switch (memory_partition) {
case RSMI_MEMORY_PARTITION_NPS1:
case RSMI_MEMORY_PARTITION_NPS2:
case RSMI_MEMORY_PARTITION_NPS4:
@@ -4604,57 +4713,156 @@ rsmi_dev_nps_mode_set(uint32_t dv_ind, rsmi_nps_mode_type_t nps_mode) {
break;
case RSMI_MEMORY_PARTITION_UNKNOWN:
default:
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: requested setting was invalid"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
// do nothing if nps_mode is the current NPS mode
rsmi_status_t ret_get = get_nps_mode(dv_ind, currentNPSMode);
// do nothing if memory_partition is the current mode
rsmi_status_t ret_get = get_memory_partition(dv_ind, currentMemoryPartition);
// we can try to set, even if we get unexpected data
if (ret_get != RSMI_STATUS_SUCCESS
&& ret_get != RSMI_STATUS_UNEXPECTED_DATA) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: could retrieve current memory partition or retrieved"
<< " unexpected data"
<< " | Returning = "
<< getRSMIStatusString(ret_get) << " |";
LOG_ERROR(ss);
return ret_get;
}
rsmi_nps_mode_type_t currRSMINpsMode
= mapStringToNPSModeTypes[currentNPSMode];
if (currRSMINpsMode == nps_mode) {
rsmi_memory_partition_type_t currRSMIMemoryPartition
= mapStringToMemoryPartitionTypes[currentMemoryPartition];
if (currRSMIMemoryPartition == memory_partition) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success - no change, current memory partition was already requested"
<< " setting"
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Data: " << newMemoryPartition
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |";
LOG_TRACE(ss);
return RSMI_STATUS_SUCCESS;
}
GET_DEV_FROM_INDX
int ret = dev->writeDevInfo(amd::smi::kDevMemoryPartition, newNPSMode);
int ret = dev->writeDevInfo(amd::smi::kDevMemoryPartition,
newMemoryPartition);
if (amd::smi::ErrnoToRsmiStatus(ret) != RSMI_STATUS_SUCCESS) {
return amd::smi::ErrnoToRsmiStatus(ret);
rsmi_status_t err = amd::smi::ErrnoToRsmiStatus(ret);
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: issue writing reqested setting of " + newMemoryPartition
<< " | Returning = "
<< getRSMIStatusString(err) << " |";
LOG_ERROR(ss);
return err;
}
return dev->restartAMDGpuDriver();
rsmi_status_t restartRet = dev->restartAMDGpuDriver();
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success - if restart completed successfully"
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Data: " << newMemoryPartition
<< " | Returning = "
<< getRSMIStatusString(restartRet) << " |";
LOG_TRACE(ss);
return restartRet;
CATCH
}
rsmi_status_t
rsmi_dev_nps_mode_get(uint32_t dv_ind, char *nps_mode,
rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ss);
if ((len == 0) || (nps_mode == nullptr)) {
if ((len == 0) || (memory_partition == nullptr)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: user sent invalid arguments, len = 0 or memory partition"
<< " was a null ptr"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
CHK_SUPPORT_NAME_ONLY(nps_mode)
CHK_SUPPORT_NAME_ONLY(memory_partition)
std::string returning_nps_mode;
rsmi_status_t ret = get_nps_mode(dv_ind,
returning_nps_mode);
std::string returning_memory_partition;
rsmi_status_t ret = get_memory_partition(dv_ind,
returning_memory_partition);
if (ret != RSMI_STATUS_SUCCESS) { return ret; }
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: could not successfully retrieve current memory partition "
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = returning_nps_mode.copy(nps_mode, len);
nps_mode[length]='\0';
std::size_t buff_size =
returning_memory_partition.copy(memory_partition, len);
memory_partition[buff_size] = '\0';
if (len < (returning_nps_mode.size() + 1)) {
if (len < (returning_memory_partition.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Cause: could not successfully retrieve current memory partition "
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Data: " << memory_partition
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
@@ -4668,22 +4876,37 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) {
DEVICE_MUTEX
GET_DEV_FROM_INDX
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
// read temp file
// Only use 1st index, rest are there in-case of future issues
// NOTE: Partitions sets cause rocm-smi indexes to fluctuate
// since the nodes are grouped in respect to primary node - why we only use
// 1st node/device id to reset
std::string bootState =
dev->readBootPartitionState<rsmi_compute_partition_type_t>(dv_ind);
dev->readBootPartitionState<rsmi_compute_partition_type_t>(0);
// Initiate reset
// If bootState is UNKNOWN, we cannot reset - return RSMI_STATUS_NOT_SUPPORTED
// Likely due to device not supporting it
if (bootState != "UNKNOWN") {
rsmi_compute_partition_type_t compute_partition =
mapStringToRSMIComputePartitionTypes[bootState];
mapStringToRSMIComputePartitionTypes[bootState];
ret = rsmi_dev_compute_partition_set(dv_ind, compute_partition);
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success - if original boot state was not unknown or valid setting"
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << bootState
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
rsmi_status_t rsmi_dev_nps_mode_reset(uint32_t dv_ind) {
rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
@@ -4692,16 +4915,32 @@ rsmi_status_t rsmi_dev_nps_mode_reset(uint32_t dv_ind) {
DEVICE_MUTEX
GET_DEV_FROM_INDX
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
// read temp file
// Only use 1st index, rest are there in-case of future issues
// NOTE: Partitions sets cause rocm-smi indexes to fluctuate.
// Since the nodes are grouped in respect to primary node - why we only use
// 1st node/device id to reset
std::string bootState =
dev->readBootPartitionState<rsmi_nps_mode_type_t>(dv_ind);
dev->readBootPartitionState<rsmi_memory_partition_type_t>(0);
// Initiate reset
// If bootState is UNKNOWN, we cannot reset - return RSMI_STATUS_NOT_SUPPORTED
// Likely due to device not supporting it
if (bootState != "UNKNOWN") {
rsmi_nps_mode_type_t nps_mode = mapStringToNPSModeTypes[bootState];
ret = rsmi_dev_nps_mode_set(dv_ind, nps_mode);
rsmi_memory_partition_type_t memory_partition =
mapStringToMemoryPartitionTypes[bootState];
ret = rsmi_dev_memory_partition_set(dv_ind, memory_partition);
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success - if original boot state was not unknown or valid setting"
<< " | Device #: " << dv_ind
<< " | Type: "
<< RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition)
<< " | Data: " << bootState
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
+20 -18
Просмотреть файл
@@ -1272,7 +1272,7 @@ template <typename T> rsmi_status_t storeParameter(uint32_t dv_ind);
// Uses template specialization, to restrict types to identify
// calls needed to complete the function.
// typename - restricted to
// rsmi_compute_partition_type_t or rsmi_compute_partition_type_t
// rsmi_compute_partition_type_t or rsmi_memory_partition_type_t
// dv_ind - device index
// tempFileName - base file name
template <>
@@ -1286,9 +1286,9 @@ rsmi_status_t storeParameter<rsmi_compute_partition_type_t>(uint32_t dv_ind) {
if (doesFileExist) {
return returnStatus;
}
uint32_t length = 128;
char data[length];
rsmi_status_t ret = rsmi_dev_compute_partition_get(dv_ind, data, length);
const uint32_t kLen = 128;
char data[kLen];
rsmi_status_t ret = rsmi_dev_compute_partition_get(dv_ind, data, kLen);
rsmi_status_t storeRet;
if (ret == RSMI_STATUS_SUCCESS) {
@@ -1312,31 +1312,32 @@ rsmi_status_t storeParameter<rsmi_compute_partition_type_t>(uint32_t dv_ind) {
// Uses template specialization, to restrict types to identify
// calls needed to complete the function.
// typename - restricted to
// rsmi_compute_partition_type_t or rsmi_compute_partition_type_t
// rsmi_compute_partition_type_t or rsmi_memory_partition_type_t
// dv_ind - device index
// tempFileName - base file name
template <> rsmi_status_t storeParameter<rsmi_nps_mode_type_t>(uint32_t dv_ind) {
template <>
rsmi_status_t storeParameter<rsmi_memory_partition_type_t>(uint32_t dv_ind) {
rsmi_status_t returnStatus = RSMI_STATUS_SUCCESS;
uint32_t length = 128;
char data[length];
uint32_t kDatalength = 128;
char data[kDatalength];
bool doesFileExist;
std::tie(doesFileExist, std::ignore) = readTmpFile(dv_ind, "boot",
"nps_mode");
"memory_partition");
// if temporary file exists -> we do not need to store anything new
// if not, read & store the state value
if (doesFileExist) {
return returnStatus;
}
rsmi_status_t ret = rsmi_dev_nps_mode_get(dv_ind, data, length);
rsmi_status_t ret = rsmi_dev_memory_partition_get(dv_ind, data, kDatalength);
rsmi_status_t storeRet;
if (ret == RSMI_STATUS_SUCCESS) {
storeRet = storeTmpFile(dv_ind, "nps_mode", "boot", data);
storeRet = storeTmpFile(dv_ind, "memory_partition", "boot", data);
} else if (ret == RSMI_STATUS_NOT_SUPPORTED) {
// not supported is ok
storeRet = storeTmpFile(dv_ind, "nps_mode", "boot", "UNKNOWN");
storeRet = storeTmpFile(dv_ind, "memory_partition", "boot", "UNKNOWN");
} else {
storeRet = storeTmpFile(dv_ind, "nps_mode", "boot", "UNKNOWN");
storeRet = storeTmpFile(dv_ind, "memory_partition", "boot", "UNKNOWN");
returnStatus = ret;
}
@@ -1350,9 +1351,9 @@ template <> rsmi_status_t storeParameter<rsmi_nps_mode_type_t>(uint32_t dv_ind)
rsmi_status_t Device::storeDevicePartitions(uint32_t dv_ind) {
rsmi_status_t returnStatus = RSMI_STATUS_SUCCESS;
returnStatus = storeParameter<rsmi_compute_partition_type_t>(dv_ind);
rsmi_status_t npsRet = storeParameter<rsmi_nps_mode_type_t>(dv_ind);
if (returnStatus == RSMI_STATUS_SUCCESS) { // only record earliest error
returnStatus = npsRet;
rsmi_status_t ret = storeParameter<rsmi_memory_partition_type_t>(dv_ind);
if (returnStatus == RSMI_STATUS_SUCCESS) { // only record earliest error
returnStatus = ret;
}
return returnStatus;
}
@@ -1381,10 +1382,11 @@ std::string Device::readBootPartitionState<rsmi_compute_partition_type_t>(
// or rsmi_compute_partition_type_t
// dv_ind - device index
template <>
std::string Device::readBootPartitionState<rsmi_nps_mode_type_t>(
std::string Device::readBootPartitionState<rsmi_memory_partition_type_t>(
uint32_t dv_ind) {
std::string boot_state;
std::tie(std::ignore, boot_state) = readTmpFile(dv_ind, "boot", "nps_mode");
std::tie(std::ignore, boot_state) = readTmpFile(dv_ind, "boot",
"memory_partition");
return boot_state;
}
+15 -3
Просмотреть файл
@@ -56,6 +56,7 @@
#include <sstream>
#include <string>
#include <unordered_set>
#include <regex>
#include "rocm_smi/rocm_smi_io_link.h"
#include "rocm_smi/rocm_smi_kfd.h"
@@ -183,6 +184,7 @@ int ReadKFDDeviceProperties(uint32_t kfd_node_id,
int ret;
std::ifstream fs;
std::string properties_path;
std::ostringstream ss;
assert(retVec != nullptr);
@@ -192,9 +194,14 @@ int ReadKFDDeviceProperties(uint32_t kfd_node_id,
return ret;
}
ss << __PRETTY_FUNCTION__ << " | properties file contains = {";
while (std::getline(fs, line)) {
retVec->push_back(line);
ss << line << ",\n";
}
ss << "}";
// Leaving below to debug any future properties file changes
// LOG_DEBUG(ss);
if (retVec->empty()) {
fs.close();
@@ -616,15 +623,20 @@ int KFDNode::ReadProperties(void) {
}
std::string key_str;
// std::string val_str;
std::string val_str;
uint64_t val_int; // Assume all properties are unsigned integers for now
std::istringstream fs;
std::ostringstream ss;
for (const auto & i : propVec) {
fs.str(i);
fs >> key_str;
fs >> val_int;
fs >> val_str;
// Leaving below to debug any new properties file changes
// ss << __PRETTY_FUNCTION__ << " | key = " << key_str
// << "; val = " << val_str;
// LOG_TRACE(ss);
val_int = std::stoull(val_str);
properties_[key_str] = val_int;
fs.str("");
+94 -67
Просмотреть файл
@@ -59,6 +59,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include <climits>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_device.h"
@@ -780,7 +781,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
myNode.s_node_id = node_id;
myNode.s_gpu_id = gpu_id;
myNode.s_unique_id = unique_id;
if(gpu_id != 0) { // only add gpu nodes, 0 = CPU
if (gpu_id != 0) { // only add gpu nodes, 0 = CPU
allSystemNodes.emplace(unique_id, myNode);
}
} else {
@@ -790,93 +791,119 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
}
ss << __PRETTY_FUNCTION__ << " | Ordered system nodes found = {";
for(auto i: allSystemNodes) {
for (auto i : allSystemNodes) {
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
<< "], "
;
<< "], ";
}
ss << "}";
LOG_DEBUG(ss);
uint32_t cardAdded = 0;
// Discover all root cards & gpu partitions associated with each
for (uint32_t node_id = 0; node_id < count; node_id++) {
for (uint32_t cardId = 0; cardId < count; cardId++) {
std::string path = kPathDRMRoot;
path += "/card";
path += std::to_string(node_id);
path += std::to_string(cardId);
uint64_t primary_unique_id = 0;
// each identified gpu card node is a primary node for
// potential matching unique ids
if (isAMDGPU(path) ||
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
std::string d_name = "card";
d_name += std::to_string(node_id);
AddToDeviceList(d_name);
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
std::string d_name = "card";
d_name += std::to_string(cardId);
AddToDeviceList(d_name);
ss << __PRETTY_FUNCTION__
<< " | Ordered system nodes seen in lookup = {";
for (auto i : allSystemNodes) {
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
<< "], ";
}
ss << "}";
LOG_DEBUG(ss);
ss << __PRETTY_FUNCTION__
<< " | Ordered system nodes seen in lookup = {";
for (auto i : allSystemNodes) {
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
<< "], ";
}
ss << "}";
LOG_DEBUG(ss);
uint64_t temp_primary_unique_id = 0;
if (allSystemNodes.empty()) {
continue;
}
uint64_t temp_primary_unique_id = 0;
if (allSystemNodes.empty()) {
cardAdded++;
ss << __PRETTY_FUNCTION__
<< " | allSystemNodes.empty() = true, continue...";
LOG_DEBUG(ss);
continue;
}
// get lowest key 1st to keep order of nodes matching card
uint32_t lowest_NodeId = 0;
uint32_t curr_NodeId = 0;
// get current partition
const int kSize = 256;
char computePartition[kSize];
std::string strCompPartition = "UNKNOWN";
uint32_t numMonDevices = 0;
rsmi_num_monitor_devices(&numMonDevices);
if (rsmi_dev_compute_partition_get(cardAdded, computePartition, kSize)
== RSMI_STATUS_SUCCESS) {
strCompPartition = computePartition;
}
uint64_t device_uuid = 0;
if (rsmi_dev_unique_id_get(cardAdded, &device_uuid)
!= RSMI_STATUS_SUCCESS) {
cardAdded++;
allSystemNodes.erase(device_uuid);
ss << __PRETTY_FUNCTION__
<< " | rsmi_dev_unique_id_get(cardId, &device_uuid)"
<< " was not successful, continue.. ";
LOG_DEBUG(ss);
continue;
}
for (auto it = allSystemNodes.begin(), end = allSystemNodes.end();
it != end; it = allSystemNodes.upper_bound(it->first)) {
curr_NodeId = it->second.s_node_id;
if (it == allSystemNodes.begin()) {
lowest_NodeId = it->second.s_node_id;
}
if (curr_NodeId <= lowest_NodeId) {
lowest_NodeId = curr_NodeId;
temp_primary_unique_id = it->second.s_unique_id;
}
}
ss << __PRETTY_FUNCTION__
<< " | lowest_NodeId = " << std::to_string(lowest_NodeId)
<< " | curr_NodeId = " << std::to_string(curr_NodeId)
<< " | temp_primary_unique_id = "
<< std::to_string(temp_primary_unique_id);
LOG_DEBUG(ss);
temp_primary_unique_id =
allSystemNodes.find(device_uuid)->second.s_unique_id;
auto temp_numb_nodes = allSystemNodes.count(temp_primary_unique_id);
if (temp_primary_unique_id != 0) {
primary_unique_id = temp_primary_unique_id;
} else {
allSystemNodes.erase(primary_unique_id);
continue;
}
ss << __PRETTY_FUNCTION__
<< " | device/node id (cardId) = " << std::to_string(cardId)
<< " | card id (cardAdded) = " << std::to_string(cardAdded)
<< " | numMonDevices = " << std::to_string(numMonDevices)
<< " | compute partition = " << strCompPartition
<< " | temp_primary_unique_id = "
<< std::to_string(temp_primary_unique_id)
<< " | Num of nodes matching temp_primary_unique_id = "
<< temp_numb_nodes
<< " | device_uuid (hex/uint) = "
<< print_unsigned_hex_and_int(device_uuid)
<< " | device_uuid (uint64_t) = " << device_uuid;
LOG_DEBUG(ss);
auto numb_nodes = allSystemNodes.count(primary_unique_id);
ss << __PRETTY_FUNCTION__ << " | REFRESH - primary_unique_id = "
<< std::to_string(primary_unique_id) << " has "
<< std::to_string(numb_nodes) << " known gpu nodes";
LOG_DEBUG(ss);
while (numb_nodes > 1) {
std::string secNode = "card";
secNode += std::to_string(node_id); // add the primary node id
AddToDeviceList(secNode);
numb_nodes--;
}
// remove already added nodes associated with current card
auto erasedNodes = allSystemNodes.erase(primary_unique_id);
ss << __PRETTY_FUNCTION__ << " | After finding primary_unique_id = "
<< std::to_string(primary_unique_id) << " erased "
<< std::to_string(erasedNodes) << " nodes";
LOG_DEBUG(ss);
if (temp_primary_unique_id != 0) {
primary_unique_id = temp_primary_unique_id;
} else {
cardAdded++;
// remove already added nodes associated with current card
auto erasedNodes = allSystemNodes.erase(0);
continue;
}
auto numb_nodes = allSystemNodes.count(primary_unique_id);
ss << __PRETTY_FUNCTION__ << " | REFRESH - primary_unique_id = "
<< std::to_string(primary_unique_id) << " has "
<< std::to_string(numb_nodes) << " known gpu nodes";
LOG_DEBUG(ss);
while (numb_nodes > 1) {
std::string secNode = "card";
secNode += std::to_string(cardId); // add the primary node id
AddToDeviceList(secNode);
numb_nodes--;
cardAdded++;
}
// remove already added nodes associated with current card
auto erasedNodes = allSystemNodes.erase(primary_unique_id);
ss << __PRETTY_FUNCTION__ << " | After finding primary_unique_id = "
<< std::to_string(primary_unique_id) << " erased "
<< std::to_string(erasedNodes) << " nodes";
LOG_DEBUG(ss);
cardAdded++;
}
}
+50 -25
Просмотреть файл
@@ -45,8 +45,12 @@
#include <stdint.h>
#include <stddef.h>
#include <unistd.h>
#include <iostream>
#include <chrono> // NOLINT [build]
#include <map>
#include <string>
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
@@ -134,6 +138,26 @@ void TestComputePartitionReadWrite::Run(void) {
return;
}
// Confirm system supports compute partition, before executing wait
ret = rsmi_dev_compute_partition_get(0, orig_char_computePartition, 255);
if (ret == RSMI_STATUS_SUCCESS) {
// Adding a delay - since changing partitions depends on gpus not
// being in an active state, we'll wait a few seconds before starting
// full testing
auto start = std::chrono::high_resolution_clock::now();
int waitTime = 20;
std::cout << "** Waiting for "
<< std::dec << waitTime
<< " seconds, for any GPU"
<< " activity to clear up. **" << std::endl;
sleep(waitTime);
auto stop = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "** Waiting took " << duration.count() / 1000000
<< " seconds **" << std::endl;
}
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
if (dv_ind != 0) {
IF_VERB(STANDARD) {
@@ -142,7 +166,7 @@ void TestComputePartitionReadWrite::Run(void) {
}
PrintDeviceHeader(dv_ind);
//Standard checks to see if API is supported, before running full tests
// Standard checks to see if API is supported, before running full tests
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
255);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
@@ -169,12 +193,12 @@ void TestComputePartitionReadWrite::Run(void) {
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
// Verify api support checking functionality is working
uint32_t length = 2;
char smallBuffer[length];
err = rsmi_dev_compute_partition_get(dv_ind, smallBuffer, length);
uint32_t kLength = 2;
char smallBuffer[kLength];
err = rsmi_dev_compute_partition_get(dv_ind, smallBuffer, kLength);
size_t size = sizeof(smallBuffer)/sizeof(*smallBuffer);
ASSERT_EQ(err, RSMI_STATUS_INSUFFICIENT_SIZE);
ASSERT_EQ((size_t)length, size);
ASSERT_EQ((size_t)kLength, size);
IF_VERB(STANDARD) {
if (err == RSMI_STATUS_INSUFFICIENT_SIZE) {
std::cout << "\t**"
@@ -207,9 +231,9 @@ void TestComputePartitionReadWrite::Run(void) {
}
// Verify api support checking functionality is working
rsmi_compute_partition_type new_computePartition
= rsmi_compute_partition_type::RSMI_COMPUTE_PARTITION_INVALID;
err = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
rsmi_compute_partition_type_t newPartition
= rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_INVALID;
err = rsmi_dev_compute_partition_set(dv_ind, newPartition);
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
(err == RSMI_STATUS_PERMISSION));
@@ -246,24 +270,24 @@ void TestComputePartitionReadWrite::Run(void) {
* //!< work together with shared memory
*/
for (int partition = RSMI_COMPUTE_PARTITION_CPX;
partition <= RSMI_COMPUTE_PARTITION_QPX;
for (int partition =
rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_CPX;
partition <= rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_QPX;
partition++) {
new_computePartition
= static_cast<rsmi_compute_partition_type>(partition);
newPartition = static_cast<rsmi_compute_partition_type_t>(partition);
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "======== TEST RSMI_COMPUTE_PARTITION_"
<< computePartitionString(new_computePartition)
<< computePartitionString(newPartition)
<< " ===============" << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Attempting to set compute partition to: "
<< computePartitionString(new_computePartition) << std::endl;
<< computePartitionString(newPartition) << std::endl;
}
ret = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
bool isSettingUnavailable = false;
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
(ret == RSMI_STATUS_SETTING_UNAVAILABLE));
@@ -282,7 +306,7 @@ void TestComputePartitionReadWrite::Run(void) {
}
if (isSettingUnavailable) {
ASSERT_EQ(RSMI_STATUS_SETTING_UNAVAILABLE, ret);
ASSERT_STRNE(computePartitionString(new_computePartition).c_str(),
ASSERT_STRNE(computePartitionString(newPartition).c_str(),
current_char_computePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
@@ -290,19 +314,19 @@ void TestComputePartitionReadWrite::Run(void) {
<< "RSMI_STATUS_SETTING_UNAVAILABLE,\n\t current compute "
<< "partition (" << current_char_computePartition
<< ") did not update to ("
<< computePartitionString(new_computePartition) << ")"
<< computePartitionString(newPartition) << ")"
<< std::endl;
}
} else {
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(computePartitionString(new_computePartition).c_str(),
ASSERT_STREQ(computePartitionString(newPartition).c_str(),
current_char_computePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed current compute partition ("
<< current_char_computePartition << ") matches"
<< "\n\t requested compute partition ("
<< computePartitionString(new_computePartition) << ")"
<< computePartitionString(newPartition) << ")"
<< std::endl;
}
}
@@ -355,26 +379,27 @@ void TestComputePartitionReadWrite::Run(void) {
<< "=========== TEST RETURN TO ORIGINAL COMPUTE PARTITION "
<< "SETTING ========" << std::endl;
}
new_computePartition
= mapStringToRSMIComputePartitionTypes.at(orig_char_computePartition);
ret = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
newPartition
= mapStringToRSMIComputePartitionTypes.at(
std::string(orig_char_computePartition));
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**" << "Returning compute partition to: "
<< computePartitionString(new_computePartition) << std::endl;
<< computePartitionString(newPartition) << std::endl;
}
ret = rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
255);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**" << "Attempted to set compute partition: "
<< computePartitionString(new_computePartition) << std::endl
<< computePartitionString(newPartition) << std::endl
<< "\t**" << "Current compute partition: "
<< current_char_computePartition
<< std::endl;
}
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(computePartitionString(new_computePartition).c_str(),
ASSERT_STREQ(computePartitionString(newPartition).c_str(),
current_char_computePartition);
}
}
@@ -47,45 +47,47 @@
#include <stddef.h>
#include <iostream>
#include <string>
#include <map>
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi_test/functional/npsmode_read_write.h"
#include "rocm_smi_test/functional/memorypartition_read_write.h"
#include "rocm_smi_test/test_common.h"
TestNPSModeReadWrite::TestNPSModeReadWrite() : TestBase() {
set_title("RSMI NPS Mode Read Test");
set_description("The NPS Mode tests verifies that the memory "
"parition setting can be read and updated properly.");
TestMemoryPartitionReadWrite::TestMemoryPartitionReadWrite() : TestBase() {
set_title("RSMI Memory Partition Read Test");
set_description("The memory partition tests verifies that the memory "
"partition settings can be read and updated properly.");
}
TestNPSModeReadWrite::~TestNPSModeReadWrite(void) {
TestMemoryPartitionReadWrite::~TestMemoryPartitionReadWrite(void) {
}
void TestNPSModeReadWrite::SetUp(void) {
void TestMemoryPartitionReadWrite::SetUp(void) {
TestBase::SetUp();
return;
}
void TestNPSModeReadWrite::DisplayTestInfo(void) {
void TestMemoryPartitionReadWrite::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestNPSModeReadWrite::DisplayResults(void) const {
void TestMemoryPartitionReadWrite::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestNPSModeReadWrite::Close() {
void TestMemoryPartitionReadWrite::Close() {
// This will close handles opened within rsmitst utility calls and call
// rsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
static const std::string
npsModeString(rsmi_nps_mode_type npsModeType) {
switch (npsModeType) {
memoryPartitionString(rsmi_memory_partition_type memoryPartitionType) {
switch (memoryPartitionType) {
case RSMI_MEMORY_PARTITION_NPS1:
return "NPS1";
case RSMI_MEMORY_PARTITION_NPS2:
@@ -99,20 +101,20 @@ npsModeString(rsmi_nps_mode_type npsModeType) {
}
}
static const std::map<std::string, rsmi_nps_mode_type_t>
mapStringToRSMINpsModeTypes {
static const std::map<std::string, rsmi_memory_partition_type_t>
mapStringToRSMIMemoryPartitionTypes {
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
{"NPS8", RSMI_MEMORY_PARTITION_NPS8}
};
void TestNPSModeReadWrite::Run(void) {
void TestMemoryPartitionReadWrite::Run(void) {
rsmi_status_t ret, err;
char orig_nps_mode[255];
char current_nps_mode[255];
orig_nps_mode[0] = '\0';
current_nps_mode[0] = '\0';
char orig_memory_partition[255];
char current_memory_partition[255];
orig_memory_partition[0] = '\0';
current_memory_partition[0] = '\0';
TestBase::Run();
if (setup_failed_) {
@@ -128,8 +130,8 @@ void TestNPSModeReadWrite::Run(void) {
}
PrintDeviceHeader(dv_ind);
//Standard checks to see if API is supported, before running full tests
ret = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 255);
// Standard checks to see if API is supported, before running full tests
ret = rsmi_dev_memory_partition_get(dv_ind, orig_memory_partition, 255);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << ": "
@@ -140,36 +142,35 @@ void TestNPSModeReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
}
IF_VERB(STANDARD) {
std::cout << std::endl << "\t**"
<< "NPS Mode: "
<< orig_nps_mode << std::endl;
std::cout << std::endl << "\t**Current Memory Partition: "
<< orig_memory_partition << std::endl;
}
if ((orig_nps_mode == nullptr) ||
(orig_nps_mode[0] == '\0')) {
std::cout << "***System nps mode value is not defined or received unexpected data. "
"Skip nps mode test." << std::endl;
if ((orig_memory_partition == nullptr) ||
(orig_memory_partition[0] == '\0')) {
std::cout << "***System memory partition value is not defined or received"
" unexpected data. Skip memory partition test." << std::endl;
continue;
}
ASSERT_TRUE(ret == RSMI_STATUS_SUCCESS);
// Verify api support checking functionality is working
uint32_t length = 2;
char smallBuffer[length];
err = rsmi_dev_nps_mode_get(dv_ind, smallBuffer, length);
uint32_t kLen = 2;
char smallBuffer[kLen];
err = rsmi_dev_memory_partition_get(dv_ind, smallBuffer, kLen);
size_t size = sizeof(smallBuffer)/sizeof(*smallBuffer);
ASSERT_EQ(err, RSMI_STATUS_INSUFFICIENT_SIZE);
ASSERT_EQ((size_t)length, size);
ASSERT_EQ((size_t)kLen, size);
if (err == RSMI_STATUS_INSUFFICIENT_SIZE) {
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed RSMI_STATUS_INSUFFICIENT_SIZE was returned "
<< "and size matches length requested." << std::endl;
<< "and size matches kLen requested." << std::endl;
}
}
// Verify api support checking functionality is working
err = rsmi_dev_nps_mode_get(dv_ind, nullptr, 255);
err = rsmi_dev_memory_partition_get(dv_ind, nullptr, 255);
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
if (err == RSMI_STATUS_INVALID_ARGS) {
@@ -181,7 +182,7 @@ void TestNPSModeReadWrite::Run(void) {
}
// Verify api support checking functionality is working
err = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 0);
err = rsmi_dev_memory_partition_get(dv_ind, orig_memory_partition, 0);
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
(err == RSMI_STATUS_NOT_SUPPORTED));
if (err == RSMI_STATUS_INVALID_ARGS) {
@@ -193,12 +194,12 @@ void TestNPSModeReadWrite::Run(void) {
}
/******************************/
/* rsmi_dev_nps_mode_set(...) */
/* rsmi_dev_memory_partition_set(...) */
/******************************/
// Verify api support checking functionality is working
rsmi_nps_mode_type new_nps_mode;
err = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
// Note: new_nps_mode is not set
rsmi_memory_partition_type new_memory_partition;
err = rsmi_dev_memory_partition_set(dv_ind, new_memory_partition);
// Note: new_memory_partition is not set
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
(err == RSMI_STATUS_NOT_SUPPORTED));
if (err == RSMI_STATUS_INVALID_ARGS) {
@@ -210,9 +211,9 @@ void TestNPSModeReadWrite::Run(void) {
} else if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << ": "
<< "rsmi_dev_nps_mode_set not supported on this device"
<< "\n\t (if rsmi_dev_nps_mode_get works, then likely "
<< "need to set in bios)"
<< "rsmi_dev_memory_partition_set not supported on this "
<< "device\n\t (if rsmi_dev_memory_partition_get works, "
<< "then likely need to set in bios)"
<< std::endl;
}
continue;
@@ -222,8 +223,9 @@ void TestNPSModeReadWrite::Run(void) {
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
// Verify api support checking functionality is working
new_nps_mode = rsmi_nps_mode_type::RSMI_MEMORY_PARTITION_UNKNOWN;
err = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
new_memory_partition =
rsmi_memory_partition_type::RSMI_MEMORY_PARTITION_UNKNOWN;
err = rsmi_dev_memory_partition_set(dv_ind, new_memory_partition);
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
(err == RSMI_STATUS_PERMISSION));
@@ -242,101 +244,106 @@ void TestNPSModeReadWrite::Run(void) {
}
// Re-run original get, so we can reset to later
ret = rsmi_dev_nps_mode_get(dv_ind, orig_nps_mode, 255);
ret = rsmi_dev_memory_partition_get(dv_ind, orig_memory_partition, 255);
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
for (int partition = RSMI_MEMORY_PARTITION_NPS1;
partition <= RSMI_MEMORY_PARTITION_NPS8;
partition++) {
new_nps_mode = static_cast<rsmi_nps_mode_type>(partition);
new_memory_partition = static_cast<rsmi_memory_partition_type>(partition);
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "======== TEST RSMI_MEMORY_PARTITION_"
<< npsModeString(new_nps_mode)
<< memoryPartitionString(new_memory_partition)
<< " ===============" << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Attempting to set nps mode to: "
<< npsModeString(new_nps_mode) << std::endl;
<< "Attempting to set memory partition to: "
<< memoryPartitionString(new_memory_partition) << std::endl;
}
ret = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
ret = rsmi_dev_memory_partition_set(dv_ind, new_memory_partition);
CHK_ERR_ASRT(ret)
ret = rsmi_dev_nps_mode_get(dv_ind, current_nps_mode, 255);
ret = rsmi_dev_memory_partition_get(dv_ind, current_memory_partition,
255);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Current nps mode: " << current_nps_mode << std::endl;
<< "Current memory partition: " << current_memory_partition
<< std::endl;
}
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(npsModeString(new_nps_mode).c_str(), current_nps_mode);
ASSERT_STREQ(memoryPartitionString(new_memory_partition).c_str(),
current_memory_partition);
}
/* TEST RETURN TO BOOT NPS MODE SETTING */
/* TEST RETURN TO BOOT MEMORY PARTITION SETTING */
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "=========== TEST RETURN TO BOOT NPS MODE SETTING "
<< "========" << std::endl;
<< "=========== TEST RETURN TO BOOT MEMORY PARTITION "
<< "SETTING ========" << std::endl;
}
std::string oldMode = current_nps_mode;
std::string oldMode = current_memory_partition;
bool wasResetSuccess = false;
ret = rsmi_dev_nps_mode_reset(dv_ind);
ret = rsmi_dev_memory_partition_reset(dv_ind);
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
(ret == RSMI_STATUS_NOT_SUPPORTED));
if (ret == RSMI_STATUS_SUCCESS) {
wasResetSuccess = true;
}
ret = rsmi_dev_nps_mode_get(dv_ind, current_nps_mode, 255);
ret = rsmi_dev_memory_partition_get(dv_ind, current_memory_partition, 255);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Current nps mode: " << current_nps_mode << std::endl;
<< "Current memory partition: " << current_memory_partition
<< std::endl;
}
if (wasResetSuccess) {
ASSERT_STRNE(oldMode.c_str(), current_nps_mode);
ASSERT_STRNE(oldMode.c_str(), current_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed prior nps mode (" << oldMode << ") is not "
<< "equal to current nps mode ("
<< current_nps_mode << ")" << std::endl;
<< "Confirmed prior memory partition (" << oldMode << ") is "
<< "not equal to current memory partition ("
<< current_memory_partition << ")" << std::endl;
}
} else {
ASSERT_STREQ(oldMode.c_str(), current_nps_mode);
ASSERT_STREQ(oldMode.c_str(), current_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed prior nps mode (" << oldMode << ") is equal"
<< " to current nps mode ("
<< current_nps_mode << ")" << std::endl;
<< "Confirmed prior memory partition (" << oldMode << ") is "
<< "equal to current memory partition ("
<< current_memory_partition << ")" << std::endl;
}
}
/* TEST RETURN TO ORIGINAL NPS MODE SETTING */
/* TEST RETURN TO ORIGINAL MEMORY PARTITION SETTING */
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "=========== TEST RETURN TO ORIGINAL NPS MODE "
<< "=========== TEST RETURN TO ORIGINAL MEMORY PARTITION "
<< "SETTING ========" << std::endl;
}
new_nps_mode
= mapStringToRSMINpsModeTypes.at(orig_nps_mode);
new_memory_partition
= mapStringToRSMIMemoryPartitionTypes.at(orig_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**" << "Returning nps mode to: "
<< npsModeString(new_nps_mode) << std::endl;
std::cout << "\t**" << "Returning memory partition to: "
<< memoryPartitionString(new_memory_partition) << std::endl;
}
ret = rsmi_dev_nps_mode_set(dv_ind, new_nps_mode);
ret = rsmi_dev_memory_partition_set(dv_ind, new_memory_partition);
CHK_ERR_ASRT(ret)
ret = rsmi_dev_nps_mode_get(dv_ind, current_nps_mode, 255);
ret = rsmi_dev_memory_partition_get(dv_ind, current_memory_partition, 255);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**" << "Attempted to set nps mode: "
<< npsModeString(new_nps_mode) << std::endl
<< "\t**" << "Current nps mode: " << current_nps_mode
std::cout << "\t**" << "Attempted to set memory partition: "
<< memoryPartitionString(new_memory_partition) << std::endl
<< "\t**" << "Current memory partition: "
<< current_memory_partition
<< std::endl;
}
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(npsModeString(new_nps_mode).c_str(), current_nps_mode);
ASSERT_STREQ(memoryPartitionString(new_memory_partition).c_str(), current_memory_partition);
}
}
@@ -42,17 +42,17 @@
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
#include "rocm_smi_test/test_base.h"
class TestNPSModeReadWrite : public TestBase {
class TestMemoryPartitionReadWrite : public TestBase {
public:
TestNPSModeReadWrite();
TestMemoryPartitionReadWrite();
// @Brief: Destructor for test case of TestNPSModeReadWrite
virtual ~TestNPSModeReadWrite();
// @Brief: Destructor for test case of TestMemoryPartitionReadWrite
virtual ~TestMemoryPartitionReadWrite();
// @Brief: Setup the environment for measurement
virtual void SetUp();
@@ -70,4 +70,4 @@ class TestNPSModeReadWrite : public TestBase {
virtual void DisplayTestInfo(void);
};
#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_NPSMODE_READ_WRITE_H_
#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
+3 -3
Просмотреть файл
@@ -87,7 +87,7 @@
#include "rocm_smi_test/functional/gpu_metrics_read.h"
#include "rocm_smi_test/functional/metrics_counter_read.h"
#include "rocm_smi_test/functional/perf_determinism.h"
#include "functional/npsmode_read_write.h"
#include "functional/memorypartition_read_write.h"
static RSMITstGlobals *sRSMIGlvalues = nullptr;
@@ -271,8 +271,8 @@ TEST(rsmitstReadWrite, TestComputePartitionReadWrite) {
TestComputePartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestNPSModeReadWrite) {
TestNPSModeReadWrite tst;
TEST(rsmitstReadWrite, TestMemoryPartitionReadWrite) {
TestMemoryPartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestEvtNotifReadWrite) {