Add Current (Instant) Socket Power
* Updates:
- rocm_smi_logger:
General cleanup &
Aligned to cpplint rules for usage
- rocm_smi_monitor:
Fixed MonitorTypes
from not displaying properly in logs
& Added socket power label + current
socket power MonitorTypes
- rocm_smi API:
Added rsmi_dev_current_socket_power_get API
- rocm_smi CLI:
General cleanup,
Concise info now displays device data
in variable width (see printLogSpacer's
new field),
printLogSpacer now as an adjustable
variable that overrides appWidth,
Added Socket Power to base rocm-smi +
--showpower CLI calls,
--showpower & base rocm-smi CLI defaults
to printing socket power (if not available,
displays average power)
- Cleaned up temp label references
- power_read gtests:
Added current socket power to testing
Change-Id: Ica57e6f98ad96e2584e7c7955e188f68d2dab89d
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/rocm_smi_lib commit: f078375350]
Tá an tiomantas seo le fáil i:
tiomanta ag
Dmitrii Galantsev
tuismitheoir
80c47e3c09
tiomantas
fd5066437b
@@ -1707,6 +1707,30 @@ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask);
|
||||
rsmi_status_t
|
||||
rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
|
||||
|
||||
/**
|
||||
* @brief Get the current socket power (also known as instant
|
||||
* power) of the device index provided.
|
||||
*
|
||||
* @details Given a device index @p dv_ind and a pointer to a uint64_t
|
||||
* @p socket_power, this function will write the current socket power
|
||||
* (in microwatts) to the uint64_t pointed to by @p socket_power.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] socket_power a pointer to uint64_t to which the current
|
||||
* socket power will be written to. If this parameter is nullptr,
|
||||
* this function will return ::RSMI_STATUS_INVALID_ARGS if the function is
|
||||
* supported with the provided, arguments and ::RSMI_STATUS_NOT_SUPPORTED
|
||||
* if it is not supported with the provided arguments.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power);
|
||||
|
||||
/**
|
||||
* @brief Get the energy accumulator counter of the device with provided
|
||||
* device index.
|
||||
|
||||
@@ -130,18 +130,18 @@ class Logger {
|
||||
break;
|
||||
}
|
||||
return *getInstance();
|
||||
};
|
||||
}
|
||||
|
||||
Logger &operator<<(const char* s) {
|
||||
return operator<<(std::string(s));
|
||||
};
|
||||
}
|
||||
|
||||
template <class T> Logger &operator<<(const T &v) {
|
||||
std::ostringstream s;
|
||||
s << v;
|
||||
std::string str = s.str();
|
||||
return operator<<(str);
|
||||
};
|
||||
}
|
||||
|
||||
// Interface for Error Log
|
||||
void error(const char* text) throw();
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -67,6 +67,8 @@ enum MonitorTypes {
|
||||
kMonPowerCapMax,
|
||||
kMonPowerCapMin,
|
||||
kMonPowerAve,
|
||||
kMonPowerInput,
|
||||
kMonPowerLabel,
|
||||
kMonTempMax,
|
||||
kMonTempMin,
|
||||
kMonTempMaxHyst,
|
||||
@@ -94,45 +96,47 @@ enum MonitorTypes {
|
||||
kMonInvalid = 0xFFFFFFFF,
|
||||
};
|
||||
|
||||
const std::map<MonitorTypes,std::string> monitorTypesToString {
|
||||
{MonitorTypes::kMonName, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTemp, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanSpeed, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonMaxFanSpeed, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanRPMs, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanCntrlEnable, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCap, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapDefault, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerAve, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMaxHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMinHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritical, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCriticalHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempEmergency, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempEmergencyHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritMinHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempOffset, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempLowest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempHighest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempLabel, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVolt, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMinCrit, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMaxCrit, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltAverage, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltLowest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltHighest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltLabel, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonInvalid, "amd::smi::kMonName"},
|
||||
const std::map<MonitorTypes, std::string> monitorTypesToString{
|
||||
{MonitorTypes::kMonName, "MonitorTypes::kMonName"},
|
||||
{MonitorTypes::kMonTemp, "MonitorTypes::kMonTemp"},
|
||||
{MonitorTypes::kMonFanSpeed, "MonitorTypes::kMonFanSpeed"},
|
||||
{MonitorTypes::kMonMaxFanSpeed, "MonitorTypes::kMonMaxFanSpeed"},
|
||||
{MonitorTypes::kMonFanRPMs, "MonitorTypes::kMonFanRPMs"},
|
||||
{MonitorTypes::kMonFanCntrlEnable, "MonitorTypes::kMonFanCntrlEnable"},
|
||||
{MonitorTypes::kMonPowerCap, "MonitorTypes::kMonPowerCap"},
|
||||
{MonitorTypes::kMonPowerCapDefault, "MonitorTypes::kMonPowerCapDefault"},
|
||||
{MonitorTypes::kMonPowerCapMax, "MonitorTypes::kMonPowerCapMax"},
|
||||
{MonitorTypes::kMonPowerCapMin, "MonitorTypes::kMonPowerCapMin"},
|
||||
{MonitorTypes::kMonPowerAve, "MonitorTypes::kMonPowerAve"},
|
||||
{MonitorTypes::kMonPowerInput, "MonitorTypes::kMonPowerInput"},
|
||||
{MonitorTypes::kMonPowerLabel, "MonitorTypes::kMonPowerLabel"},
|
||||
{MonitorTypes::kMonTempMax, "MonitorTypes::kMonTempMax"},
|
||||
{MonitorTypes::kMonTempMin, "MonitorTypes::kMonTempMin"},
|
||||
{MonitorTypes::kMonTempMaxHyst, "MonitorTypes::kMonTempMaxHyst"},
|
||||
{MonitorTypes::kMonTempMinHyst, "MonitorTypes::kMonTempMinHyst"},
|
||||
{MonitorTypes::kMonTempCritical, "MonitorTypes::kMonTempCritical"},
|
||||
{MonitorTypes::kMonTempCriticalHyst, "MonitorTypes::kMonTempCriticalHyst"},
|
||||
{MonitorTypes::kMonTempEmergency, "MonitorTypes::kMonTempEmergency"},
|
||||
{MonitorTypes::kMonTempEmergencyHyst,
|
||||
"MonitorTypes::kMonTempEmergencyHyst"},
|
||||
{MonitorTypes::kMonTempCritMin, "MonitorTypes::kMonTempCritMin"},
|
||||
{MonitorTypes::kMonTempCritMinHyst, "MonitorTypes::kMonTempCritMinHyst"},
|
||||
{MonitorTypes::kMonTempOffset, "MonitorTypes::kMonTempOffset"},
|
||||
{MonitorTypes::kMonTempLowest, "MonitorTypes::kMonTempLowest"},
|
||||
{MonitorTypes::kMonTempHighest, "MonitorTypes::kMonTempHighest"},
|
||||
{MonitorTypes::kMonTempLabel, "MonitorTypes::kMonTempLabel"},
|
||||
{MonitorTypes::kMonVolt, "MonitorTypes::kMonVolt"},
|
||||
{MonitorTypes::kMonVoltMax, "MonitorTypes::kMonVoltMax"},
|
||||
{MonitorTypes::kMonVoltMinCrit, "MonitorTypes::kMonVoltMinCrit"},
|
||||
{MonitorTypes::kMonVoltMin, "MonitorTypes::kMonVoltMin"},
|
||||
{MonitorTypes::kMonVoltMaxCrit, "MonitorTypes::kMonVoltMaxCrit"},
|
||||
{MonitorTypes::kMonVoltAverage, "MonitorTypes::kMonVoltAverage"},
|
||||
{MonitorTypes::kMonVoltLowest, "MonitorTypes::kMonVoltLowest"},
|
||||
{MonitorTypes::kMonVoltHighest, "MonitorTypes::kMonVoltHighest"},
|
||||
{MonitorTypes::kMonVoltLabel, "MonitorTypes::kMonVoltLabel"},
|
||||
{MonitorTypes::kMonInvalid, "MonitorTypes::kMonInvalid"},
|
||||
};
|
||||
|
||||
|
||||
class Monitor {
|
||||
public:
|
||||
explicit Monitor(std::string path, RocmSMI_env_vars const *e);
|
||||
|
||||
@@ -45,9 +45,8 @@ CLOCK_JSON_VERSION = 1
|
||||
|
||||
headerString = ' ROCm System Management Interface '
|
||||
footerString = ' End of ROCm SMI Log '
|
||||
|
||||
# Output formatting
|
||||
appWidth = 100
|
||||
appWidth = 90
|
||||
deviceList = []
|
||||
|
||||
# Enable or disable serialized format
|
||||
@@ -383,8 +382,8 @@ def getPidList():
|
||||
return
|
||||
|
||||
|
||||
def getPower(device, silent=False):
|
||||
""" Return the current power level of a given device
|
||||
def getAvgPower(device, silent=False):
|
||||
""" Return the average power level of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
@@ -393,7 +392,21 @@ def getPower(device, silent=False):
|
||||
power = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power))
|
||||
if rsmi_ret_ok(ret, device, 'get_power_avg', silent):
|
||||
return power.value / 1000000
|
||||
return str(power.value / 1000000)
|
||||
return 'N/A'
|
||||
|
||||
def getCurrentSocketPower(device, silent=False):
|
||||
""" Return the current (also known as instant)
|
||||
socket power of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
power = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_current_socket_power_get(device, byref(power))
|
||||
if rsmi_ret_ok(ret, device, 'get_socket_power', silent):
|
||||
return str(power.value / 1000000)
|
||||
return 'N/A'
|
||||
|
||||
|
||||
@@ -437,7 +450,7 @@ def findFirstAvailableTemp(device):
|
||||
temp = c_int64(0)
|
||||
metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT
|
||||
ret_temp = "N/A"
|
||||
ret_temp_type = "(Unknown)"
|
||||
ret_temp_type = temp_type_lst[0]
|
||||
for i, templist_val in enumerate(temp_type_lst):
|
||||
ret = rocmsmi.rsmi_dev_temp_metric_get(c_uint32(device), i, metric, byref(temp))
|
||||
if rsmi_ret_ok(ret, device, 'get_temp_metric_' + templist_val, silent=True):
|
||||
@@ -448,6 +461,37 @@ def findFirstAvailableTemp(device):
|
||||
continue
|
||||
return (ret_temp_type, ret_temp)
|
||||
|
||||
def getTemperatureLabel(deviceList):
|
||||
""" Discovers the the first identified power label
|
||||
|
||||
Returns a string label value
|
||||
@param device: DRM device identifier
|
||||
"""
|
||||
# Default label is Edge
|
||||
tempLabel = temp_type_lst[0].lower()
|
||||
if len(deviceList) < 1:
|
||||
return tempLabel
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
tempLabel = temp_type.lower().replace('(', '').replace(')', '')
|
||||
return tempLabel
|
||||
|
||||
def getPowerLabel(deviceList):
|
||||
""" Discovers the the first identified power label
|
||||
|
||||
Returns a string label value
|
||||
@param device: DRM device identifier
|
||||
"""
|
||||
power = c_int64(0)
|
||||
# Default label is AvgPower
|
||||
powerLabel = rsmi_power_label.AVG_POWER
|
||||
if len(deviceList) < 1:
|
||||
return powerLabel
|
||||
device=deviceList[0]
|
||||
power = getCurrentSocketPower(device, True)
|
||||
if power != '0.0' and power != 'N/A':
|
||||
powerLabel = rsmi_power_label.CURRENT_SOCKET_POWER
|
||||
return powerLabel
|
||||
|
||||
def getVbiosVersion(device, silent=False):
|
||||
""" Returns the VBIOS version for a given device
|
||||
|
||||
@@ -679,23 +723,35 @@ def printListLog(metricName, valuesList):
|
||||
print(listStr + line)
|
||||
|
||||
|
||||
def printLogSpacer(displayString=None, fill='='):
|
||||
def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0):
|
||||
""" Prints [name of the option]/[name of the program] in the spacer to explain data below
|
||||
|
||||
If no parameters are given, a default fill of the '=' string is used in the spacer
|
||||
|
||||
@param displayString: name of item to be displayed inside of the log spacer
|
||||
@param fill: padding string which surrounds the given display string
|
||||
@param contentSizeToFit: providing an integer > 0 allows
|
||||
ability to dynamically change output padding/fill based on this value
|
||||
instead of appWidth. Handy for concise info output.
|
||||
"""
|
||||
global appWidth, PRINT_JSON
|
||||
resizeValue = appWidth
|
||||
if contentSizeToFit != 0:
|
||||
resizeValue = contentSizeToFit
|
||||
if resizeValue % 2: # if odd -> make even
|
||||
resizeValue += 1
|
||||
# leaving below to check if resizing works properly
|
||||
# print("resizeVal=" +str(resizeValue) + "; appWidth=" + str(appWidth) +
|
||||
# "; contentSizeToFit=" + str(contentSizeToFit) + "; fill=" + fill)
|
||||
|
||||
if not PRINT_JSON:
|
||||
if displayString:
|
||||
if len(displayString) % 2:
|
||||
displayString += fill
|
||||
logSpacer = fill * int((appWidth - (len(displayString))) / 2) + displayString + fill * int(
|
||||
(appWidth - (len(displayString))) / 2)
|
||||
logSpacer = fill * int((resizeValue - (len(displayString))) / 2) + displayString + fill * int(
|
||||
(resizeValue - (len(displayString))) / 2)
|
||||
else:
|
||||
logSpacer = fill * appWidth
|
||||
logSpacer = fill * resizeValue
|
||||
print(logSpacer)
|
||||
|
||||
|
||||
@@ -1630,22 +1686,15 @@ def showAllConcise(deviceList):
|
||||
print('ERROR: Cannot print JSON/CSV output for concise output')
|
||||
sys.exit(1)
|
||||
|
||||
""" Place holder for the actual max size """
|
||||
MAX_ALL_CONCISE_WIDTH = 100
|
||||
appWidth_temp = appWidth
|
||||
appWidth = MAX_ALL_CONCISE_WIDTH
|
||||
silent = True
|
||||
|
||||
printLogSpacer(' Concise Info ')
|
||||
deviceList.sort()
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
available_temp_type = temp_type.lower()
|
||||
available_temp_type = available_temp_type.replace('(', '')
|
||||
available_temp_type = available_temp_type.replace(')', '')
|
||||
header = ['GPU', '[Model : Revision]', 'Temp', 'AvgPwr', 'Partitions', 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
|
||||
subheader = ['', 'Name (20 chars)', temp_type, '', '(Mem, Compute)', '', '', '', '', '', '', '']
|
||||
available_temp_type = getTemperatureLabel(deviceList)
|
||||
temp_type = "(" + available_temp_type.capitalize() + ")"
|
||||
header=['Device', '[Model : Revision]', 'Temp', 'Power', 'Partitions',
|
||||
'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
|
||||
subheader = ['', 'Name (20 chars)', temp_type, getPowerLabel(deviceList),
|
||||
'(Mem, Compute)', '', '', '', '', '', '', '']
|
||||
# add additional spaces to match header
|
||||
for idx, item in enumerate(subheader):
|
||||
header_size = len(header[idx])
|
||||
@@ -1667,11 +1716,17 @@ def showAllConcise(deviceList):
|
||||
temp_val = str(getTemp(device, available_temp_type, silent))
|
||||
if temp_val != 'N/A':
|
||||
temp_val += degree_sign + 'C'
|
||||
avgPwr = str(getPower(device))
|
||||
if avgPwr != '0.0' and avgPwr != 'N/A':
|
||||
socketPwr = getCurrentSocketPower(device, True)
|
||||
avgPwr = getAvgPower(device, True)
|
||||
powerVal = 'N/A'
|
||||
if socketPwr != '0.0' and socketPwr != 'N/A':
|
||||
socketPwr += 'W'
|
||||
powerVal=socketPwr
|
||||
elif avgPwr != '0.0' and avgPwr != 'N/A':
|
||||
avgPwr += 'W'
|
||||
powerVal=avgPwr
|
||||
else:
|
||||
avgPwr = 'N/A'
|
||||
powerVal = 'N/A'
|
||||
combined_partition = (getMemoryPartition(device, silent) + ", "
|
||||
+ getComputePartition(device, silent))
|
||||
sclk = showCurrentClocks([device], 'sclk', concise=silent)
|
||||
@@ -1704,10 +1759,10 @@ def showAllConcise(deviceList):
|
||||
'', '', '', '']
|
||||
gpu_dev_product_info_top_name = gpu_dev_product_info_names[1]
|
||||
|
||||
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, avgPwr,
|
||||
combined_partition, sclk, mclk,
|
||||
fan, str(perf).lower(), pwrCap,
|
||||
mem_use_pct, gpu_busy]
|
||||
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val,
|
||||
powerVal, combined_partition, sclk, mclk,
|
||||
fan, str(perf).lower(), pwrCap, mem_use_pct,
|
||||
gpu_busy]
|
||||
|
||||
val_widths = {}
|
||||
for device in deviceList:
|
||||
@@ -1716,10 +1771,17 @@ def showAllConcise(deviceList):
|
||||
for device in deviceList:
|
||||
for col in range(len(val_widths[device])):
|
||||
max_widths[col] = max(max_widths[col], val_widths[device][col])
|
||||
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)), None)
|
||||
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader)),
|
||||
None, useItalics=True)
|
||||
printLogSpacer(fill='=')
|
||||
|
||||
########################
|
||||
# Display concise info #
|
||||
########################
|
||||
header_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header))
|
||||
subheader_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader))
|
||||
printLogSpacer(headerString, contentSizeToFit=len(header_output))
|
||||
printLogSpacer(' Concise Info ', contentSizeToFit=len(header_output))
|
||||
printLog(None, header_output, None)
|
||||
printLog(None, subheader_output, None, useItalics=True)
|
||||
printLogSpacer(fill='=', contentSizeToFit=len(header_output))
|
||||
|
||||
for device in deviceList:
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
@@ -1730,9 +1792,8 @@ def showAllConcise(deviceList):
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
zip(range(len(max_widths)), values['card%s_Info' % (str(device))])), None)
|
||||
|
||||
printLogSpacer()
|
||||
""" Restore original max size """
|
||||
appWidth = appWidth_temp
|
||||
printLogSpacer(contentSizeToFit=len(header_output))
|
||||
printLogSpacer(footerString, contentSizeToFit=len(header_output))
|
||||
|
||||
|
||||
def showAllConciseHw(deviceList):
|
||||
@@ -2360,23 +2421,25 @@ def showPids(verbose):
|
||||
|
||||
|
||||
def showPower(deviceList):
|
||||
""" Display current Average Graphics Package Power Consumption for a list of devices
|
||||
""" Display Current (also known as instant) Socket or Average
|
||||
Graphics Package Power Consumption for a list of devices
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
"""
|
||||
secondaryPresent=False
|
||||
printLogSpacer(' Power Consumption ')
|
||||
for device in deviceList:
|
||||
if checkIfSecondaryDie(device):
|
||||
if str(getCurrentSocketPower(device, True)) != 'N/A':
|
||||
printLog(device, 'Current Socket Graphics Package Power (W)', getCurrentSocketPower(device))
|
||||
elif checkIfSecondaryDie(device):
|
||||
printLog(device, 'Average Graphics Package Power (W)', "N/A (Secondary die)")
|
||||
secondaryPresent=True
|
||||
elif str(getPower(device)) != '0.0':
|
||||
printLog(device, 'Average Graphics Package Power (W)', getPower(device))
|
||||
elif str(getAvgPower(device)) != '0.0':
|
||||
printLog(device, 'Average Graphics Package Power (W)', getAvgPower(device))
|
||||
else:
|
||||
printErrLog(device, 'Unable to get Average Graphics Package Power Consumption')
|
||||
printErrLog(device, 'Unable to get Average or Current Socket Graphics Package Power Consumption')
|
||||
if secondaryPresent:
|
||||
printLog(None, "\n\t\tPrimary die (usually one above or below the secondary) shows total (primary + secondary) socket power information", None)
|
||||
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
@@ -2872,13 +2935,8 @@ def getGraphColor(percentage):
|
||||
|
||||
def showTempGraph(deviceList):
|
||||
deviceList.sort()
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
printLogSpacer(' Temperature Graph ' + temp_type + ' ')
|
||||
temp_type = temp_type.lower()
|
||||
temp_type = temp_type.replace('(', '')
|
||||
temp_type = temp_type.replace(')', '')
|
||||
temp_type = getTemperatureLabel(deviceList)
|
||||
printLogSpacer(' Temperature Graph ' + temp_type.capitalize() + ' ')
|
||||
# Start a thread for constantly printing
|
||||
try:
|
||||
# Create a thread (call print function, devices, delay in ms)
|
||||
@@ -3547,6 +3605,11 @@ def save(deviceList, savefilepath):
|
||||
|
||||
|
||||
# The code below is for when this script is run as an executable instead of when imported as a module
|
||||
def isConciseInfoRequested(args):
|
||||
return len(sys.argv) == 1 or \
|
||||
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
|
||||
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv))
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='AMD ROCm System Management Interface | ROCM-SMI version: %s' % __version__,
|
||||
@@ -3755,7 +3818,8 @@ if __name__ == '__main__':
|
||||
|
||||
if not PRINT_JSON:
|
||||
print('\n')
|
||||
printLogSpacer(headerString)
|
||||
if not isConciseInfoRequested(args):
|
||||
printLogSpacer(headerString)
|
||||
|
||||
if args.showallinfo:
|
||||
args.list = True
|
||||
@@ -3809,9 +3873,7 @@ if __name__ == '__main__':
|
||||
if not checkAmdGpus(deviceList):
|
||||
logging.warning('No AMD GPUs specified')
|
||||
|
||||
if len(sys.argv) == 1 or \
|
||||
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
|
||||
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)):
|
||||
if isConciseInfoRequested(args):
|
||||
showAllConcise(deviceList)
|
||||
if args.showhw:
|
||||
showAllConciseHw(deviceList)
|
||||
@@ -4018,7 +4080,8 @@ if __name__ == '__main__':
|
||||
devCsv = formatCsv(deviceList)
|
||||
print(devCsv)
|
||||
|
||||
printLogSpacer(footerString)
|
||||
if not isConciseInfoRequested(args):
|
||||
printLogSpacer(footerString)
|
||||
|
||||
rsmi_ret_ok(rocmsmi.rsmi_shut_down())
|
||||
exit(RETCODE)
|
||||
|
||||
@@ -655,3 +655,8 @@ rsmi_nps_mode_type = rsmi_nps_mode_type_t
|
||||
# nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2]
|
||||
# will return string 'NPS2'
|
||||
nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
|
||||
|
||||
class rsmi_power_label(str, Enum):
|
||||
AVG_POWER = '(Avg)'
|
||||
CURRENT_SOCKET_POWER = '(Socket)'
|
||||
|
||||
|
||||
@@ -77,7 +77,6 @@
|
||||
#include "rocm_smi/rocm_smi64Config.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
using namespace amd::smi;
|
||||
|
||||
static const uint32_t kMaxOverdriveLevel = 20;
|
||||
@@ -2386,21 +2385,22 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
amd::smi::MonitorTypes mon_type = amd::smi::kMonInvalid;
|
||||
uint16_t val_ui16;
|
||||
|
||||
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes> kMetricTypeMap = {
|
||||
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
|
||||
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
|
||||
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
|
||||
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
|
||||
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
|
||||
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
|
||||
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
|
||||
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
|
||||
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
|
||||
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
|
||||
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
|
||||
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
|
||||
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
|
||||
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
|
||||
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes>
|
||||
kMetricTypeMap = {
|
||||
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
|
||||
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
|
||||
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
|
||||
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
|
||||
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
|
||||
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
|
||||
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
|
||||
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
|
||||
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
|
||||
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
|
||||
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
|
||||
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
|
||||
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
|
||||
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
|
||||
};
|
||||
|
||||
const auto mon_type_it = kMetricTypeMap.find(metric);
|
||||
@@ -2485,7 +2485,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
*temperature = static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
|
||||
*temperature =
|
||||
static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======= "
|
||||
<< " | Success "
|
||||
@@ -2815,6 +2816,80 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
rsmi_status_t rsmiReturn = RSMI_STATUS_NOT_SUPPORTED;
|
||||
std::string val_str;
|
||||
uint32_t sensor_ind = 1; // socket_power sysfs files have 1-based indices
|
||||
MonitorTypes mon_type = amd::smi::kMonPowerInput;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind="
|
||||
<< std::to_string(dv_ind);
|
||||
LOG_TRACE(ss);
|
||||
if (socket_power == nullptr) {
|
||||
rsmiReturn = RSMI_STATUS_INVALID_ARGS;
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: socket_power was a null ptr reference"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
CHK_SUPPORT_SUBVAR_ONLY(socket_power, sensor_ind)
|
||||
DEVICE_MUTEX
|
||||
|
||||
if (dev->monitor() == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: hwmon monitor was a null ptr reference"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return rsmiReturn;
|
||||
}
|
||||
|
||||
int ret = dev->monitor()->readMonitor(amd::smi::kMonPowerLabel,
|
||||
sensor_ind, &val_str);
|
||||
if (ret || val_str != "PPT" || val_str.size() != 3) {
|
||||
if (ret != 0) {
|
||||
rsmiReturn = amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: readMonitor() returned an error status"
|
||||
<< " or Socket Power label did not show PPT or size of label data was"
|
||||
<< " unexpected"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return rsmiReturn;
|
||||
}
|
||||
rsmiReturn = get_dev_mon_value(mon_type, dv_ind, sensor_ind,
|
||||
socket_power);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Data: " << *socket_power
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return rsmiReturn;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power,
|
||||
float *counter_resolution, uint64_t *timestamp) {
|
||||
|
||||
@@ -68,8 +68,6 @@
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
|
||||
@@ -61,7 +61,6 @@
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
using namespace amd::smi;
|
||||
|
||||
#define TRY try {
|
||||
|
||||
@@ -71,9 +71,8 @@
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
Logger* Logger::m_Instance = nullptr;
|
||||
ROCmLogging::Logger *ROCmLogging::Logger::m_Instance = nullptr;
|
||||
|
||||
// Log file name
|
||||
// WARNING: File name should be changed here and
|
||||
@@ -81,39 +80,39 @@ Logger* Logger::m_Instance = nullptr;
|
||||
// in one place will cause a mismatch in these scripts,
|
||||
// files may not have proper permissions, and logrotate
|
||||
// would not function properly.
|
||||
const std::string logPath = "/var/log/rocm_smi_lib/";
|
||||
const std::string logBaseFName = "ROCm-SMI-lib";
|
||||
const std::string logExtension = ".log";
|
||||
const std::string logFileName = logPath + logBaseFName + logExtension;
|
||||
#define LOGPATH "/var/log/rocm_smi_lib/"
|
||||
#define LOGBASE_FNAME "ROCm-SMI-lib"
|
||||
#define LOGEXTENSION ".log"
|
||||
const char *logFileName = LOGPATH LOGBASE_FNAME LOGEXTENSION;
|
||||
|
||||
Logger::Logger() {
|
||||
ROCmLogging::Logger::Logger() {
|
||||
initialize_resources();
|
||||
}
|
||||
|
||||
Logger::~Logger() {
|
||||
ROCmLogging::Logger::~Logger() {
|
||||
if (m_loggingIsOn) {
|
||||
destroy_resources();
|
||||
}
|
||||
}
|
||||
|
||||
Logger* Logger::getInstance() throw() {
|
||||
ROCmLogging::Logger* ROCmLogging::Logger::getInstance() throw() {
|
||||
if (m_Instance == nullptr) {
|
||||
m_Instance = new Logger();
|
||||
m_Instance = new ROCmLogging::Logger();
|
||||
}
|
||||
return m_Instance;
|
||||
}
|
||||
|
||||
void Logger::lock() {
|
||||
void ROCmLogging::Logger::lock() {
|
||||
m_Lock.lock();
|
||||
}
|
||||
|
||||
void Logger::unlock() {
|
||||
void ROCmLogging::Logger::unlock() {
|
||||
m_Lock.unlock();
|
||||
}
|
||||
|
||||
void Logger::logIntoFile(std::string& data) {
|
||||
void ROCmLogging::Logger::logIntoFile(std::string& data) {
|
||||
lock();
|
||||
if(!m_File.is_open()) {
|
||||
if (!m_File.is_open()) {
|
||||
initialize_resources();
|
||||
if (!m_File.is_open()) {
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessful."
|
||||
@@ -127,24 +126,24 @@ void Logger::logIntoFile(std::string& data) {
|
||||
unlock();
|
||||
}
|
||||
|
||||
void Logger::logOnConsole(std::string& data) {
|
||||
void ROCmLogging::Logger::logOnConsole(std::string& data) {
|
||||
std::cout << getCurrentTime() << " " << data << std::endl;
|
||||
}
|
||||
|
||||
// Returns: In string format, YY-MM-DD HH:MM:SS.microseconds
|
||||
std::string Logger::getCurrentTime(void) {
|
||||
using namespace std::chrono;
|
||||
std::string ROCmLogging::Logger::getCurrentTime(void) {
|
||||
std::string currentTime;
|
||||
|
||||
// get current time
|
||||
auto now = system_clock::now();
|
||||
auto now = std::chrono::system_clock::now();
|
||||
|
||||
// get number of milliseconds for the current second
|
||||
// (remainder after division into seconds)
|
||||
auto ms = duration_cast<microseconds>(now.time_since_epoch()) % 1000000;
|
||||
auto ms = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch()) % 1000000;
|
||||
|
||||
// convert to std::time_t in order to convert to std::tm (broken time)
|
||||
auto timer = system_clock::to_time_t(now);
|
||||
auto timer = std::chrono::system_clock::to_time_t(now);
|
||||
|
||||
// convert to broken time
|
||||
std::tm bt = *std::localtime(&timer);
|
||||
@@ -159,7 +158,7 @@ std::string Logger::getCurrentTime(void) {
|
||||
}
|
||||
|
||||
// Interface for Error Log
|
||||
void Logger::error(const char* text) throw() {
|
||||
void ROCmLogging::Logger::error(const char* text) throw() {
|
||||
// By default, logging is disabled
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -182,18 +181,18 @@ void Logger::error(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::error(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::error(std::string& text) throw() {
|
||||
error(text.data());
|
||||
}
|
||||
|
||||
void Logger::error(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::error(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
error(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Alarm Log
|
||||
void Logger::alarm(const char* text) throw() {
|
||||
void ROCmLogging::Logger::alarm(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -216,18 +215,18 @@ void Logger::alarm(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::alarm(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::alarm(std::string& text) throw() {
|
||||
alarm(text.data());
|
||||
}
|
||||
|
||||
void Logger::alarm(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::alarm(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
alarm(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Always Log
|
||||
void Logger::always(const char* text) throw() {
|
||||
void ROCmLogging::Logger::always(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -250,18 +249,18 @@ void Logger::always(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::always(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::always(std::string& text) throw() {
|
||||
always(text.data());
|
||||
}
|
||||
|
||||
void Logger::always(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::always(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
always(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Buffer Log
|
||||
void Logger::buffer(const char* text) throw() {
|
||||
void ROCmLogging::Logger::buffer(const char* text) throw() {
|
||||
// Buffer is the special case. So don't add log level
|
||||
// and timestamp in the buffer message. Just log the raw bytes.
|
||||
if ((m_LogType == FILE_LOG) && (m_LogLevel >= LOG_LEVEL_BUFFER)) {
|
||||
@@ -284,18 +283,18 @@ void Logger::buffer(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::buffer(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::buffer(std::string& text) throw() {
|
||||
buffer(text.data());
|
||||
}
|
||||
|
||||
void Logger::buffer(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::buffer(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
buffer(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Info Log
|
||||
void Logger::info(const char* text) throw() {
|
||||
void ROCmLogging::Logger::info(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -318,18 +317,18 @@ void Logger::info(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::info(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::info(std::string& text) throw() {
|
||||
info(text.data());
|
||||
}
|
||||
|
||||
void Logger::info(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::info(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
info(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Trace Log
|
||||
void Logger::trace(const char* text) throw() {
|
||||
void ROCmLogging::Logger::trace(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -352,18 +351,18 @@ void Logger::trace(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::trace(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::trace(std::string& text) throw() {
|
||||
trace(text.data());
|
||||
}
|
||||
|
||||
void Logger::trace(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::trace(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
trace(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Debug Log
|
||||
void Logger::debug(const char* text) throw() {
|
||||
void ROCmLogging::Logger::debug(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -386,51 +385,53 @@ void Logger::debug(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::debug(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::debug(std::string& text) throw() {
|
||||
debug(text.data());
|
||||
}
|
||||
|
||||
void Logger::debug(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::debug(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
debug(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interfaces to control log levels
|
||||
void Logger::updateLogLevel(LogLevel logLevel) {
|
||||
void ROCmLogging::Logger::updateLogLevel(LogLevel logLevel) {
|
||||
m_LogLevel = logLevel;
|
||||
}
|
||||
|
||||
void Logger::enableAllLogLevels() {
|
||||
void ROCmLogging::Logger::enableAllLogLevels() {
|
||||
m_LogLevel = ENABLE_LOG;
|
||||
}
|
||||
|
||||
// Disable all log levels, except error and alarm
|
||||
void Logger::disableLog() {
|
||||
void ROCmLogging::Logger::disableLog() {
|
||||
m_LogLevel = DISABLE_LOG;
|
||||
}
|
||||
|
||||
// Interfaces to control log Types
|
||||
void Logger::updateLogType(LogType logType) {
|
||||
void ROCmLogging::Logger::updateLogType(LogType logType) {
|
||||
m_LogType = logType;
|
||||
}
|
||||
|
||||
void Logger::enableConsoleLogging() {
|
||||
void ROCmLogging::Logger::enableConsoleLogging() {
|
||||
m_LogType = CONSOLE;
|
||||
}
|
||||
|
||||
void Logger::enableFileLogging() {
|
||||
void ROCmLogging::Logger::enableFileLogging() {
|
||||
m_LogType = FILE_LOG;
|
||||
}
|
||||
|
||||
// Returns a string of details on current log settings
|
||||
std::string Logger::getLogSettings() {
|
||||
std::string ROCmLogging::Logger::getLogSettings() {
|
||||
std::string logSettings;
|
||||
|
||||
if (m_File.is_open()) {
|
||||
logSettings += "OpenStatus = File (" + logFileName + ") is open";
|
||||
logSettings += "OpenStatus = File (" + std::string(logFileName)
|
||||
+ ") is open";
|
||||
} else {
|
||||
logSettings += "OpenStatus = File (" + logFileName + ") is not open";
|
||||
logSettings += "OpenStatus = File (" + std::string(logFileName)
|
||||
+ ") is not open";
|
||||
}
|
||||
logSettings += ", ";
|
||||
|
||||
@@ -480,11 +481,11 @@ std::string Logger::getLogSettings() {
|
||||
|
||||
// Returns current reported enabled logging state. State is controlled by
|
||||
// user's environment variable RSMI_LOGGING.
|
||||
bool Logger::isLoggerEnabled() {
|
||||
bool ROCmLogging::Logger::isLoggerEnabled() {
|
||||
return m_loggingIsOn;
|
||||
}
|
||||
|
||||
void Logger::initialize_resources() {
|
||||
void ROCmLogging::Logger::initialize_resources() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -492,7 +493,7 @@ void Logger::initialize_resources() {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
m_File.open(logFileName.c_str(), std::ios::out | std::ios::app);
|
||||
m_File.open(logFileName, std::ios::out | std::ios::app);
|
||||
m_LogLevel = LOG_LEVEL_TRACE;
|
||||
// RSMI_LOGGING = 1, output to logs only
|
||||
// RSMI_LOGGING = 2, output to console only
|
||||
@@ -521,9 +522,9 @@ void Logger::initialize_resources() {
|
||||
if (m_File.fail()) {
|
||||
std::cout << "WARNING: Failed opening log file." << std::endl;
|
||||
}
|
||||
chmod(logFileName.c_str(), S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
|
||||
chmod(logFileName, S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
|
||||
}
|
||||
|
||||
void Logger::destroy_resources() {
|
||||
void ROCmLogging::Logger::destroy_resources() {
|
||||
m_File.close();
|
||||
}
|
||||
|
||||
@@ -68,7 +68,6 @@
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
static const char *kPathDRMRoot = "/sys/class/drm";
|
||||
static const char *kPathHWMonRoot = "/sys/class/hwmon";
|
||||
@@ -314,12 +313,12 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
int i_ret;
|
||||
|
||||
LOG_ALWAYS("=============== ROCM SMI initialize ================");
|
||||
Logger::getInstance()->enableAllLogLevels();
|
||||
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
|
||||
// Leaving below to allow developers to check current log settings
|
||||
// std::string logSettings = Logger::getInstance()->getLogSettings();
|
||||
// std::cout << "Current log settings:\n" << logSettings << std::endl;
|
||||
|
||||
if (Logger::getInstance()->isLoggerEnabled()) {
|
||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||
logSystemDetails();
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -58,8 +58,6 @@
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -80,6 +78,8 @@ static const char *kMonPowerCapName = "power#_cap";
|
||||
static const char *kMonPowerCapMaxName = "power#_cap_max";
|
||||
static const char *kMonPowerCapMinName = "power#_cap_min";
|
||||
static const char *kMonPowerAveName = "power#_average";
|
||||
static const char *kMonPowerInputName = "power#_input";
|
||||
static const char *kMonPowerLabelName = "power#_label";
|
||||
static const char *kMonTempMaxName = "temp#_max";
|
||||
static const char *kMonTempMinName = "temp#_min";
|
||||
static const char *kMonTempMaxHystName = "temp#_max_hyst";
|
||||
@@ -135,6 +135,8 @@ static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
|
||||
{kMonPowerCapMax, kMonPowerCapMaxName},
|
||||
{kMonPowerCapMin, kMonPowerCapMinName},
|
||||
{kMonPowerAve, kMonPowerAveName},
|
||||
{kMonPowerInput, kMonPowerInputName},
|
||||
{kMonPowerLabel, kMonPowerLabelName},
|
||||
{kMonTempMax, kMonTempMaxName},
|
||||
{kMonTempMin, kMonTempMinName},
|
||||
{kMonTempMaxHyst, kMonTempMaxHystName},
|
||||
@@ -202,7 +204,8 @@ static const std::map<const char *, monitor_depends_t> kMonFuncDependsMap = {
|
||||
.variants = {kMonInvalid},
|
||||
}
|
||||
},
|
||||
{"rsmi_dev_power_cap_default_get", { .mandatory_depends = {kMonPowerCapDefaultName},
|
||||
{"rsmi_dev_power_cap_default_get", { .mandatory_depends =
|
||||
{kMonPowerCapDefaultName},
|
||||
.variants = {kMonInvalid},
|
||||
}
|
||||
},
|
||||
@@ -613,7 +616,7 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) {
|
||||
supported_monitors = intersect;
|
||||
}
|
||||
if (!supported_monitors.empty()) {
|
||||
for (unsigned long & supported_monitor : supported_monitors) {
|
||||
for (uint64_t &supported_monitor : supported_monitors) {
|
||||
if (m_type == eDefaultMonitor) {
|
||||
assert(supported_monitor > 0);
|
||||
supported_monitor |=
|
||||
|
||||
@@ -73,7 +73,6 @@
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/functional/power_read.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
TestPowerRead::TestPowerRead() : TestBase() {
|
||||
set_title("RSMI Power Read Test");
|
||||
@@ -116,27 +117,48 @@ void TestPowerRead::Run(void) {
|
||||
val_ui64 << " uW" << std::endl;
|
||||
}
|
||||
|
||||
/* Average Power */
|
||||
err = rsmi_dev_power_ave_get(i, 0, &val_ui64);
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
std::cout <<
|
||||
"\t**Power average information is not supported for this device"
|
||||
"\t**Average Power Usage: not supported on this device"
|
||||
<< std::endl;
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_power_ave_get(i, 0, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
|
||||
continue;
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Average Power Usage: ";
|
||||
CHK_RSMI_PERM_ERR(err)
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
std::cout << static_cast<float>(val_ui64)/1000 << " mW" << std::endl;
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Average Power Usage: ";
|
||||
CHK_RSMI_PERM_ERR(err)
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
std::cout << static_cast<float>(val_ui64) / 1000 << " mW"
|
||||
<< std::endl;
|
||||
}
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_power_ave_get(i, 0, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
|
||||
}
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_power_ave_get(i, 0, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
|
||||
}
|
||||
|
||||
/* Current Socket Power */
|
||||
err = rsmi_dev_current_socket_power_get(i, &val_ui64);
|
||||
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
std::cout <<
|
||||
"\t**Current Socket Power: not supported"
|
||||
" on this device" << std::endl;
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Current Socket Power: ";
|
||||
CHK_RSMI_PERM_ERR(err)
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
std::cout << static_cast<float>(val_ui64) / 1000 << " mW"
|
||||
<< std::endl;
|
||||
}
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_current_socket_power_get(i, nullptr);
|
||||
// std::cout << "err = " << amd::smi::getRSMIStatusString(err);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
|
||||
}
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir